# featsql
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
``` python
from featsql.featsqlite import *
from featsql.featmysql import *
from featsql.featsnow import *
```
## Imports
``` python
import pandas as pd
```
``` python
import mysql.connector
from sqlalchemy import create_engine
pd.set_option('display.max_columns', None)
```
## Install
``` sh
pip install featsql
```
## 1. SQLITE
### Configurando a engine
``` python
url_db = "sqlite:///../../data/mydatabase.db"
engine = create_engine(url_db)
```
### Visão inicial do público
Primeiro vamos observar o formato da tabela spine
``` python
df_spine = pd.read_sql("SELECT * FROM tb_spine", engine)
df_spine.head()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| | ID | SAFRA_REF | Target |
|-----|-----|------------|--------|
| 0 | 4 | 2023-02-01 | 0 |
| 1 | 5 | 2023-02-01 | 0 |
| 2 | 6 | 2023-02-01 | 0 |
| 3 | 7 | 2023-02-01 | 0 |
| 4 | 10 | 2023-02-01 | 0 |
</div>
### Visão inicial da tabela de variáveis
A tabela de variáveis contém 4 variáveis, duas sendo numéricas e duas
categórica. Perceba que existem mais ID’s únicos e datas disponíveis
nessa tabela do que na tabela spine, caso que ocorre no dia a dia.
``` python
df_data = pd.read_sql("SELECT * FROM tb_feat", engine)
df_data.head()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| | ID | SAFRA | FEAT_NUM1 | FEAT_NUM2 | FEAT_CAT1 | FEAT_CAT2 |
|-----|-----|------------|-----------|-----------|-----------|-----------|
| 0 | 1 | 2023-01-01 | -97 | -44 | A | C |
| 1 | 2 | 2023-01-01 | 89 | 67 | C | B |
| 2 | 3 | 2023-01-01 | 53 | 24 | A | B |
| 3 | 4 | 2023-01-01 | -40 | 62 | B | C |
| 4 | 5 | 2023-01-01 | 41 | 62 | B | B |
</div>
### Criação de variáveis numéricas
A função sqlite_create_query_num() cria um texto com a query para a
criação de variáveis com as operações soma, mínimo, máximo e média das
variáveis listadas em feat_num_lista e com a janela de tempo listada em
lista_janela.
``` python
tb_publico = 'tb_spine'
tb_feat = 'tb_feat'
id = 'ID'
safra_ref = 'SAFRA_REF'
safra = 'SAFRA'
feat_num_lista = ['FEAT_NUM1','FEAT_NUM2']
lista_janela = [1,2,3]
query_final_num_sqlite = sqlite_create_query_num(tb_publico, tb_feat, lista_janela,feat_num_lista, id, safra_ref, safra)
```
``` python
df_num_sqlite = pd.read_sql(query_final_num_sqlite, engine)
df_num_sqlite.head()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| | ID | SAFRA_REF | FEAT_NUM1_SUM_1M | FEAT_NUM1_MIN_1M | FEAT_NUM1_MAX_1M | FEAT_NUM1_AVG_1M | FEAT_NUM2_SUM_1M | FEAT_NUM2_MIN_1M | FEAT_NUM2_MAX_1M | FEAT_NUM2_AVG_1M | FEAT_NUM1_SUM_2M | FEAT_NUM1_MIN_2M | FEAT_NUM1_MAX_2M | FEAT_NUM1_AVG_2M | FEAT_NUM2_SUM_2M | FEAT_NUM2_MIN_2M | FEAT_NUM2_MAX_2M | FEAT_NUM2_AVG_2M | FEAT_NUM1_SUM_3M | FEAT_NUM1_MIN_3M | FEAT_NUM1_MAX_3M | FEAT_NUM1_AVG_3M | FEAT_NUM2_SUM_3M | FEAT_NUM2_MIN_3M | FEAT_NUM2_MAX_3M | FEAT_NUM2_AVG_3M |
|-----|-----|------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|
| 0 | 4 | 2023-02-01 | -40.0 | -40.0 | -40.0 | -40.0 | 62.0 | 62.0 | 62.0 | 62.0 | -40 | -40 | -40 | -40.0 | 62 | 62 | 62 | 62.0 | -40 | -40 | -40 | -40.0 | 62 | 62 | 62 | 62.0 |
| 1 | 5 | 2023-02-01 | 41.0 | 41.0 | 41.0 | 41.0 | 62.0 | 62.0 | 62.0 | 62.0 | 41 | 41 | 41 | 41.0 | 62 | 62 | 62 | 62.0 | 41 | 41 | 41 | 41.0 | 62 | 62 | 62 | 62.0 |
| 2 | 6 | 2023-02-01 | 36.0 | 36.0 | 36.0 | 36.0 | 63.0 | 63.0 | 63.0 | 63.0 | 36 | 36 | 36 | 36.0 | 63 | 63 | 63 | 63.0 | 36 | 36 | 36 | 36.0 | 63 | 63 | 63 | 63.0 |
| 3 | 7 | 2023-02-01 | 47.0 | 47.0 | 47.0 | 47.0 | 44.0 | 44.0 | 44.0 | 44.0 | 47 | 47 | 47 | 47.0 | 44 | 44 | 44 | 44.0 | 47 | 47 | 47 | 47.0 | 44 | 44 | 44 | 44.0 |
| 4 | 10 | 2023-02-01 | 29.0 | 29.0 | 29.0 | 29.0 | -7.0 | -7.0 | -7.0 | -7.0 | 29 | 29 | 29 | 29.0 | -7 | -7 | -7 | -7.0 | 29 | 29 | 29 | 29.0 | -7 | -7 | -7 | -7.0 |
</div>
Ajustar para não necessariamente criar um dataframe por conta do tamanho
``` python
print(query_final_num_sqlite)
```
WITH
tb_public AS (
SELECT
*
FROM tb_spine
),
-- Criação de variáveis de janela de 1M
tb_janela_1M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM1 para a janela 1
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_1M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_1M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_1M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_1M,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM2 para a janela 1
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_1M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_1M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_1M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_1M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+1 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
-- Criação de variáveis de janela de 2M
tb_janela_2M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM1 para a janela 2
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_2M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_2M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_2M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_2M,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM2 para a janela 2
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_2M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_2M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_2M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_2M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+2 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
-- Criação de variáveis de janela de 3M
tb_janela_3M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM1 para a janela 3
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_3M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_3M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_3M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_3M,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM2 para a janela 3
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_3M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_3M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_3M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_3M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+3 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_join AS (
SELECT
*
FROM tb_public
LEFT JOIN tb_janela_1M
ON tb_public.ID = tb_janela_1M.ID
AND tb_public.SAFRA_REF = tb_janela_1M.SAFRA_REF
LEFT JOIN tb_janela_2M
ON tb_public.ID = tb_janela_2M.ID
AND tb_public.SAFRA_REF = tb_janela_2M.SAFRA_REF
LEFT JOIN tb_janela_3M
ON tb_public.ID = tb_janela_3M.ID
AND tb_public.SAFRA_REF = tb_janela_3M.SAFRA_REF
)
SELECT
tb_join.ID,
tb_join.SAFRA_REF,
tb_join.FEAT_NUM1_SUM_1M,
tb_join.FEAT_NUM1_MIN_1M,
tb_join.FEAT_NUM1_MAX_1M,
tb_join.FEAT_NUM1_AVG_1M,
tb_join.FEAT_NUM2_SUM_1M,
tb_join.FEAT_NUM2_MIN_1M,
tb_join.FEAT_NUM2_MAX_1M,
tb_join.FEAT_NUM2_AVG_1M,
tb_join.FEAT_NUM1_SUM_2M,
tb_join.FEAT_NUM1_MIN_2M,
tb_join.FEAT_NUM1_MAX_2M,
tb_join.FEAT_NUM1_AVG_2M,
tb_join.FEAT_NUM2_SUM_2M,
tb_join.FEAT_NUM2_MIN_2M,
tb_join.FEAT_NUM2_MAX_2M,
tb_join.FEAT_NUM2_AVG_2M,
tb_join.FEAT_NUM1_SUM_3M,
tb_join.FEAT_NUM1_MIN_3M,
tb_join.FEAT_NUM1_MAX_3M,
tb_join.FEAT_NUM1_AVG_3M,
tb_join.FEAT_NUM2_SUM_3M,
tb_join.FEAT_NUM2_MIN_3M,
tb_join.FEAT_NUM2_MAX_3M,
tb_join.FEAT_NUM2_AVG_3M
FROM tb_join
### Criação de variáveis categóricas
A função sqlite_create_query_cat() cria um texto com a query para a
criação de variáveis com a moda de cada uma das variáveis listadas em
feat_num_lista na janela de tempo fornecida em lista_janela.
``` python
tb_publico = 'tb_spine'
tb_feat = 'tb_feat'
id = 'ID'
safra_ref = 'SAFRA_REF'
safra = 'SAFRA'
feat_num_lista = ['FEAT_CAT1', 'FEAT_CAT2']
lista_janela = [1, 3, 6]
query_final_cat_sqlite = sqlite_create_query_cat(tb_publico, tb_feat, lista_janela,feat_num_lista, id, safra_ref, safra)
```
``` python
df_cat_sqlite_sqlite = pd.read_sql(query_final_cat_sqlite, engine)
df_cat_sqlite_sqlite.head()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| | ID | SAFRA_REF | FEAT_CAT1_MODA_1M | FEAT_CAT2_MODA_1M | FEAT_CAT1_MODA_3M | FEAT_CAT2_MODA_3M | FEAT_CAT1_MODA_6M | FEAT_CAT2_MODA_6M |
|-----|-----|------------|-------------------|-------------------|-------------------|-------------------|-------------------|-------------------|
| 0 | 4 | 2023-02-01 | B | C | B | C | B | C |
| 1 | 5 | 2023-02-01 | B | B | B | B | B | B |
| 2 | 6 | 2023-02-01 | A | A | A | A | A | A |
| 3 | 7 | 2023-02-01 | C | B | C | B | C | B |
| 4 | 10 | 2023-02-01 | A | B | A | B | A | B |
</div>
``` python
print(query_final_cat_sqlite)
```
WITH
tb_public as (
SELECT
ID,
SAFRA_REF
FROM tb_spine
),
tb_janela_FEAT_CAT1_1M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_feat.FEAT_CAT1,
COUNT(*) AS frequency_FEAT_CAT1
FROM tb_public
LEFT JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+1 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT1
),
tb_row_FEAT_CAT1_1M as (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY
ID,
SAFRA_REF
ORDER BY frequency_FEAT_CAT1 DESC
) as row_num_FEAT_CAT1_1M
FROM tb_janela_FEAT_CAT1_1M
),
tb_moda_FEAT_CAT1_1M AS(
SELECT
tb_row_FEAT_CAT1_1M .ID,
tb_row_FEAT_CAT1_1M .SAFRA_REF,
tb_row_FEAT_CAT1_1M.FEAT_CAT1 AS FEAT_CAT1_MODA_1M
FROM tb_row_FEAT_CAT1_1M
WHERE row_num_FEAT_CAT1_1M = 1
),
tb_janela_FEAT_CAT2_1M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_feat.FEAT_CAT2,
COUNT(*) AS frequency_FEAT_CAT2
FROM tb_public
LEFT JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+1 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT2
),
tb_row_FEAT_CAT2_1M as (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY
ID,
SAFRA_REF
ORDER BY frequency_FEAT_CAT2 DESC
) as row_num_FEAT_CAT2_1M
FROM tb_janela_FEAT_CAT2_1M
),
tb_moda_FEAT_CAT2_1M AS(
SELECT
tb_row_FEAT_CAT2_1M .ID,
tb_row_FEAT_CAT2_1M .SAFRA_REF,
tb_row_FEAT_CAT2_1M.FEAT_CAT2 AS FEAT_CAT2_MODA_1M
FROM tb_row_FEAT_CAT2_1M
WHERE row_num_FEAT_CAT2_1M = 1
),
tb_janela_FEAT_CAT1_3M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_feat.FEAT_CAT1,
COUNT(*) AS frequency_FEAT_CAT1
FROM tb_public
LEFT JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+3 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT1
),
tb_row_FEAT_CAT1_3M as (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY
ID,
SAFRA_REF
ORDER BY frequency_FEAT_CAT1 DESC
) as row_num_FEAT_CAT1_3M
FROM tb_janela_FEAT_CAT1_3M
),
tb_moda_FEAT_CAT1_3M AS(
SELECT
tb_row_FEAT_CAT1_3M .ID,
tb_row_FEAT_CAT1_3M .SAFRA_REF,
tb_row_FEAT_CAT1_3M.FEAT_CAT1 AS FEAT_CAT1_MODA_3M
FROM tb_row_FEAT_CAT1_3M
WHERE row_num_FEAT_CAT1_3M = 1
),
tb_janela_FEAT_CAT2_3M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_feat.FEAT_CAT2,
COUNT(*) AS frequency_FEAT_CAT2
FROM tb_public
LEFT JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+3 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT2
),
tb_row_FEAT_CAT2_3M as (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY
ID,
SAFRA_REF
ORDER BY frequency_FEAT_CAT2 DESC
) as row_num_FEAT_CAT2_3M
FROM tb_janela_FEAT_CAT2_3M
),
tb_moda_FEAT_CAT2_3M AS(
SELECT
tb_row_FEAT_CAT2_3M .ID,
tb_row_FEAT_CAT2_3M .SAFRA_REF,
tb_row_FEAT_CAT2_3M.FEAT_CAT2 AS FEAT_CAT2_MODA_3M
FROM tb_row_FEAT_CAT2_3M
WHERE row_num_FEAT_CAT2_3M = 1
),
tb_janela_FEAT_CAT1_6M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_feat.FEAT_CAT1,
COUNT(*) AS frequency_FEAT_CAT1
FROM tb_public
LEFT JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+6 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT1
),
tb_row_FEAT_CAT1_6M as (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY
ID,
SAFRA_REF
ORDER BY frequency_FEAT_CAT1 DESC
) as row_num_FEAT_CAT1_6M
FROM tb_janela_FEAT_CAT1_6M
),
tb_moda_FEAT_CAT1_6M AS(
SELECT
tb_row_FEAT_CAT1_6M .ID,
tb_row_FEAT_CAT1_6M .SAFRA_REF,
tb_row_FEAT_CAT1_6M.FEAT_CAT1 AS FEAT_CAT1_MODA_6M
FROM tb_row_FEAT_CAT1_6M
WHERE row_num_FEAT_CAT1_6M = 1
),
tb_janela_FEAT_CAT2_6M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_feat.FEAT_CAT2,
COUNT(*) AS frequency_FEAT_CAT2
FROM tb_public
LEFT JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+6 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT2
),
tb_row_FEAT_CAT2_6M as (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY
ID,
SAFRA_REF
ORDER BY frequency_FEAT_CAT2 DESC
) as row_num_FEAT_CAT2_6M
FROM tb_janela_FEAT_CAT2_6M
),
tb_moda_FEAT_CAT2_6M AS(
SELECT
tb_row_FEAT_CAT2_6M .ID,
tb_row_FEAT_CAT2_6M .SAFRA_REF,
tb_row_FEAT_CAT2_6M.FEAT_CAT2 AS FEAT_CAT2_MODA_6M
FROM tb_row_FEAT_CAT2_6M
WHERE row_num_FEAT_CAT2_6M = 1
)
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_moda_FEAT_CAT1_1M.FEAT_CAT1_MODA_1M,
tb_moda_FEAT_CAT2_1M.FEAT_CAT2_MODA_1M,
tb_moda_FEAT_CAT1_3M.FEAT_CAT1_MODA_3M,
tb_moda_FEAT_CAT2_3M.FEAT_CAT2_MODA_3M,
tb_moda_FEAT_CAT1_6M.FEAT_CAT1_MODA_6M,
tb_moda_FEAT_CAT2_6M.FEAT_CAT2_MODA_6M
FROM tb_public
LEFT JOIN tb_moda_FEAT_CAT1_1M
ON tb_moda_FEAT_CAT1_1M.ID = tb_public.ID
AND tb_moda_FEAT_CAT1_1M.SAFRA_REF = tb_public.SAFRA_REF
LEFT JOIN tb_moda_FEAT_CAT2_1M
ON tb_moda_FEAT_CAT2_1M.ID = tb_public.ID
AND tb_moda_FEAT_CAT2_1M.SAFRA_REF = tb_public.SAFRA_REF
LEFT JOIN tb_moda_FEAT_CAT1_3M
ON tb_moda_FEAT_CAT1_3M.ID = tb_public.ID
AND tb_moda_FEAT_CAT1_3M.SAFRA_REF = tb_public.SAFRA_REF
LEFT JOIN tb_moda_FEAT_CAT2_3M
ON tb_moda_FEAT_CAT2_3M.ID = tb_public.ID
AND tb_moda_FEAT_CAT2_3M.SAFRA_REF = tb_public.SAFRA_REF
LEFT JOIN tb_moda_FEAT_CAT1_6M
ON tb_moda_FEAT_CAT1_6M.ID = tb_public.ID
AND tb_moda_FEAT_CAT1_6M.SAFRA_REF = tb_public.SAFRA_REF
LEFT JOIN tb_moda_FEAT_CAT2_6M
ON tb_moda_FEAT_CAT2_6M.ID = tb_public.ID
AND tb_moda_FEAT_CAT2_6M.SAFRA_REF = tb_public.SAFRA_REF
### Criação de variáveis agragadas
``` python
tb_publico = 'tb_spine'
lista_janela = [3, 6]
lista_feat_num = ['FEAT_NUM1', 'FEAT_NUM2']
feat_cat = 'FEAT_CAT1'
lista_valor_agregador = ['A', 'B']
id = 'ID'
safra_ref = 'SAFRA_REF'
tb_feat = 'tb_feat'
safra = 'SAFRA'
query = sqlite_create_query_agregada(tb_publico, tb_feat, lista_janela, lista_feat_num, id, safra_ref, safra, feat_cat, lista_valor_agregador)
```
``` python
print(query)
```
WITH
tb_public as(
SELECT
ID,
SAFRA_REF
FROM tb_spine
),
tb_agrupada_FEAT_CAT1_A_3M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_A_3M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_A_3M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_A_3M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_A_3M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_A_3M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_A_3M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_A_3M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_A_3M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+3 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
AND tb_feat.FEAT_CAT1 = 'A'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_agrupada_FEAT_CAT1_A_6M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_A_6M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_A_6M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_A_6M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_A_6M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_A_6M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_A_6M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_A_6M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_A_6M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+6 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
AND tb_feat.FEAT_CAT1 = 'A'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_agrupada_FEAT_CAT1_B_3M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_3M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_3M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_3M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_3M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_3M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_3M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_3M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_3M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+3 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
AND tb_feat.FEAT_CAT1 = 'B'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_agrupada_FEAT_CAT1_B_6M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_6M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_6M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_6M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_6M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_6M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_6M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_6M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_6M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+6 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
AND tb_feat.FEAT_CAT1 = 'B'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_join AS (
SELECT
*
FROM tb_public
LEFT JOIN tb_agrupada_FEAT_CAT1_A_3M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_A_3M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_A_3M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT1_A_6M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_A_6M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_A_6M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT1_B_3M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_3M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_3M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT1_B_6M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_6M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_6M.SAFRA_REF
)
SELECT
tb_join.ID,
tb_join.SAFRA_REF,
tb_join.SUM_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_join.SUM_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_join.SUM_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_join.SUM_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT1_B_6M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT1_B_6M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT1_B_6M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT1_B_6M
FROM tb_join
``` python
df_sqlite_agregada = pd.read_sql(query, engine)
df_sqlite_agregada.head()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| | ID | SAFRA_REF | SUM_FEAT_NUM1_FEAT_CAT1_A_3M | MAX_FEAT_NUM1_FEAT_CAT1_A_3M | MIN_FEAT_NUM1_FEAT_CAT1_A_3M | AVG_FEAT_NUM1_FEAT_CAT1_A_3M | SUM_FEAT_NUM2_FEAT_CAT1_A_3M | MAX_FEAT_NUM2_FEAT_CAT1_A_3M | MIN_FEAT_NUM2_FEAT_CAT1_A_3M | AVG_FEAT_NUM2_FEAT_CAT1_A_3M | SUM_FEAT_NUM1_FEAT_CAT1_A_6M | MAX_FEAT_NUM1_FEAT_CAT1_A_6M | MIN_FEAT_NUM1_FEAT_CAT1_A_6M | AVG_FEAT_NUM1_FEAT_CAT1_A_6M | SUM_FEAT_NUM2_FEAT_CAT1_A_6M | MAX_FEAT_NUM2_FEAT_CAT1_A_6M | MIN_FEAT_NUM2_FEAT_CAT1_A_6M | AVG_FEAT_NUM2_FEAT_CAT1_A_6M | SUM_FEAT_NUM1_FEAT_CAT1_B_3M | MAX_FEAT_NUM1_FEAT_CAT1_B_3M | MIN_FEAT_NUM1_FEAT_CAT1_B_3M | AVG_FEAT_NUM1_FEAT_CAT1_B_3M | SUM_FEAT_NUM2_FEAT_CAT1_B_3M | MAX_FEAT_NUM2_FEAT_CAT1_B_3M | MIN_FEAT_NUM2_FEAT_CAT1_B_3M | AVG_FEAT_NUM2_FEAT_CAT1_B_3M | SUM_FEAT_NUM1_FEAT_CAT1_B_6M | MAX_FEAT_NUM1_FEAT_CAT1_B_6M | MIN_FEAT_NUM1_FEAT_CAT1_B_6M | AVG_FEAT_NUM1_FEAT_CAT1_B_6M | SUM_FEAT_NUM2_FEAT_CAT1_B_6M | MAX_FEAT_NUM2_FEAT_CAT1_B_6M | MIN_FEAT_NUM2_FEAT_CAT1_B_6M | AVG_FEAT_NUM2_FEAT_CAT1_B_6M |
|-----|-----|------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|
| 0 | 4 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | -40.0 | -40.0 | -40.0 | -40.0 | 62.0 | 62.0 | 62.0 | 62.0 | -40.0 | -40.0 | -40.0 | -40.0 | 62.0 | 62.0 | 62.0 | 62.0 |
| 1 | 5 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 41.0 | 41.0 | 41.0 | 41.0 | 62.0 | 62.0 | 62.0 | 62.0 | 41.0 | 41.0 | 41.0 | 41.0 | 62.0 | 62.0 | 62.0 | 62.0 |
| 2 | 6 | 2023-02-01 | 36.0 | 36.0 | 36.0 | 36.0 | 63.0 | 63.0 | 63.0 | 63.0 | 36.0 | 36.0 | 36.0 | 36.0 | 63.0 | 63.0 | 63.0 | 63.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | 7 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | 10 | 2023-02-01 | 29.0 | 29.0 | 29.0 | 29.0 | -7.0 | -7.0 | -7.0 | -7.0 | 29.0 | 29.0 | 29.0 | 29.0 | -7.0 | -7.0 | -7.0 | -7.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
</div>
``` python
tb_publico = 'tb_spine'
lista_janela = [3, 6]
lista_feat_num = ['FEAT_NUM1', 'FEAT_NUM2']
feat_cat = 'FEAT_CAT1'
lista_valor_agragador = ['A', 'B']
id = 'ID'
safra_ref = 'SAFRA_REF'
tb_feat = 'tb_feat'
safra = 'SAFRA'
query = sqlite_create_query_agregada(tb_publico, tb_feat, lista_janela, lista_feat_num, id, safra_ref, safra, feat_cat, lista_valor_agragador)
```
``` python
print(query)
```
WITH
tb_public as(
SELECT
ID,
SAFRA_REF
FROM tb_spine
),
tb_agrupada_FEAT_CAT1_A_3M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_A_3M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_A_3M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_A_3M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_A_3M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_A_3M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_A_3M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_A_3M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_A_3M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+3 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
AND tb_feat.FEAT_CAT1 = 'A'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_agrupada_FEAT_CAT1_A_6M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_A_6M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_A_6M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_A_6M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_A_6M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_A_6M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_A_6M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_A_6M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_A_6M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+6 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
AND tb_feat.FEAT_CAT1 = 'A'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_agrupada_FEAT_CAT1_B_3M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_3M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_3M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_3M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_3M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_3M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_3M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_3M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_3M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+3 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
AND tb_feat.FEAT_CAT1 = 'B'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_agrupada_FEAT_CAT1_B_6M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_6M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_6M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_6M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_6M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_6M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_6M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_6M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_6M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+6 months')) >= tb_public.SAFRA_REF)
AND (tb_feat.SAFRA < tb_public.SAFRA_REF)
AND tb_feat.FEAT_CAT1 = 'B'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_join AS (
SELECT
*
FROM tb_public
LEFT JOIN tb_agrupada_FEAT_CAT1_A_3M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_A_3M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_A_3M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT1_A_6M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_A_6M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_A_6M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT1_B_3M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_3M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_3M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT1_B_6M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_6M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_6M.SAFRA_REF
)
SELECT
tb_join.ID,
tb_join.SAFRA_REF,
tb_join.SUM_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_join.SUM_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_join.SUM_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_join.SUM_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT1_B_6M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT1_B_6M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT1_B_6M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT1_B_6M
FROM tb_join
``` python
df_sqlite_agregada = pd.read_sql(query, engine)
df_sqlite_agregada.head()
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| | ID | SAFRA_REF | SUM_FEAT_NUM1_FEAT_CAT1_A_3M | MAX_FEAT_NUM1_FEAT_CAT1_A_3M | MIN_FEAT_NUM1_FEAT_CAT1_A_3M | AVG_FEAT_NUM1_FEAT_CAT1_A_3M | SUM_FEAT_NUM2_FEAT_CAT1_A_3M | MAX_FEAT_NUM2_FEAT_CAT1_A_3M | MIN_FEAT_NUM2_FEAT_CAT1_A_3M | AVG_FEAT_NUM2_FEAT_CAT1_A_3M | SUM_FEAT_NUM1_FEAT_CAT1_A_6M | MAX_FEAT_NUM1_FEAT_CAT1_A_6M | MIN_FEAT_NUM1_FEAT_CAT1_A_6M | AVG_FEAT_NUM1_FEAT_CAT1_A_6M | SUM_FEAT_NUM2_FEAT_CAT1_A_6M | MAX_FEAT_NUM2_FEAT_CAT1_A_6M | MIN_FEAT_NUM2_FEAT_CAT1_A_6M | AVG_FEAT_NUM2_FEAT_CAT1_A_6M | SUM_FEAT_NUM1_FEAT_CAT1_B_3M | MAX_FEAT_NUM1_FEAT_CAT1_B_3M | MIN_FEAT_NUM1_FEAT_CAT1_B_3M | AVG_FEAT_NUM1_FEAT_CAT1_B_3M | SUM_FEAT_NUM2_FEAT_CAT1_B_3M | MAX_FEAT_NUM2_FEAT_CAT1_B_3M | MIN_FEAT_NUM2_FEAT_CAT1_B_3M | AVG_FEAT_NUM2_FEAT_CAT1_B_3M | SUM_FEAT_NUM1_FEAT_CAT1_B_6M | MAX_FEAT_NUM1_FEAT_CAT1_B_6M | MIN_FEAT_NUM1_FEAT_CAT1_B_6M | AVG_FEAT_NUM1_FEAT_CAT1_B_6M | SUM_FEAT_NUM2_FEAT_CAT1_B_6M | MAX_FEAT_NUM2_FEAT_CAT1_B_6M | MIN_FEAT_NUM2_FEAT_CAT1_B_6M | AVG_FEAT_NUM2_FEAT_CAT1_B_6M |
|-----|-----|------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|
| 0 | 4 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | -40.0 | -40.0 | -40.0 | -40.0 | 62.0 | 62.0 | 62.0 | 62.0 | -40.0 | -40.0 | -40.0 | -40.0 | 62.0 | 62.0 | 62.0 | 62.0 |
| 1 | 5 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 41.0 | 41.0 | 41.0 | 41.0 | 62.0 | 62.0 | 62.0 | 62.0 | 41.0 | 41.0 | 41.0 | 41.0 | 62.0 | 62.0 | 62.0 | 62.0 |
| 2 | 6 | 2023-02-01 | 36.0 | 36.0 | 36.0 | 36.0 | 63.0 | 63.0 | 63.0 | 63.0 | 36.0 | 36.0 | 36.0 | 36.0 | 63.0 | 63.0 | 63.0 | 63.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | 7 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | 10 | 2023-02-01 | 29.0 | 29.0 | 29.0 | 29.0 | -7.0 | -7.0 | -7.0 | -7.0 | 29.0 | 29.0 | 29.0 | 29.0 | -7.0 | -7.0 | -7.0 | -7.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
</div>
## 2. MySQL
### Configurando conexão
``` python
host = "localhost"
user = "sqluser"
password = "password"
database = "mydatabase"
# Conectar ao MySQL
connection = mysql.connector.connect(
host=host,
user=user,
password=password,
database=database
)
```
### Visão inicial do público
``` python
df_spine = pd.read_sql("SELECT * FROM tb_spine", connection)
df_spine.head()
```
/tmp/ipykernel_4407/661881290.py:3: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.
df_spine = pd.read_sql("SELECT * FROM tb_spine", connection)
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| | ID | SAFRA_REF | Target |
|-----|-----|------------|--------|
| 0 | 4 | 2023-02-01 | 1 |
| 1 | 5 | 2023-02-01 | 0 |
| 2 | 6 | 2023-02-01 | 0 |
| 3 | 7 | 2023-02-01 | 0 |
| 4 | 10 | 2023-02-01 | 0 |
</div>
### Visão inicial da tabela de variáveis
``` python
df_data = pd.read_sql("SELECT * FROM tb_feat", connection)
df_data.head()
```
/tmp/ipykernel_4407/1780842963.py:3: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.
df_data = pd.read_sql("SELECT * FROM tb_feat", connection)
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| | ID | SAFRA | FEAT_NUM1 | FEAT_NUM2 | FEAT_CAT1 | FEAT_CAT2 |
|-----|-----|------------|-----------|-----------|-----------|-----------|
| 0 | 1 | 2023-01-01 | 73 | 23 | B | B |
| 1 | 3 | 2023-01-01 | 15 | 1 | B | B |
| 2 | 5 | 2023-01-01 | 75 | 71 | A | A |
| 3 | 7 | 2023-01-01 | 73 | 82 | B | C |
| 4 | 9 | 2023-01-01 | 61 | 8 | C | B |
</div>
### Criação de variáveis numéricas
A função mysql_create_query_num() cria um texto com a query para a
criação de variáveis com as operações soma, mínimo, máximo e média das
variáveis listadas em feat_num_lista e com a janela de tempo listada em
lista_janela.
``` python
tb_publico = 'tb_spine'
tb_feat = 'tb_feat'
id = 'ID'
safra_ref = 'SAFRA_REF'
safra = 'SAFRA'
feat_num_lista = ['FEAT_NUM1','FEAT_NUM2']
lista_janela = [1,2,3]
query_final_num_mysql = mysql_create_query_num(tb_publico, tb_feat, lista_janela,feat_num_lista, id, safra_ref, safra)
```
``` python
df_num_mysql = pd.read_sql(query_final_num_mysql, connection)
df_num_mysql.head()
```
/tmp/ipykernel_4407/2119439562.py:3: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.
df_num_mysql = pd.read_sql(query_final_num_mysql, connection)
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| | ID | SAFRA_REF | FEAT_NUM1_SUM_1M | FEAT_NUM1_MIN_1M | FEAT_NUM1_MAX_1M | FEAT_NUM1_AVG_1M | FEAT_NUM2_SUM_1M | FEAT_NUM2_MIN_1M | FEAT_NUM2_MAX_1M | FEAT_NUM2_AVG_1M | FEAT_NUM1_SUM_2M | FEAT_NUM1_MIN_2M | FEAT_NUM1_MAX_2M | FEAT_NUM1_AVG_2M | FEAT_NUM2_SUM_2M | FEAT_NUM2_MIN_2M | FEAT_NUM2_MAX_2M | FEAT_NUM2_AVG_2M | FEAT_NUM1_SUM_3M | FEAT_NUM1_MIN_3M | FEAT_NUM1_MAX_3M | FEAT_NUM1_AVG_3M | FEAT_NUM2_SUM_3M | FEAT_NUM2_MIN_3M | FEAT_NUM2_MAX_3M | FEAT_NUM2_AVG_3M |
|-----|-----|------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|
| 0 | 4 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | 5 | 2023-02-01 | 75.0 | 75.0 | 75.0 | 75.0 | 71.0 | 71.0 | 71.0 | 71.0 | 75.0 | 75.0 | 75.0 | 75.0 | 71.0 | 71.0 | 71.0 | 71.0 | 75.0 | 75.0 | 75.0 | 75.0 | 71.0 | 71.0 | 71.0 | 71.0 |
| 2 | 6 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | 7 | 2023-02-01 | 73.0 | 73.0 | 73.0 | 73.0 | 82.0 | 82.0 | 82.0 | 82.0 | 73.0 | 73.0 | 73.0 | 73.0 | 82.0 | 82.0 | 82.0 | 82.0 | 73.0 | 73.0 | 73.0 | 73.0 | 82.0 | 82.0 | 82.0 | 82.0 |
| 4 | 10 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
</div>
``` python
print(query_final_num_mysql)
```
WITH
tb_public AS (
SELECT
*
FROM tb_spine
),
-- Criação de variáveis de janela de 1M
tb_janela_1M AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM1 para a janela 1
SUM(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_1M,
MIN(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_1M,
MAX(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_1M,
AVG(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_1M,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM2 para a janela 1
SUM(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_1M,
MIN(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_1M,
MAX(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_1M,
AVG(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_1M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 1 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
-- Criação de variáveis de janela de 2M
tb_janela_2M AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM1 para a janela 2
SUM(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_2M,
MIN(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_2M,
MAX(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_2M,
AVG(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_2M,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM2 para a janela 2
SUM(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_2M,
MIN(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_2M,
MAX(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_2M,
AVG(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_2M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 2 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
-- Criação de variáveis de janela de 3M
tb_janela_3M AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM1 para a janela 3
SUM(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_3M,
MIN(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_3M,
MAX(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_3M,
AVG(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_3M,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM2 para a janela 3
SUM(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_3M,
MIN(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_3M,
MAX(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_3M,
AVG(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_3M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 3 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_join AS (
SELECT
tb_public.*,
tb_janela_1M.FEAT_NUM1_SUM_1M,
tb_janela_1M.FEAT_NUM1_MIN_1M,
tb_janela_1M.FEAT_NUM1_MAX_1M,
tb_janela_1M.FEAT_NUM1_AVG_1M,
tb_janela_1M.FEAT_NUM2_SUM_1M,
tb_janela_1M.FEAT_NUM2_MIN_1M,
tb_janela_1M.FEAT_NUM2_MAX_1M,
tb_janela_1M.FEAT_NUM2_AVG_1M,
tb_janela_2M.FEAT_NUM1_SUM_2M,
tb_janela_2M.FEAT_NUM1_MIN_2M,
tb_janela_2M.FEAT_NUM1_MAX_2M,
tb_janela_2M.FEAT_NUM1_AVG_2M,
tb_janela_2M.FEAT_NUM2_SUM_2M,
tb_janela_2M.FEAT_NUM2_MIN_2M,
tb_janela_2M.FEAT_NUM2_MAX_2M,
tb_janela_2M.FEAT_NUM2_AVG_2M,
tb_janela_3M.FEAT_NUM1_SUM_3M,
tb_janela_3M.FEAT_NUM1_MIN_3M,
tb_janela_3M.FEAT_NUM1_MAX_3M,
tb_janela_3M.FEAT_NUM1_AVG_3M,
tb_janela_3M.FEAT_NUM2_SUM_3M,
tb_janela_3M.FEAT_NUM2_MIN_3M,
tb_janela_3M.FEAT_NUM2_MAX_3M,
tb_janela_3M.FEAT_NUM2_AVG_3M
FROM tb_public
LEFT JOIN tb_janela_1M
ON tb_public.ID = tb_janela_1M.ID
AND tb_public.SAFRA_REF = tb_janela_1M.SAFRA_REF
LEFT JOIN tb_janela_2M
ON tb_public.ID = tb_janela_2M.ID
AND tb_public.SAFRA_REF = tb_janela_2M.SAFRA_REF
LEFT JOIN tb_janela_3M
ON tb_public.ID = tb_janela_3M.ID
AND tb_public.SAFRA_REF = tb_janela_3M.SAFRA_REF
)
SELECT
tb_join.ID,
tb_join.SAFRA_REF,
tb_join.FEAT_NUM1_SUM_1M,
tb_join.FEAT_NUM1_MIN_1M,
tb_join.FEAT_NUM1_MAX_1M,
tb_join.FEAT_NUM1_AVG_1M,
tb_join.FEAT_NUM2_SUM_1M,
tb_join.FEAT_NUM2_MIN_1M,
tb_join.FEAT_NUM2_MAX_1M,
tb_join.FEAT_NUM2_AVG_1M,
tb_join.FEAT_NUM1_SUM_2M,
tb_join.FEAT_NUM1_MIN_2M,
tb_join.FEAT_NUM1_MAX_2M,
tb_join.FEAT_NUM1_AVG_2M,
tb_join.FEAT_NUM2_SUM_2M,
tb_join.FEAT_NUM2_MIN_2M,
tb_join.FEAT_NUM2_MAX_2M,
tb_join.FEAT_NUM2_AVG_2M,
tb_join.FEAT_NUM1_SUM_3M,
tb_join.FEAT_NUM1_MIN_3M,
tb_join.FEAT_NUM1_MAX_3M,
tb_join.FEAT_NUM1_AVG_3M,
tb_join.FEAT_NUM2_SUM_3M,
tb_join.FEAT_NUM2_MIN_3M,
tb_join.FEAT_NUM2_MAX_3M,
tb_join.FEAT_NUM2_AVG_3M
FROM tb_join
### Criação de variáveis categóricas
A função mysql_create_query_cat() cria um texto com a query para a
criação de variáveis com a moda de cada uma das variáveis listadas em
feat_num_lista na janela de tempo fornecida em lista_janela.
``` python
tb_publico = 'tb_spine'
tb_feat = 'tb_feat'
id = 'ID'
safra_ref = 'SAFRA_REF'
safra = 'SAFRA'
feat_num_lista = ['FEAT_CAT1','FEAT_CAT2']
lista_janela = [1,2,3]
query_final_cat_mysql = mysql_create_query_cat(tb_publico, tb_feat, lista_janela, feat_num_lista, id, safra_ref, safra)
```
``` python
df_cat_sqlite_mysql = pd.read_sql(query_final_cat_mysql, connection)
df_cat_sqlite_mysql.head()
```
/tmp/ipykernel_4407/3114069227.py:3: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.
df_cat_sqlite_mysql = pd.read_sql(query_final_cat_mysql, connection)
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| | ID | SAFRA_REF | FEAT_CAT1_MODA_1M | FEAT_CAT2_MODA_1M | FEAT_CAT1_MODA_2M | FEAT_CAT2_MODA_2M | FEAT_CAT1_MODA_3M | FEAT_CAT2_MODA_3M |
|-----|-----|------------|-------------------|-------------------|-------------------|-------------------|-------------------|-------------------|
| 0 | 4 | 2023-02-01 | None | None | None | None | None | None |
| 1 | 5 | 2023-02-01 | A | A | A | A | A | A |
| 2 | 6 | 2023-02-01 | None | None | None | None | None | None |
| 3 | 7 | 2023-02-01 | B | C | B | C | B | C |
| 4 | 10 | 2023-02-01 | None | None | None | None | None | None |
</div>
``` python
print(query_final_cat_mysql)
```
WITH
tb_public AS (
SELECT
ID,
SAFRA_REF
FROM tb_spine
),
tb_janela_FEAT_CAT1_1M AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_feat.FEAT_CAT1,
COUNT(*) AS frequency_FEAT_CAT1
FROM tb_public
LEFT JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 1 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT1
),
tb_row_FEAT_CAT1_1M AS (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY
ID,
SAFRA_REF
ORDER BY frequency_FEAT_CAT1 DESC
) AS row_num_FEAT_CAT1_1M
FROM tb_janela_FEAT_CAT1_1M
),
tb_moda_FEAT_CAT1_1M AS (
SELECT
tb_row_FEAT_CAT1_1M.ID,
tb_row_FEAT_CAT1_1M.SAFRA_REF,
tb_row_FEAT_CAT1_1M.FEAT_CAT1 AS FEAT_CAT1_MODA_1M
FROM tb_row_FEAT_CAT1_1M
WHERE row_num_FEAT_CAT1_1M = 1
),
tb_janela_FEAT_CAT2_1M AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_feat.FEAT_CAT2,
COUNT(*) AS frequency_FEAT_CAT2
FROM tb_public
LEFT JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 1 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT2
),
tb_row_FEAT_CAT2_1M AS (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY
ID,
SAFRA_REF
ORDER BY frequency_FEAT_CAT2 DESC
) AS row_num_FEAT_CAT2_1M
FROM tb_janela_FEAT_CAT2_1M
),
tb_moda_FEAT_CAT2_1M AS (
SELECT
tb_row_FEAT_CAT2_1M.ID,
tb_row_FEAT_CAT2_1M.SAFRA_REF,
tb_row_FEAT_CAT2_1M.FEAT_CAT2 AS FEAT_CAT2_MODA_1M
FROM tb_row_FEAT_CAT2_1M
WHERE row_num_FEAT_CAT2_1M = 1
),
tb_janela_FEAT_CAT1_2M AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_feat.FEAT_CAT1,
COUNT(*) AS frequency_FEAT_CAT1
FROM tb_public
LEFT JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 2 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT1
),
tb_row_FEAT_CAT1_2M AS (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY
ID,
SAFRA_REF
ORDER BY frequency_FEAT_CAT1 DESC
) AS row_num_FEAT_CAT1_2M
FROM tb_janela_FEAT_CAT1_2M
),
tb_moda_FEAT_CAT1_2M AS (
SELECT
tb_row_FEAT_CAT1_2M.ID,
tb_row_FEAT_CAT1_2M.SAFRA_REF,
tb_row_FEAT_CAT1_2M.FEAT_CAT1 AS FEAT_CAT1_MODA_2M
FROM tb_row_FEAT_CAT1_2M
WHERE row_num_FEAT_CAT1_2M = 1
),
tb_janela_FEAT_CAT2_2M AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_feat.FEAT_CAT2,
COUNT(*) AS frequency_FEAT_CAT2
FROM tb_public
LEFT JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 2 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT2
),
tb_row_FEAT_CAT2_2M AS (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY
ID,
SAFRA_REF
ORDER BY frequency_FEAT_CAT2 DESC
) AS row_num_FEAT_CAT2_2M
FROM tb_janela_FEAT_CAT2_2M
),
tb_moda_FEAT_CAT2_2M AS (
SELECT
tb_row_FEAT_CAT2_2M.ID,
tb_row_FEAT_CAT2_2M.SAFRA_REF,
tb_row_FEAT_CAT2_2M.FEAT_CAT2 AS FEAT_CAT2_MODA_2M
FROM tb_row_FEAT_CAT2_2M
WHERE row_num_FEAT_CAT2_2M = 1
),
tb_janela_FEAT_CAT1_3M AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_feat.FEAT_CAT1,
COUNT(*) AS frequency_FEAT_CAT1
FROM tb_public
LEFT JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 3 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT1
),
tb_row_FEAT_CAT1_3M AS (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY
ID,
SAFRA_REF
ORDER BY frequency_FEAT_CAT1 DESC
) AS row_num_FEAT_CAT1_3M
FROM tb_janela_FEAT_CAT1_3M
),
tb_moda_FEAT_CAT1_3M AS (
SELECT
tb_row_FEAT_CAT1_3M.ID,
tb_row_FEAT_CAT1_3M.SAFRA_REF,
tb_row_FEAT_CAT1_3M.FEAT_CAT1 AS FEAT_CAT1_MODA_3M
FROM tb_row_FEAT_CAT1_3M
WHERE row_num_FEAT_CAT1_3M = 1
),
tb_janela_FEAT_CAT2_3M AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_feat.FEAT_CAT2,
COUNT(*) AS frequency_FEAT_CAT2
FROM tb_public
LEFT JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 3 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT2
),
tb_row_FEAT_CAT2_3M AS (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY
ID,
SAFRA_REF
ORDER BY frequency_FEAT_CAT2 DESC
) AS row_num_FEAT_CAT2_3M
FROM tb_janela_FEAT_CAT2_3M
),
tb_moda_FEAT_CAT2_3M AS (
SELECT
tb_row_FEAT_CAT2_3M.ID,
tb_row_FEAT_CAT2_3M.SAFRA_REF,
tb_row_FEAT_CAT2_3M.FEAT_CAT2 AS FEAT_CAT2_MODA_3M
FROM tb_row_FEAT_CAT2_3M
WHERE row_num_FEAT_CAT2_3M = 1
)
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_moda_FEAT_CAT1_1M.FEAT_CAT1_MODA_1M,
tb_moda_FEAT_CAT2_1M.FEAT_CAT2_MODA_1M,
tb_moda_FEAT_CAT1_2M.FEAT_CAT1_MODA_2M,
tb_moda_FEAT_CAT2_2M.FEAT_CAT2_MODA_2M,
tb_moda_FEAT_CAT1_3M.FEAT_CAT1_MODA_3M,
tb_moda_FEAT_CAT2_3M.FEAT_CAT2_MODA_3M
FROM tb_public
LEFT JOIN tb_moda_FEAT_CAT1_1M
ON tb_moda_FEAT_CAT1_1M.ID = tb_public.ID
AND tb_moda_FEAT_CAT1_1M.SAFRA_REF = tb_public.SAFRA_REF
LEFT JOIN tb_moda_FEAT_CAT2_1M
ON tb_moda_FEAT_CAT2_1M.ID = tb_public.ID
AND tb_moda_FEAT_CAT2_1M.SAFRA_REF = tb_public.SAFRA_REF
LEFT JOIN tb_moda_FEAT_CAT1_2M
ON tb_moda_FEAT_CAT1_2M.ID = tb_public.ID
AND tb_moda_FEAT_CAT1_2M.SAFRA_REF = tb_public.SAFRA_REF
LEFT JOIN tb_moda_FEAT_CAT2_2M
ON tb_moda_FEAT_CAT2_2M.ID = tb_public.ID
AND tb_moda_FEAT_CAT2_2M.SAFRA_REF = tb_public.SAFRA_REF
LEFT JOIN tb_moda_FEAT_CAT1_3M
ON tb_moda_FEAT_CAT1_3M.ID = tb_public.ID
AND tb_moda_FEAT_CAT1_3M.SAFRA_REF = tb_public.SAFRA_REF
LEFT JOIN tb_moda_FEAT_CAT2_3M
ON tb_moda_FEAT_CAT2_3M.ID = tb_public.ID
AND tb_moda_FEAT_CAT2_3M.SAFRA_REF = tb_public.SAFRA_REF
### Criação de variáveis agragadas
``` python
tb_publico = 'tb_spine'
lista_janela = [3, 6]
lista_feat_num = ['FEAT_NUM1', 'FEAT_NUM2']
feat_cat = 'FEAT_CAT1'
lista_valor_agragador = ['A', 'B']
id = 'ID'
safra_ref = 'SAFRA_REF'
tb_feat = 'tb_feat'
safra = 'SAFRA'
query = mysql_create_query_agregada(tb_publico, tb_feat, lista_janela, lista_feat_num, id, safra_ref, safra, feat_cat, lista_valor_agragador)
```
``` python
print(query)
```
WITH
tb_public as(
SELECT
ID,
SAFRA_REF
FROM tb_spine
),
tb_agrupada_FEAT_CAT1_A_3M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_A_3M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_A_3M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_A_3M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_A_3M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_A_3M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_A_3M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_A_3M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_A_3M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 3 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
AND tb_feat.FEAT_CAT1 = 'A'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_agrupada_FEAT_CAT1_A_6M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_A_6M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_A_6M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_A_6M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_A_6M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_A_6M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_A_6M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_A_6M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_A_6M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 6 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
AND tb_feat.FEAT_CAT1 = 'A'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_agrupada_FEAT_CAT1_B_3M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_3M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_3M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_3M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_3M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_3M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_3M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_3M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_3M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 3 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
AND tb_feat.FEAT_CAT1 = 'B'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_agrupada_FEAT_CAT1_B_6M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_6M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_6M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_6M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_6M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_6M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_6M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_6M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_6M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 6 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
AND tb_feat.FEAT_CAT1 = 'B'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_join AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_agrupada_FEAT_CAT1_A_3M.SUM_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_agrupada_FEAT_CAT1_A_3M.MAX_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_agrupada_FEAT_CAT1_A_3M.MIN_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_agrupada_FEAT_CAT1_A_3M.AVG_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_agrupada_FEAT_CAT1_A_3M.SUM_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_agrupada_FEAT_CAT1_A_3M.MAX_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_agrupada_FEAT_CAT1_A_3M.MIN_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_agrupada_FEAT_CAT1_A_3M.AVG_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_agrupada_FEAT_CAT1_A_6M.SUM_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_agrupada_FEAT_CAT1_A_6M.MAX_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_agrupada_FEAT_CAT1_A_6M.MIN_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_agrupada_FEAT_CAT1_A_6M.AVG_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_agrupada_FEAT_CAT1_A_6M.SUM_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_agrupada_FEAT_CAT1_A_6M.MAX_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_agrupada_FEAT_CAT1_A_6M.MIN_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_agrupada_FEAT_CAT1_A_6M.AVG_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_agrupada_FEAT_CAT1_B_3M.SUM_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.MAX_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.MIN_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.AVG_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.SUM_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.MAX_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.MIN_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.AVG_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_6M.SUM_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_agrupada_FEAT_CAT1_B_6M.MAX_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_agrupada_FEAT_CAT1_B_6M.MIN_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_agrupada_FEAT_CAT1_B_6M.AVG_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_agrupada_FEAT_CAT1_B_6M.SUM_FEAT_NUM2_FEAT_CAT1_B_6M,
tb_agrupada_FEAT_CAT1_B_6M.MAX_FEAT_NUM2_FEAT_CAT1_B_6M,
tb_agrupada_FEAT_CAT1_B_6M.MIN_FEAT_NUM2_FEAT_CAT1_B_6M,
tb_agrupada_FEAT_CAT1_B_6M.AVG_FEAT_NUM2_FEAT_CAT1_B_6M
FROM tb_public
LEFT JOIN tb_agrupada_FEAT_CAT1_A_3M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_A_3M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_A_3M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT1_A_6M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_A_6M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_A_6M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT1_B_3M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_3M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_3M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT1_B_6M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_6M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_6M.SAFRA_REF
)
SELECT
tb_join.ID,
tb_join.SAFRA_REF,
tb_join.SUM_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT1_A_3M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT1_A_3M,
tb_join.SUM_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT1_A_6M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT1_A_6M,
tb_join.SUM_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_join.SUM_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT1_B_6M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT1_B_6M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT1_B_6M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT1_B_6M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT1_B_6M
FROM tb_join
``` python
df_mysql_agregada = pd.read_sql(query, connection)
df_mysql_agregada.head()
```
/tmp/ipykernel_4407/724425866.py:3: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.
df_mysql_agregada = pd.read_sql(query, connection)
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| | ID | SAFRA_REF | SUM_FEAT_NUM1_FEAT_CAT1_A_3M | MAX_FEAT_NUM1_FEAT_CAT1_A_3M | MIN_FEAT_NUM1_FEAT_CAT1_A_3M | AVG_FEAT_NUM1_FEAT_CAT1_A_3M | SUM_FEAT_NUM2_FEAT_CAT1_A_3M | MAX_FEAT_NUM2_FEAT_CAT1_A_3M | MIN_FEAT_NUM2_FEAT_CAT1_A_3M | AVG_FEAT_NUM2_FEAT_CAT1_A_3M | SUM_FEAT_NUM1_FEAT_CAT1_A_6M | MAX_FEAT_NUM1_FEAT_CAT1_A_6M | MIN_FEAT_NUM1_FEAT_CAT1_A_6M | AVG_FEAT_NUM1_FEAT_CAT1_A_6M | SUM_FEAT_NUM2_FEAT_CAT1_A_6M | MAX_FEAT_NUM2_FEAT_CAT1_A_6M | MIN_FEAT_NUM2_FEAT_CAT1_A_6M | AVG_FEAT_NUM2_FEAT_CAT1_A_6M | SUM_FEAT_NUM1_FEAT_CAT1_B_3M | MAX_FEAT_NUM1_FEAT_CAT1_B_3M | MIN_FEAT_NUM1_FEAT_CAT1_B_3M | AVG_FEAT_NUM1_FEAT_CAT1_B_3M | SUM_FEAT_NUM2_FEAT_CAT1_B_3M | MAX_FEAT_NUM2_FEAT_CAT1_B_3M | MIN_FEAT_NUM2_FEAT_CAT1_B_3M | AVG_FEAT_NUM2_FEAT_CAT1_B_3M | SUM_FEAT_NUM1_FEAT_CAT1_B_6M | MAX_FEAT_NUM1_FEAT_CAT1_B_6M | MIN_FEAT_NUM1_FEAT_CAT1_B_6M | AVG_FEAT_NUM1_FEAT_CAT1_B_6M | SUM_FEAT_NUM2_FEAT_CAT1_B_6M | MAX_FEAT_NUM2_FEAT_CAT1_B_6M | MIN_FEAT_NUM2_FEAT_CAT1_B_6M | AVG_FEAT_NUM2_FEAT_CAT1_B_6M |
|-----|-----|------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|
| 0 | 4 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | 5 | 2023-02-01 | 75.0 | 75.0 | 75.0 | 75.0 | 71.0 | 71.0 | 71.0 | 71.0 | 75.0 | 75.0 | 75.0 | 75.0 | 71.0 | 71.0 | 71.0 | 71.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | 6 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | 7 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 73.0 | 73.0 | 73.0 | 73.0 | 82.0 | 82.0 | 82.0 | 82.0 | 73.0 | 73.0 | 73.0 | 73.0 | 82.0 | 82.0 | 82.0 | 82.0 |
| 4 | 10 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
</div>
``` python
tb_publico = 'tb_spine'
lista_janela = [3, 6]
lista_feat_num = ['FEAT_NUM1', 'FEAT_NUM2']
feat_cat = 'FEAT_CAT2'
lista_valor_agragador = ['B', 'C']
id = 'ID'
safra_ref = 'SAFRA_REF'
tb_feat = 'tb_feat'
safra = 'SAFRA'
query = mysql_create_query_agregada(tb_publico, tb_feat, lista_janela, lista_feat_num, id, safra_ref, safra, feat_cat, lista_valor_agragador)
```
``` python
print(query)
```
WITH
tb_public as(
SELECT
ID,
SAFRA_REF
FROM tb_spine
),
tb_agrupada_FEAT_CAT2_B_3M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT2_B_3M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT2_B_3M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT2_B_3M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT2_B_3M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT2_B_3M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT2_B_3M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT2_B_3M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT2_B_3M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 3 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
AND tb_feat.FEAT_CAT2 = 'B'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_agrupada_FEAT_CAT2_B_6M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT2_B_6M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT2_B_6M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT2_B_6M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT2_B_6M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT2_B_6M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT2_B_6M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT2_B_6M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT2_B_6M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 6 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
AND tb_feat.FEAT_CAT2 = 'B'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_agrupada_FEAT_CAT2_C_3M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT2_C_3M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT2_C_3M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT2_C_3M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT2_C_3M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT2_C_3M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT2_C_3M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT2_C_3M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT2_C_3M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 3 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
AND tb_feat.FEAT_CAT2 = 'C'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_agrupada_FEAT_CAT2_C_6M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT2_C_6M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT2_C_6M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT2_C_6M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT2_C_6M,
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT2_C_6M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT2_C_6M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT2_C_6M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT2_C_6M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND DATE_ADD(tb_feat.SAFRA, INTERVAL 6 MONTH) >= tb_public.SAFRA_REF
AND tb_feat.SAFRA < tb_public.SAFRA_REF
AND tb_feat.FEAT_CAT2 = 'C'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_join AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_agrupada_FEAT_CAT2_B_3M.SUM_FEAT_NUM1_FEAT_CAT2_B_3M,
tb_agrupada_FEAT_CAT2_B_3M.MAX_FEAT_NUM1_FEAT_CAT2_B_3M,
tb_agrupada_FEAT_CAT2_B_3M.MIN_FEAT_NUM1_FEAT_CAT2_B_3M,
tb_agrupada_FEAT_CAT2_B_3M.AVG_FEAT_NUM1_FEAT_CAT2_B_3M,
tb_agrupada_FEAT_CAT2_B_3M.SUM_FEAT_NUM2_FEAT_CAT2_B_3M,
tb_agrupada_FEAT_CAT2_B_3M.MAX_FEAT_NUM2_FEAT_CAT2_B_3M,
tb_agrupada_FEAT_CAT2_B_3M.MIN_FEAT_NUM2_FEAT_CAT2_B_3M,
tb_agrupada_FEAT_CAT2_B_3M.AVG_FEAT_NUM2_FEAT_CAT2_B_3M,
tb_agrupada_FEAT_CAT2_B_6M.SUM_FEAT_NUM1_FEAT_CAT2_B_6M,
tb_agrupada_FEAT_CAT2_B_6M.MAX_FEAT_NUM1_FEAT_CAT2_B_6M,
tb_agrupada_FEAT_CAT2_B_6M.MIN_FEAT_NUM1_FEAT_CAT2_B_6M,
tb_agrupada_FEAT_CAT2_B_6M.AVG_FEAT_NUM1_FEAT_CAT2_B_6M,
tb_agrupada_FEAT_CAT2_B_6M.SUM_FEAT_NUM2_FEAT_CAT2_B_6M,
tb_agrupada_FEAT_CAT2_B_6M.MAX_FEAT_NUM2_FEAT_CAT2_B_6M,
tb_agrupada_FEAT_CAT2_B_6M.MIN_FEAT_NUM2_FEAT_CAT2_B_6M,
tb_agrupada_FEAT_CAT2_B_6M.AVG_FEAT_NUM2_FEAT_CAT2_B_6M,
tb_agrupada_FEAT_CAT2_C_3M.SUM_FEAT_NUM1_FEAT_CAT2_C_3M,
tb_agrupada_FEAT_CAT2_C_3M.MAX_FEAT_NUM1_FEAT_CAT2_C_3M,
tb_agrupada_FEAT_CAT2_C_3M.MIN_FEAT_NUM1_FEAT_CAT2_C_3M,
tb_agrupada_FEAT_CAT2_C_3M.AVG_FEAT_NUM1_FEAT_CAT2_C_3M,
tb_agrupada_FEAT_CAT2_C_3M.SUM_FEAT_NUM2_FEAT_CAT2_C_3M,
tb_agrupada_FEAT_CAT2_C_3M.MAX_FEAT_NUM2_FEAT_CAT2_C_3M,
tb_agrupada_FEAT_CAT2_C_3M.MIN_FEAT_NUM2_FEAT_CAT2_C_3M,
tb_agrupada_FEAT_CAT2_C_3M.AVG_FEAT_NUM2_FEAT_CAT2_C_3M,
tb_agrupada_FEAT_CAT2_C_6M.SUM_FEAT_NUM1_FEAT_CAT2_C_6M,
tb_agrupada_FEAT_CAT2_C_6M.MAX_FEAT_NUM1_FEAT_CAT2_C_6M,
tb_agrupada_FEAT_CAT2_C_6M.MIN_FEAT_NUM1_FEAT_CAT2_C_6M,
tb_agrupada_FEAT_CAT2_C_6M.AVG_FEAT_NUM1_FEAT_CAT2_C_6M,
tb_agrupada_FEAT_CAT2_C_6M.SUM_FEAT_NUM2_FEAT_CAT2_C_6M,
tb_agrupada_FEAT_CAT2_C_6M.MAX_FEAT_NUM2_FEAT_CAT2_C_6M,
tb_agrupada_FEAT_CAT2_C_6M.MIN_FEAT_NUM2_FEAT_CAT2_C_6M,
tb_agrupada_FEAT_CAT2_C_6M.AVG_FEAT_NUM2_FEAT_CAT2_C_6M
FROM tb_public
LEFT JOIN tb_agrupada_FEAT_CAT2_B_3M
ON tb_public.ID = tb_agrupada_FEAT_CAT2_B_3M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT2_B_3M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT2_B_6M
ON tb_public.ID = tb_agrupada_FEAT_CAT2_B_6M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT2_B_6M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT2_C_3M
ON tb_public.ID = tb_agrupada_FEAT_CAT2_C_3M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT2_C_3M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT2_C_6M
ON tb_public.ID = tb_agrupada_FEAT_CAT2_C_6M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT2_C_6M.SAFRA_REF
)
SELECT
tb_join.ID,
tb_join.SAFRA_REF,
tb_join.SUM_FEAT_NUM1_FEAT_CAT2_B_3M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT2_B_3M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT2_B_3M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT2_B_3M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT2_B_3M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT2_B_3M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT2_B_3M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT2_B_3M,
tb_join.SUM_FEAT_NUM1_FEAT_CAT2_B_6M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT2_B_6M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT2_B_6M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT2_B_6M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT2_B_6M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT2_B_6M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT2_B_6M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT2_B_6M,
tb_join.SUM_FEAT_NUM1_FEAT_CAT2_C_3M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT2_C_3M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT2_C_3M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT2_C_3M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT2_C_3M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT2_C_3M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT2_C_3M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT2_C_3M,
tb_join.SUM_FEAT_NUM1_FEAT_CAT2_C_6M,
tb_join.MAX_FEAT_NUM1_FEAT_CAT2_C_6M,
tb_join.MIN_FEAT_NUM1_FEAT_CAT2_C_6M,
tb_join.AVG_FEAT_NUM1_FEAT_CAT2_C_6M,
tb_join.SUM_FEAT_NUM2_FEAT_CAT2_C_6M,
tb_join.MAX_FEAT_NUM2_FEAT_CAT2_C_6M,
tb_join.MIN_FEAT_NUM2_FEAT_CAT2_C_6M,
tb_join.AVG_FEAT_NUM2_FEAT_CAT2_C_6M
FROM tb_join
``` python
df_mysql_agregada = pd.read_sql(query, connection)
df_mysql_agregada.head()
```
/tmp/ipykernel_4407/724425866.py:3: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.
df_mysql_agregada = pd.read_sql(query, connection)
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
| | ID | SAFRA_REF | SUM_FEAT_NUM1_FEAT_CAT2_B_3M | MAX_FEAT_NUM1_FEAT_CAT2_B_3M | MIN_FEAT_NUM1_FEAT_CAT2_B_3M | AVG_FEAT_NUM1_FEAT_CAT2_B_3M | SUM_FEAT_NUM2_FEAT_CAT2_B_3M | MAX_FEAT_NUM2_FEAT_CAT2_B_3M | MIN_FEAT_NUM2_FEAT_CAT2_B_3M | AVG_FEAT_NUM2_FEAT_CAT2_B_3M | SUM_FEAT_NUM1_FEAT_CAT2_B_6M | MAX_FEAT_NUM1_FEAT_CAT2_B_6M | MIN_FEAT_NUM1_FEAT_CAT2_B_6M | AVG_FEAT_NUM1_FEAT_CAT2_B_6M | SUM_FEAT_NUM2_FEAT_CAT2_B_6M | MAX_FEAT_NUM2_FEAT_CAT2_B_6M | MIN_FEAT_NUM2_FEAT_CAT2_B_6M | AVG_FEAT_NUM2_FEAT_CAT2_B_6M | SUM_FEAT_NUM1_FEAT_CAT2_C_3M | MAX_FEAT_NUM1_FEAT_CAT2_C_3M | MIN_FEAT_NUM1_FEAT_CAT2_C_3M | AVG_FEAT_NUM1_FEAT_CAT2_C_3M | SUM_FEAT_NUM2_FEAT_CAT2_C_3M | MAX_FEAT_NUM2_FEAT_CAT2_C_3M | MIN_FEAT_NUM2_FEAT_CAT2_C_3M | AVG_FEAT_NUM2_FEAT_CAT2_C_3M | SUM_FEAT_NUM1_FEAT_CAT2_C_6M | MAX_FEAT_NUM1_FEAT_CAT2_C_6M | MIN_FEAT_NUM1_FEAT_CAT2_C_6M | AVG_FEAT_NUM1_FEAT_CAT2_C_6M | SUM_FEAT_NUM2_FEAT_CAT2_C_6M | MAX_FEAT_NUM2_FEAT_CAT2_C_6M | MIN_FEAT_NUM2_FEAT_CAT2_C_6M | AVG_FEAT_NUM2_FEAT_CAT2_C_6M |
|-----|-----|------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|
| 0 | 4 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | 5 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | 6 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | 7 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 73.0 | 73.0 | 73.0 | 73.0 | 82.0 | 82.0 | 82.0 | 82.0 | 73.0 | 73.0 | 73.0 | 73.0 | 82.0 | 82.0 | 82.0 | 82.0 |
| 4 | 10 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
</div>
## 3. Snowflake
### Criação de variáveis numéricas
A função snow_create_query_num() cria um texto com a query para a
criação de variáveis com as operações soma, mínimo, máximo e média das
variáveis listadas em feat_num_lista e com a janela de tempo listada em
lista_janela.
``` python
tb_publico = 'tb_spine'
tb_feat = 'tb_feat'
id = 'ID'
safra_ref = 'SAFRA_REF'
safra = 'SAFRA'
feat_num_lista = ['FEAT_NUM1','FEAT_NUM2']
lista_janela = [1,2,3]
query_final_num_snow = snow_create_query_num(tb_publico, tb_feat, lista_janela,feat_num_lista, id, safra_ref, safra)
```
``` python
print(query_final_num_snow)
```
WITH
tb_public AS (
SELECT
*
FROM tb_spine
),
-- Criação de variáveis de janela de 1M
tb_janela_1M AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM1 para a janela 1
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_1M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_1M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_1M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_1M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MEDIAN_1M,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM2 para a janela 1
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_1M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_1M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_1M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_1M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MEDIAN_1M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (DATEADD('month', 1 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF)
AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
-- Criação de variáveis de janela de 2M
tb_janela_2M AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM1 para a janela 2
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_2M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_2M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_2M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_2M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MEDIAN_2M,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM2 para a janela 2
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_2M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_2M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_2M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_2M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MEDIAN_2M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (DATEADD('month', 2 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF)
AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
-- Criação de variáveis de janela de 3M
tb_janela_3M AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM1 para a janela 3
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_3M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_3M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_3M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_3M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MEDIAN_3M,
-- Criação de variáveis numéricas a partir da coluna FEAT_NUM2 para a janela 3
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_3M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_3M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_3M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_3M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MEDIAN_3M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (DATEADD('month', 3 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF)
AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_join AS (
SELECT
tb_public.*,
tb_janela_1M.FEAT_NUM1_SUM_1M,
tb_janela_1M.FEAT_NUM1_MIN_1M,
tb_janela_1M.FEAT_NUM1_MAX_1M,
tb_janela_1M.FEAT_NUM1_AVG_1M,
tb_janela_1M.FEAT_NUM1_MEDIAN_1M,
tb_janela_1M.FEAT_NUM2_SUM_1M,
tb_janela_1M.FEAT_NUM2_MIN_1M,
tb_janela_1M.FEAT_NUM2_MAX_1M,
tb_janela_1M.FEAT_NUM2_AVG_1M,
tb_janela_1M.FEAT_NUM2_MEDIAN_1M,
tb_janela_2M.FEAT_NUM1_SUM_2M,
tb_janela_2M.FEAT_NUM1_MIN_2M,
tb_janela_2M.FEAT_NUM1_MAX_2M,
tb_janela_2M.FEAT_NUM1_AVG_2M,
tb_janela_2M.FEAT_NUM1_MEDIAN_2M,
tb_janela_2M.FEAT_NUM2_SUM_2M,
tb_janela_2M.FEAT_NUM2_MIN_2M,
tb_janela_2M.FEAT_NUM2_MAX_2M,
tb_janela_2M.FEAT_NUM2_AVG_2M,
tb_janela_2M.FEAT_NUM2_MEDIAN_2M,
tb_janela_3M.FEAT_NUM1_SUM_3M,
tb_janela_3M.FEAT_NUM1_MIN_3M,
tb_janela_3M.FEAT_NUM1_MAX_3M,
tb_janela_3M.FEAT_NUM1_AVG_3M,
tb_janela_3M.FEAT_NUM1_MEDIAN_3M,
tb_janela_3M.FEAT_NUM2_SUM_3M,
tb_janela_3M.FEAT_NUM2_MIN_3M,
tb_janela_3M.FEAT_NUM2_MAX_3M,
tb_janela_3M.FEAT_NUM2_AVG_3M,
tb_janela_3M.FEAT_NUM2_MEDIAN_3M
FROM tb_public
LEFT JOIN tb_janela_1M
ON tb_public.ID = tb_janela_1M.ID
AND tb_public.SAFRA_REF = tb_janela_1M.SAFRA_REF
LEFT JOIN tb_janela_2M
ON tb_public.ID = tb_janela_2M.ID
AND tb_public.SAFRA_REF = tb_janela_2M.SAFRA_REF
LEFT JOIN tb_janela_3M
ON tb_public.ID = tb_janela_3M.ID
AND tb_public.SAFRA_REF = tb_janela_3M.SAFRA_REF
)
SELECT
tb_join.ID,
tb_join.SAFRA_REF,
tb_join.FEAT_NUM1_SUM_1M,
tb_join.FEAT_NUM1_MIN_1M,
tb_join.FEAT_NUM1_MAX_1M,
tb_join.FEAT_NUM1_AVG_1M,
tb_join.FEAT_NUM1_MEDIAN_1M,
tb_join.FEAT_NUM2_SUM_1M,
tb_join.FEAT_NUM2_MIN_1M,
tb_join.FEAT_NUM2_MAX_1M,
tb_join.FEAT_NUM2_AVG_1M,
tb_join.FEAT_NUM2_MEDIAN_1M,
tb_join.FEAT_NUM1_SUM_2M,
tb_join.FEAT_NUM1_MIN_2M,
tb_join.FEAT_NUM1_MAX_2M,
tb_join.FEAT_NUM1_AVG_2M,
tb_join.FEAT_NUM1_MEDIAN_2M,
tb_join.FEAT_NUM2_SUM_2M,
tb_join.FEAT_NUM2_MIN_2M,
tb_join.FEAT_NUM2_MAX_2M,
tb_join.FEAT_NUM2_AVG_2M,
tb_join.FEAT_NUM2_MEDIAN_2M,
tb_join.FEAT_NUM1_SUM_3M,
tb_join.FEAT_NUM1_MIN_3M,
tb_join.FEAT_NUM1_MAX_3M,
tb_join.FEAT_NUM1_AVG_3M,
tb_join.FEAT_NUM1_MEDIAN_3M,
tb_join.FEAT_NUM2_SUM_3M,
tb_join.FEAT_NUM2_MIN_3M,
tb_join.FEAT_NUM2_MAX_3M,
tb_join.FEAT_NUM2_AVG_3M,
tb_join.FEAT_NUM2_MEDIAN_3M
FROM tb_join
### Criação de variáveis categóricas
A função query_final_cat_snow() cria um texto com a query para a criação
de variáveis com a moda de cada uma das variáveis listadas em
feat_num_lista na janela de tempo fornecida em lista_janela.
``` python
tb_publico = 'tb_spine'
tb_feat = 'tb_feat'
id = 'ID'
safra_ref = 'SAFRA_REF'
safra = 'SAFRA'
feat_num_lista = ['FEAT_CAT1','FEAT_CAT2']
lista_janela = [1,2,3]
query_final_cat_snow = snow_create_query_cat(tb_publico, tb_feat, lista_janela, feat_num_lista, id, safra_ref, safra)
```
``` python
print(query_final_cat_snow)
```
WITH
tb_public AS (
SELECT
ID,
SAFRA_REF
FROM tb_spine
),
tb_janela_1M AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
MODE(FEAT_CAT1) AS MODE_FEAT_CAT1_1M,
MODE(FEAT_CAT2) AS MODE_FEAT_CAT2_1M
FROM tb_public
LEFT JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (DATEADD('month', 1 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF)
AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_janela_2M AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
MODE(FEAT_CAT1) AS MODE_FEAT_CAT1_2M,
MODE(FEAT_CAT2) AS MODE_FEAT_CAT2_2M
FROM tb_public
LEFT JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (DATEADD('month', 2 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF)
AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
tb_janela_3M AS (
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
MODE(FEAT_CAT1) AS MODE_FEAT_CAT1_3M,
MODE(FEAT_CAT2) AS MODE_FEAT_CAT2_3M
FROM tb_public
LEFT JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (DATEADD('month', 3 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF)
AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)
GROUP BY tb_public.ID, tb_public.SAFRA_REF
)
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_janela_1M.MODE_FEAT_CAT1_1M,
tb_janela_1M.MODE_FEAT_CAT2_1M,
tb_janela_2M.MODE_FEAT_CAT1_2M,
tb_janela_2M.MODE_FEAT_CAT2_2M,
tb_janela_3M.MODE_FEAT_CAT1_3M,
tb_janela_3M.MODE_FEAT_CAT2_3M
FROM tb_public
LEFT JOIN tb_janela_1M
ON tb_public.ID = JOIN tb_janela_1M.ID
AND tb_public.SAFRA_REF = JOIN tb_janela_1M.SAFRA_REF
LEFT JOIN tb_janela_2M
ON tb_public.ID = JOIN tb_janela_2M.ID
AND tb_public.SAFRA_REF = JOIN tb_janela_2M.SAFRA_REF
LEFT JOIN tb_janela_3M
ON tb_public.ID = JOIN tb_janela_3M.ID
AND tb_public.SAFRA_REF = JOIN tb_janela_3M.SAFRA_REF
### Criação de variáveis agragadas
``` python
lista_feat_num = ['FEAT_NUM1', 'FEAT_NUM2']
feat_cat = 'FEAT_CAT1'
lista_valor_agragador = ['B', 'C']
id = 'ID'
safra_ref = 'SAFRA_REF'
tb_feat = 'tb_feat'
safra = 'SAFRA'
janelas = [1, 2, 3]
tb_publico = 'tb_spine'
print(snow_create_query_agregada(tb_publico, tb_feat, janelas, lista_feat_num, id, safra_ref, safra, feat_cat, lista_valor_agragador))
```
WITH
tb_public as(
SELECT
ID,
SAFRA_REF
FROM tb_spine
),
-- Criação de variáveis agrupadas com janela de 1M
tb_agrupada_FEAT_CAT1_B_1M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
-- Criação de variáveis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM1 para a janela 1
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_1M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_1M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_1M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_1M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MEDIAN_FEAT_NUM1_FEAT_CAT1_B_1M,
-- Criação de variáveis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM2 para a janela 1
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_1M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_1M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_1M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_1M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MEDIAN_FEAT_NUM2_FEAT_CAT1_B_1M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (DATEADD('month', 1 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF)
AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)
AND tb_feat.FEAT_CAT1 = 'B'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
-- Criação de variáveis agrupadas com janela de 2M
tb_agrupada_FEAT_CAT1_B_2M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
-- Criação de variáveis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM1 para a janela 2
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_2M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_2M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_2M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_2M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MEDIAN_FEAT_NUM1_FEAT_CAT1_B_2M,
-- Criação de variáveis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM2 para a janela 2
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_2M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_2M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_2M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_2M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MEDIAN_FEAT_NUM2_FEAT_CAT1_B_2M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (DATEADD('month', 2 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF)
AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)
AND tb_feat.FEAT_CAT1 = 'B'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
-- Criação de variáveis agrupadas com janela de 3M
tb_agrupada_FEAT_CAT1_B_3M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
-- Criação de variáveis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM1 para a janela 3
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_3M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_3M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_3M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_3M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MEDIAN_FEAT_NUM1_FEAT_CAT1_B_3M,
-- Criação de variáveis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM2 para a janela 3
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_3M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_3M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_3M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_3M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MEDIAN_FEAT_NUM2_FEAT_CAT1_B_3M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (DATEADD('month', 3 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF)
AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)
AND tb_feat.FEAT_CAT1 = 'B'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
-- Criação de variáveis agrupadas com janela de 1M
tb_agrupada_FEAT_CAT1_C_1M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
-- Criação de variáveis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM1 para a janela 1
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_C_1M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_C_1M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_C_1M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_C_1M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MEDIAN_FEAT_NUM1_FEAT_CAT1_C_1M,
-- Criação de variáveis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM2 para a janela 1
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_C_1M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_C_1M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_C_1M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_C_1M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MEDIAN_FEAT_NUM2_FEAT_CAT1_C_1M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (DATEADD('month', 1 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF)
AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)
AND tb_feat.FEAT_CAT1 = 'C'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
-- Criação de variáveis agrupadas com janela de 2M
tb_agrupada_FEAT_CAT1_C_2M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
-- Criação de variáveis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM1 para a janela 2
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_C_2M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_C_2M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_C_2M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_C_2M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MEDIAN_FEAT_NUM1_FEAT_CAT1_C_2M,
-- Criação de variáveis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM2 para a janela 2
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_C_2M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_C_2M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_C_2M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_C_2M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MEDIAN_FEAT_NUM2_FEAT_CAT1_C_2M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (DATEADD('month', 2 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF)
AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)
AND tb_feat.FEAT_CAT1 = 'C'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
),
-- Criação de variáveis agrupadas com janela de 3M
tb_agrupada_FEAT_CAT1_C_3M as(
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
-- Criação de variáveis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM1 para a janela 3
SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_C_3M,
MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_C_3M,
MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_C_3M,
AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_C_3M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MEDIAN_FEAT_NUM1_FEAT_CAT1_C_3M,
-- Criação de variáveis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM2 para a janela 3
SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_C_3M,
MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_C_3M,
MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_C_3M,
AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_C_3M,
MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MEDIAN_FEAT_NUM2_FEAT_CAT1_C_3M
FROM tb_public
INNER JOIN tb_feat
ON tb_public.ID = tb_feat.ID
AND (DATEADD('month', 3 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF)
AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)
AND tb_feat.FEAT_CAT1 = 'C'
GROUP BY tb_public.ID, tb_public.SAFRA_REF
)
SELECT
tb_public.ID,
tb_public.SAFRA_REF,
tb_agrupada_FEAT_CAT1_B_1M.SUM_FEAT_NUM1_FEAT_CAT1_B_1M,
tb_agrupada_FEAT_CAT1_B_1M.MAX_FEAT_NUM1_FEAT_CAT1_B_1M,
tb_agrupada_FEAT_CAT1_B_1M.MIN_FEAT_NUM1_FEAT_CAT1_B_1M,
tb_agrupada_FEAT_CAT1_B_1M.AVG_FEAT_NUM1_FEAT_CAT1_B_1M,
tb_agrupada_FEAT_CAT1_B_1M.MEDIAN_FEAT_NUM1_FEAT_CAT1_B_1M,
tb_agrupada_FEAT_CAT1_B_1M.SUM_FEAT_NUM2_FEAT_CAT1_B_1M,
tb_agrupada_FEAT_CAT1_B_1M.MAX_FEAT_NUM2_FEAT_CAT1_B_1M,
tb_agrupada_FEAT_CAT1_B_1M.MIN_FEAT_NUM2_FEAT_CAT1_B_1M,
tb_agrupada_FEAT_CAT1_B_1M.AVG_FEAT_NUM2_FEAT_CAT1_B_1M,
tb_agrupada_FEAT_CAT1_B_1M.MEDIAN_FEAT_NUM2_FEAT_CAT1_B_1M,
tb_agrupada_FEAT_CAT1_B_2M.SUM_FEAT_NUM1_FEAT_CAT1_B_2M,
tb_agrupada_FEAT_CAT1_B_2M.MAX_FEAT_NUM1_FEAT_CAT1_B_2M,
tb_agrupada_FEAT_CAT1_B_2M.MIN_FEAT_NUM1_FEAT_CAT1_B_2M,
tb_agrupada_FEAT_CAT1_B_2M.AVG_FEAT_NUM1_FEAT_CAT1_B_2M,
tb_agrupada_FEAT_CAT1_B_2M.MEDIAN_FEAT_NUM1_FEAT_CAT1_B_2M,
tb_agrupada_FEAT_CAT1_B_2M.SUM_FEAT_NUM2_FEAT_CAT1_B_2M,
tb_agrupada_FEAT_CAT1_B_2M.MAX_FEAT_NUM2_FEAT_CAT1_B_2M,
tb_agrupada_FEAT_CAT1_B_2M.MIN_FEAT_NUM2_FEAT_CAT1_B_2M,
tb_agrupada_FEAT_CAT1_B_2M.AVG_FEAT_NUM2_FEAT_CAT1_B_2M,
tb_agrupada_FEAT_CAT1_B_2M.MEDIAN_FEAT_NUM2_FEAT_CAT1_B_2M,
tb_agrupada_FEAT_CAT1_B_3M.SUM_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.MAX_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.MIN_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.AVG_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.MEDIAN_FEAT_NUM1_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.SUM_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.MAX_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.MIN_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.AVG_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_B_3M.MEDIAN_FEAT_NUM2_FEAT_CAT1_B_3M,
tb_agrupada_FEAT_CAT1_C_1M.SUM_FEAT_NUM1_FEAT_CAT1_C_1M,
tb_agrupada_FEAT_CAT1_C_1M.MAX_FEAT_NUM1_FEAT_CAT1_C_1M,
tb_agrupada_FEAT_CAT1_C_1M.MIN_FEAT_NUM1_FEAT_CAT1_C_1M,
tb_agrupada_FEAT_CAT1_C_1M.AVG_FEAT_NUM1_FEAT_CAT1_C_1M,
tb_agrupada_FEAT_CAT1_C_1M.MEDIAN_FEAT_NUM1_FEAT_CAT1_C_1M,
tb_agrupada_FEAT_CAT1_C_1M.SUM_FEAT_NUM2_FEAT_CAT1_C_1M,
tb_agrupada_FEAT_CAT1_C_1M.MAX_FEAT_NUM2_FEAT_CAT1_C_1M,
tb_agrupada_FEAT_CAT1_C_1M.MIN_FEAT_NUM2_FEAT_CAT1_C_1M,
tb_agrupada_FEAT_CAT1_C_1M.AVG_FEAT_NUM2_FEAT_CAT1_C_1M,
tb_agrupada_FEAT_CAT1_C_1M.MEDIAN_FEAT_NUM2_FEAT_CAT1_C_1M,
tb_agrupada_FEAT_CAT1_C_2M.SUM_FEAT_NUM1_FEAT_CAT1_C_2M,
tb_agrupada_FEAT_CAT1_C_2M.MAX_FEAT_NUM1_FEAT_CAT1_C_2M,
tb_agrupada_FEAT_CAT1_C_2M.MIN_FEAT_NUM1_FEAT_CAT1_C_2M,
tb_agrupada_FEAT_CAT1_C_2M.AVG_FEAT_NUM1_FEAT_CAT1_C_2M,
tb_agrupada_FEAT_CAT1_C_2M.MEDIAN_FEAT_NUM1_FEAT_CAT1_C_2M,
tb_agrupada_FEAT_CAT1_C_2M.SUM_FEAT_NUM2_FEAT_CAT1_C_2M,
tb_agrupada_FEAT_CAT1_C_2M.MAX_FEAT_NUM2_FEAT_CAT1_C_2M,
tb_agrupada_FEAT_CAT1_C_2M.MIN_FEAT_NUM2_FEAT_CAT1_C_2M,
tb_agrupada_FEAT_CAT1_C_2M.AVG_FEAT_NUM2_FEAT_CAT1_C_2M,
tb_agrupada_FEAT_CAT1_C_2M.MEDIAN_FEAT_NUM2_FEAT_CAT1_C_2M,
tb_agrupada_FEAT_CAT1_C_3M.SUM_FEAT_NUM1_FEAT_CAT1_C_3M,
tb_agrupada_FEAT_CAT1_C_3M.MAX_FEAT_NUM1_FEAT_CAT1_C_3M,
tb_agrupada_FEAT_CAT1_C_3M.MIN_FEAT_NUM1_FEAT_CAT1_C_3M,
tb_agrupada_FEAT_CAT1_C_3M.AVG_FEAT_NUM1_FEAT_CAT1_C_3M,
tb_agrupada_FEAT_CAT1_C_3M.MEDIAN_FEAT_NUM1_FEAT_CAT1_C_3M,
tb_agrupada_FEAT_CAT1_C_3M.SUM_FEAT_NUM2_FEAT_CAT1_C_3M,
tb_agrupada_FEAT_CAT1_C_3M.MAX_FEAT_NUM2_FEAT_CAT1_C_3M,
tb_agrupada_FEAT_CAT1_C_3M.MIN_FEAT_NUM2_FEAT_CAT1_C_3M,
tb_agrupada_FEAT_CAT1_C_3M.AVG_FEAT_NUM2_FEAT_CAT1_C_3M,
tb_agrupada_FEAT_CAT1_C_3M.MEDIAN_FEAT_NUM2_FEAT_CAT1_C_3M
FROM tb_public
LEFT JOIN tb_agrupada_FEAT_CAT1_B_1M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_1M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_1M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT1_B_2M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_2M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_2M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT1_B_3M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_3M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_3M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT1_C_1M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_C_1M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_C_1M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT1_C_2M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_C_2M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_C_2M.SAFRA_REF
LEFT JOIN tb_agrupada_FEAT_CAT1_C_3M
ON tb_public.ID = tb_agrupada_FEAT_CAT1_C_3M.ID
AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_C_3M.SAFRA_REF
Raw data
{
"_id": null,
"home_page": "https://github.com/ravennaro/featsql",
"name": "featsql",
"maintainer": "",
"docs_url": null,
"requires_python": ">=3.7",
"maintainer_email": "",
"keywords": "nbdev jupyter notebook python",
"author": "Ravenna Oliveria",
"author_email": "ravenna.rro@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/d1/02/d0c23ad70a008acfbd1fa5a697889ea53325c1c7cbfa29167dd5886db398/featsql-0.0.1.tar.gz",
"platform": null,
"description": "# featsql\n\n<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->\n\n``` python\nfrom featsql.featsqlite import *\nfrom featsql.featmysql import *\nfrom featsql.featsnow import *\n```\n\n## Imports\n\n``` python\nimport pandas as pd\n```\n\n``` python\nimport mysql.connector\nfrom sqlalchemy import create_engine\npd.set_option('display.max_columns', None)\n```\n\n## Install\n\n``` sh\npip install featsql\n```\n\n## 1. SQLITE\n\n### Configurando a engine\n\n``` python\nurl_db = \"sqlite:///../../data/mydatabase.db\" \n\nengine = create_engine(url_db)\n```\n\n### Vis\u00e3o inicial do p\u00fablico\n\nPrimeiro vamos observar o formato da tabela spine\n\n``` python\ndf_spine = pd.read_sql(\"SELECT * FROM tb_spine\", engine)\ndf_spine.head()\n```\n\n<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n .dataframe tbody tr th {\n vertical-align: top;\n }\n .dataframe thead th {\n text-align: right;\n }\n</style>\n\n| | ID | SAFRA_REF | Target |\n|-----|-----|------------|--------|\n| 0 | 4 | 2023-02-01 | 0 |\n| 1 | 5 | 2023-02-01 | 0 |\n| 2 | 6 | 2023-02-01 | 0 |\n| 3 | 7 | 2023-02-01 | 0 |\n| 4 | 10 | 2023-02-01 | 0 |\n\n</div>\n\n### Vis\u00e3o inicial da tabela de vari\u00e1veis\n\nA tabela de vari\u00e1veis cont\u00e9m 4 vari\u00e1veis, duas sendo num\u00e9ricas e duas\ncateg\u00f3rica. Perceba que existem mais ID\u2019s \u00fanicos e datas dispon\u00edveis\nnessa tabela do que na tabela spine, caso que ocorre no dia a dia.\n\n``` python\ndf_data = pd.read_sql(\"SELECT * FROM tb_feat\", engine)\ndf_data.head()\n```\n\n<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n .dataframe tbody tr th {\n vertical-align: top;\n }\n .dataframe thead th {\n text-align: right;\n }\n</style>\n\n| | ID | SAFRA | FEAT_NUM1 | FEAT_NUM2 | FEAT_CAT1 | FEAT_CAT2 |\n|-----|-----|------------|-----------|-----------|-----------|-----------|\n| 0 | 1 | 2023-01-01 | -97 | -44 | A | C |\n| 1 | 2 | 2023-01-01 | 89 | 67 | C | B |\n| 2 | 3 | 2023-01-01 | 53 | 24 | A | B |\n| 3 | 4 | 2023-01-01 | -40 | 62 | B | C |\n| 4 | 5 | 2023-01-01 | 41 | 62 | B | B |\n\n</div>\n\n### Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas\n\nA fun\u00e7\u00e3o sqlite_create_query_num() cria um texto com a query para a\ncria\u00e7\u00e3o de vari\u00e1veis com as opera\u00e7\u00f5es soma, m\u00ednimo, m\u00e1ximo e m\u00e9dia das\nvari\u00e1veis listadas em feat_num_lista e com a janela de tempo listada em\nlista_janela.\n\n``` python\ntb_publico = 'tb_spine'\ntb_feat = 'tb_feat'\nid = 'ID'\nsafra_ref = 'SAFRA_REF'\nsafra = 'SAFRA'\nfeat_num_lista = ['FEAT_NUM1','FEAT_NUM2']\nlista_janela = [1,2,3]\nquery_final_num_sqlite = sqlite_create_query_num(tb_publico, tb_feat, lista_janela,feat_num_lista, id, safra_ref, safra)\n```\n\n``` python\ndf_num_sqlite = pd.read_sql(query_final_num_sqlite, engine)\ndf_num_sqlite.head()\n```\n\n<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n .dataframe tbody tr th {\n vertical-align: top;\n }\n .dataframe thead th {\n text-align: right;\n }\n</style>\n\n| | ID | SAFRA_REF | FEAT_NUM1_SUM_1M | FEAT_NUM1_MIN_1M | FEAT_NUM1_MAX_1M | FEAT_NUM1_AVG_1M | FEAT_NUM2_SUM_1M | FEAT_NUM2_MIN_1M | FEAT_NUM2_MAX_1M | FEAT_NUM2_AVG_1M | FEAT_NUM1_SUM_2M | FEAT_NUM1_MIN_2M | FEAT_NUM1_MAX_2M | FEAT_NUM1_AVG_2M | FEAT_NUM2_SUM_2M | FEAT_NUM2_MIN_2M | FEAT_NUM2_MAX_2M | FEAT_NUM2_AVG_2M | FEAT_NUM1_SUM_3M | FEAT_NUM1_MIN_3M | FEAT_NUM1_MAX_3M | FEAT_NUM1_AVG_3M | FEAT_NUM2_SUM_3M | FEAT_NUM2_MIN_3M | FEAT_NUM2_MAX_3M | FEAT_NUM2_AVG_3M |\n|-----|-----|------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|\n| 0 | 4 | 2023-02-01 | -40.0 | -40.0 | -40.0 | -40.0 | 62.0 | 62.0 | 62.0 | 62.0 | -40 | -40 | -40 | -40.0 | 62 | 62 | 62 | 62.0 | -40 | -40 | -40 | -40.0 | 62 | 62 | 62 | 62.0 |\n| 1 | 5 | 2023-02-01 | 41.0 | 41.0 | 41.0 | 41.0 | 62.0 | 62.0 | 62.0 | 62.0 | 41 | 41 | 41 | 41.0 | 62 | 62 | 62 | 62.0 | 41 | 41 | 41 | 41.0 | 62 | 62 | 62 | 62.0 |\n| 2 | 6 | 2023-02-01 | 36.0 | 36.0 | 36.0 | 36.0 | 63.0 | 63.0 | 63.0 | 63.0 | 36 | 36 | 36 | 36.0 | 63 | 63 | 63 | 63.0 | 36 | 36 | 36 | 36.0 | 63 | 63 | 63 | 63.0 |\n| 3 | 7 | 2023-02-01 | 47.0 | 47.0 | 47.0 | 47.0 | 44.0 | 44.0 | 44.0 | 44.0 | 47 | 47 | 47 | 47.0 | 44 | 44 | 44 | 44.0 | 47 | 47 | 47 | 47.0 | 44 | 44 | 44 | 44.0 |\n| 4 | 10 | 2023-02-01 | 29.0 | 29.0 | 29.0 | 29.0 | -7.0 | -7.0 | -7.0 | -7.0 | 29 | 29 | 29 | 29.0 | -7 | -7 | -7 | -7.0 | 29 | 29 | 29 | 29.0 | -7 | -7 | -7 | -7.0 |\n\n</div>\n\nAjustar para n\u00e3o necessariamente criar um dataframe por conta do tamanho\n\n``` python\nprint(query_final_num_sqlite)\n```\n\n\n WITH \n tb_public AS (\n SELECT \n *\n FROM tb_spine\n ),\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis de janela de 1M\n tb_janela_1M as(\n SELECT \n tb_public.ID,\n tb_public.SAFRA_REF,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM1 para a janela 1\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_1M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_1M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_1M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_1M,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM2 para a janela 1\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_1M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_1M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_1M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_1M\n\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+1 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis de janela de 2M\n tb_janela_2M as(\n SELECT \n tb_public.ID,\n tb_public.SAFRA_REF,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM1 para a janela 2\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_2M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_2M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_2M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_2M,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM2 para a janela 2\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_2M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_2M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_2M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_2M\n\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+2 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis de janela de 3M\n tb_janela_3M as(\n SELECT \n tb_public.ID,\n tb_public.SAFRA_REF,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM1 para a janela 3\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_3M,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM2 para a janela 3\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_3M\n\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+3 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n \n\n tb_join AS (\n SELECT \n *\n FROM tb_public \n \n LEFT JOIN tb_janela_1M\n ON tb_public.ID = tb_janela_1M.ID\n AND tb_public.SAFRA_REF = tb_janela_1M.SAFRA_REF\n \n LEFT JOIN tb_janela_2M\n ON tb_public.ID = tb_janela_2M.ID\n AND tb_public.SAFRA_REF = tb_janela_2M.SAFRA_REF\n \n LEFT JOIN tb_janela_3M\n ON tb_public.ID = tb_janela_3M.ID\n AND tb_public.SAFRA_REF = tb_janela_3M.SAFRA_REF\n \n )\n \n SELECT \n tb_join.ID,\n tb_join.SAFRA_REF,\n \n tb_join.FEAT_NUM1_SUM_1M,\n tb_join.FEAT_NUM1_MIN_1M,\n tb_join.FEAT_NUM1_MAX_1M,\n tb_join.FEAT_NUM1_AVG_1M,\n \n tb_join.FEAT_NUM2_SUM_1M,\n tb_join.FEAT_NUM2_MIN_1M,\n tb_join.FEAT_NUM2_MAX_1M,\n tb_join.FEAT_NUM2_AVG_1M,\n \n tb_join.FEAT_NUM1_SUM_2M,\n tb_join.FEAT_NUM1_MIN_2M,\n tb_join.FEAT_NUM1_MAX_2M,\n tb_join.FEAT_NUM1_AVG_2M,\n \n tb_join.FEAT_NUM2_SUM_2M,\n tb_join.FEAT_NUM2_MIN_2M,\n tb_join.FEAT_NUM2_MAX_2M,\n tb_join.FEAT_NUM2_AVG_2M,\n \n tb_join.FEAT_NUM1_SUM_3M,\n tb_join.FEAT_NUM1_MIN_3M,\n tb_join.FEAT_NUM1_MAX_3M,\n tb_join.FEAT_NUM1_AVG_3M,\n \n tb_join.FEAT_NUM2_SUM_3M,\n tb_join.FEAT_NUM2_MIN_3M,\n tb_join.FEAT_NUM2_MAX_3M,\n tb_join.FEAT_NUM2_AVG_3M\n FROM tb_join\n \n\n### Cria\u00e7\u00e3o de vari\u00e1veis categ\u00f3ricas\n\nA fun\u00e7\u00e3o sqlite_create_query_cat() cria um texto com a query para a\ncria\u00e7\u00e3o de vari\u00e1veis com a moda de cada uma das vari\u00e1veis listadas em\nfeat_num_lista na janela de tempo fornecida em lista_janela.\n\n``` python\ntb_publico = 'tb_spine'\ntb_feat = 'tb_feat'\nid = 'ID'\nsafra_ref = 'SAFRA_REF'\nsafra = 'SAFRA'\nfeat_num_lista = ['FEAT_CAT1', 'FEAT_CAT2']\nlista_janela = [1, 3, 6]\nquery_final_cat_sqlite = sqlite_create_query_cat(tb_publico, tb_feat, lista_janela,feat_num_lista, id, safra_ref, safra)\n```\n\n``` python\ndf_cat_sqlite_sqlite = pd.read_sql(query_final_cat_sqlite, engine)\ndf_cat_sqlite_sqlite.head()\n```\n\n<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n .dataframe tbody tr th {\n vertical-align: top;\n }\n .dataframe thead th {\n text-align: right;\n }\n</style>\n\n| | ID | SAFRA_REF | FEAT_CAT1_MODA_1M | FEAT_CAT2_MODA_1M | FEAT_CAT1_MODA_3M | FEAT_CAT2_MODA_3M | FEAT_CAT1_MODA_6M | FEAT_CAT2_MODA_6M |\n|-----|-----|------------|-------------------|-------------------|-------------------|-------------------|-------------------|-------------------|\n| 0 | 4 | 2023-02-01 | B | C | B | C | B | C |\n| 1 | 5 | 2023-02-01 | B | B | B | B | B | B |\n| 2 | 6 | 2023-02-01 | A | A | A | A | A | A |\n| 3 | 7 | 2023-02-01 | C | B | C | B | C | B |\n| 4 | 10 | 2023-02-01 | A | B | A | B | A | B |\n\n</div>\n\n``` python\nprint(query_final_cat_sqlite)\n```\n\n\n WITH \n tb_public as (\n SELECT \n ID,\n SAFRA_REF\n FROM tb_spine\n ),\n \n tb_janela_FEAT_CAT1_1M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n tb_feat.FEAT_CAT1,\n COUNT(*) AS frequency_FEAT_CAT1\n FROM tb_public\n LEFT JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+1 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT1\n ),\n\n tb_row_FEAT_CAT1_1M as (\n SELECT \n *, \n ROW_NUMBER() OVER (\n PARTITION BY \n ID,\n SAFRA_REF \n ORDER BY frequency_FEAT_CAT1 DESC\n ) as row_num_FEAT_CAT1_1M\n FROM tb_janela_FEAT_CAT1_1M\n ),\n \n tb_moda_FEAT_CAT1_1M AS(\n SELECT\n tb_row_FEAT_CAT1_1M .ID,\n tb_row_FEAT_CAT1_1M .SAFRA_REF,\n tb_row_FEAT_CAT1_1M.FEAT_CAT1 AS FEAT_CAT1_MODA_1M\n FROM tb_row_FEAT_CAT1_1M \n WHERE row_num_FEAT_CAT1_1M = 1\n ),\n\n tb_janela_FEAT_CAT2_1M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n tb_feat.FEAT_CAT2,\n COUNT(*) AS frequency_FEAT_CAT2\n FROM tb_public\n LEFT JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+1 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT2\n ),\n\n tb_row_FEAT_CAT2_1M as (\n SELECT \n *, \n ROW_NUMBER() OVER (\n PARTITION BY \n ID,\n SAFRA_REF \n ORDER BY frequency_FEAT_CAT2 DESC\n ) as row_num_FEAT_CAT2_1M\n FROM tb_janela_FEAT_CAT2_1M\n ),\n \n tb_moda_FEAT_CAT2_1M AS(\n SELECT\n tb_row_FEAT_CAT2_1M .ID,\n tb_row_FEAT_CAT2_1M .SAFRA_REF,\n tb_row_FEAT_CAT2_1M.FEAT_CAT2 AS FEAT_CAT2_MODA_1M\n FROM tb_row_FEAT_CAT2_1M \n WHERE row_num_FEAT_CAT2_1M = 1\n ),\n\n tb_janela_FEAT_CAT1_3M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n tb_feat.FEAT_CAT1,\n COUNT(*) AS frequency_FEAT_CAT1\n FROM tb_public\n LEFT JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+3 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT1\n ),\n\n tb_row_FEAT_CAT1_3M as (\n SELECT \n *, \n ROW_NUMBER() OVER (\n PARTITION BY \n ID,\n SAFRA_REF \n ORDER BY frequency_FEAT_CAT1 DESC\n ) as row_num_FEAT_CAT1_3M\n FROM tb_janela_FEAT_CAT1_3M\n ),\n \n tb_moda_FEAT_CAT1_3M AS(\n SELECT\n tb_row_FEAT_CAT1_3M .ID,\n tb_row_FEAT_CAT1_3M .SAFRA_REF,\n tb_row_FEAT_CAT1_3M.FEAT_CAT1 AS FEAT_CAT1_MODA_3M\n FROM tb_row_FEAT_CAT1_3M \n WHERE row_num_FEAT_CAT1_3M = 1\n ),\n\n tb_janela_FEAT_CAT2_3M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n tb_feat.FEAT_CAT2,\n COUNT(*) AS frequency_FEAT_CAT2\n FROM tb_public\n LEFT JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+3 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT2\n ),\n\n tb_row_FEAT_CAT2_3M as (\n SELECT \n *, \n ROW_NUMBER() OVER (\n PARTITION BY \n ID,\n SAFRA_REF \n ORDER BY frequency_FEAT_CAT2 DESC\n ) as row_num_FEAT_CAT2_3M\n FROM tb_janela_FEAT_CAT2_3M\n ),\n \n tb_moda_FEAT_CAT2_3M AS(\n SELECT\n tb_row_FEAT_CAT2_3M .ID,\n tb_row_FEAT_CAT2_3M .SAFRA_REF,\n tb_row_FEAT_CAT2_3M.FEAT_CAT2 AS FEAT_CAT2_MODA_3M\n FROM tb_row_FEAT_CAT2_3M \n WHERE row_num_FEAT_CAT2_3M = 1\n ),\n\n tb_janela_FEAT_CAT1_6M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n tb_feat.FEAT_CAT1,\n COUNT(*) AS frequency_FEAT_CAT1\n FROM tb_public\n LEFT JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+6 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT1\n ),\n\n tb_row_FEAT_CAT1_6M as (\n SELECT \n *, \n ROW_NUMBER() OVER (\n PARTITION BY \n ID,\n SAFRA_REF \n ORDER BY frequency_FEAT_CAT1 DESC\n ) as row_num_FEAT_CAT1_6M\n FROM tb_janela_FEAT_CAT1_6M\n ),\n \n tb_moda_FEAT_CAT1_6M AS(\n SELECT\n tb_row_FEAT_CAT1_6M .ID,\n tb_row_FEAT_CAT1_6M .SAFRA_REF,\n tb_row_FEAT_CAT1_6M.FEAT_CAT1 AS FEAT_CAT1_MODA_6M\n FROM tb_row_FEAT_CAT1_6M \n WHERE row_num_FEAT_CAT1_6M = 1\n ),\n\n tb_janela_FEAT_CAT2_6M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n tb_feat.FEAT_CAT2,\n COUNT(*) AS frequency_FEAT_CAT2\n FROM tb_public\n LEFT JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+6 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT2\n ),\n\n tb_row_FEAT_CAT2_6M as (\n SELECT \n *, \n ROW_NUMBER() OVER (\n PARTITION BY \n ID,\n SAFRA_REF \n ORDER BY frequency_FEAT_CAT2 DESC\n ) as row_num_FEAT_CAT2_6M\n FROM tb_janela_FEAT_CAT2_6M\n ),\n \n tb_moda_FEAT_CAT2_6M AS(\n SELECT\n tb_row_FEAT_CAT2_6M .ID,\n tb_row_FEAT_CAT2_6M .SAFRA_REF,\n tb_row_FEAT_CAT2_6M.FEAT_CAT2 AS FEAT_CAT2_MODA_6M\n FROM tb_row_FEAT_CAT2_6M \n WHERE row_num_FEAT_CAT2_6M = 1\n )\n\n SELECT \n tb_public.ID,\n tb_public.SAFRA_REF,\n \n tb_moda_FEAT_CAT1_1M.FEAT_CAT1_MODA_1M,\n \n tb_moda_FEAT_CAT2_1M.FEAT_CAT2_MODA_1M,\n \n tb_moda_FEAT_CAT1_3M.FEAT_CAT1_MODA_3M,\n \n tb_moda_FEAT_CAT2_3M.FEAT_CAT2_MODA_3M,\n \n tb_moda_FEAT_CAT1_6M.FEAT_CAT1_MODA_6M,\n \n tb_moda_FEAT_CAT2_6M.FEAT_CAT2_MODA_6M\n FROM tb_public\n \n LEFT JOIN tb_moda_FEAT_CAT1_1M \n ON tb_moda_FEAT_CAT1_1M.ID = tb_public.ID\n AND tb_moda_FEAT_CAT1_1M.SAFRA_REF = tb_public.SAFRA_REF\n\n LEFT JOIN tb_moda_FEAT_CAT2_1M \n ON tb_moda_FEAT_CAT2_1M.ID = tb_public.ID\n AND tb_moda_FEAT_CAT2_1M.SAFRA_REF = tb_public.SAFRA_REF\n\n LEFT JOIN tb_moda_FEAT_CAT1_3M \n ON tb_moda_FEAT_CAT1_3M.ID = tb_public.ID\n AND tb_moda_FEAT_CAT1_3M.SAFRA_REF = tb_public.SAFRA_REF\n\n LEFT JOIN tb_moda_FEAT_CAT2_3M \n ON tb_moda_FEAT_CAT2_3M.ID = tb_public.ID\n AND tb_moda_FEAT_CAT2_3M.SAFRA_REF = tb_public.SAFRA_REF\n\n LEFT JOIN tb_moda_FEAT_CAT1_6M \n ON tb_moda_FEAT_CAT1_6M.ID = tb_public.ID\n AND tb_moda_FEAT_CAT1_6M.SAFRA_REF = tb_public.SAFRA_REF\n\n LEFT JOIN tb_moda_FEAT_CAT2_6M \n ON tb_moda_FEAT_CAT2_6M.ID = tb_public.ID\n AND tb_moda_FEAT_CAT2_6M.SAFRA_REF = tb_public.SAFRA_REF\n\n \n\n### Cria\u00e7\u00e3o de vari\u00e1veis agragadas\n\n``` python\ntb_publico = 'tb_spine'\nlista_janela = [3, 6]\nlista_feat_num = ['FEAT_NUM1', 'FEAT_NUM2']\nfeat_cat = 'FEAT_CAT1'\nlista_valor_agregador = ['A', 'B']\nid = 'ID'\nsafra_ref = 'SAFRA_REF'\ntb_feat = 'tb_feat'\nsafra = 'SAFRA'\n\nquery = sqlite_create_query_agregada(tb_publico, tb_feat, lista_janela, lista_feat_num, id, safra_ref, safra, feat_cat, lista_valor_agregador)\n```\n\n``` python\nprint(query)\n```\n\n\n WITH\n tb_public as(\n SELECT\n ID,\n SAFRA_REF\n FROM tb_spine\n ),\n \n \n tb_agrupada_FEAT_CAT1_A_3M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_A_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_A_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_A_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_A_3M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_A_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_A_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_A_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_A_3M\n FROM tb_public\n INNER JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+3 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n AND tb_feat.FEAT_CAT1 = 'A'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n \n tb_agrupada_FEAT_CAT1_A_6M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_A_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_A_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_A_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_A_6M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_A_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_A_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_A_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_A_6M\n FROM tb_public\n INNER JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+6 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n AND tb_feat.FEAT_CAT1 = 'A'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n \n tb_agrupada_FEAT_CAT1_B_3M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_3M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_3M\n FROM tb_public\n INNER JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+3 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n AND tb_feat.FEAT_CAT1 = 'B'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n \n tb_agrupada_FEAT_CAT1_B_6M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_6M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_6M\n FROM tb_public\n INNER JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+6 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n AND tb_feat.FEAT_CAT1 = 'B'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n tb_join AS (\n SELECT \n * \n FROM tb_public \n \n LEFT JOIN tb_agrupada_FEAT_CAT1_A_3M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_A_3M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_A_3M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT1_A_6M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_A_6M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_A_6M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT1_B_3M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_3M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_3M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT1_B_6M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_6M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_6M.SAFRA_REF\n \n )\n\n SELECT \n tb_join.ID,\n tb_join.SAFRA_REF,\n \n tb_join.SUM_FEAT_NUM1_FEAT_CAT1_A_3M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT1_A_3M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT1_A_3M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT1_A_3M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT1_A_3M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT1_A_3M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT1_A_3M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT1_A_3M,\n\n tb_join.SUM_FEAT_NUM1_FEAT_CAT1_A_6M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT1_A_6M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT1_A_6M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT1_A_6M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT1_A_6M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT1_A_6M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT1_A_6M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT1_A_6M,\n\n tb_join.SUM_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT1_B_3M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT1_B_3M,\n\n tb_join.SUM_FEAT_NUM1_FEAT_CAT1_B_6M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT1_B_6M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT1_B_6M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT1_B_6M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT1_B_6M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT1_B_6M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT1_B_6M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT1_B_6M\n FROM tb_join\n \n\n``` python\ndf_sqlite_agregada = pd.read_sql(query, engine)\ndf_sqlite_agregada.head()\n```\n\n<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n .dataframe tbody tr th {\n vertical-align: top;\n }\n .dataframe thead th {\n text-align: right;\n }\n</style>\n\n| | ID | SAFRA_REF | SUM_FEAT_NUM1_FEAT_CAT1_A_3M | MAX_FEAT_NUM1_FEAT_CAT1_A_3M | MIN_FEAT_NUM1_FEAT_CAT1_A_3M | AVG_FEAT_NUM1_FEAT_CAT1_A_3M | SUM_FEAT_NUM2_FEAT_CAT1_A_3M | MAX_FEAT_NUM2_FEAT_CAT1_A_3M | MIN_FEAT_NUM2_FEAT_CAT1_A_3M | AVG_FEAT_NUM2_FEAT_CAT1_A_3M | SUM_FEAT_NUM1_FEAT_CAT1_A_6M | MAX_FEAT_NUM1_FEAT_CAT1_A_6M | MIN_FEAT_NUM1_FEAT_CAT1_A_6M | AVG_FEAT_NUM1_FEAT_CAT1_A_6M | SUM_FEAT_NUM2_FEAT_CAT1_A_6M | MAX_FEAT_NUM2_FEAT_CAT1_A_6M | MIN_FEAT_NUM2_FEAT_CAT1_A_6M | AVG_FEAT_NUM2_FEAT_CAT1_A_6M | SUM_FEAT_NUM1_FEAT_CAT1_B_3M | MAX_FEAT_NUM1_FEAT_CAT1_B_3M | MIN_FEAT_NUM1_FEAT_CAT1_B_3M | AVG_FEAT_NUM1_FEAT_CAT1_B_3M | SUM_FEAT_NUM2_FEAT_CAT1_B_3M | MAX_FEAT_NUM2_FEAT_CAT1_B_3M | MIN_FEAT_NUM2_FEAT_CAT1_B_3M | AVG_FEAT_NUM2_FEAT_CAT1_B_3M | SUM_FEAT_NUM1_FEAT_CAT1_B_6M | MAX_FEAT_NUM1_FEAT_CAT1_B_6M | MIN_FEAT_NUM1_FEAT_CAT1_B_6M | AVG_FEAT_NUM1_FEAT_CAT1_B_6M | SUM_FEAT_NUM2_FEAT_CAT1_B_6M | MAX_FEAT_NUM2_FEAT_CAT1_B_6M | MIN_FEAT_NUM2_FEAT_CAT1_B_6M | AVG_FEAT_NUM2_FEAT_CAT1_B_6M |\n|-----|-----|------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|\n| 0 | 4 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | -40.0 | -40.0 | -40.0 | -40.0 | 62.0 | 62.0 | 62.0 | 62.0 | -40.0 | -40.0 | -40.0 | -40.0 | 62.0 | 62.0 | 62.0 | 62.0 |\n| 1 | 5 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 41.0 | 41.0 | 41.0 | 41.0 | 62.0 | 62.0 | 62.0 | 62.0 | 41.0 | 41.0 | 41.0 | 41.0 | 62.0 | 62.0 | 62.0 | 62.0 |\n| 2 | 6 | 2023-02-01 | 36.0 | 36.0 | 36.0 | 36.0 | 63.0 | 63.0 | 63.0 | 63.0 | 36.0 | 36.0 | 36.0 | 36.0 | 63.0 | 63.0 | 63.0 | 63.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n| 3 | 7 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n| 4 | 10 | 2023-02-01 | 29.0 | 29.0 | 29.0 | 29.0 | -7.0 | -7.0 | -7.0 | -7.0 | 29.0 | 29.0 | 29.0 | 29.0 | -7.0 | -7.0 | -7.0 | -7.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n\n</div>\n\n``` python\ntb_publico = 'tb_spine'\nlista_janela = [3, 6]\nlista_feat_num = ['FEAT_NUM1', 'FEAT_NUM2']\nfeat_cat = 'FEAT_CAT1'\nlista_valor_agragador = ['A', 'B']\nid = 'ID'\nsafra_ref = 'SAFRA_REF'\ntb_feat = 'tb_feat'\nsafra = 'SAFRA'\n\nquery = sqlite_create_query_agregada(tb_publico, tb_feat, lista_janela, lista_feat_num, id, safra_ref, safra, feat_cat, lista_valor_agragador)\n```\n\n``` python\nprint(query)\n```\n\n\n WITH\n tb_public as(\n SELECT\n ID,\n SAFRA_REF\n FROM tb_spine\n ),\n \n \n tb_agrupada_FEAT_CAT1_A_3M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_A_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_A_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_A_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_A_3M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_A_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_A_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_A_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_A_3M\n FROM tb_public\n INNER JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+3 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n AND tb_feat.FEAT_CAT1 = 'A'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n \n tb_agrupada_FEAT_CAT1_A_6M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_A_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_A_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_A_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_A_6M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_A_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_A_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_A_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_A_6M\n FROM tb_public\n INNER JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+6 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n AND tb_feat.FEAT_CAT1 = 'A'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n \n tb_agrupada_FEAT_CAT1_B_3M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_3M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_3M\n FROM tb_public\n INNER JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+3 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n AND tb_feat.FEAT_CAT1 = 'B'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n \n tb_agrupada_FEAT_CAT1_B_6M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_6M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_6M\n FROM tb_public\n INNER JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (strftime('%Y-%m-%d', date(tb_feat.SAFRA, '+6 months')) >= tb_public.SAFRA_REF)\n AND (tb_feat.SAFRA < tb_public.SAFRA_REF)\n AND tb_feat.FEAT_CAT1 = 'B'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n tb_join AS (\n SELECT \n * \n FROM tb_public \n \n LEFT JOIN tb_agrupada_FEAT_CAT1_A_3M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_A_3M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_A_3M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT1_A_6M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_A_6M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_A_6M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT1_B_3M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_3M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_3M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT1_B_6M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_6M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_6M.SAFRA_REF\n \n )\n\n SELECT \n tb_join.ID,\n tb_join.SAFRA_REF,\n \n tb_join.SUM_FEAT_NUM1_FEAT_CAT1_A_3M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT1_A_3M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT1_A_3M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT1_A_3M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT1_A_3M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT1_A_3M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT1_A_3M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT1_A_3M,\n\n tb_join.SUM_FEAT_NUM1_FEAT_CAT1_A_6M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT1_A_6M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT1_A_6M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT1_A_6M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT1_A_6M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT1_A_6M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT1_A_6M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT1_A_6M,\n\n tb_join.SUM_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT1_B_3M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT1_B_3M,\n\n tb_join.SUM_FEAT_NUM1_FEAT_CAT1_B_6M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT1_B_6M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT1_B_6M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT1_B_6M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT1_B_6M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT1_B_6M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT1_B_6M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT1_B_6M\n FROM tb_join\n \n\n``` python\ndf_sqlite_agregada = pd.read_sql(query, engine)\ndf_sqlite_agregada.head()\n```\n\n<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n .dataframe tbody tr th {\n vertical-align: top;\n }\n .dataframe thead th {\n text-align: right;\n }\n</style>\n\n| | ID | SAFRA_REF | SUM_FEAT_NUM1_FEAT_CAT1_A_3M | MAX_FEAT_NUM1_FEAT_CAT1_A_3M | MIN_FEAT_NUM1_FEAT_CAT1_A_3M | AVG_FEAT_NUM1_FEAT_CAT1_A_3M | SUM_FEAT_NUM2_FEAT_CAT1_A_3M | MAX_FEAT_NUM2_FEAT_CAT1_A_3M | MIN_FEAT_NUM2_FEAT_CAT1_A_3M | AVG_FEAT_NUM2_FEAT_CAT1_A_3M | SUM_FEAT_NUM1_FEAT_CAT1_A_6M | MAX_FEAT_NUM1_FEAT_CAT1_A_6M | MIN_FEAT_NUM1_FEAT_CAT1_A_6M | AVG_FEAT_NUM1_FEAT_CAT1_A_6M | SUM_FEAT_NUM2_FEAT_CAT1_A_6M | MAX_FEAT_NUM2_FEAT_CAT1_A_6M | MIN_FEAT_NUM2_FEAT_CAT1_A_6M | AVG_FEAT_NUM2_FEAT_CAT1_A_6M | SUM_FEAT_NUM1_FEAT_CAT1_B_3M | MAX_FEAT_NUM1_FEAT_CAT1_B_3M | MIN_FEAT_NUM1_FEAT_CAT1_B_3M | AVG_FEAT_NUM1_FEAT_CAT1_B_3M | SUM_FEAT_NUM2_FEAT_CAT1_B_3M | MAX_FEAT_NUM2_FEAT_CAT1_B_3M | MIN_FEAT_NUM2_FEAT_CAT1_B_3M | AVG_FEAT_NUM2_FEAT_CAT1_B_3M | SUM_FEAT_NUM1_FEAT_CAT1_B_6M | MAX_FEAT_NUM1_FEAT_CAT1_B_6M | MIN_FEAT_NUM1_FEAT_CAT1_B_6M | AVG_FEAT_NUM1_FEAT_CAT1_B_6M | SUM_FEAT_NUM2_FEAT_CAT1_B_6M | MAX_FEAT_NUM2_FEAT_CAT1_B_6M | MIN_FEAT_NUM2_FEAT_CAT1_B_6M | AVG_FEAT_NUM2_FEAT_CAT1_B_6M |\n|-----|-----|------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|\n| 0 | 4 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | -40.0 | -40.0 | -40.0 | -40.0 | 62.0 | 62.0 | 62.0 | 62.0 | -40.0 | -40.0 | -40.0 | -40.0 | 62.0 | 62.0 | 62.0 | 62.0 |\n| 1 | 5 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 41.0 | 41.0 | 41.0 | 41.0 | 62.0 | 62.0 | 62.0 | 62.0 | 41.0 | 41.0 | 41.0 | 41.0 | 62.0 | 62.0 | 62.0 | 62.0 |\n| 2 | 6 | 2023-02-01 | 36.0 | 36.0 | 36.0 | 36.0 | 63.0 | 63.0 | 63.0 | 63.0 | 36.0 | 36.0 | 36.0 | 36.0 | 63.0 | 63.0 | 63.0 | 63.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n| 3 | 7 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n| 4 | 10 | 2023-02-01 | 29.0 | 29.0 | 29.0 | 29.0 | -7.0 | -7.0 | -7.0 | -7.0 | 29.0 | 29.0 | 29.0 | 29.0 | -7.0 | -7.0 | -7.0 | -7.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n\n</div>\n\n## 2. MySQL\n\n### Configurando conex\u00e3o\n\n``` python\nhost = \"localhost\"\nuser = \"sqluser\"\npassword = \"password\"\ndatabase = \"mydatabase\"\n\n# Conectar ao MySQL\nconnection = mysql.connector.connect(\n host=host,\n user=user,\n password=password,\n database=database\n)\n```\n\n### Vis\u00e3o inicial do p\u00fablico\n\n``` python\ndf_spine = pd.read_sql(\"SELECT * FROM tb_spine\", connection)\ndf_spine.head()\n```\n\n /tmp/ipykernel_4407/661881290.py:3: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n df_spine = pd.read_sql(\"SELECT * FROM tb_spine\", connection)\n\n<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n .dataframe tbody tr th {\n vertical-align: top;\n }\n .dataframe thead th {\n text-align: right;\n }\n</style>\n\n| | ID | SAFRA_REF | Target |\n|-----|-----|------------|--------|\n| 0 | 4 | 2023-02-01 | 1 |\n| 1 | 5 | 2023-02-01 | 0 |\n| 2 | 6 | 2023-02-01 | 0 |\n| 3 | 7 | 2023-02-01 | 0 |\n| 4 | 10 | 2023-02-01 | 0 |\n\n</div>\n\n### Vis\u00e3o inicial da tabela de vari\u00e1veis\n\n``` python\ndf_data = pd.read_sql(\"SELECT * FROM tb_feat\", connection)\ndf_data.head()\n```\n\n /tmp/ipykernel_4407/1780842963.py:3: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n df_data = pd.read_sql(\"SELECT * FROM tb_feat\", connection)\n\n<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n .dataframe tbody tr th {\n vertical-align: top;\n }\n .dataframe thead th {\n text-align: right;\n }\n</style>\n\n| | ID | SAFRA | FEAT_NUM1 | FEAT_NUM2 | FEAT_CAT1 | FEAT_CAT2 |\n|-----|-----|------------|-----------|-----------|-----------|-----------|\n| 0 | 1 | 2023-01-01 | 73 | 23 | B | B |\n| 1 | 3 | 2023-01-01 | 15 | 1 | B | B |\n| 2 | 5 | 2023-01-01 | 75 | 71 | A | A |\n| 3 | 7 | 2023-01-01 | 73 | 82 | B | C |\n| 4 | 9 | 2023-01-01 | 61 | 8 | C | B |\n\n</div>\n\n### Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas\n\nA fun\u00e7\u00e3o mysql_create_query_num() cria um texto com a query para a\ncria\u00e7\u00e3o de vari\u00e1veis com as opera\u00e7\u00f5es soma, m\u00ednimo, m\u00e1ximo e m\u00e9dia das\nvari\u00e1veis listadas em feat_num_lista e com a janela de tempo listada em\nlista_janela.\n\n``` python\ntb_publico = 'tb_spine'\ntb_feat = 'tb_feat'\nid = 'ID'\nsafra_ref = 'SAFRA_REF'\nsafra = 'SAFRA'\nfeat_num_lista = ['FEAT_NUM1','FEAT_NUM2']\nlista_janela = [1,2,3]\nquery_final_num_mysql = mysql_create_query_num(tb_publico, tb_feat, lista_janela,feat_num_lista, id, safra_ref, safra)\n```\n\n``` python\ndf_num_mysql = pd.read_sql(query_final_num_mysql, connection)\ndf_num_mysql.head()\n```\n\n /tmp/ipykernel_4407/2119439562.py:3: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n df_num_mysql = pd.read_sql(query_final_num_mysql, connection)\n\n<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n .dataframe tbody tr th {\n vertical-align: top;\n }\n .dataframe thead th {\n text-align: right;\n }\n</style>\n\n| | ID | SAFRA_REF | FEAT_NUM1_SUM_1M | FEAT_NUM1_MIN_1M | FEAT_NUM1_MAX_1M | FEAT_NUM1_AVG_1M | FEAT_NUM2_SUM_1M | FEAT_NUM2_MIN_1M | FEAT_NUM2_MAX_1M | FEAT_NUM2_AVG_1M | FEAT_NUM1_SUM_2M | FEAT_NUM1_MIN_2M | FEAT_NUM1_MAX_2M | FEAT_NUM1_AVG_2M | FEAT_NUM2_SUM_2M | FEAT_NUM2_MIN_2M | FEAT_NUM2_MAX_2M | FEAT_NUM2_AVG_2M | FEAT_NUM1_SUM_3M | FEAT_NUM1_MIN_3M | FEAT_NUM1_MAX_3M | FEAT_NUM1_AVG_3M | FEAT_NUM2_SUM_3M | FEAT_NUM2_MIN_3M | FEAT_NUM2_MAX_3M | FEAT_NUM2_AVG_3M |\n|-----|-----|------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|------------------|\n| 0 | 4 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n| 1 | 5 | 2023-02-01 | 75.0 | 75.0 | 75.0 | 75.0 | 71.0 | 71.0 | 71.0 | 71.0 | 75.0 | 75.0 | 75.0 | 75.0 | 71.0 | 71.0 | 71.0 | 71.0 | 75.0 | 75.0 | 75.0 | 75.0 | 71.0 | 71.0 | 71.0 | 71.0 |\n| 2 | 6 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n| 3 | 7 | 2023-02-01 | 73.0 | 73.0 | 73.0 | 73.0 | 82.0 | 82.0 | 82.0 | 82.0 | 73.0 | 73.0 | 73.0 | 73.0 | 82.0 | 82.0 | 82.0 | 82.0 | 73.0 | 73.0 | 73.0 | 73.0 | 82.0 | 82.0 | 82.0 | 82.0 |\n| 4 | 10 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n\n</div>\n\n``` python\nprint(query_final_num_mysql)\n```\n\n\n WITH \n tb_public AS (\n SELECT \n *\n FROM tb_spine\n ),\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis de janela de 1M\n tb_janela_1M AS (\n SELECT \n tb_public.ID,\n tb_public.SAFRA_REF,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM1 para a janela 1\n SUM(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_1M,\n MIN(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_1M,\n MAX(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_1M,\n AVG(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_1M,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM2 para a janela 1\n SUM(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_1M,\n MIN(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_1M,\n MAX(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_1M,\n AVG(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_1M\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 1 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis de janela de 2M\n tb_janela_2M AS (\n SELECT \n tb_public.ID,\n tb_public.SAFRA_REF,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM1 para a janela 2\n SUM(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_2M,\n MIN(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_2M,\n MAX(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_2M,\n AVG(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_2M,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM2 para a janela 2\n SUM(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_2M,\n MIN(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_2M,\n MAX(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_2M,\n AVG(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_2M\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 2 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis de janela de 3M\n tb_janela_3M AS (\n SELECT \n tb_public.ID,\n tb_public.SAFRA_REF,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM1 para a janela 3\n SUM(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_3M,\n MIN(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_3M,\n MAX(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_3M,\n AVG(IFNULL(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_3M,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM2 para a janela 3\n SUM(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_3M,\n MIN(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_3M,\n MAX(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_3M,\n AVG(IFNULL(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_3M\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 3 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n \n\n tb_join AS (\n SELECT \n tb_public.*,\n \n tb_janela_1M.FEAT_NUM1_SUM_1M,\n tb_janela_1M.FEAT_NUM1_MIN_1M,\n tb_janela_1M.FEAT_NUM1_MAX_1M,\n tb_janela_1M.FEAT_NUM1_AVG_1M,\n \n tb_janela_1M.FEAT_NUM2_SUM_1M,\n tb_janela_1M.FEAT_NUM2_MIN_1M,\n tb_janela_1M.FEAT_NUM2_MAX_1M,\n tb_janela_1M.FEAT_NUM2_AVG_1M,\n \n tb_janela_2M.FEAT_NUM1_SUM_2M,\n tb_janela_2M.FEAT_NUM1_MIN_2M,\n tb_janela_2M.FEAT_NUM1_MAX_2M,\n tb_janela_2M.FEAT_NUM1_AVG_2M,\n \n tb_janela_2M.FEAT_NUM2_SUM_2M,\n tb_janela_2M.FEAT_NUM2_MIN_2M,\n tb_janela_2M.FEAT_NUM2_MAX_2M,\n tb_janela_2M.FEAT_NUM2_AVG_2M,\n \n tb_janela_3M.FEAT_NUM1_SUM_3M,\n tb_janela_3M.FEAT_NUM1_MIN_3M,\n tb_janela_3M.FEAT_NUM1_MAX_3M,\n tb_janela_3M.FEAT_NUM1_AVG_3M,\n \n tb_janela_3M.FEAT_NUM2_SUM_3M,\n tb_janela_3M.FEAT_NUM2_MIN_3M,\n tb_janela_3M.FEAT_NUM2_MAX_3M,\n tb_janela_3M.FEAT_NUM2_AVG_3M\n\n FROM tb_public \n \n LEFT JOIN tb_janela_1M\n ON tb_public.ID = tb_janela_1M.ID\n AND tb_public.SAFRA_REF = tb_janela_1M.SAFRA_REF\n \n LEFT JOIN tb_janela_2M\n ON tb_public.ID = tb_janela_2M.ID\n AND tb_public.SAFRA_REF = tb_janela_2M.SAFRA_REF\n \n LEFT JOIN tb_janela_3M\n ON tb_public.ID = tb_janela_3M.ID\n AND tb_public.SAFRA_REF = tb_janela_3M.SAFRA_REF\n \n )\n \n SELECT \n tb_join.ID,\n tb_join.SAFRA_REF,\n \n tb_join.FEAT_NUM1_SUM_1M,\n tb_join.FEAT_NUM1_MIN_1M,\n tb_join.FEAT_NUM1_MAX_1M,\n tb_join.FEAT_NUM1_AVG_1M,\n \n tb_join.FEAT_NUM2_SUM_1M,\n tb_join.FEAT_NUM2_MIN_1M,\n tb_join.FEAT_NUM2_MAX_1M,\n tb_join.FEAT_NUM2_AVG_1M,\n \n tb_join.FEAT_NUM1_SUM_2M,\n tb_join.FEAT_NUM1_MIN_2M,\n tb_join.FEAT_NUM1_MAX_2M,\n tb_join.FEAT_NUM1_AVG_2M,\n \n tb_join.FEAT_NUM2_SUM_2M,\n tb_join.FEAT_NUM2_MIN_2M,\n tb_join.FEAT_NUM2_MAX_2M,\n tb_join.FEAT_NUM2_AVG_2M,\n \n tb_join.FEAT_NUM1_SUM_3M,\n tb_join.FEAT_NUM1_MIN_3M,\n tb_join.FEAT_NUM1_MAX_3M,\n tb_join.FEAT_NUM1_AVG_3M,\n \n tb_join.FEAT_NUM2_SUM_3M,\n tb_join.FEAT_NUM2_MIN_3M,\n tb_join.FEAT_NUM2_MAX_3M,\n tb_join.FEAT_NUM2_AVG_3M\n FROM tb_join\n \n\n### Cria\u00e7\u00e3o de vari\u00e1veis categ\u00f3ricas\n\nA fun\u00e7\u00e3o mysql_create_query_cat() cria um texto com a query para a\ncria\u00e7\u00e3o de vari\u00e1veis com a moda de cada uma das vari\u00e1veis listadas em\nfeat_num_lista na janela de tempo fornecida em lista_janela.\n\n``` python\ntb_publico = 'tb_spine'\ntb_feat = 'tb_feat'\nid = 'ID'\nsafra_ref = 'SAFRA_REF'\nsafra = 'SAFRA'\nfeat_num_lista = ['FEAT_CAT1','FEAT_CAT2']\nlista_janela = [1,2,3]\nquery_final_cat_mysql = mysql_create_query_cat(tb_publico, tb_feat, lista_janela, feat_num_lista, id, safra_ref, safra)\n```\n\n``` python\ndf_cat_sqlite_mysql = pd.read_sql(query_final_cat_mysql, connection)\ndf_cat_sqlite_mysql.head()\n```\n\n /tmp/ipykernel_4407/3114069227.py:3: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n df_cat_sqlite_mysql = pd.read_sql(query_final_cat_mysql, connection)\n\n<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n .dataframe tbody tr th {\n vertical-align: top;\n }\n .dataframe thead th {\n text-align: right;\n }\n</style>\n\n| | ID | SAFRA_REF | FEAT_CAT1_MODA_1M | FEAT_CAT2_MODA_1M | FEAT_CAT1_MODA_2M | FEAT_CAT2_MODA_2M | FEAT_CAT1_MODA_3M | FEAT_CAT2_MODA_3M |\n|-----|-----|------------|-------------------|-------------------|-------------------|-------------------|-------------------|-------------------|\n| 0 | 4 | 2023-02-01 | None | None | None | None | None | None |\n| 1 | 5 | 2023-02-01 | A | A | A | A | A | A |\n| 2 | 6 | 2023-02-01 | None | None | None | None | None | None |\n| 3 | 7 | 2023-02-01 | B | C | B | C | B | C |\n| 4 | 10 | 2023-02-01 | None | None | None | None | None | None |\n\n</div>\n\n``` python\nprint(query_final_cat_mysql)\n```\n\n\n WITH \n tb_public AS (\n SELECT \n ID,\n SAFRA_REF\n FROM tb_spine\n ),\n \n tb_janela_FEAT_CAT1_1M AS (\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n tb_feat.FEAT_CAT1,\n COUNT(*) AS frequency_FEAT_CAT1\n FROM tb_public\n LEFT JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 1 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT1\n ),\n\n tb_row_FEAT_CAT1_1M AS (\n SELECT \n *, \n ROW_NUMBER() OVER (\n PARTITION BY \n ID,\n SAFRA_REF \n ORDER BY frequency_FEAT_CAT1 DESC\n ) AS row_num_FEAT_CAT1_1M\n FROM tb_janela_FEAT_CAT1_1M\n ),\n \n tb_moda_FEAT_CAT1_1M AS (\n SELECT\n tb_row_FEAT_CAT1_1M.ID,\n tb_row_FEAT_CAT1_1M.SAFRA_REF,\n tb_row_FEAT_CAT1_1M.FEAT_CAT1 AS FEAT_CAT1_MODA_1M\n FROM tb_row_FEAT_CAT1_1M \n WHERE row_num_FEAT_CAT1_1M = 1\n ),\n\n tb_janela_FEAT_CAT2_1M AS (\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n tb_feat.FEAT_CAT2,\n COUNT(*) AS frequency_FEAT_CAT2\n FROM tb_public\n LEFT JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 1 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT2\n ),\n\n tb_row_FEAT_CAT2_1M AS (\n SELECT \n *, \n ROW_NUMBER() OVER (\n PARTITION BY \n ID,\n SAFRA_REF \n ORDER BY frequency_FEAT_CAT2 DESC\n ) AS row_num_FEAT_CAT2_1M\n FROM tb_janela_FEAT_CAT2_1M\n ),\n \n tb_moda_FEAT_CAT2_1M AS (\n SELECT\n tb_row_FEAT_CAT2_1M.ID,\n tb_row_FEAT_CAT2_1M.SAFRA_REF,\n tb_row_FEAT_CAT2_1M.FEAT_CAT2 AS FEAT_CAT2_MODA_1M\n FROM tb_row_FEAT_CAT2_1M \n WHERE row_num_FEAT_CAT2_1M = 1\n ),\n\n tb_janela_FEAT_CAT1_2M AS (\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n tb_feat.FEAT_CAT1,\n COUNT(*) AS frequency_FEAT_CAT1\n FROM tb_public\n LEFT JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 2 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT1\n ),\n\n tb_row_FEAT_CAT1_2M AS (\n SELECT \n *, \n ROW_NUMBER() OVER (\n PARTITION BY \n ID,\n SAFRA_REF \n ORDER BY frequency_FEAT_CAT1 DESC\n ) AS row_num_FEAT_CAT1_2M\n FROM tb_janela_FEAT_CAT1_2M\n ),\n \n tb_moda_FEAT_CAT1_2M AS (\n SELECT\n tb_row_FEAT_CAT1_2M.ID,\n tb_row_FEAT_CAT1_2M.SAFRA_REF,\n tb_row_FEAT_CAT1_2M.FEAT_CAT1 AS FEAT_CAT1_MODA_2M\n FROM tb_row_FEAT_CAT1_2M \n WHERE row_num_FEAT_CAT1_2M = 1\n ),\n\n tb_janela_FEAT_CAT2_2M AS (\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n tb_feat.FEAT_CAT2,\n COUNT(*) AS frequency_FEAT_CAT2\n FROM tb_public\n LEFT JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 2 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT2\n ),\n\n tb_row_FEAT_CAT2_2M AS (\n SELECT \n *, \n ROW_NUMBER() OVER (\n PARTITION BY \n ID,\n SAFRA_REF \n ORDER BY frequency_FEAT_CAT2 DESC\n ) AS row_num_FEAT_CAT2_2M\n FROM tb_janela_FEAT_CAT2_2M\n ),\n \n tb_moda_FEAT_CAT2_2M AS (\n SELECT\n tb_row_FEAT_CAT2_2M.ID,\n tb_row_FEAT_CAT2_2M.SAFRA_REF,\n tb_row_FEAT_CAT2_2M.FEAT_CAT2 AS FEAT_CAT2_MODA_2M\n FROM tb_row_FEAT_CAT2_2M \n WHERE row_num_FEAT_CAT2_2M = 1\n ),\n\n tb_janela_FEAT_CAT1_3M AS (\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n tb_feat.FEAT_CAT1,\n COUNT(*) AS frequency_FEAT_CAT1\n FROM tb_public\n LEFT JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 3 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT1\n ),\n\n tb_row_FEAT_CAT1_3M AS (\n SELECT \n *, \n ROW_NUMBER() OVER (\n PARTITION BY \n ID,\n SAFRA_REF \n ORDER BY frequency_FEAT_CAT1 DESC\n ) AS row_num_FEAT_CAT1_3M\n FROM tb_janela_FEAT_CAT1_3M\n ),\n \n tb_moda_FEAT_CAT1_3M AS (\n SELECT\n tb_row_FEAT_CAT1_3M.ID,\n tb_row_FEAT_CAT1_3M.SAFRA_REF,\n tb_row_FEAT_CAT1_3M.FEAT_CAT1 AS FEAT_CAT1_MODA_3M\n FROM tb_row_FEAT_CAT1_3M \n WHERE row_num_FEAT_CAT1_3M = 1\n ),\n\n tb_janela_FEAT_CAT2_3M AS (\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n tb_feat.FEAT_CAT2,\n COUNT(*) AS frequency_FEAT_CAT2\n FROM tb_public\n LEFT JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 3 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n GROUP BY tb_public.ID, tb_public.SAFRA_REF, tb_feat.FEAT_CAT2\n ),\n\n tb_row_FEAT_CAT2_3M AS (\n SELECT \n *, \n ROW_NUMBER() OVER (\n PARTITION BY \n ID,\n SAFRA_REF \n ORDER BY frequency_FEAT_CAT2 DESC\n ) AS row_num_FEAT_CAT2_3M\n FROM tb_janela_FEAT_CAT2_3M\n ),\n \n tb_moda_FEAT_CAT2_3M AS (\n SELECT\n tb_row_FEAT_CAT2_3M.ID,\n tb_row_FEAT_CAT2_3M.SAFRA_REF,\n tb_row_FEAT_CAT2_3M.FEAT_CAT2 AS FEAT_CAT2_MODA_3M\n FROM tb_row_FEAT_CAT2_3M \n WHERE row_num_FEAT_CAT2_3M = 1\n )\n\n SELECT \n tb_public.ID,\n tb_public.SAFRA_REF,\n \n tb_moda_FEAT_CAT1_1M.FEAT_CAT1_MODA_1M,\n \n tb_moda_FEAT_CAT2_1M.FEAT_CAT2_MODA_1M,\n \n tb_moda_FEAT_CAT1_2M.FEAT_CAT1_MODA_2M,\n \n tb_moda_FEAT_CAT2_2M.FEAT_CAT2_MODA_2M,\n \n tb_moda_FEAT_CAT1_3M.FEAT_CAT1_MODA_3M,\n \n tb_moda_FEAT_CAT2_3M.FEAT_CAT2_MODA_3M\n FROM tb_public\n \n LEFT JOIN tb_moda_FEAT_CAT1_1M \n ON tb_moda_FEAT_CAT1_1M.ID = tb_public.ID\n AND tb_moda_FEAT_CAT1_1M.SAFRA_REF = tb_public.SAFRA_REF\n\n LEFT JOIN tb_moda_FEAT_CAT2_1M \n ON tb_moda_FEAT_CAT2_1M.ID = tb_public.ID\n AND tb_moda_FEAT_CAT2_1M.SAFRA_REF = tb_public.SAFRA_REF\n\n LEFT JOIN tb_moda_FEAT_CAT1_2M \n ON tb_moda_FEAT_CAT1_2M.ID = tb_public.ID\n AND tb_moda_FEAT_CAT1_2M.SAFRA_REF = tb_public.SAFRA_REF\n\n LEFT JOIN tb_moda_FEAT_CAT2_2M \n ON tb_moda_FEAT_CAT2_2M.ID = tb_public.ID\n AND tb_moda_FEAT_CAT2_2M.SAFRA_REF = tb_public.SAFRA_REF\n\n LEFT JOIN tb_moda_FEAT_CAT1_3M \n ON tb_moda_FEAT_CAT1_3M.ID = tb_public.ID\n AND tb_moda_FEAT_CAT1_3M.SAFRA_REF = tb_public.SAFRA_REF\n\n LEFT JOIN tb_moda_FEAT_CAT2_3M \n ON tb_moda_FEAT_CAT2_3M.ID = tb_public.ID\n AND tb_moda_FEAT_CAT2_3M.SAFRA_REF = tb_public.SAFRA_REF\n\n \n\n### Cria\u00e7\u00e3o de vari\u00e1veis agragadas\n\n``` python\ntb_publico = 'tb_spine'\nlista_janela = [3, 6]\nlista_feat_num = ['FEAT_NUM1', 'FEAT_NUM2']\nfeat_cat = 'FEAT_CAT1'\nlista_valor_agragador = ['A', 'B']\nid = 'ID'\nsafra_ref = 'SAFRA_REF'\ntb_feat = 'tb_feat'\nsafra = 'SAFRA'\n\nquery = mysql_create_query_agregada(tb_publico, tb_feat, lista_janela, lista_feat_num, id, safra_ref, safra, feat_cat, lista_valor_agragador)\n```\n\n``` python\nprint(query)\n```\n\n\n WITH\n tb_public as(\n SELECT\n ID,\n SAFRA_REF\n FROM tb_spine\n ),\n \n \n tb_agrupada_FEAT_CAT1_A_3M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_A_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_A_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_A_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_A_3M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_A_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_A_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_A_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_A_3M\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 3 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n AND tb_feat.FEAT_CAT1 = 'A'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n \n tb_agrupada_FEAT_CAT1_A_6M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_A_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_A_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_A_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_A_6M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_A_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_A_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_A_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_A_6M\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 6 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n AND tb_feat.FEAT_CAT1 = 'A'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n \n tb_agrupada_FEAT_CAT1_B_3M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_3M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_3M\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 3 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n AND tb_feat.FEAT_CAT1 = 'B'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n \n tb_agrupada_FEAT_CAT1_B_6M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_6M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_6M\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 6 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n AND tb_feat.FEAT_CAT1 = 'B'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n tb_join AS (\n SELECT \n tb_public.ID,\n tb_public.SAFRA_REF,\n \n tb_agrupada_FEAT_CAT1_A_3M.SUM_FEAT_NUM1_FEAT_CAT1_A_3M,\n tb_agrupada_FEAT_CAT1_A_3M.MAX_FEAT_NUM1_FEAT_CAT1_A_3M,\n tb_agrupada_FEAT_CAT1_A_3M.MIN_FEAT_NUM1_FEAT_CAT1_A_3M,\n tb_agrupada_FEAT_CAT1_A_3M.AVG_FEAT_NUM1_FEAT_CAT1_A_3M,\n\n tb_agrupada_FEAT_CAT1_A_3M.SUM_FEAT_NUM2_FEAT_CAT1_A_3M,\n tb_agrupada_FEAT_CAT1_A_3M.MAX_FEAT_NUM2_FEAT_CAT1_A_3M,\n tb_agrupada_FEAT_CAT1_A_3M.MIN_FEAT_NUM2_FEAT_CAT1_A_3M,\n tb_agrupada_FEAT_CAT1_A_3M.AVG_FEAT_NUM2_FEAT_CAT1_A_3M,\n\n tb_agrupada_FEAT_CAT1_A_6M.SUM_FEAT_NUM1_FEAT_CAT1_A_6M,\n tb_agrupada_FEAT_CAT1_A_6M.MAX_FEAT_NUM1_FEAT_CAT1_A_6M,\n tb_agrupada_FEAT_CAT1_A_6M.MIN_FEAT_NUM1_FEAT_CAT1_A_6M,\n tb_agrupada_FEAT_CAT1_A_6M.AVG_FEAT_NUM1_FEAT_CAT1_A_6M,\n\n tb_agrupada_FEAT_CAT1_A_6M.SUM_FEAT_NUM2_FEAT_CAT1_A_6M,\n tb_agrupada_FEAT_CAT1_A_6M.MAX_FEAT_NUM2_FEAT_CAT1_A_6M,\n tb_agrupada_FEAT_CAT1_A_6M.MIN_FEAT_NUM2_FEAT_CAT1_A_6M,\n tb_agrupada_FEAT_CAT1_A_6M.AVG_FEAT_NUM2_FEAT_CAT1_A_6M,\n\n tb_agrupada_FEAT_CAT1_B_3M.SUM_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_agrupada_FEAT_CAT1_B_3M.MAX_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_agrupada_FEAT_CAT1_B_3M.MIN_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_agrupada_FEAT_CAT1_B_3M.AVG_FEAT_NUM1_FEAT_CAT1_B_3M,\n\n tb_agrupada_FEAT_CAT1_B_3M.SUM_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_agrupada_FEAT_CAT1_B_3M.MAX_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_agrupada_FEAT_CAT1_B_3M.MIN_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_agrupada_FEAT_CAT1_B_3M.AVG_FEAT_NUM2_FEAT_CAT1_B_3M,\n\n tb_agrupada_FEAT_CAT1_B_6M.SUM_FEAT_NUM1_FEAT_CAT1_B_6M,\n tb_agrupada_FEAT_CAT1_B_6M.MAX_FEAT_NUM1_FEAT_CAT1_B_6M,\n tb_agrupada_FEAT_CAT1_B_6M.MIN_FEAT_NUM1_FEAT_CAT1_B_6M,\n tb_agrupada_FEAT_CAT1_B_6M.AVG_FEAT_NUM1_FEAT_CAT1_B_6M,\n\n tb_agrupada_FEAT_CAT1_B_6M.SUM_FEAT_NUM2_FEAT_CAT1_B_6M,\n tb_agrupada_FEAT_CAT1_B_6M.MAX_FEAT_NUM2_FEAT_CAT1_B_6M,\n tb_agrupada_FEAT_CAT1_B_6M.MIN_FEAT_NUM2_FEAT_CAT1_B_6M,\n tb_agrupada_FEAT_CAT1_B_6M.AVG_FEAT_NUM2_FEAT_CAT1_B_6M \n FROM tb_public \n \n LEFT JOIN tb_agrupada_FEAT_CAT1_A_3M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_A_3M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_A_3M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT1_A_6M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_A_6M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_A_6M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT1_B_3M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_3M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_3M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT1_B_6M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_6M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_6M.SAFRA_REF\n \n )\n\n SELECT \n tb_join.ID,\n tb_join.SAFRA_REF,\n \n tb_join.SUM_FEAT_NUM1_FEAT_CAT1_A_3M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT1_A_3M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT1_A_3M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT1_A_3M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT1_A_3M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT1_A_3M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT1_A_3M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT1_A_3M,\n\n tb_join.SUM_FEAT_NUM1_FEAT_CAT1_A_6M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT1_A_6M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT1_A_6M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT1_A_6M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT1_A_6M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT1_A_6M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT1_A_6M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT1_A_6M,\n\n tb_join.SUM_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT1_B_3M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT1_B_3M,\n\n tb_join.SUM_FEAT_NUM1_FEAT_CAT1_B_6M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT1_B_6M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT1_B_6M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT1_B_6M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT1_B_6M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT1_B_6M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT1_B_6M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT1_B_6M\n FROM tb_join\n \n\n``` python\ndf_mysql_agregada = pd.read_sql(query, connection)\ndf_mysql_agregada.head()\n```\n\n /tmp/ipykernel_4407/724425866.py:3: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n df_mysql_agregada = pd.read_sql(query, connection)\n\n<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n .dataframe tbody tr th {\n vertical-align: top;\n }\n .dataframe thead th {\n text-align: right;\n }\n</style>\n\n| | ID | SAFRA_REF | SUM_FEAT_NUM1_FEAT_CAT1_A_3M | MAX_FEAT_NUM1_FEAT_CAT1_A_3M | MIN_FEAT_NUM1_FEAT_CAT1_A_3M | AVG_FEAT_NUM1_FEAT_CAT1_A_3M | SUM_FEAT_NUM2_FEAT_CAT1_A_3M | MAX_FEAT_NUM2_FEAT_CAT1_A_3M | MIN_FEAT_NUM2_FEAT_CAT1_A_3M | AVG_FEAT_NUM2_FEAT_CAT1_A_3M | SUM_FEAT_NUM1_FEAT_CAT1_A_6M | MAX_FEAT_NUM1_FEAT_CAT1_A_6M | MIN_FEAT_NUM1_FEAT_CAT1_A_6M | AVG_FEAT_NUM1_FEAT_CAT1_A_6M | SUM_FEAT_NUM2_FEAT_CAT1_A_6M | MAX_FEAT_NUM2_FEAT_CAT1_A_6M | MIN_FEAT_NUM2_FEAT_CAT1_A_6M | AVG_FEAT_NUM2_FEAT_CAT1_A_6M | SUM_FEAT_NUM1_FEAT_CAT1_B_3M | MAX_FEAT_NUM1_FEAT_CAT1_B_3M | MIN_FEAT_NUM1_FEAT_CAT1_B_3M | AVG_FEAT_NUM1_FEAT_CAT1_B_3M | SUM_FEAT_NUM2_FEAT_CAT1_B_3M | MAX_FEAT_NUM2_FEAT_CAT1_B_3M | MIN_FEAT_NUM2_FEAT_CAT1_B_3M | AVG_FEAT_NUM2_FEAT_CAT1_B_3M | SUM_FEAT_NUM1_FEAT_CAT1_B_6M | MAX_FEAT_NUM1_FEAT_CAT1_B_6M | MIN_FEAT_NUM1_FEAT_CAT1_B_6M | AVG_FEAT_NUM1_FEAT_CAT1_B_6M | SUM_FEAT_NUM2_FEAT_CAT1_B_6M | MAX_FEAT_NUM2_FEAT_CAT1_B_6M | MIN_FEAT_NUM2_FEAT_CAT1_B_6M | AVG_FEAT_NUM2_FEAT_CAT1_B_6M |\n|-----|-----|------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|\n| 0 | 4 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n| 1 | 5 | 2023-02-01 | 75.0 | 75.0 | 75.0 | 75.0 | 71.0 | 71.0 | 71.0 | 71.0 | 75.0 | 75.0 | 75.0 | 75.0 | 71.0 | 71.0 | 71.0 | 71.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n| 2 | 6 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n| 3 | 7 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 73.0 | 73.0 | 73.0 | 73.0 | 82.0 | 82.0 | 82.0 | 82.0 | 73.0 | 73.0 | 73.0 | 73.0 | 82.0 | 82.0 | 82.0 | 82.0 |\n| 4 | 10 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n\n</div>\n\n``` python\ntb_publico = 'tb_spine'\nlista_janela = [3, 6]\nlista_feat_num = ['FEAT_NUM1', 'FEAT_NUM2']\nfeat_cat = 'FEAT_CAT2'\nlista_valor_agragador = ['B', 'C']\nid = 'ID'\nsafra_ref = 'SAFRA_REF'\ntb_feat = 'tb_feat'\nsafra = 'SAFRA'\n\nquery = mysql_create_query_agregada(tb_publico, tb_feat, lista_janela, lista_feat_num, id, safra_ref, safra, feat_cat, lista_valor_agragador)\n```\n\n``` python\nprint(query)\n```\n\n\n WITH\n tb_public as(\n SELECT\n ID,\n SAFRA_REF\n FROM tb_spine\n ),\n \n \n tb_agrupada_FEAT_CAT2_B_3M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT2_B_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT2_B_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT2_B_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT2_B_3M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT2_B_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT2_B_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT2_B_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT2_B_3M\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 3 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n AND tb_feat.FEAT_CAT2 = 'B'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n \n tb_agrupada_FEAT_CAT2_B_6M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT2_B_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT2_B_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT2_B_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT2_B_6M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT2_B_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT2_B_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT2_B_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT2_B_6M\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 6 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n AND tb_feat.FEAT_CAT2 = 'B'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n \n tb_agrupada_FEAT_CAT2_C_3M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT2_C_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT2_C_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT2_C_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT2_C_3M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT2_C_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT2_C_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT2_C_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT2_C_3M\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 3 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n AND tb_feat.FEAT_CAT2 = 'C'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n \n tb_agrupada_FEAT_CAT2_C_6M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT2_C_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT2_C_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT2_C_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT2_C_6M,\n\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT2_C_6M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT2_C_6M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT2_C_6M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT2_C_6M\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND DATE_ADD(tb_feat.SAFRA, INTERVAL 6 MONTH) >= tb_public.SAFRA_REF\n AND tb_feat.SAFRA < tb_public.SAFRA_REF\n AND tb_feat.FEAT_CAT2 = 'C'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF \n ),\n\n \n tb_join AS (\n SELECT \n tb_public.ID,\n tb_public.SAFRA_REF,\n \n tb_agrupada_FEAT_CAT2_B_3M.SUM_FEAT_NUM1_FEAT_CAT2_B_3M,\n tb_agrupada_FEAT_CAT2_B_3M.MAX_FEAT_NUM1_FEAT_CAT2_B_3M,\n tb_agrupada_FEAT_CAT2_B_3M.MIN_FEAT_NUM1_FEAT_CAT2_B_3M,\n tb_agrupada_FEAT_CAT2_B_3M.AVG_FEAT_NUM1_FEAT_CAT2_B_3M,\n\n tb_agrupada_FEAT_CAT2_B_3M.SUM_FEAT_NUM2_FEAT_CAT2_B_3M,\n tb_agrupada_FEAT_CAT2_B_3M.MAX_FEAT_NUM2_FEAT_CAT2_B_3M,\n tb_agrupada_FEAT_CAT2_B_3M.MIN_FEAT_NUM2_FEAT_CAT2_B_3M,\n tb_agrupada_FEAT_CAT2_B_3M.AVG_FEAT_NUM2_FEAT_CAT2_B_3M,\n\n tb_agrupada_FEAT_CAT2_B_6M.SUM_FEAT_NUM1_FEAT_CAT2_B_6M,\n tb_agrupada_FEAT_CAT2_B_6M.MAX_FEAT_NUM1_FEAT_CAT2_B_6M,\n tb_agrupada_FEAT_CAT2_B_6M.MIN_FEAT_NUM1_FEAT_CAT2_B_6M,\n tb_agrupada_FEAT_CAT2_B_6M.AVG_FEAT_NUM1_FEAT_CAT2_B_6M,\n\n tb_agrupada_FEAT_CAT2_B_6M.SUM_FEAT_NUM2_FEAT_CAT2_B_6M,\n tb_agrupada_FEAT_CAT2_B_6M.MAX_FEAT_NUM2_FEAT_CAT2_B_6M,\n tb_agrupada_FEAT_CAT2_B_6M.MIN_FEAT_NUM2_FEAT_CAT2_B_6M,\n tb_agrupada_FEAT_CAT2_B_6M.AVG_FEAT_NUM2_FEAT_CAT2_B_6M,\n\n tb_agrupada_FEAT_CAT2_C_3M.SUM_FEAT_NUM1_FEAT_CAT2_C_3M,\n tb_agrupada_FEAT_CAT2_C_3M.MAX_FEAT_NUM1_FEAT_CAT2_C_3M,\n tb_agrupada_FEAT_CAT2_C_3M.MIN_FEAT_NUM1_FEAT_CAT2_C_3M,\n tb_agrupada_FEAT_CAT2_C_3M.AVG_FEAT_NUM1_FEAT_CAT2_C_3M,\n\n tb_agrupada_FEAT_CAT2_C_3M.SUM_FEAT_NUM2_FEAT_CAT2_C_3M,\n tb_agrupada_FEAT_CAT2_C_3M.MAX_FEAT_NUM2_FEAT_CAT2_C_3M,\n tb_agrupada_FEAT_CAT2_C_3M.MIN_FEAT_NUM2_FEAT_CAT2_C_3M,\n tb_agrupada_FEAT_CAT2_C_3M.AVG_FEAT_NUM2_FEAT_CAT2_C_3M,\n\n tb_agrupada_FEAT_CAT2_C_6M.SUM_FEAT_NUM1_FEAT_CAT2_C_6M,\n tb_agrupada_FEAT_CAT2_C_6M.MAX_FEAT_NUM1_FEAT_CAT2_C_6M,\n tb_agrupada_FEAT_CAT2_C_6M.MIN_FEAT_NUM1_FEAT_CAT2_C_6M,\n tb_agrupada_FEAT_CAT2_C_6M.AVG_FEAT_NUM1_FEAT_CAT2_C_6M,\n\n tb_agrupada_FEAT_CAT2_C_6M.SUM_FEAT_NUM2_FEAT_CAT2_C_6M,\n tb_agrupada_FEAT_CAT2_C_6M.MAX_FEAT_NUM2_FEAT_CAT2_C_6M,\n tb_agrupada_FEAT_CAT2_C_6M.MIN_FEAT_NUM2_FEAT_CAT2_C_6M,\n tb_agrupada_FEAT_CAT2_C_6M.AVG_FEAT_NUM2_FEAT_CAT2_C_6M \n FROM tb_public \n \n LEFT JOIN tb_agrupada_FEAT_CAT2_B_3M\n ON tb_public.ID = tb_agrupada_FEAT_CAT2_B_3M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT2_B_3M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT2_B_6M\n ON tb_public.ID = tb_agrupada_FEAT_CAT2_B_6M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT2_B_6M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT2_C_3M\n ON tb_public.ID = tb_agrupada_FEAT_CAT2_C_3M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT2_C_3M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT2_C_6M\n ON tb_public.ID = tb_agrupada_FEAT_CAT2_C_6M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT2_C_6M.SAFRA_REF\n \n )\n\n SELECT \n tb_join.ID,\n tb_join.SAFRA_REF,\n \n tb_join.SUM_FEAT_NUM1_FEAT_CAT2_B_3M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT2_B_3M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT2_B_3M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT2_B_3M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT2_B_3M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT2_B_3M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT2_B_3M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT2_B_3M,\n\n tb_join.SUM_FEAT_NUM1_FEAT_CAT2_B_6M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT2_B_6M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT2_B_6M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT2_B_6M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT2_B_6M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT2_B_6M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT2_B_6M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT2_B_6M,\n\n tb_join.SUM_FEAT_NUM1_FEAT_CAT2_C_3M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT2_C_3M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT2_C_3M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT2_C_3M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT2_C_3M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT2_C_3M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT2_C_3M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT2_C_3M,\n\n tb_join.SUM_FEAT_NUM1_FEAT_CAT2_C_6M,\n tb_join.MAX_FEAT_NUM1_FEAT_CAT2_C_6M,\n tb_join.MIN_FEAT_NUM1_FEAT_CAT2_C_6M,\n tb_join.AVG_FEAT_NUM1_FEAT_CAT2_C_6M,\n\n tb_join.SUM_FEAT_NUM2_FEAT_CAT2_C_6M,\n tb_join.MAX_FEAT_NUM2_FEAT_CAT2_C_6M,\n tb_join.MIN_FEAT_NUM2_FEAT_CAT2_C_6M,\n tb_join.AVG_FEAT_NUM2_FEAT_CAT2_C_6M\n FROM tb_join\n \n\n``` python\ndf_mysql_agregada = pd.read_sql(query, connection)\ndf_mysql_agregada.head()\n```\n\n /tmp/ipykernel_4407/724425866.py:3: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n df_mysql_agregada = pd.read_sql(query, connection)\n\n<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n .dataframe tbody tr th {\n vertical-align: top;\n }\n .dataframe thead th {\n text-align: right;\n }\n</style>\n\n| | ID | SAFRA_REF | SUM_FEAT_NUM1_FEAT_CAT2_B_3M | MAX_FEAT_NUM1_FEAT_CAT2_B_3M | MIN_FEAT_NUM1_FEAT_CAT2_B_3M | AVG_FEAT_NUM1_FEAT_CAT2_B_3M | SUM_FEAT_NUM2_FEAT_CAT2_B_3M | MAX_FEAT_NUM2_FEAT_CAT2_B_3M | MIN_FEAT_NUM2_FEAT_CAT2_B_3M | AVG_FEAT_NUM2_FEAT_CAT2_B_3M | SUM_FEAT_NUM1_FEAT_CAT2_B_6M | MAX_FEAT_NUM1_FEAT_CAT2_B_6M | MIN_FEAT_NUM1_FEAT_CAT2_B_6M | AVG_FEAT_NUM1_FEAT_CAT2_B_6M | SUM_FEAT_NUM2_FEAT_CAT2_B_6M | MAX_FEAT_NUM2_FEAT_CAT2_B_6M | MIN_FEAT_NUM2_FEAT_CAT2_B_6M | AVG_FEAT_NUM2_FEAT_CAT2_B_6M | SUM_FEAT_NUM1_FEAT_CAT2_C_3M | MAX_FEAT_NUM1_FEAT_CAT2_C_3M | MIN_FEAT_NUM1_FEAT_CAT2_C_3M | AVG_FEAT_NUM1_FEAT_CAT2_C_3M | SUM_FEAT_NUM2_FEAT_CAT2_C_3M | MAX_FEAT_NUM2_FEAT_CAT2_C_3M | MIN_FEAT_NUM2_FEAT_CAT2_C_3M | AVG_FEAT_NUM2_FEAT_CAT2_C_3M | SUM_FEAT_NUM1_FEAT_CAT2_C_6M | MAX_FEAT_NUM1_FEAT_CAT2_C_6M | MIN_FEAT_NUM1_FEAT_CAT2_C_6M | AVG_FEAT_NUM1_FEAT_CAT2_C_6M | SUM_FEAT_NUM2_FEAT_CAT2_C_6M | MAX_FEAT_NUM2_FEAT_CAT2_C_6M | MIN_FEAT_NUM2_FEAT_CAT2_C_6M | AVG_FEAT_NUM2_FEAT_CAT2_C_6M |\n|-----|-----|------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|\n| 0 | 4 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n| 1 | 5 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n| 2 | 6 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n| 3 | 7 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 73.0 | 73.0 | 73.0 | 73.0 | 82.0 | 82.0 | 82.0 | 82.0 | 73.0 | 73.0 | 73.0 | 73.0 | 82.0 | 82.0 | 82.0 | 82.0 |\n| 4 | 10 | 2023-02-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |\n\n</div>\n\n## 3. Snowflake\n\n### Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas\n\nA fun\u00e7\u00e3o snow_create_query_num() cria um texto com a query para a\ncria\u00e7\u00e3o de vari\u00e1veis com as opera\u00e7\u00f5es soma, m\u00ednimo, m\u00e1ximo e m\u00e9dia das\nvari\u00e1veis listadas em feat_num_lista e com a janela de tempo listada em\nlista_janela.\n\n``` python\ntb_publico = 'tb_spine'\ntb_feat = 'tb_feat'\nid = 'ID'\nsafra_ref = 'SAFRA_REF'\nsafra = 'SAFRA'\nfeat_num_lista = ['FEAT_NUM1','FEAT_NUM2']\nlista_janela = [1,2,3]\nquery_final_num_snow = snow_create_query_num(tb_publico, tb_feat, lista_janela,feat_num_lista, id, safra_ref, safra)\n```\n\n``` python\nprint(query_final_num_snow)\n```\n\n\n WITH \n tb_public AS (\n SELECT \n *\n FROM tb_spine\n ),\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis de janela de 1M\n tb_janela_1M AS (\n SELECT \n tb_public.ID,\n tb_public.SAFRA_REF,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM1 para a janela 1\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_1M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_1M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_1M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_1M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MEDIAN_1M,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM2 para a janela 1\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_1M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_1M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_1M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_1M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MEDIAN_1M\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND (DATEADD('month', 1 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF) \n AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis de janela de 2M\n tb_janela_2M AS (\n SELECT \n tb_public.ID,\n tb_public.SAFRA_REF,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM1 para a janela 2\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_2M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_2M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_2M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_2M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MEDIAN_2M,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM2 para a janela 2\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_2M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_2M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_2M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_2M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MEDIAN_2M\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND (DATEADD('month', 2 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF) \n AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis de janela de 3M\n tb_janela_3M AS (\n SELECT \n tb_public.ID,\n tb_public.SAFRA_REF,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM1 para a janela 3\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_SUM_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MIN_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MAX_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_AVG_3M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS FEAT_NUM1_MEDIAN_3M,\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis num\u00e9ricas a partir da coluna FEAT_NUM2 para a janela 3\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_SUM_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MIN_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MAX_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_AVG_3M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS FEAT_NUM2_MEDIAN_3M\n FROM tb_public\n INNER JOIN tb_feat \n ON tb_public.ID = tb_feat.ID\n AND (DATEADD('month', 3 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF) \n AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n \n\n tb_join AS (\n SELECT \n tb_public.*,\n \n tb_janela_1M.FEAT_NUM1_SUM_1M,\n tb_janela_1M.FEAT_NUM1_MIN_1M,\n tb_janela_1M.FEAT_NUM1_MAX_1M,\n tb_janela_1M.FEAT_NUM1_AVG_1M,\n tb_janela_1M.FEAT_NUM1_MEDIAN_1M,\n \n tb_janela_1M.FEAT_NUM2_SUM_1M,\n tb_janela_1M.FEAT_NUM2_MIN_1M,\n tb_janela_1M.FEAT_NUM2_MAX_1M,\n tb_janela_1M.FEAT_NUM2_AVG_1M,\n tb_janela_1M.FEAT_NUM2_MEDIAN_1M,\n \n tb_janela_2M.FEAT_NUM1_SUM_2M,\n tb_janela_2M.FEAT_NUM1_MIN_2M,\n tb_janela_2M.FEAT_NUM1_MAX_2M,\n tb_janela_2M.FEAT_NUM1_AVG_2M,\n tb_janela_2M.FEAT_NUM1_MEDIAN_2M,\n \n tb_janela_2M.FEAT_NUM2_SUM_2M,\n tb_janela_2M.FEAT_NUM2_MIN_2M,\n tb_janela_2M.FEAT_NUM2_MAX_2M,\n tb_janela_2M.FEAT_NUM2_AVG_2M,\n tb_janela_2M.FEAT_NUM2_MEDIAN_2M,\n \n tb_janela_3M.FEAT_NUM1_SUM_3M,\n tb_janela_3M.FEAT_NUM1_MIN_3M,\n tb_janela_3M.FEAT_NUM1_MAX_3M,\n tb_janela_3M.FEAT_NUM1_AVG_3M,\n tb_janela_3M.FEAT_NUM1_MEDIAN_3M,\n \n tb_janela_3M.FEAT_NUM2_SUM_3M,\n tb_janela_3M.FEAT_NUM2_MIN_3M,\n tb_janela_3M.FEAT_NUM2_MAX_3M,\n tb_janela_3M.FEAT_NUM2_AVG_3M,\n tb_janela_3M.FEAT_NUM2_MEDIAN_3M\n\n FROM tb_public \n \n LEFT JOIN tb_janela_1M\n ON tb_public.ID = tb_janela_1M.ID\n AND tb_public.SAFRA_REF = tb_janela_1M.SAFRA_REF\n \n LEFT JOIN tb_janela_2M\n ON tb_public.ID = tb_janela_2M.ID\n AND tb_public.SAFRA_REF = tb_janela_2M.SAFRA_REF\n \n LEFT JOIN tb_janela_3M\n ON tb_public.ID = tb_janela_3M.ID\n AND tb_public.SAFRA_REF = tb_janela_3M.SAFRA_REF\n \n )\n \n SELECT \n tb_join.ID,\n tb_join.SAFRA_REF,\n \n tb_join.FEAT_NUM1_SUM_1M,\n tb_join.FEAT_NUM1_MIN_1M,\n tb_join.FEAT_NUM1_MAX_1M,\n tb_join.FEAT_NUM1_AVG_1M,\n tb_join.FEAT_NUM1_MEDIAN_1M,\n \n tb_join.FEAT_NUM2_SUM_1M,\n tb_join.FEAT_NUM2_MIN_1M,\n tb_join.FEAT_NUM2_MAX_1M,\n tb_join.FEAT_NUM2_AVG_1M,\n tb_join.FEAT_NUM2_MEDIAN_1M,\n \n tb_join.FEAT_NUM1_SUM_2M,\n tb_join.FEAT_NUM1_MIN_2M,\n tb_join.FEAT_NUM1_MAX_2M,\n tb_join.FEAT_NUM1_AVG_2M,\n tb_join.FEAT_NUM1_MEDIAN_2M,\n \n tb_join.FEAT_NUM2_SUM_2M,\n tb_join.FEAT_NUM2_MIN_2M,\n tb_join.FEAT_NUM2_MAX_2M,\n tb_join.FEAT_NUM2_AVG_2M,\n tb_join.FEAT_NUM2_MEDIAN_2M,\n \n tb_join.FEAT_NUM1_SUM_3M,\n tb_join.FEAT_NUM1_MIN_3M,\n tb_join.FEAT_NUM1_MAX_3M,\n tb_join.FEAT_NUM1_AVG_3M,\n tb_join.FEAT_NUM1_MEDIAN_3M,\n \n tb_join.FEAT_NUM2_SUM_3M,\n tb_join.FEAT_NUM2_MIN_3M,\n tb_join.FEAT_NUM2_MAX_3M,\n tb_join.FEAT_NUM2_AVG_3M,\n tb_join.FEAT_NUM2_MEDIAN_3M\n FROM tb_join\n \n\n### Cria\u00e7\u00e3o de vari\u00e1veis categ\u00f3ricas\n\nA fun\u00e7\u00e3o query_final_cat_snow() cria um texto com a query para a cria\u00e7\u00e3o\nde vari\u00e1veis com a moda de cada uma das vari\u00e1veis listadas em\nfeat_num_lista na janela de tempo fornecida em lista_janela.\n\n``` python\ntb_publico = 'tb_spine'\ntb_feat = 'tb_feat'\nid = 'ID'\nsafra_ref = 'SAFRA_REF'\nsafra = 'SAFRA'\nfeat_num_lista = ['FEAT_CAT1','FEAT_CAT2']\nlista_janela = [1,2,3]\nquery_final_cat_snow = snow_create_query_cat(tb_publico, tb_feat, lista_janela, feat_num_lista, id, safra_ref, safra)\n```\n\n``` python\nprint(query_final_cat_snow)\n```\n\n\n WITH \n tb_public AS (\n SELECT \n ID,\n SAFRA_REF\n FROM tb_spine\n ),\n \n tb_janela_1M AS (\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n \n MODE(FEAT_CAT1) AS MODE_FEAT_CAT1_1M,\n MODE(FEAT_CAT2) AS MODE_FEAT_CAT2_1M \n \n FROM tb_public\n LEFT JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (DATEADD('month', 1 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF) \n AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n\n tb_janela_2M AS (\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n \n MODE(FEAT_CAT1) AS MODE_FEAT_CAT1_2M,\n MODE(FEAT_CAT2) AS MODE_FEAT_CAT2_2M \n \n FROM tb_public\n LEFT JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (DATEADD('month', 2 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF) \n AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n\n tb_janela_3M AS (\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n \n MODE(FEAT_CAT1) AS MODE_FEAT_CAT1_3M,\n MODE(FEAT_CAT2) AS MODE_FEAT_CAT2_3M \n \n FROM tb_public\n LEFT JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (DATEADD('month', 3 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF) \n AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n )\n\n SELECT \n tb_public.ID,\n tb_public.SAFRA_REF,\n \n tb_janela_1M.MODE_FEAT_CAT1_1M,\n tb_janela_1M.MODE_FEAT_CAT2_1M,\n tb_janela_2M.MODE_FEAT_CAT1_2M,\n tb_janela_2M.MODE_FEAT_CAT2_2M,\n tb_janela_3M.MODE_FEAT_CAT1_3M,\n tb_janela_3M.MODE_FEAT_CAT2_3M\n \n FROM tb_public\n \n LEFT JOIN tb_janela_1M\n ON tb_public.ID = JOIN tb_janela_1M.ID\n AND tb_public.SAFRA_REF = JOIN tb_janela_1M.SAFRA_REF\n\n LEFT JOIN tb_janela_2M\n ON tb_public.ID = JOIN tb_janela_2M.ID\n AND tb_public.SAFRA_REF = JOIN tb_janela_2M.SAFRA_REF\n\n LEFT JOIN tb_janela_3M\n ON tb_public.ID = JOIN tb_janela_3M.ID\n AND tb_public.SAFRA_REF = JOIN tb_janela_3M.SAFRA_REF\n\n \n\n### Cria\u00e7\u00e3o de vari\u00e1veis agragadas\n\n``` python\nlista_feat_num = ['FEAT_NUM1', 'FEAT_NUM2']\nfeat_cat = 'FEAT_CAT1'\nlista_valor_agragador = ['B', 'C']\nid = 'ID'\nsafra_ref = 'SAFRA_REF'\ntb_feat = 'tb_feat'\nsafra = 'SAFRA'\njanelas = [1, 2, 3]\ntb_publico = 'tb_spine'\n\n\nprint(snow_create_query_agregada(tb_publico, tb_feat, janelas, lista_feat_num, id, safra_ref, safra, feat_cat, lista_valor_agragador))\n```\n\n\n WITH\n tb_public as(\n SELECT\n ID,\n SAFRA_REF\n FROM tb_spine\n ),\n\n \n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas com janela de 1M\n tb_agrupada_FEAT_CAT1_B_1M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM1 para a janela 1\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_1M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_1M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_1M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_1M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MEDIAN_FEAT_NUM1_FEAT_CAT1_B_1M,\n\n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM2 para a janela 1\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_1M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_1M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_1M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_1M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MEDIAN_FEAT_NUM2_FEAT_CAT1_B_1M\n FROM tb_public\n INNER JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (DATEADD('month', 1 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF) \n AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)\n AND tb_feat.FEAT_CAT1 = 'B'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n\n \n \n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas com janela de 2M\n tb_agrupada_FEAT_CAT1_B_2M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM1 para a janela 2\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_2M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_2M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_2M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_2M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MEDIAN_FEAT_NUM1_FEAT_CAT1_B_2M,\n\n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM2 para a janela 2\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_2M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_2M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_2M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_2M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MEDIAN_FEAT_NUM2_FEAT_CAT1_B_2M\n FROM tb_public\n INNER JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (DATEADD('month', 2 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF) \n AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)\n AND tb_feat.FEAT_CAT1 = 'B'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n\n \n \n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas com janela de 3M\n tb_agrupada_FEAT_CAT1_B_3M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM1 para a janela 3\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_B_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_B_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_B_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_B_3M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MEDIAN_FEAT_NUM1_FEAT_CAT1_B_3M,\n\n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM2 para a janela 3\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_B_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_B_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_B_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_B_3M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MEDIAN_FEAT_NUM2_FEAT_CAT1_B_3M\n FROM tb_public\n INNER JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (DATEADD('month', 3 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF) \n AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)\n AND tb_feat.FEAT_CAT1 = 'B'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n\n \n \n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas com janela de 1M\n tb_agrupada_FEAT_CAT1_C_1M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM1 para a janela 1\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_C_1M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_C_1M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_C_1M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_C_1M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MEDIAN_FEAT_NUM1_FEAT_CAT1_C_1M,\n\n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM2 para a janela 1\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_C_1M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_C_1M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_C_1M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_C_1M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MEDIAN_FEAT_NUM2_FEAT_CAT1_C_1M\n FROM tb_public\n INNER JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (DATEADD('month', 1 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF) \n AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)\n AND tb_feat.FEAT_CAT1 = 'C'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n\n \n \n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas com janela de 2M\n tb_agrupada_FEAT_CAT1_C_2M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM1 para a janela 2\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_C_2M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_C_2M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_C_2M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_C_2M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MEDIAN_FEAT_NUM1_FEAT_CAT1_C_2M,\n\n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM2 para a janela 2\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_C_2M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_C_2M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_C_2M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_C_2M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MEDIAN_FEAT_NUM2_FEAT_CAT1_C_2M\n FROM tb_public\n INNER JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (DATEADD('month', 2 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF) \n AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)\n AND tb_feat.FEAT_CAT1 = 'C'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n ),\n\n \n \n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas com janela de 3M\n tb_agrupada_FEAT_CAT1_C_3M as(\n SELECT\n tb_public.ID,\n tb_public.SAFRA_REF,\n\n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM1 para a janela 3\n SUM(COALESCE(tb_feat.FEAT_NUM1,0)) AS SUM_FEAT_NUM1_FEAT_CAT1_C_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM1,0)) AS MAX_FEAT_NUM1_FEAT_CAT1_C_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MIN_FEAT_NUM1_FEAT_CAT1_C_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM1,0)) AS AVG_FEAT_NUM1_FEAT_CAT1_C_3M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM1,0)) AS MEDIAN_FEAT_NUM1_FEAT_CAT1_C_3M,\n\n -- Cria\u00e7\u00e3o de vari\u00e1veis agrupadas a partir da coluna FEAT_CAT1 e FEAT_NUM2 para a janela 3\n SUM(COALESCE(tb_feat.FEAT_NUM2,0)) AS SUM_FEAT_NUM2_FEAT_CAT1_C_3M,\n MAX(COALESCE(tb_feat.FEAT_NUM2,0)) AS MAX_FEAT_NUM2_FEAT_CAT1_C_3M,\n MIN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MIN_FEAT_NUM2_FEAT_CAT1_C_3M,\n AVG(COALESCE(tb_feat.FEAT_NUM2,0)) AS AVG_FEAT_NUM2_FEAT_CAT1_C_3M,\n MEDIAN(COALESCE(tb_feat.FEAT_NUM2,0)) AS MEDIAN_FEAT_NUM2_FEAT_CAT1_C_3M\n FROM tb_public\n INNER JOIN tb_feat\n ON tb_public.ID = tb_feat.ID\n AND (DATEADD('month', 3 , TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD')) >= tb_public.SAFRA_REF) \n AND (TO_DATE(tb_feat.SAFRA, 'YYYY-MM-DD') < tb_public.SAFRA_REF)\n AND tb_feat.FEAT_CAT1 = 'C'\n GROUP BY tb_public.ID, tb_public.SAFRA_REF\n )\n\n SELECT \n tb_public.ID,\n tb_public.SAFRA_REF,\n \n \n \n tb_agrupada_FEAT_CAT1_B_1M.SUM_FEAT_NUM1_FEAT_CAT1_B_1M,\n tb_agrupada_FEAT_CAT1_B_1M.MAX_FEAT_NUM1_FEAT_CAT1_B_1M,\n tb_agrupada_FEAT_CAT1_B_1M.MIN_FEAT_NUM1_FEAT_CAT1_B_1M,\n tb_agrupada_FEAT_CAT1_B_1M.AVG_FEAT_NUM1_FEAT_CAT1_B_1M,\n tb_agrupada_FEAT_CAT1_B_1M.MEDIAN_FEAT_NUM1_FEAT_CAT1_B_1M,\n\n tb_agrupada_FEAT_CAT1_B_1M.SUM_FEAT_NUM2_FEAT_CAT1_B_1M,\n tb_agrupada_FEAT_CAT1_B_1M.MAX_FEAT_NUM2_FEAT_CAT1_B_1M,\n tb_agrupada_FEAT_CAT1_B_1M.MIN_FEAT_NUM2_FEAT_CAT1_B_1M,\n tb_agrupada_FEAT_CAT1_B_1M.AVG_FEAT_NUM2_FEAT_CAT1_B_1M,\n tb_agrupada_FEAT_CAT1_B_1M.MEDIAN_FEAT_NUM2_FEAT_CAT1_B_1M,\n\n \n \n tb_agrupada_FEAT_CAT1_B_2M.SUM_FEAT_NUM1_FEAT_CAT1_B_2M,\n tb_agrupada_FEAT_CAT1_B_2M.MAX_FEAT_NUM1_FEAT_CAT1_B_2M,\n tb_agrupada_FEAT_CAT1_B_2M.MIN_FEAT_NUM1_FEAT_CAT1_B_2M,\n tb_agrupada_FEAT_CAT1_B_2M.AVG_FEAT_NUM1_FEAT_CAT1_B_2M,\n tb_agrupada_FEAT_CAT1_B_2M.MEDIAN_FEAT_NUM1_FEAT_CAT1_B_2M,\n\n tb_agrupada_FEAT_CAT1_B_2M.SUM_FEAT_NUM2_FEAT_CAT1_B_2M,\n tb_agrupada_FEAT_CAT1_B_2M.MAX_FEAT_NUM2_FEAT_CAT1_B_2M,\n tb_agrupada_FEAT_CAT1_B_2M.MIN_FEAT_NUM2_FEAT_CAT1_B_2M,\n tb_agrupada_FEAT_CAT1_B_2M.AVG_FEAT_NUM2_FEAT_CAT1_B_2M,\n tb_agrupada_FEAT_CAT1_B_2M.MEDIAN_FEAT_NUM2_FEAT_CAT1_B_2M,\n\n \n \n tb_agrupada_FEAT_CAT1_B_3M.SUM_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_agrupada_FEAT_CAT1_B_3M.MAX_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_agrupada_FEAT_CAT1_B_3M.MIN_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_agrupada_FEAT_CAT1_B_3M.AVG_FEAT_NUM1_FEAT_CAT1_B_3M,\n tb_agrupada_FEAT_CAT1_B_3M.MEDIAN_FEAT_NUM1_FEAT_CAT1_B_3M,\n\n tb_agrupada_FEAT_CAT1_B_3M.SUM_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_agrupada_FEAT_CAT1_B_3M.MAX_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_agrupada_FEAT_CAT1_B_3M.MIN_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_agrupada_FEAT_CAT1_B_3M.AVG_FEAT_NUM2_FEAT_CAT1_B_3M,\n tb_agrupada_FEAT_CAT1_B_3M.MEDIAN_FEAT_NUM2_FEAT_CAT1_B_3M,\n\n \n \n tb_agrupada_FEAT_CAT1_C_1M.SUM_FEAT_NUM1_FEAT_CAT1_C_1M,\n tb_agrupada_FEAT_CAT1_C_1M.MAX_FEAT_NUM1_FEAT_CAT1_C_1M,\n tb_agrupada_FEAT_CAT1_C_1M.MIN_FEAT_NUM1_FEAT_CAT1_C_1M,\n tb_agrupada_FEAT_CAT1_C_1M.AVG_FEAT_NUM1_FEAT_CAT1_C_1M,\n tb_agrupada_FEAT_CAT1_C_1M.MEDIAN_FEAT_NUM1_FEAT_CAT1_C_1M,\n\n tb_agrupada_FEAT_CAT1_C_1M.SUM_FEAT_NUM2_FEAT_CAT1_C_1M,\n tb_agrupada_FEAT_CAT1_C_1M.MAX_FEAT_NUM2_FEAT_CAT1_C_1M,\n tb_agrupada_FEAT_CAT1_C_1M.MIN_FEAT_NUM2_FEAT_CAT1_C_1M,\n tb_agrupada_FEAT_CAT1_C_1M.AVG_FEAT_NUM2_FEAT_CAT1_C_1M,\n tb_agrupada_FEAT_CAT1_C_1M.MEDIAN_FEAT_NUM2_FEAT_CAT1_C_1M,\n\n \n \n tb_agrupada_FEAT_CAT1_C_2M.SUM_FEAT_NUM1_FEAT_CAT1_C_2M,\n tb_agrupada_FEAT_CAT1_C_2M.MAX_FEAT_NUM1_FEAT_CAT1_C_2M,\n tb_agrupada_FEAT_CAT1_C_2M.MIN_FEAT_NUM1_FEAT_CAT1_C_2M,\n tb_agrupada_FEAT_CAT1_C_2M.AVG_FEAT_NUM1_FEAT_CAT1_C_2M,\n tb_agrupada_FEAT_CAT1_C_2M.MEDIAN_FEAT_NUM1_FEAT_CAT1_C_2M,\n\n tb_agrupada_FEAT_CAT1_C_2M.SUM_FEAT_NUM2_FEAT_CAT1_C_2M,\n tb_agrupada_FEAT_CAT1_C_2M.MAX_FEAT_NUM2_FEAT_CAT1_C_2M,\n tb_agrupada_FEAT_CAT1_C_2M.MIN_FEAT_NUM2_FEAT_CAT1_C_2M,\n tb_agrupada_FEAT_CAT1_C_2M.AVG_FEAT_NUM2_FEAT_CAT1_C_2M,\n tb_agrupada_FEAT_CAT1_C_2M.MEDIAN_FEAT_NUM2_FEAT_CAT1_C_2M,\n\n \n \n tb_agrupada_FEAT_CAT1_C_3M.SUM_FEAT_NUM1_FEAT_CAT1_C_3M,\n tb_agrupada_FEAT_CAT1_C_3M.MAX_FEAT_NUM1_FEAT_CAT1_C_3M,\n tb_agrupada_FEAT_CAT1_C_3M.MIN_FEAT_NUM1_FEAT_CAT1_C_3M,\n tb_agrupada_FEAT_CAT1_C_3M.AVG_FEAT_NUM1_FEAT_CAT1_C_3M,\n tb_agrupada_FEAT_CAT1_C_3M.MEDIAN_FEAT_NUM1_FEAT_CAT1_C_3M,\n\n tb_agrupada_FEAT_CAT1_C_3M.SUM_FEAT_NUM2_FEAT_CAT1_C_3M,\n tb_agrupada_FEAT_CAT1_C_3M.MAX_FEAT_NUM2_FEAT_CAT1_C_3M,\n tb_agrupada_FEAT_CAT1_C_3M.MIN_FEAT_NUM2_FEAT_CAT1_C_3M,\n tb_agrupada_FEAT_CAT1_C_3M.AVG_FEAT_NUM2_FEAT_CAT1_C_3M,\n tb_agrupada_FEAT_CAT1_C_3M.MEDIAN_FEAT_NUM2_FEAT_CAT1_C_3M\n FROM tb_public\n \n LEFT JOIN tb_agrupada_FEAT_CAT1_B_1M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_1M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_1M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT1_B_2M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_2M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_2M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT1_B_3M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_B_3M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_B_3M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT1_C_1M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_C_1M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_C_1M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT1_C_2M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_C_2M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_C_2M.SAFRA_REF\n \n LEFT JOIN tb_agrupada_FEAT_CAT1_C_3M\n ON tb_public.ID = tb_agrupada_FEAT_CAT1_C_3M.ID\n AND tb_public.SAFRA_REF = tb_agrupada_FEAT_CAT1_C_3M.SAFRA_REF\n \n\n \n\n\n",
"bugtrack_url": null,
"license": "Apache Software License 2.0",
"summary": "Create features with sql",
"version": "0.0.1",
"project_urls": {
"Homepage": "https://github.com/ravennaro/featsql"
},
"split_keywords": [
"nbdev",
"jupyter",
"notebook",
"python"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "f8d727ec25c6d55be39f201f384a6e398e0e91d088cb4fe9074f5e1de975afb6",
"md5": "ceffe28dae7336ce92a7df4ae1351744",
"sha256": "d93e59226edb0d48a9654937a9db5ff96f45c9f773c47aa61cde6e463667c1af"
},
"downloads": -1,
"filename": "featsql-0.0.1-py3-none-any.whl",
"has_sig": false,
"md5_digest": "ceffe28dae7336ce92a7df4ae1351744",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.7",
"size": 22207,
"upload_time": "2024-02-15T20:29:58",
"upload_time_iso_8601": "2024-02-15T20:29:58.118710Z",
"url": "https://files.pythonhosted.org/packages/f8/d7/27ec25c6d55be39f201f384a6e398e0e91d088cb4fe9074f5e1de975afb6/featsql-0.0.1-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "d102d0c23ad70a008acfbd1fa5a697889ea53325c1c7cbfa29167dd5886db398",
"md5": "a0d2fbeabd118fe1685934882353f21e",
"sha256": "fdd595772ef0ac1c786c188929d8b4c736f20b83c38d031faa6472bfd077e581"
},
"downloads": -1,
"filename": "featsql-0.0.1.tar.gz",
"has_sig": false,
"md5_digest": "a0d2fbeabd118fe1685934882353f21e",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.7",
"size": 39411,
"upload_time": "2024-02-15T20:30:01",
"upload_time_iso_8601": "2024-02-15T20:30:01.977313Z",
"url": "https://files.pythonhosted.org/packages/d1/02/d0c23ad70a008acfbd1fa5a697889ea53325c1c7cbfa29167dd5886db398/featsql-0.0.1.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-02-15 20:30:01",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "ravennaro",
"github_project": "featsql",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"lcname": "featsql"
}