Instalação de pacotes e extração de dados

Criando tabela casos

Filtrando somente casos de covid

casos_fil_covid <- filter(casos_fil, CLASSI_FIN == 5)

Criando a coluna menor data e a coluna epiweek(SEM_DTMIN)

Deixando nascimento em formato de data para o R e analisando a coluna NU_IDADE_N. Podemos ver que algumas pessoas possuem idades não condizentes com a realidade, com idades menores do que 0 ou muito acima da pessoa mais velha registrada no Brasil, portanto limito a 100 anos de idade.

casos_fil_covid$DT_NASC<- as_date(casos_fil_covid$DT_NASC, format= "%d/%m/%Y")

count(casos_fil_covid, NU_IDADE_N <= 0)
## # A tibble: 2 x 2
##   `NU_IDADE_N <= 0`       n
##   <lgl>               <int>
## 1 FALSE             2134830
## 2 TRUE                 1175
count(casos_fil_covid, NU_IDADE_N >= 100)
## # A tibble: 2 x 2
##   `NU_IDADE_N >= 100`       n
##   <lgl>                 <int>
## 1 FALSE               2132815
## 2 TRUE                   3190
casos_fil_covid <- filter(casos_fil_covid, NU_IDADE_N > 0)
casos_fil_covid <- filter(casos_fil_covid, NU_IDADE_N < 100)

Summary do df

summary(casos_fil_covid)
##    DT_NOTIFIC            SEM_NOT       SG_UF_NOT          ID_MUNICIP       
##  Min.   :2020-02-21   Min.   : 1.00   Length:2131640     Length:2131640    
##  1st Qu.:2020-11-11   1st Qu.:12.00   Class :character   Class :character  
##  Median :2021-03-24   Median :21.00   Mode  :character   Mode  :character  
##  Mean   :2021-03-17   Mean   :22.53                                        
##  3rd Qu.:2021-06-15   3rd Qu.:30.00                                        
##  Max.   :2022-12-09   Max.   :53.00                                        
##                                                                            
##    CO_MUN_NOT       CS_SEXO             DT_NASC             CS_GESTANT   
##  Min.   :110001   Length:2131640     Min.   :1920-04-05   Min.   :0.000  
##  1st Qu.:310620   Class :character   1st Qu.:1948-09-13   1st Qu.:5.000  
##  Median :351880   Mode  :character   Median :1961-10-13   Median :6.000  
##  Mean   :344123                      Mean   :1962-10-22   Mean   :5.779  
##  3rd Qu.:410690                      3rd Qu.:1975-09-07   3rd Qu.:6.000  
##  Max.   :530010                      Max.   :2022-11-22   Max.   :9.000  
##                                      NA's   :1839                        
##     CS_RACA        CS_ESCOL_N      PAC_COCBO            CS_ZONA      
##  Min.   :1.00    Min.   :0.0      Length:2131640     Min.   :1.00    
##  1st Qu.:1.00    1st Qu.:2.0      Class :character   1st Qu.:1.00    
##  Median :4.00    Median :4.0      Mode  :character   Median :1.00    
##  Mean   :3.49    Mean   :5.4                         Mean   :1.15    
##  3rd Qu.:4.00    3rd Qu.:9.0                         3rd Qu.:1.00    
##  Max.   :9.00    Max.   :9.0                         Max.   :9.00    
##  NA's   :28517   NA's   :715644                      NA's   :232617  
##    VACINA_COV         VACINA        DT_UT_DOSE           HOSPITAL    
##  Min.   :1.0      Min.   :1.0      Length:2131640     Min.   :1.00   
##  1st Qu.:2.0      1st Qu.:2.0      Class :character   1st Qu.:1.00   
##  Median :2.0      Median :2.0      Mode  :character   Median :1.00   
##  Mean   :2.6      Mean   :5.1                         Mean   :1.04   
##  3rd Qu.:2.0      3rd Qu.:9.0                         3rd Qu.:1.00   
##  Max.   :9.0      Max.   :9.0                         Max.   :9.00   
##  NA's   :325989   NA's   :526559                      NA's   :45140  
##    DT_INTERNA              UTI           DT_ENTUTI         
##  Min.   :2020-01-05   Min.   :1.00     Min.   :2020-01-05  
##  1st Qu.:2020-11-06   1st Qu.:1.00     1st Qu.:2020-11-09  
##  Median :2021-03-21   Median :2.00     Median :2021-03-23  
##  Mean   :2021-03-23   Mean   :1.79     Mean   :2021-03-17  
##  3rd Qu.:2021-06-11   3rd Qu.:2.00     3rd Qu.:2021-06-14  
##  Max.   :9202-09-11   Max.   :9.00     Max.   :4202-05-26  
##                       NA's   :253860                       
##    DT_SAIDUTI           CLASSI_FIN    EVOLUCAO       DT_EVOLUCA        
##  Min.   :2020-02-21   Min.   :5    Min.   :1.0     Min.   :2020-02-21  
##  1st Qu.:2020-11-12   1st Qu.:5    1st Qu.:1.0     1st Qu.:2020-11-17  
##  Median :2021-03-26   Median :5    Median :1.0     Median :2021-03-31  
##  Mean   :2021-03-18   Mean   :5    Mean   :1.5     Mean   :2021-03-23  
##  3rd Qu.:2021-06-16   3rd Qu.:5    3rd Qu.:2.0     3rd Qu.:2021-06-21  
##  Max.   :2121-03-13   Max.   :5    Max.   :9.0     Max.   :2022-12-04  
##                                    NA's   :99271                       
##    NU_IDADE_N      NOSOCOMIAL       AVE_SUINO          FEBRE       
##  Min.   : 1.00   Min.   :1.0      Min.   :1.0      Min.   :1.0     
##  1st Qu.:45.00   1st Qu.:2.0      1st Qu.:2.0      1st Qu.:1.0     
##  Median :59.00   Median :2.0      Median :2.0      Median :1.0     
##  Mean   :57.91   Mean   :2.6      Mean   :3.3      Mean   :1.4     
##  3rd Qu.:72.00   3rd Qu.:2.0      3rd Qu.:2.0      3rd Qu.:2.0     
##  Max.   :99.00   Max.   :9.0      Max.   :9.0      Max.   :9.0     
##                  NA's   :374777   NA's   :376972   NA's   :340351  
##      TOSSE           GARGANTA         DISPNEIA        DESC_RESP     
##  Min.   :1.0      Min.   :1        Min.   :1.00     Min.   :1.0     
##  1st Qu.:1.0      1st Qu.:2        1st Qu.:1.00     1st Qu.:1.0     
##  Median :1.0      Median :2        Median :1.00     Median :1.0     
##  Mean   :1.3      Mean   :2        Mean   :1.28     Mean   :1.4     
##  3rd Qu.:1.0      3rd Qu.:2        3rd Qu.:1.00     3rd Qu.:2.0     
##  Max.   :9.0      Max.   :9        Max.   :9.00     Max.   :9.0     
##  NA's   :268697   NA's   :615677   NA's   :265435   NA's   :409211  
##    SATURACAO         DIARREIA          VOMITO          DOR_ABD      
##  Min.   :1.0      Min.   :1        Min.   :1.0      Min.   :1.0     
##  1st Qu.:1.0      1st Qu.:2        1st Qu.:2.0      1st Qu.:2.0     
##  Median :1.0      Median :2        Median :2.0      Median :2.0     
##  Mean   :1.4      Mean   :2        Mean   :2.1      Mean   :2.2     
##  3rd Qu.:2.0      3rd Qu.:2        3rd Qu.:2.0      3rd Qu.:2.0     
##  Max.   :9.0      Max.   :9        Max.   :9.0      Max.   :9.0     
##  NA's   :341842   NA's   :643986   NA's   :671957   NA's   :873571  
##      FADIGA         PERD_OLFT        PERD_PALA        OUTRO_SIN     
##  Min.   :1.0      Min.   :1.0      Min.   :1.0      Min.   :1.0     
##  1st Qu.:1.0      1st Qu.:2.0      1st Qu.:2.0      1st Qu.:1.0     
##  Median :2.0      Median :2.0      Median :2.0      Median :2.0     
##  Mean   :1.9      Mean   :2.1      Mean   :2.1      Mean   :1.8     
##  3rd Qu.:2.0      3rd Qu.:2.0      3rd Qu.:2.0      3rd Qu.:2.0     
##  Max.   :9.0      Max.   :9.0      Max.   :9.0      Max.   :9.0     
##  NA's   :796163   NA's   :852465   NA's   :853816   NA's   :617519  
##   OUTRO_DES          FATOR_RISC           PUERPERA         CARDIOPATI     
##  Length:2131640     Length:2131640     Min.   :1.0       Min.   :1.0      
##  Class :character   Class :character   1st Qu.:2.0       1st Qu.:1.0      
##  Mode  :character   Mode  :character   Median :2.0       Median :1.0      
##                                        Mean   :2.2       Mean   :1.4      
##                                        3rd Qu.:2.0       3rd Qu.:2.0      
##                                        Max.   :9.0       Max.   :9.0      
##                                        NA's   :1352981   NA's   :1099368  
##    HEMATOLOGI        SIND_DOWN          HEPATICA            ASMA        
##  Min.   :1.0       Min.   :1.0       Min.   :1.0       Min.   :1.0      
##  1st Qu.:2.0       1st Qu.:2.0       1st Qu.:2.0       1st Qu.:2.0      
##  Median :2.0       Median :2.0       Median :2.0       Median :2.0      
##  Mean   :2.2       Mean   :2.2       Mean   :2.2       Mean   :2.1      
##  3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0      
##  Max.   :9.0       Max.   :9.0       Max.   :9.0       Max.   :9.0      
##  NA's   :1350649   NA's   :1353343   NA's   :1352960   NA's   :1340898  
##     DIABETES         NEUROLOGIC        PNEUMOPATI        IMUNODEPRE     
##  Min.   :1.0       Min.   :1.0       Min.   :1.0       Min.   :1.0      
##  1st Qu.:1.0       1st Qu.:2.0       1st Qu.:2.0       1st Qu.:2.0      
##  Median :2.0       Median :2.0       Median :2.0       Median :2.0      
##  Mean   :1.6       Mean   :2.1       Mean   :2.1       Mean   :2.1      
##  3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0      
##  Max.   :9.0       Max.   :9.0       Max.   :9.0       Max.   :9.0      
##  NA's   :1172542   NA's   :1331091   NA's   :1332442   NA's   :1342756  
##      RENAL           OBESIDADE         OBES_IMC           OUT_MORBI      
##  Min.   :1.0       Min.   :1         Length:2131640     Min.   :1.0      
##  1st Qu.:2.0       1st Qu.:2         Class :character   1st Qu.:1.0      
##  Median :2.0       Median :2         Mode  :character   Median :1.0      
##  Mean   :2.1       Mean   :2                            Mean   :1.5      
##  3rd Qu.:2.0       3rd Qu.:2                            3rd Qu.:2.0      
##  Max.   :9.0       Max.   :9                            Max.   :9.0      
##  NA's   :1334452   NA's   :1303592                      NA's   :1173521  
##   MORB_DESC          DOSE_1_COV         DOSE_2_COV          DOSE_REF        
##  Length:2131640     Length:2131640     Length:2131640     Length:2131640    
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   FAB_COVREF          ANTIVIRAL        TP_ANTIVIR       OUT_ANTIV        
##  Length:2131640     Min.   :1.0      Min.   :1.0       Length:2131640    
##  Class :character   1st Qu.:2.0      1st Qu.:1.0       Class :character  
##  Mode  :character   Median :2.0      Median :1.0       Mode  :character  
##                     Mean   :3.2      Mean   :1.2                         
##                     3rd Qu.:2.0      3rd Qu.:1.0                         
##                     Max.   :9.0      Max.   :3.0                         
##                     NA's   :347443   NA's   :2008382                     
##   DT_ANTIVIR          SUPORT_VEN       RAIOX_RES       RAIOX_OUT        
##  Length:2131640     Min.   :1.00     Min.   :1.0      Length:2131640    
##  Class :character   1st Qu.:2.00     1st Qu.:3.0      Class :character  
##  Mode  :character   Median :2.00     Median :6.0      Mode  :character  
##                     Mean   :2.26     Mean   :5.3                        
##                     3rd Qu.:2.00     3rd Qu.:6.0                        
##                     Max.   :9.00     Max.   :9.0                        
##                     NA's   :256513   NA's   :857548                     
##     TOMO_RES        TOMO_OUT            AMOSTRA        TP_AMOSTRA    
##  Min.   :1.0      Length:2131640     Min.   :1.00    Min.   :1.00    
##  1st Qu.:1.0      Class :character   1st Qu.:1.00    1st Qu.:1.00    
##  Median :1.0      Mode  :character   Median :1.00    Median :1.00    
##  Mean   :3.1                         Mean   :1.08    Mean   :1.38    
##  3rd Qu.:6.0                         3rd Qu.:1.00    3rd Qu.:1.00    
##  Max.   :9.0                         Max.   :9.00    Max.   :9.00    
##  NA's   :862861                      NA's   :71794   NA's   :184777  
##   OUT_AMOST           PCR_RESUL         DT_PCR            POS_PCRFLU     
##  Length:2131640     Min.   :1.00     Length:2131640     Min.   :1.0      
##  Class :character   1st Qu.:1.00     Class :character   1st Qu.:2.0      
##  Mode  :character   Median :1.00     Mode  :character   Median :2.0      
##                     Mean   :1.89                        Mean   :2.8      
##                     3rd Qu.:2.00                        3rd Qu.:2.0      
##                     Max.   :9.00                        Max.   :9.0      
##                     NA's   :195201                      NA's   :1317296  
##    TP_FLU_PCR        PCR_FLUASU          DT_MIN             ANO_DTMIN   
##  Min.   :1.0       Min.   :1.0       Min.   :2020-01-05   Min.   :2020  
##  1st Qu.:1.0       1st Qu.:2.0       1st Qu.:2020-11-05   1st Qu.:2020  
##  Median :1.0       Median :2.0       Median :2021-03-21   Median :2021  
##  Mean   :1.1       Mean   :2.5       Mean   :2021-03-12   Mean   :2021  
##  3rd Qu.:1.0       3rd Qu.:3.0       3rd Qu.:2021-06-10   3rd Qu.:2021  
##  Max.   :2.0       Max.   :6.0       Max.   :2022-12-04   Max.   :2022  
##  NA's   :2130656   NA's   :2130801                                      
##    SEM_DTMIN     
##  Min.   :  1.00  
##  1st Qu.: 45.00  
##  Median : 65.00  
##  Mean   : 63.42  
##  3rd Qu.: 76.00  
##  Max.   :154.00  
## 

Criando tabelas por tipo (Possuimos muitas colunas, algumas delas com muitos nulos, portanto para verificar quais possuem impacto na mortalidade vou separar por tipo, por exemplo, demográficas, comorbidades, relacionadas a vacinação, etc.)

Demográficas e economicas

casos_fil_covid_dem <- casos_fil_covid %>% select(DT_MIN, SG_UF_NOT, ID_MUNICIP, CO_MUN_NOT, CS_SEXO, CS_GESTANT, CS_RACA, CS_ESCOL_N, PAC_COCBO, CS_ZONA, NU_IDADE_N, EVOLUCAO)
summary(casos_fil_covid_dem)
##      DT_MIN            SG_UF_NOT          ID_MUNICIP          CO_MUN_NOT    
##  Min.   :2020-01-05   Length:2131640     Length:2131640     Min.   :110001  
##  1st Qu.:2020-11-05   Class :character   Class :character   1st Qu.:310620  
##  Median :2021-03-21   Mode  :character   Mode  :character   Median :351880  
##  Mean   :2021-03-12                                         Mean   :344123  
##  3rd Qu.:2021-06-10                                         3rd Qu.:410690  
##  Max.   :2022-12-04                                         Max.   :530010  
##                                                                             
##    CS_SEXO            CS_GESTANT       CS_RACA        CS_ESCOL_N    
##  Length:2131640     Min.   :0.000   Min.   :1.00    Min.   :0.0     
##  Class :character   1st Qu.:5.000   1st Qu.:1.00    1st Qu.:2.0     
##  Mode  :character   Median :6.000   Median :4.00    Median :4.0     
##                     Mean   :5.779   Mean   :3.49    Mean   :5.4     
##                     3rd Qu.:6.000   3rd Qu.:4.00    3rd Qu.:9.0     
##                     Max.   :9.000   Max.   :9.00    Max.   :9.0     
##                                     NA's   :28517   NA's   :715644  
##   PAC_COCBO            CS_ZONA         NU_IDADE_N       EVOLUCAO    
##  Length:2131640     Min.   :1.00     Min.   : 1.00   Min.   :1.0    
##  Class :character   1st Qu.:1.00     1st Qu.:45.00   1st Qu.:1.0    
##  Mode  :character   Median :1.00     Median :59.00   Median :1.0    
##                     Mean   :1.15     Mean   :57.91   Mean   :1.5    
##                     3rd Qu.:1.00     3rd Qu.:72.00   3rd Qu.:2.0    
##                     Max.   :9.00     Max.   :99.00   Max.   :9.0    
##                     NA's   :232617                   NA's   :99271

Estruturando em one hot encoded para poder fazer a regressão logit.

casos_fil_covid_dem <- dummy_cols(casos_fil_covid_dem, select_columns = c('CS_SEXO', 'CS_GESTANT', 'CS_RACA', 'CS_ZONA'),
           remove_selected_columns = TRUE)
casos_fil_covid_dem$EVOLUCAO <- if_else(casos_fil_covid_dem$EVOLUCAO == 1 , 1, 0)

Porcentagem de nulos na coluna EVOLUCAO.

sum(is.na(casos_fil_covid$EVOLUCAO)) / length(casos_fil_covid$EVOLUCAO)
## [1] 0.04657025

Variáveis selecionadas: CS_SEX_F = se é mulher ou não, CS_RACA_2 = se é preto ou não, CS_ZONA_2 = se é rural ou não, NU_IDADE_N = idade da pessoa. EVOLUCAO = 1 para sobreviveu, 0 para óbito. Recordo também que o comando glm omite as linhas com valores nulos.

casos_fil_covid.logit <- glm(formula = EVOLUCAO ~   CS_SEXO_F + CS_RACA_2  + CS_ZONA_2 + NU_IDADE_N, family = binomial(link = "logit"), 
    data = casos_fil_covid_dem)
summary(casos_fil_covid.logit)
## 
## Call:
## glm(formula = EVOLUCAO ~ CS_SEXO_F + CS_RACA_2 + CS_ZONA_2 + 
##     NU_IDADE_N, family = binomial(link = "logit"), data = casos_fil_covid_dem)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4794  -1.1620   0.6629   0.9259   1.7909  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  2.921e+00  6.452e-03  452.77   <2e-16 ***
## CS_SEXO_F    1.441e-01  3.330e-03   43.26   <2e-16 ***
## CS_RACA_2   -3.064e-01  7.815e-03  -39.20   <2e-16 ***
## CS_ZONA_2   -1.410e-01  7.443e-03  -18.95   <2e-16 ***
## NU_IDADE_N  -3.892e-02  9.946e-05 -391.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2322208  on 1795884  degrees of freedom
## Residual deviance: 2140628  on 1795880  degrees of freedom
##   (335755 observations deleted due to missingness)
## AIC: 2140638
## 
## Number of Fisher Scoring iterations: 3

Variáveis relacionadas à vacinação

rm(casos_fil_covid_dem)
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   5652072  301.9   12354841  659.9   5714417  305.2
## Vcells 540287406 4122.1  948513124 7236.6 948401121 7235.8
casos_fil_covid_vacina <- casos_fil_covid %>% select(DT_MIN, VACINA_COV, VACINA, NU_IDADE_N, EVOLUCAO)
summary(casos_fil_covid_vacina)
##      DT_MIN             VACINA_COV         VACINA         NU_IDADE_N   
##  Min.   :2020-01-05   Min.   :1.0      Min.   :1.0      Min.   : 1.00  
##  1st Qu.:2020-11-05   1st Qu.:2.0      1st Qu.:2.0      1st Qu.:45.00  
##  Median :2021-03-21   Median :2.0      Median :2.0      Median :59.00  
##  Mean   :2021-03-12   Mean   :2.6      Mean   :5.1      Mean   :57.91  
##  3rd Qu.:2021-06-10   3rd Qu.:2.0      3rd Qu.:9.0      3rd Qu.:72.00  
##  Max.   :2022-12-04   Max.   :9.0      Max.   :9.0      Max.   :99.00  
##                       NA's   :325989   NA's   :526559                  
##     EVOLUCAO    
##  Min.   :1.0    
##  1st Qu.:1.0    
##  Median :1.0    
##  Mean   :1.5    
##  3rd Qu.:2.0    
##  Max.   :9.0    
##  NA's   :99271

Criando a coluna vacina cov e vacina gripe para entender se há um efeito de dupla vacinação positivo.

casos_fil_covid_vacina$DUPLA_VACINA <- if_else(casos_fil_covid_vacina$VACINA_COV == 1 & casos_fil_covid_vacina$VACINA == 1, 1 , 0)

Estruturando em one hot encoded para poder fazer a regressão logit.

casos_fil_covid_vacina <- dummy_cols(casos_fil_covid_vacina, select_columns = c('VACINA_COV', 'VACINA','DUPLA_VACINA'),
           remove_selected_columns = TRUE)
casos_fil_covid_vacina$EVOLUCAO <- if_else(casos_fil_covid_vacina$EVOLUCAO == 1 , 1, 0)

Variáveis selecionadas: CS_SEX_F = se é mulher ou não, CS_RACA_2 = se é preto ou não, CS_ZONA_2 = se é rural ou não, NU_IDADE_N = idade da pessoa. EVOLUCAO = 1 para sobreviveu, 0 para óbito. Recordo também que o comando glm omite as linhas com valores nulos.

casos_fil_covid.logit <- glm(formula = EVOLUCAO ~    VACINA_1 + VACINA_COV_1 + DUPLA_VACINA_1+ NU_IDADE_N, family = binomial(link = "logit"), 
    data = casos_fil_covid_vacina)
summary(casos_fil_covid.logit)
## 
## Call:
## glm(formula = EVOLUCAO ~ VACINA_1 + VACINA_COV_1 + DUPLA_VACINA_1 + 
##     NU_IDADE_N, family = binomial(link = "logit"), data = casos_fil_covid_vacina)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7098  -1.1571   0.6596   0.9119   1.6625  
## 
## Coefficients:
##                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)     3.0445509  0.0074774  407.166   <2e-16 ***
## VACINA_1        0.3968219  0.0068614   57.834   <2e-16 ***
## VACINA_COV_1    0.3610720  0.0052273   69.074   <2e-16 ***
## DUPLA_VACINA_1 -0.1149745  0.0126732   -9.072   <2e-16 ***
## NU_IDADE_N     -0.0417909  0.0001197 -349.236   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1755431  on 1364590  degrees of freedom
## Residual deviance: 1610875  on 1364586  degrees of freedom
##   (767049 observations deleted due to missingness)
## AIC: 1610885
## 
## Number of Fisher Scoring iterations: 4

Análise dos sintomas

rm(casos_fil_covid_vacina)
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   5221079  278.9   12354841  659.9   5812999  310.5
## Vcells 510958394 3898.4  948513124 7236.6 948401121 7235.8
casos_fil_covid_sintomas <- casos_fil_covid %>% select(DT_MIN, NU_IDADE_N, EVOLUCAO,FEBRE, TOSSE, GARGANTA, DISPNEIA, DESC_RESP, SATURACAO, DIARREIA, VOMITO, DOR_ABD, FADIGA, PERD_OLFT, PERD_PALA)
summary(casos_fil_covid_sintomas)
##      DT_MIN             NU_IDADE_N       EVOLUCAO         FEBRE       
##  Min.   :2020-01-05   Min.   : 1.00   Min.   :1.0     Min.   :1.0     
##  1st Qu.:2020-11-05   1st Qu.:45.00   1st Qu.:1.0     1st Qu.:1.0     
##  Median :2021-03-21   Median :59.00   Median :1.0     Median :1.0     
##  Mean   :2021-03-12   Mean   :57.91   Mean   :1.5     Mean   :1.4     
##  3rd Qu.:2021-06-10   3rd Qu.:72.00   3rd Qu.:2.0     3rd Qu.:2.0     
##  Max.   :2022-12-04   Max.   :99.00   Max.   :9.0     Max.   :9.0     
##                                       NA's   :99271   NA's   :340351  
##      TOSSE           GARGANTA         DISPNEIA        DESC_RESP     
##  Min.   :1.0      Min.   :1        Min.   :1.00     Min.   :1.0     
##  1st Qu.:1.0      1st Qu.:2        1st Qu.:1.00     1st Qu.:1.0     
##  Median :1.0      Median :2        Median :1.00     Median :1.0     
##  Mean   :1.3      Mean   :2        Mean   :1.28     Mean   :1.4     
##  3rd Qu.:1.0      3rd Qu.:2        3rd Qu.:1.00     3rd Qu.:2.0     
##  Max.   :9.0      Max.   :9        Max.   :9.00     Max.   :9.0     
##  NA's   :268697   NA's   :615677   NA's   :265435   NA's   :409211  
##    SATURACAO         DIARREIA          VOMITO          DOR_ABD      
##  Min.   :1.0      Min.   :1        Min.   :1.0      Min.   :1.0     
##  1st Qu.:1.0      1st Qu.:2        1st Qu.:2.0      1st Qu.:2.0     
##  Median :1.0      Median :2        Median :2.0      Median :2.0     
##  Mean   :1.4      Mean   :2        Mean   :2.1      Mean   :2.2     
##  3rd Qu.:2.0      3rd Qu.:2        3rd Qu.:2.0      3rd Qu.:2.0     
##  Max.   :9.0      Max.   :9        Max.   :9.0      Max.   :9.0     
##  NA's   :341842   NA's   :643986   NA's   :671957   NA's   :873571  
##      FADIGA         PERD_OLFT        PERD_PALA     
##  Min.   :1.0      Min.   :1.0      Min.   :1.0     
##  1st Qu.:1.0      1st Qu.:2.0      1st Qu.:2.0     
##  Median :2.0      Median :2.0      Median :2.0     
##  Mean   :1.9      Mean   :2.1      Mean   :2.1     
##  3rd Qu.:2.0      3rd Qu.:2.0      3rd Qu.:2.0     
##  Max.   :9.0      Max.   :9.0      Max.   :9.0     
##  NA's   :796163   NA's   :852465   NA's   :853816

Criando as dummies

casos_fil_covid_sintomas <- dummy_cols(casos_fil_covid_sintomas, select_columns = c("FEBRE" , "TOSSE", "GARGANTA", "DISPNEIA", "DESC_RESP", "SATURACAO",  "DIARREIA", "VOMITO", "DOR_ABD",  "FADIGA", "PERD_OLFT", "PERD_PALA" ),
           remove_selected_columns = TRUE)
casos_fil_covid_sintomas$EVOLUCAO <- if_else(casos_fil_covid_sintomas$EVOLUCAO == 1 , 1, 0)

Logit dos sintomas

casos_fil_covid.logit <- glm(formula = EVOLUCAO ~  FEBRE_1 + TOSSE_1 + GARGANTA_1 + DISPNEIA_1,
           family = binomial(link = "logit"), 
    data = casos_fil_covid_sintomas)
summary(casos_fil_covid.logit)
## 
## Call:
## glm(formula = EVOLUCAO ~ FEBRE_1 + TOSSE_1 + GARGANTA_1 + DISPNEIA_1, 
##     family = binomial(link = "logit"), data = casos_fil_covid_sintomas)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7954  -1.3919   0.8600   0.9164   1.1066  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.753840   0.004675  161.24   <2e-16 ***
## FEBRE_1      0.159034   0.003825   41.58   <2e-16 ***
## TOSSE_1      0.322382   0.004182   77.09   <2e-16 ***
## GARGANTA_1   0.153917   0.004632   33.23   <2e-16 ***
## DISPNEIA_1  -0.584917   0.004365 -134.00   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1786226  on 1394858  degrees of freedom
## Residual deviance: 1758084  on 1394854  degrees of freedom
##   (736781 observations deleted due to missingness)
## AIC: 1758094
## 
## Number of Fisher Scoring iterations: 4

Tenho de separar em duas regressões, pois o R não imprime todos os valores, tem um limite a analisar.

casos_fil_covid.logit <- glm(formula = EVOLUCAO ~ DESC_RESP_1 + SATURACAO_1 + DIARREIA_1 + VOMITO_1 + DOR_ABD_1 +  FADIGA_1 + PERD_OLFT_1 + PERD_PALA_1,
           family = binomial(link = "logit"), 
    data = casos_fil_covid_sintomas)
summary(casos_fil_covid.logit)
## 
## Call:
## glm(formula = EVOLUCAO ~ DESC_RESP_1 + SATURACAO_1 + DIARREIA_1 + 
##     VOMITO_1 + DOR_ABD_1 + FADIGA_1 + PERD_OLFT_1 + PERD_PALA_1, 
##     family = binomial(link = "logit"), data = casos_fil_covid_sintomas)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0685  -1.3002   0.7509   0.9405   1.0602  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.121971   0.004279 262.218   <2e-16 ***
## DESC_RESP_1 -0.375743   0.004563 -82.343   <2e-16 ***
## SATURACAO_1 -0.462214   0.004832 -95.653   <2e-16 ***
## DIARREIA_1   0.169312   0.006165  27.464   <2e-16 ***
## VOMITO_1     0.105631   0.007476  14.130   <2e-16 ***
## DOR_ABD_1   -0.002028   0.008309  -0.244    0.807    
## FADIGA_1     0.133151   0.004603  28.929   <2e-16 ***
## PERD_OLFT_1  0.227462   0.010308  22.066   <2e-16 ***
## PERD_PALA_1  0.256625   0.010236  25.072   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1483778  on 1161598  degrees of freedom
## Residual deviance: 1451780  on 1161590  degrees of freedom
##   (970041 observations deleted due to missingness)
## AIC: 1451798
## 
## Number of Fisher Scoring iterations: 4

Análise das comorbidades

rm(casos_fil_covid_sintomas)
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   5019034  268.1   12354841  659.9   5812999  310.5
## Vcells 553028984 4219.3  948513124 7236.6 948486590 7236.4
casos_fil_covid_comorbidades <- casos_fil_covid %>% select(DT_MIN, NU_IDADE_N, EVOLUCAO, FATOR_RISC, PUERPERA, CARDIOPATI, HEMATOLOGI, SIND_DOWN, HEPATICA, ASMA, DIABETES, NEUROLOGIC, PNEUMOPATI, IMUNODEPRE, RENAL, OBESIDADE)

summary(casos_fil_covid_comorbidades)
##      DT_MIN             NU_IDADE_N       EVOLUCAO      FATOR_RISC       
##  Min.   :2020-01-05   Min.   : 1.00   Min.   :1.0     Length:2131640    
##  1st Qu.:2020-11-05   1st Qu.:45.00   1st Qu.:1.0     Class :character  
##  Median :2021-03-21   Median :59.00   Median :1.0     Mode  :character  
##  Mean   :2021-03-12   Mean   :57.91   Mean   :1.5                       
##  3rd Qu.:2021-06-10   3rd Qu.:72.00   3rd Qu.:2.0                       
##  Max.   :2022-12-04   Max.   :99.00   Max.   :9.0                       
##                                       NA's   :99271                     
##     PUERPERA         CARDIOPATI        HEMATOLOGI        SIND_DOWN      
##  Min.   :1.0       Min.   :1.0       Min.   :1.0       Min.   :1.0      
##  1st Qu.:2.0       1st Qu.:1.0       1st Qu.:2.0       1st Qu.:2.0      
##  Median :2.0       Median :1.0       Median :2.0       Median :2.0      
##  Mean   :2.2       Mean   :1.4       Mean   :2.2       Mean   :2.2      
##  3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0      
##  Max.   :9.0       Max.   :9.0       Max.   :9.0       Max.   :9.0      
##  NA's   :1352981   NA's   :1099368   NA's   :1350649   NA's   :1353343  
##     HEPATICA            ASMA            DIABETES         NEUROLOGIC     
##  Min.   :1.0       Min.   :1.0       Min.   :1.0       Min.   :1.0      
##  1st Qu.:2.0       1st Qu.:2.0       1st Qu.:1.0       1st Qu.:2.0      
##  Median :2.0       Median :2.0       Median :2.0       Median :2.0      
##  Mean   :2.2       Mean   :2.1       Mean   :1.6       Mean   :2.1      
##  3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0      
##  Max.   :9.0       Max.   :9.0       Max.   :9.0       Max.   :9.0      
##  NA's   :1352960   NA's   :1340898   NA's   :1172542   NA's   :1331091  
##    PNEUMOPATI        IMUNODEPRE          RENAL           OBESIDADE      
##  Min.   :1.0       Min.   :1.0       Min.   :1.0       Min.   :1        
##  1st Qu.:2.0       1st Qu.:2.0       1st Qu.:2.0       1st Qu.:2        
##  Median :2.0       Median :2.0       Median :2.0       Median :2        
##  Mean   :2.1       Mean   :2.1       Mean   :2.1       Mean   :2        
##  3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2        
##  Max.   :9.0       Max.   :9.0       Max.   :9.0       Max.   :9        
##  NA's   :1332442   NA's   :1342756   NA's   :1334452   NA's   :1303592

Criando as dummies

casos_fil_covid_comorbidades <- dummy_cols(casos_fil_covid_comorbidades, select_columns = c('FATOR_RISC', 'PUERPERA', 'CARDIOPATI', 'HEMATOLOGI', 'SIND_DOWN', 'HEPATICA', 'ASMA', 'DIABETES', 'NEUROLOGIC', 'PNEUMOPATI', 'IMUNODEPRE', 'RENAL', 'OBESIDADE'),
           remove_selected_columns = TRUE)

casos_fil_covid_comorbidades$EVOLUCAO <- if_else(casos_fil_covid_comorbidades$EVOLUCAO == 1 , 1, 0)

summary(casos_fil_covid_comorbidades)
##      DT_MIN             NU_IDADE_N       EVOLUCAO      FATOR_RISC_1   
##  Min.   :2020-01-05   Min.   : 1.00   Min.   :0.00    Min.   :0.0000  
##  1st Qu.:2020-11-05   1st Qu.:45.00   1st Qu.:0.00    1st Qu.:0.0000  
##  Median :2021-03-21   Median :59.00   Median :1.00    Median :0.0000  
##  Mean   :2021-03-12   Mean   :57.91   Mean   :0.65    Mean   :0.3899  
##  3rd Qu.:2021-06-10   3rd Qu.:72.00   3rd Qu.:1.00    3rd Qu.:1.0000  
##  Max.   :2022-12-04   Max.   :99.00   Max.   :1.00    Max.   :1.0000  
##                                       NA's   :99271                   
##   FATOR_RISC_2     FATOR_RISC_N     FATOR_RISC_S      PUERPERA_1     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0        
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0        
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0        
##  Mean   :0.2743   Mean   :0.1229   Mean   :0.2128   Mean   :0        
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0        
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1        
##                                                     NA's   :1352981  
##    PUERPERA_2        PUERPERA_9       PUERPERA_NA      CARDIOPATI_1    
##  Min.   :0         Min.   :0         Min.   :0.0000   Min.   :0.0      
##  1st Qu.:1         1st Qu.:0         1st Qu.:0.0000   1st Qu.:0.0      
##  Median :1         Median :0         Median :1.0000   Median :1.0      
##  Mean   :1         Mean   :0         Mean   :0.6347   Mean   :0.6      
##  3rd Qu.:1         3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:1.0      
##  Max.   :1         Max.   :1         Max.   :1.0000   Max.   :1.0      
##  NA's   :1352981   NA's   :1352981                    NA's   :1099368  
##   CARDIOPATI_2      CARDIOPATI_9     CARDIOPATI_NA     HEMATOLOGI_1    
##  Min.   :0.0       Min.   :0         Min.   :0.0000   Min.   :0        
##  1st Qu.:0.0       1st Qu.:0         1st Qu.:0.0000   1st Qu.:0        
##  Median :0.0       Median :0         Median :1.0000   Median :0        
##  Mean   :0.3       Mean   :0         Mean   :0.5157   Mean   :0        
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0        
##  Max.   :1.0       Max.   :1         Max.   :1.0000   Max.   :1        
##  NA's   :1099368   NA's   :1099368                    NA's   :1350649  
##   HEMATOLOGI_2      HEMATOLOGI_9     HEMATOLOGI_NA     SIND_DOWN_1     
##  Min.   :0         Min.   :0         Min.   :0.0000   Min.   :0        
##  1st Qu.:1         1st Qu.:0         1st Qu.:0.0000   1st Qu.:0        
##  Median :1         Median :0         Median :1.0000   Median :0        
##  Mean   :1         Mean   :0         Mean   :0.6336   Mean   :0        
##  3rd Qu.:1         3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0        
##  Max.   :1         Max.   :1         Max.   :1.0000   Max.   :1        
##  NA's   :1350649   NA's   :1350649                    NA's   :1353343  
##   SIND_DOWN_2       SIND_DOWN_9       SIND_DOWN_NA      HEPATICA_1     
##  Min.   :0         Min.   :0         Min.   :0.0000   Min.   :0        
##  1st Qu.:1         1st Qu.:0         1st Qu.:0.0000   1st Qu.:0        
##  Median :1         Median :0         Median :1.0000   Median :0        
##  Mean   :1         Mean   :0         Mean   :0.6349   Mean   :0        
##  3rd Qu.:1         3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0        
##  Max.   :1         Max.   :1         Max.   :1.0000   Max.   :1        
##  NA's   :1353343   NA's   :1353343                    NA's   :1352960  
##    HEPATICA_2        HEPATICA_9       HEPATICA_NA         ASMA_1       
##  Min.   :0         Min.   :0         Min.   :0.0000   Min.   :0.0      
##  1st Qu.:1         1st Qu.:0         1st Qu.:0.0000   1st Qu.:0.0      
##  Median :1         Median :0         Median :1.0000   Median :0.0      
##  Mean   :1         Mean   :0         Mean   :0.6347   Mean   :0.1      
##  3rd Qu.:1         3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0.0      
##  Max.   :1         Max.   :1         Max.   :1.0000   Max.   :1.0      
##  NA's   :1352960   NA's   :1352960                    NA's   :1340898  
##      ASMA_2            ASMA_9           ASMA_NA        DIABETES_1     
##  Min.   :0.0       Min.   :0         Min.   :0.000   Min.   :0.0      
##  1st Qu.:1.0       1st Qu.:0         1st Qu.:0.000   1st Qu.:0.0      
##  Median :1.0       Median :0         Median :1.000   Median :0.0      
##  Mean   :0.9       Mean   :0         Mean   :0.629   Mean   :0.5      
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.000   3rd Qu.:1.0      
##  Max.   :1.0       Max.   :1         Max.   :1.000   Max.   :1.0      
##  NA's   :1340898   NA's   :1340898                   NA's   :1172542  
##    DIABETES_2        DIABETES_9       DIABETES_NA      NEUROLOGIC_1    
##  Min.   :0.0       Min.   :0         Min.   :0.0000   Min.   :0.0      
##  1st Qu.:0.0       1st Qu.:0         1st Qu.:0.0000   1st Qu.:0.0      
##  Median :0.0       Median :0         Median :1.0000   Median :0.0      
##  Mean   :0.5       Mean   :0         Mean   :0.5501   Mean   :0.1      
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0.0      
##  Max.   :1.0       Max.   :1         Max.   :1.0000   Max.   :1.0      
##  NA's   :1172542   NA's   :1172542                    NA's   :1331091  
##   NEUROLOGIC_2      NEUROLOGIC_9     NEUROLOGIC_NA     PNEUMOPATI_1    
##  Min.   :0.0       Min.   :0         Min.   :0.0000   Min.   :0.0      
##  1st Qu.:1.0       1st Qu.:0         1st Qu.:0.0000   1st Qu.:0.0      
##  Median :1.0       Median :0         Median :1.0000   Median :0.0      
##  Mean   :0.9       Mean   :0         Mean   :0.6244   Mean   :0.1      
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0.0      
##  Max.   :1.0       Max.   :1         Max.   :1.0000   Max.   :1.0      
##  NA's   :1331091   NA's   :1331091                    NA's   :1332442  
##   PNEUMOPATI_2      PNEUMOPATI_9     PNEUMOPATI_NA     IMUNODEPRE_1    
##  Min.   :0.0       Min.   :0         Min.   :0.0000   Min.   :0.0      
##  1st Qu.:1.0       1st Qu.:0         1st Qu.:0.0000   1st Qu.:0.0      
##  Median :1.0       Median :0         Median :1.0000   Median :0.0      
##  Mean   :0.9       Mean   :0         Mean   :0.6251   Mean   :0.1      
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0.0      
##  Max.   :1.0       Max.   :1         Max.   :1.0000   Max.   :1.0      
##  NA's   :1332442   NA's   :1332442                    NA's   :1342756  
##   IMUNODEPRE_2      IMUNODEPRE_9     IMUNODEPRE_NA       RENAL_1       
##  Min.   :0.0       Min.   :0         Min.   :0.0000   Min.   :0.0      
##  1st Qu.:1.0       1st Qu.:0         1st Qu.:0.0000   1st Qu.:0.0      
##  Median :1.0       Median :0         Median :1.0000   Median :0.0      
##  Mean   :0.9       Mean   :0         Mean   :0.6299   Mean   :0.1      
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0.0      
##  Max.   :1.0       Max.   :1         Max.   :1.0000   Max.   :1.0      
##  NA's   :1342756   NA's   :1342756                    NA's   :1334452  
##     RENAL_2           RENAL_9           RENAL_NA      OBESIDADE_1     
##  Min.   :0.0       Min.   :0         Min.   :0.000   Min.   :0.0      
##  1st Qu.:1.0       1st Qu.:0         1st Qu.:0.000   1st Qu.:0.0      
##  Median :1.0       Median :0         Median :1.000   Median :0.0      
##  Mean   :0.9       Mean   :0         Mean   :0.626   Mean   :0.2      
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.000   3rd Qu.:0.0      
##  Max.   :1.0       Max.   :1         Max.   :1.000   Max.   :1.0      
##  NA's   :1334452   NA's   :1334452                   NA's   :1303592  
##   OBESIDADE_2       OBESIDADE_9       OBESIDADE_NA   
##  Min.   :0.0       Min.   :0         Min.   :0.0000  
##  1st Qu.:1.0       1st Qu.:0         1st Qu.:0.0000  
##  Median :1.0       Median :0         Median :1.0000  
##  Mean   :0.8       Mean   :0         Mean   :0.6115  
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.0000  
##  Max.   :1.0       Max.   :1         Max.   :1.0000  
##  NA's   :1303592   NA's   :1303592

Logit comorbidades

casos_fil_covid.logit <- glm(formula = EVOLUCAO ~ FATOR_RISC_1 + PUERPERA_1 + CARDIOPATI_1 + HEMATOLOGI_1 + SIND_DOWN_1 + HEPATICA_1 + ASMA_1 + DIABETES_1 + NEUROLOGIC_1 + PNEUMOPATI_1 + IMUNODEPRE_1 + RENAL_1 + OBESIDADE_1 ,
           family = binomial(link = "logit"), 
    data = casos_fil_covid_comorbidades)
summary(casos_fil_covid.logit)
## 
## Call:
## glm(formula = EVOLUCAO ~ FATOR_RISC_1 + PUERPERA_1 + CARDIOPATI_1 + 
##     HEMATOLOGI_1 + SIND_DOWN_1 + HEPATICA_1 + ASMA_1 + DIABETES_1 + 
##     NEUROLOGIC_1 + PNEUMOPATI_1 + IMUNODEPRE_1 + RENAL_1 + OBESIDADE_1, 
##     family = binomial(link = "logit"), data = casos_fil_covid_comorbidades)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0496  -1.3077   0.8999   0.9911   2.2419  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.761552   0.005652 134.747  < 2e-16 ***
## FATOR_RISC_1 -0.066832   0.005127 -13.035  < 2e-16 ***
## PUERPERA_1    0.777301   0.041216  18.859  < 2e-16 ***
## CARDIOPATI_1 -0.221110   0.004977 -44.429  < 2e-16 ***
## HEMATOLOGI_1 -0.110694   0.022751  -4.865 1.14e-06 ***
## SIND_DOWN_1  -0.002804   0.034751  -0.081    0.936    
## HEPATICA_1   -0.447586   0.021010 -21.304  < 2e-16 ***
## ASMA_1        0.431097   0.013041  33.056  < 2e-16 ***
## DIABETES_1   -0.239192   0.005109 -46.820  < 2e-16 ***
## NEUROLOGIC_1 -0.564989   0.009985 -56.585  < 2e-16 ***
## PNEUMOPATI_1 -0.502164   0.010495 -47.850  < 2e-16 ***
## IMUNODEPRE_1 -0.404165   0.012380 -32.647  < 2e-16 ***
## RENAL_1      -0.591436   0.010353 -57.130  < 2e-16 ***
## OBESIDADE_1  -0.041856   0.007138  -5.863 4.53e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 954775  on 708893  degrees of freedom
## Residual deviance: 937544  on 708880  degrees of freedom
##   (1422746 observations deleted due to missingness)
## AIC: 937572
## 
## Number of Fisher Scoring iterations: 4

Descobrimos agora quais variáveis sãos impactantes para ato da chance de a pessoa internada vir a óbito, portanto vamos comparar essas variáveis ao longo do tempo, inicialmente regredindo o logit com elas em dois grupos, pré e pós vacina.(17/01/2021)

As variáveis são, Demográficas = idade e raça. Vacina = Vacina covid e vacina gripe. Sintomas = dispinéia, desconforto respiratírio e saturação, Se foi para UTI, Comorbidades = hepatica, pneumopatia, neurologica, renal e cardiopatia. Entretanto como as comorbidades me fazem perder muitas linhas por nulos vou desconsidera-las por enquanto.

rm(casos_fil_covid_comorbidades)
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   4566641  243.9   12354841  659.9   5812999  310.5
## Vcells 551752373 4209.6  948513124 7236.6 948486590 7236.4
casos_fil_covid_analise_vacina <- casos_fil_covid %>% select(DT_MIN, NU_IDADE_N, CS_RACA,  VACINA, VACINA_COV, UTI, DISPNEIA, DESC_RESP,SATURACAO, EVOLUCAO )

summary(casos_fil_covid_analise_vacina)
##      DT_MIN             NU_IDADE_N       CS_RACA          VACINA      
##  Min.   :2020-01-05   Min.   : 1.00   Min.   :1.00    Min.   :1.0     
##  1st Qu.:2020-11-05   1st Qu.:45.00   1st Qu.:1.00    1st Qu.:2.0     
##  Median :2021-03-21   Median :59.00   Median :4.00    Median :2.0     
##  Mean   :2021-03-12   Mean   :57.91   Mean   :3.49    Mean   :5.1     
##  3rd Qu.:2021-06-10   3rd Qu.:72.00   3rd Qu.:4.00    3rd Qu.:9.0     
##  Max.   :2022-12-04   Max.   :99.00   Max.   :9.00    Max.   :9.0     
##                                       NA's   :28517   NA's   :526559  
##    VACINA_COV          UTI            DISPNEIA        DESC_RESP     
##  Min.   :1.0      Min.   :1.00     Min.   :1.00     Min.   :1.0     
##  1st Qu.:2.0      1st Qu.:1.00     1st Qu.:1.00     1st Qu.:1.0     
##  Median :2.0      Median :2.00     Median :1.00     Median :1.0     
##  Mean   :2.6      Mean   :1.79     Mean   :1.28     Mean   :1.4     
##  3rd Qu.:2.0      3rd Qu.:2.00     3rd Qu.:1.00     3rd Qu.:2.0     
##  Max.   :9.0      Max.   :9.00     Max.   :9.00     Max.   :9.0     
##  NA's   :325989   NA's   :253860   NA's   :265435   NA's   :409211  
##    SATURACAO         EVOLUCAO    
##  Min.   :1.0      Min.   :1.0    
##  1st Qu.:1.0      1st Qu.:1.0    
##  Median :1.0      Median :1.0    
##  Mean   :1.4      Mean   :1.5    
##  3rd Qu.:2.0      3rd Qu.:2.0    
##  Max.   :9.0      Max.   :9.0    
##  NA's   :341842   NA's   :99271

Dummies

casos_fil_covid_analise_vacina <- dummy_cols(casos_fil_covid_analise_vacina, select_columns = c('CS_RACA', 'VACINA', 'VACINA_COV','UTI', 'DISPNEIA', 'DESC_RESP', 'SATURACAO'),
           remove_selected_columns = TRUE)
casos_fil_covid_analise_vacina$EVOLUCAO <- if_else(casos_fil_covid_analise_vacina$EVOLUCAO == 1 , 1, 0)

Separando em grupos pré e pós vacinação

casos_fil_covid_analise_vacina_pre <- casos_fil_covid_analise_vacina %>% filter(DT_MIN < '2021-01-01')
casos_fil_covid_analise_vacina_pos <- casos_fil_covid_analise_vacina %>% filter(DT_MIN >= '2021-01-01')

Logit pré vacina

casos_fil_covid.logit <- glm(formula = EVOLUCAO ~ NU_IDADE_N + CS_RACA_2 + VACINA_1 + UTI_1 + DISPNEIA_1 + DESC_RESP_1 + SATURACAO_1,
           family = binomial(link = "logit"), 
    data = casos_fil_covid_analise_vacina_pre)
summary(casos_fil_covid.logit)
## 
## Call:
## glm(formula = EVOLUCAO ~ NU_IDADE_N + CS_RACA_2 + VACINA_1 + 
##     UTI_1 + DISPNEIA_1 + DESC_RESP_1 + SATURACAO_1, family = binomial(link = "logit"), 
##     data = casos_fil_covid_analise_vacina_pre)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.1604  -0.9013   0.4780   0.8014   2.2124  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.7307670  0.0190467  248.38   <2e-16 ***
## NU_IDADE_N  -0.0465783  0.0002466 -188.88   <2e-16 ***
## CS_RACA_2   -0.3150523  0.0168036  -18.75   <2e-16 ***
## VACINA_1     0.3029572  0.0102442   29.57   <2e-16 ***
## UTI_1       -1.4521395  0.0076773 -189.15   <2e-16 ***
## DISPNEIA_1  -0.2522193  0.0100036  -25.21   <2e-16 ***
## DESC_RESP_1 -0.3289754  0.0090848  -36.21   <2e-16 ***
## SATURACAO_1 -0.3142151  0.0091357  -34.39   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 521846  on 404498  degrees of freedom
## Residual deviance: 420593  on 404491  degrees of freedom
##   (278120 observations deleted due to missingness)
## AIC: 420609
## 
## Number of Fisher Scoring iterations: 4

Logit pós vacina. Parece que não há diferença entre as duas regressões, o que indica que os grupos são semelhantes, o que nos permite utilizar toda a base de dados para uma unica regressão, não precisando separar período pós e pré vacina.

casos_fil_covid.logit <- glm(formula = EVOLUCAO ~ NU_IDADE_N + CS_RACA_2 + VACINA_1 + VACINA_COV_1 + UTI_1 + DISPNEIA_1 + DESC_RESP_1 + SATURACAO_1,
           family = binomial(link = "logit"), 
    data = casos_fil_covid_analise_vacina_pos)
summary(casos_fil_covid.logit)
## 
## Call:
## glm(formula = EVOLUCAO ~ NU_IDADE_N + CS_RACA_2 + VACINA_1 + 
##     VACINA_COV_1 + UTI_1 + DISPNEIA_1 + DESC_RESP_1 + SATURACAO_1, 
##     family = binomial(link = "logit"), data = casos_fil_covid_analise_vacina_pos)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9564  -0.9074   0.5129   0.7872   2.2449  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   4.1255197  0.0138877  297.06   <2e-16 ***
## NU_IDADE_N   -0.0413358  0.0001874 -220.56   <2e-16 ***
## CS_RACA_2    -0.2732955  0.0139572  -19.58   <2e-16 ***
## VACINA_1      0.1920415  0.0093885   20.45   <2e-16 ***
## VACINA_COV_1  0.4385190  0.0066027   66.42   <2e-16 ***
## UTI_1        -1.6605966  0.0059225 -280.39   <2e-16 ***
## DISPNEIA_1   -0.1945834  0.0079570  -24.45   <2e-16 ***
## DESC_RESP_1  -0.2774648  0.0070134  -39.56   <2e-16 ***
## SATURACAO_1  -0.2285727  0.0077752  -29.40   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 872348  on 677486  degrees of freedom
## Residual deviance: 713446  on 677478  degrees of freedom
##   (771534 observations deleted due to missingness)
## AIC: 713464
## 
## Number of Fisher Scoring iterations: 4

Criando coluna idade, cortando número de idades iguais ou menores do que zero “erro de input” e excluindo estes valores.

rm(casos_fil_covid_analise_vacina,casos_fil_covid_analise_vacina_pre,casos_fil_covid_analise_vacina_pos)
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   4535227  242.3   12354841  659.9   5812999  310.5
## Vcells 510324175 3893.5  950007366 7248.0 950007366 7248.0
casos_fil_covid$DT_NASC<- as_date(casos_fil_covid$DT_NASC, format= "%d/%m/%Y")

casos_fil_covid$idade <-  (as.numeric(casos_fil_covid$DT_MIN) - as.numeric(casos_fil_covid$DT_NASC))/365

count(casos_fil_covid, idade <= 0)
## # A tibble: 3 x 2
##   `idade <= 0`       n
##   <lgl>          <int>
## 1 FALSE        2129657
## 2 TRUE             144
## 3 NA              1839
count(casos_fil_covid, idade >= 100)
## # A tibble: 3 x 2
##   `idade >= 100`       n
##   <lgl>            <int>
## 1 FALSE          2129663
## 2 TRUE               138
## 3 NA                1839
casos_fil_covid <- filter(casos_fil_covid, idade > 0)
casos_fil_covid <- filter(casos_fil_covid, idade < 100)

Devemos fazer mais umas análises sobre as bases para poder cravarmos se possuem uma distribuição similar ou não. vamos olhar parqa a caracteristica das pessoas que foram à óbito. Linha azul representa início da vacinação.

a <- casos_fil_covid %>% filter(EVOLUCAO == 2) %>% group_by(SEM_DTMIN)  %>% summarise(avg = mean(NU_IDADE_N))


ggplot(data = a, aes(x = SEM_DTMIN, y = avg )) + geom_line() + ggtitle("Média de idade dos óbitos ao longo das semanas epidemiológicas") + geom_vline(xintercept=55,lwd=1,colour="blue",linetype = 'dotted')
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.

Distribuição das raças, pode parecer a priori que a mortalidade dos pardos no inicio era maior, mas o que parece ter ocorrido é que no começo os brancos estavam também sido deixados como nulos.

a <- casos_fil_covid %>% filter(EVOLUCAO == 2) %>% group_by(SEM_DTMIN) %>% count(CS_RACA)
a$CS_RACA <- replace(a$CS_RACA,a$CS_RACA == 1,"Branca")
a$CS_RACA <- replace(a$CS_RACA,a$CS_RACA == 2,"Preta")
a$CS_RACA <- replace(a$CS_RACA,a$CS_RACA == 3,"Amarela")
a$CS_RACA <- replace(a$CS_RACA,a$CS_RACA == 4,"Parda")
a$CS_RACA <- replace(a$CS_RACA,a$CS_RACA == 5,"Indigena")
a$CS_RACA <- replace(a$CS_RACA,a$CS_RACA == 9,"Ignorado")


ggplot(data = a, aes(x = SEM_DTMIN, y = n, colour = CS_RACA)) + geom_line() + ggtitle("Óbitos por raça ao longo das semanas") + geom_vline(xintercept=55,lwd=1,colour="blue",linetype = 'dotted')

Vacina covid, como esperado os mortos vacinados se tornam maioria ao longo do tempo, mas como sabemos que a vacina possui efeito de aumentar a chance de recuperação podemos dizer que é só porque os vacinados se tornam maioria.

a <- casos_fil_covid %>%  filter(EVOLUCAO == 2) %>% group_by(SEM_DTMIN) %>% count(VACINA_COV)

a$VACINA_COV <- replace(a$VACINA_COV,a$VACINA_COV == 1,"Vacinados")
a$VACINA_COV <- replace(a$VACINA_COV,a$VACINA_COV == 2,"Não Vacinados")
a$VACINA_COV <- replace(a$VACINA_COV,a$VACINA_COV == 9,"Ignorado")

ggplot(data = a, aes(x = SEM_DTMIN, y = n, colour = VACINA_COV)) + geom_line() + ggtitle("Número de óbitos entre vacinados") + geom_vline(xintercept=55,lwd=1,colour="blue",linetype = 'dotted')

Vacina gripe

a <- casos_fil_covid %>%  filter(EVOLUCAO == 2) %>% group_by(SEM_DTMIN) %>% count(VACINA)

a$VACINA <- replace(a$VACINA,a$VACINA == 1,"Vacinados")
a$VACINA <- replace(a$VACINA,a$VACINA == 2,"Não Vacinados")
a$VACINA <- replace(a$VACINA,a$VACINA == 9,"Ignorado")

ggplot(data = a, aes(x = SEM_DTMIN, y = n, colour = VACINA)) + geom_line() + ggtitle("Número de óbitos entre vacinados contra gripe") + geom_vline(xintercept=55,lwd=1,colour="blue",linetype = 'dotted')

Dispnéia, aparece estar em paralelo com os casos do covid

a <- casos_fil_covid %>%  filter(EVOLUCAO == 2) %>% group_by(SEM_DTMIN) %>% count(DESC_RESP)

a$DESC_RESP <- replace(a$DESC_RESP,a$DESC_RESP == 1,"Sim")
a$DESC_RESP <- replace(a$DESC_RESP,a$DESC_RESP == 2,"Não")
a$DESC_RESP <- replace(a$DESC_RESP,a$DESC_RESP == 9,"Ignorado")

ggplot(data = a, aes(x = SEM_DTMIN, y = n, colour = DESC_RESP)) + geom_line() + ggtitle("Número de óbitos de pessoas com desconforto respiratório") + geom_vline(xintercept=55,lwd=1,colour="blue",linetype = 'dotted')

Desconforto respiratório, nada diferente da dispinéia.

a <- casos_fil_covid %>%  filter(EVOLUCAO == 2) %>% group_by(SEM_DTMIN) %>% count(DISPNEIA)

a$DISPNEIA <- replace(a$DISPNEIA,a$DISPNEIA == 1,"Sim")
a$DISPNEIA <- replace(a$DISPNEIA,a$DISPNEIA == 2,"Não")
a$DISPNEIA <- replace(a$DISPNEIA,a$DISPNEIA == 9,"Ignorado")

ggplot(data = a, aes(x = SEM_DTMIN, y = n, colour = DISPNEIA)) + geom_line() + ggtitle("Número de óbitos de pessoas com dispinéia") + geom_vline(xintercept=55,lwd=1,colour="blue",linetype = 'dotted')

Saturação, nada diferente da dispinéia.

a <- casos_fil_covid %>%  filter(EVOLUCAO == 2) %>% group_by(SEM_DTMIN) %>% count(SATURACAO)

a$SATURACAO <- replace(a$SATURACAO,a$SATURACAO == 1,"Sim")
a$SATURACAO <- replace(a$SATURACAO,a$SATURACAO == 2,"Não")
a$SATURACAO <- replace(a$SATURACAO,a$SATURACAO == 9,"Ignorado")

ggplot(data = a, aes(x = SEM_DTMIN, y = n, colour = SATURACAO)) + geom_line() + ggtitle("Número de óbitos de pessoas com dispinéia") + geom_vline(xintercept=55,lwd=1,colour="blue",linetype = 'dotted')

UTI

a <- casos_fil_covid %>%  filter(EVOLUCAO == 2) %>% group_by(SEM_DTMIN) %>% count(UTI)

a$UTI <- replace(a$UTI,a$UTI == 1,"Sim")
a$UTI <- replace(a$UTI,a$UTI == 2,"Não")
a$UTI <- replace(a$UTI,a$UTI == 9,"Ignorado")

ggplot(data = a, aes(x = SEM_DTMIN, y = n, colour = UTI)) + geom_line() + ggtitle("Número de óbitos de pessoas que foram para UTI") + geom_vline(xintercept=55,lwd=1,colour="blue",linetype = 'dotted')

Hepática, numero de nulos alto.

a <- casos_fil_covid %>%  filter(EVOLUCAO == 2) %>% group_by(SEM_DTMIN) %>% count(HEPATICA)

a$HEPATICA <- replace(a$HEPATICA,a$HEPATICA == 1,"Sim")
a$HEPATICA <- replace(a$HEPATICA,a$HEPATICA == 2,"Não")
a$HEPATICA <- replace(a$HEPATICA,a$HEPATICA == 9,"Ignorado")

ggplot(data = a, aes(x = SEM_DTMIN, y = n, colour = HEPATICA)) + geom_line() + ggtitle("Número de óbitos de pessoas hepáticas") + geom_vline(xintercept=55,lwd=1,colour="blue",linetype = 'dotted')

Pneupática, numero de nulos alto.

a <- casos_fil_covid %>%  filter(EVOLUCAO == 2) %>% group_by(SEM_DTMIN) %>% count(PNEUMOPATI)

a$PNEUMOPATI <- replace(a$PNEUMOPATI,a$PNEUMOPATI == 1,"Sim")
a$PNEUMOPATI <- replace(a$PNEUMOPATI,a$PNEUMOPATI == 2,"Não")
a$PNEUMOPATI <- replace(a$PNEUMOPATI,a$PNEUMOPATI == 9,"Ignorado")

ggplot(data = a, aes(x = SEM_DTMIN, y = n, colour = PNEUMOPATI)) + geom_line() + ggtitle("Número de óbitos de pessoas pneupáticas") + geom_vline(xintercept=55,lwd=1,colour="blue",linetype = 'dotted')

### Neurológica, numero de nulos alto.

a <- casos_fil_covid %>%  filter(EVOLUCAO == 2) %>% group_by(SEM_DTMIN) %>% count(NEUROLOGIC)

a$NEUROLOGIC <- replace(a$NEUROLOGIC,a$NEUROLOGIC == 1,"Sim")
a$NEUROLOGIC <- replace(a$NEUROLOGIC,a$NEUROLOGIC == 2,"Não")
a$NEUROLOGIC <- replace(a$NEUROLOGIC,a$NEUROLOGIC == 9,"Ignorado")

ggplot(data = a, aes(x = SEM_DTMIN, y = n, colour = NEUROLOGIC)) + geom_line() + ggtitle("Número de óbitos de pessoas pneupáticas") + geom_vline(xintercept=55,lwd=1,colour="blue",linetype = 'dotted')

Renal, numero de nulos alto.

a <- casos_fil_covid %>%  filter(EVOLUCAO == 2) %>% group_by(SEM_DTMIN) %>% count(RENAL)
c <- colnames(a)[2]

colnames(a)[2] = 'b'
a$b <- replace(a$b,a$b == 1,"Sim")
a$b <- replace(a$b,a$b == 2,"Não")
a$b <- replace(a$b,a$b == 9,"Ignorado")
colnames(a)[2] = c

ggplot(data = a, aes(x = SEM_DTMIN, y = n, colour = RENAL)) + geom_line() + ggtitle("Número de óbitos de pessoas com problema renal") + geom_vline(xintercept=55,lwd=1,colour="blue",linetype = 'dotted')

Cardiopatia, número de nulos alto.

a <- casos_fil_covid %>%  filter(EVOLUCAO == 2) %>% group_by(SEM_DTMIN) %>% count(CARDIOPATI)

a$CARDIOPATI <- replace(a$CARDIOPATI,a$CARDIOPATI == 1,"Sim")
a$CARDIOPATI <- replace(a$CARDIOPATI,a$CARDIOPATI == 2,"Não")
a$CARDIOPATI <- replace(a$CARDIOPATI,a$CARDIOPATI == 9,"Ignorado")

ggplot(data = a, aes(x = SEM_DTMIN, y = n, colour = CARDIOPATI)) + geom_line() + ggtitle("Número de óbitos de pessoas pneupáticas") + geom_vline(xintercept=55,lwd=1,colour="blue",linetype = 'dotted')