Instalação de pacotes e extração de dados

Packages

Library

Criando tabela casos

Filtrando somente casos de covid

casos_fil_covid <- filter(casos_fil, CLASSI_FIN == 5)
rm(casos_fil)
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   1448919   77.4    2705755  144.6   2394751  127.9
## Vcells 167960362 1281.5  632284791 4824.0 660739455 5041.1

Criando a coluna menor data

Summary do df

summary(casos_fil_covid)
##    DT_NOTIFIC            SEM_NOT       SG_UF_NOT          ID_MUNICIP       
##  Min.   :2020-02-21   Min.   : 1.00   Length:2136005     Length:2136005    
##  1st Qu.:2020-11-11   1st Qu.:12.00   Class :character   Class :character  
##  Median :2021-03-24   Median :21.00   Mode  :character   Mode  :character  
##  Mean   :2021-03-17   Mean   :22.53                                        
##  3rd Qu.:2021-06-15   3rd Qu.:30.00                                        
##  Max.   :2022-12-09   Max.   :53.00                                        
##                                                                            
##    CO_MUN_NOT       CS_SEXO            DT_NASC            CS_GESTANT   
##  Min.   :110001   Length:2136005     Length:2136005     Min.   :0.000  
##  1st Qu.:310620   Class :character   Class :character   1st Qu.:5.000  
##  Median :351880   Mode  :character   Mode  :character   Median :6.000  
##  Mean   :344069                                         Mean   :5.779  
##  3rd Qu.:410690                                         3rd Qu.:6.000  
##  Max.   :530010                                         Max.   :9.000  
##                                                                        
##     CS_RACA        CS_ESCOL_N      PAC_COCBO            CS_ZONA      
##  Min.   :1.00    Min.   :0.0      Length:2136005     Min.   :1.00    
##  1st Qu.:1.00    1st Qu.:2.0      Class :character   1st Qu.:1.00    
##  Median :4.00    Median :4.0      Mode  :character   Median :1.00    
##  Mean   :3.49    Mean   :5.4                         Mean   :1.15    
##  3rd Qu.:4.00    3rd Qu.:9.0                         3rd Qu.:1.00    
##  Max.   :9.00    Max.   :9.0                         Max.   :9.00    
##  NA's   :28576   NA's   :716885                      NA's   :233075  
##    VACINA_COV         VACINA        DT_UT_DOSE           HOSPITAL    
##  Min.   :1.0      Min.   :1.0      Length:2136005     Min.   :1.00   
##  1st Qu.:2.0      1st Qu.:2.0      Class :character   1st Qu.:1.00   
##  Median :2.0      Median :2.0      Mode  :character   Median :1.00   
##  Mean   :2.6      Mean   :5.1                         Mean   :1.04   
##  3rd Qu.:2.0      3rd Qu.:9.0                         3rd Qu.:1.00   
##  Max.   :9.0      Max.   :9.0                         Max.   :9.00   
##  NA's   :326476   NA's   :528302                      NA's   :45286  
##    DT_INTERNA              UTI           DT_ENTUTI         
##  Min.   :2020-01-05   Min.   :1.00     Min.   :2020-01-05  
##  1st Qu.:2020-11-06   1st Qu.:1.00     1st Qu.:2020-11-09  
##  Median :2021-03-21   Median :2.00     Median :2021-03-24  
##  Mean   :2021-03-23   Mean   :1.79     Mean   :2021-03-17  
##  3rd Qu.:2021-06-11   3rd Qu.:2.00     3rd Qu.:2021-06-14  
##  Max.   :9202-09-11   Max.   :9.00     Max.   :4202-05-26  
##                       NA's   :254657                       
##    DT_SAIDUTI           CLASSI_FIN    EVOLUCAO       DT_EVOLUCA        
##  Min.   :2020-02-21   Min.   :5    Min.   :1.0     Min.   :2020-02-21  
##  1st Qu.:2020-11-12   1st Qu.:5    1st Qu.:1.0     1st Qu.:2020-11-17  
##  Median :2021-03-26   Median :5    Median :1.0     Median :2021-03-31  
##  Mean   :2021-03-18   Mean   :5    Mean   :1.5     Mean   :2021-03-23  
##  3rd Qu.:2021-06-16   3rd Qu.:5    3rd Qu.:2.0     3rd Qu.:2021-06-21  
##  Max.   :2121-03-13   Max.   :5    Max.   :9.0     Max.   :2022-12-04  
##                                    NA's   :99481                       
##    NU_IDADE_N       NOSOCOMIAL       AVE_SUINO          FEBRE       
##  Min.   : -9.00   Min.   :1.0      Min.   :1.0      Min.   :1.0     
##  1st Qu.: 45.00   1st Qu.:2.0      1st Qu.:2.0      1st Qu.:1.0     
##  Median : 59.00   Median :2.0      Median :2.0      Median :1.0     
##  Mean   : 57.94   Mean   :2.6      Mean   :3.3      Mean   :1.4     
##  3rd Qu.: 72.00   3rd Qu.:2.0      3rd Qu.:2.0      3rd Qu.:2.0     
##  Max.   :150.00   Max.   :9.0      Max.   :9.0      Max.   :9.0     
##                   NA's   :375647   NA's   :377867   NA's   :341238  
##      TOSSE           GARGANTA         DISPNEIA        DESC_RESP     
##  Min.   :1.0      Min.   :1        Min.   :1.00     Min.   :1.0     
##  1st Qu.:1.0      1st Qu.:2        1st Qu.:1.00     1st Qu.:1.0     
##  Median :1.0      Median :2        Median :1.00     Median :1.0     
##  Mean   :1.3      Mean   :2        Mean   :1.28     Mean   :1.4     
##  3rd Qu.:1.0      3rd Qu.:2        3rd Qu.:1.00     3rd Qu.:2.0     
##  Max.   :9.0      Max.   :9        Max.   :9.00     Max.   :9.0     
##  NA's   :269458   NA's   :617102   NA's   :266184   NA's   :410120  
##    SATURACAO         DIARREIA          VOMITO          DOR_ABD      
##  Min.   :1.0      Min.   :1        Min.   :1.0      Min.   :1.0     
##  1st Qu.:1.0      1st Qu.:2        1st Qu.:2.0      1st Qu.:2.0     
##  Median :1.0      Median :2        Median :2.0      Median :2.0     
##  Mean   :1.4      Mean   :2        Mean   :2.1      Mean   :2.2     
##  3rd Qu.:2.0      3rd Qu.:2        3rd Qu.:2.0      3rd Qu.:2.0     
##  Max.   :9.0      Max.   :9        Max.   :9.0      Max.   :9.0     
##  NA's   :342699   NA's   :645448   NA's   :673453   NA's   :875489  
##      FADIGA         PERD_OLFT        PERD_PALA        OUTRO_SIN     
##  Min.   :1.0      Min.   :1.0      Min.   :1.0      Min.   :1.0     
##  1st Qu.:1.0      1st Qu.:2.0      1st Qu.:2.0      1st Qu.:1.0     
##  Median :2.0      Median :2.0      Median :2.0      Median :2.0     
##  Mean   :1.9      Mean   :2.1      Mean   :2.1      Mean   :1.8     
##  3rd Qu.:2.0      3rd Qu.:2.0      3rd Qu.:2.0      3rd Qu.:2.0     
##  Max.   :9.0      Max.   :9.0      Max.   :9.0      Max.   :9.0     
##  NA's   :797981   NA's   :854395   NA's   :855749   NA's   :618960  
##   OUTRO_DES          FATOR_RISC           PUERPERA         CARDIOPATI     
##  Length:2136005     Length:2136005     Min.   :1.0       Min.   :1.0      
##  Class :character   Class :character   1st Qu.:2.0       1st Qu.:1.0      
##  Mode  :character   Mode  :character   Median :2.0       Median :1.0      
##                                        Mean   :2.2       Mean   :1.4      
##                                        3rd Qu.:2.0       3rd Qu.:2.0      
##                                        Max.   :9.0       Max.   :9.0      
##                                        NA's   :1355981   NA's   :1101849  
##    HEMATOLOGI        SIND_DOWN          HEPATICA            ASMA        
##  Min.   :1.0       Min.   :1.0       Min.   :1.0       Min.   :1.0      
##  1st Qu.:2.0       1st Qu.:2.0       1st Qu.:2.0       1st Qu.:2.0      
##  Median :2.0       Median :2.0       Median :2.0       Median :2.0      
##  Mean   :2.2       Mean   :2.2       Mean   :2.2       Mean   :2.1      
##  3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0      
##  Max.   :9.0       Max.   :9.0       Max.   :9.0       Max.   :9.0      
##  NA's   :1353650   NA's   :1356345   NA's   :1355968   NA's   :1343897  
##     DIABETES         NEUROLOGIC        PNEUMOPATI        IMUNODEPRE     
##  Min.   :1.0       Min.   :1.0       Min.   :1.0       Min.   :1.0      
##  1st Qu.:1.0       1st Qu.:2.0       1st Qu.:2.0       1st Qu.:2.0      
##  Median :2.0       Median :2.0       Median :2.0       Median :2.0      
##  Mean   :1.6       Mean   :2.1       Mean   :2.1       Mean   :2.1      
##  3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0      
##  Max.   :9.0       Max.   :9.0       Max.   :9.0       Max.   :9.0      
##  NA's   :1175363   NA's   :1333968   NA's   :1335383   NA's   :1345752  
##      RENAL           OBESIDADE         OBES_IMC           OUT_MORBI      
##  Min.   :1.0       Min.   :1         Length:2136005     Min.   :1.0      
##  1st Qu.:2.0       1st Qu.:2         Class :character   1st Qu.:1.0      
##  Median :2.0       Median :2         Mode  :character   Median :1.0      
##  Mean   :2.1       Mean   :2                            Mean   :1.5      
##  3rd Qu.:2.0       3rd Qu.:2                            3rd Qu.:2.0      
##  Max.   :9.0       Max.   :9                            Max.   :9.0      
##  NA's   :1337435   NA's   :1306587                      NA's   :1176060  
##   MORB_DESC          DOSE_1_COV         DOSE_2_COV          DOSE_REF        
##  Length:2136005     Length:2136005     Length:2136005     Length:2136005    
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   FAB_COVREF          ANTIVIRAL        TP_ANTIVIR       OUT_ANTIV        
##  Length:2136005     Min.   :1.0      Min.   :1.0       Length:2136005    
##  Class :character   1st Qu.:2.0      1st Qu.:1.0       Class :character  
##  Mode  :character   Median :2.0      Median :1.0       Mode  :character  
##                     Mean   :3.2      Mean   :1.2                         
##                     3rd Qu.:2.0      3rd Qu.:1.0                         
##                     Max.   :9.0      Max.   :3.0                         
##                     NA's   :348281   NA's   :2012569                     
##   DT_ANTIVIR          SUPORT_VEN       RAIOX_RES       RAIOX_OUT        
##  Length:2136005     Min.   :1.00     Min.   :1.0      Length:2136005    
##  Class :character   1st Qu.:2.00     1st Qu.:3.0      Class :character  
##  Mode  :character   Median :2.00     Median :6.0      Mode  :character  
##                     Mean   :2.26     Mean   :5.3                        
##                     3rd Qu.:2.00     3rd Qu.:6.0                        
##                     Max.   :9.00     Max.   :9.0                        
##                     NA's   :257209   NA's   :859382                     
##     TOMO_RES        TOMO_OUT            AMOSTRA        TP_AMOSTRA    
##  Min.   :1.0      Length:2136005     Min.   :1.00    Min.   :1.00    
##  1st Qu.:1.0      Class :character   1st Qu.:1.00    1st Qu.:1.00    
##  Median :1.0      Mode  :character   Median :1.00    Median :1.00    
##  Mean   :3.1                         Mean   :1.08    Mean   :1.38    
##  3rd Qu.:6.0                         3rd Qu.:1.00    3rd Qu.:1.00    
##  Max.   :9.0                         Max.   :9.00    Max.   :9.00    
##  NA's   :864905                      NA's   :71983   NA's   :185218  
##   OUT_AMOST           PCR_RESUL         DT_PCR            POS_PCRFLU     
##  Length:2136005     Min.   :1.00     Length:2136005     Min.   :1.0      
##  Class :character   1st Qu.:1.00     Class :character   1st Qu.:2.0      
##  Mode  :character   Median :1.00     Mode  :character   Median :2.0      
##                     Mean   :1.89                        Mean   :2.8      
##                     3rd Qu.:2.00                        3rd Qu.:2.0      
##                     Max.   :9.00                        Max.   :9.0      
##                     NA's   :195685                      NA's   :1320192  
##    TP_FLU_PCR        PCR_FLUASU         DT_MIN.DT_MIN    
##  Min.   :1.0       Min.   :1.0       Min.   :2020-01-05  
##  1st Qu.:1.0       1st Qu.:2.0       1st Qu.:2020-11-05  
##  Median :1.0       Median :2.0       Median :2021-03-21  
##  Mean   :1.1       Mean   :2.5       Mean   :2021-03-12  
##  3rd Qu.:1.0       3rd Qu.:3.0       3rd Qu.:2021-06-10  
##  Max.   :2.0       Max.   :6.0       Max.   :2022-12-04  
##  NA's   :2135015   NA's   :2135160

Deixando nascimento em formato de data para o R e analisando a coluna NU_IDADE_N. Podemos ver que algumas pessoas possuem idades não condizentes com a realidade, com idades menores do que 0 ou muito acima da pessoa mais velha registrada no Brasil, portanto limito a 100 anos de idade.

casos_fil_covid$DT_NASC<- as_date(casos_fil_covid$DT_NASC, format= "%d/%m/%Y")

count(casos_fil_covid, NU_IDADE_N <= 0)
## # A tibble: 2 x 2
##   `NU_IDADE_N <= 0`       n
##   <lgl>               <int>
## 1 FALSE             2134830
## 2 TRUE                 1175
count(casos_fil_covid, NU_IDADE_N >= 100)
## # A tibble: 2 x 2
##   `NU_IDADE_N >= 100`       n
##   <lgl>                 <int>
## 1 FALSE               2132815
## 2 TRUE                   3190
casos_fil_covid <- filter(casos_fil_covid, NU_IDADE_N > 0)
casos_fil_covid <- filter(casos_fil_covid, NU_IDADE_N < 100)

Criando tabelas por tipo (Possuimos muitas colunas, algumas delas com muitos nulos, portanto para verificar quais possuem impacto na mortalidade vou separar por tipo, por exemplo, demográficas, comorbidades, relacionadas a vacinação, etc.)

Demográficas e economicas

casos_fil_covid_dem <- casos_fil_covid %>% select(DT_MIN, SG_UF_NOT, ID_MUNICIP, CO_MUN_NOT, CS_SEXO, CS_GESTANT, CS_RACA, CS_ESCOL_N, PAC_COCBO, CS_ZONA, NU_IDADE_N, EVOLUCAO)

Summary do df demográfico

summary(casos_fil_covid_dem)
##     DT_MIN.DT_MIN      SG_UF_NOT          ID_MUNICIP          CO_MUN_NOT    
##  Min.   :2020-01-05   Length:2131640     Length:2131640     Min.   :110001  
##  1st Qu.:2020-11-05   Class :character   Class :character   1st Qu.:310620  
##  Median :2021-03-21   Mode  :character   Mode  :character   Median :351880  
##  Mean   :2021-03-12                                         Mean   :344123  
##  3rd Qu.:2021-06-10                                         3rd Qu.:410690  
##  Max.   :2022-12-04                                         Max.   :530010  
##                                                                             
##    CS_SEXO            CS_GESTANT       CS_RACA        CS_ESCOL_N    
##  Length:2131640     Min.   :0.000   Min.   :1.00    Min.   :0.0     
##  Class :character   1st Qu.:5.000   1st Qu.:1.00    1st Qu.:2.0     
##  Mode  :character   Median :6.000   Median :4.00    Median :4.0     
##                     Mean   :5.779   Mean   :3.49    Mean   :5.4     
##                     3rd Qu.:6.000   3rd Qu.:4.00    3rd Qu.:9.0     
##                     Max.   :9.000   Max.   :9.00    Max.   :9.0     
##                                     NA's   :28517   NA's   :715644  
##   PAC_COCBO            CS_ZONA         NU_IDADE_N       EVOLUCAO    
##  Length:2131640     Min.   :1.00     Min.   : 1.00   Min.   :1.0    
##  Class :character   1st Qu.:1.00     1st Qu.:45.00   1st Qu.:1.0    
##  Mode  :character   Median :1.00     Median :59.00   Median :1.0    
##                     Mean   :1.15     Mean   :57.91   Mean   :1.5    
##                     3rd Qu.:1.00     3rd Qu.:72.00   3rd Qu.:2.0    
##                     Max.   :9.00     Max.   :99.00   Max.   :9.0    
##                     NA's   :232617                   NA's   :99271

Estruturando em one hot encoded para poder fazer a regressão logit.

casos_fil_covid_dem <- dummy_cols(casos_fil_covid_dem, select_columns = c('CS_SEXO', 'CS_GESTANT', 'CS_RACA', 'CS_ZONA'),
           remove_selected_columns = TRUE)

Transformando a variável EVOLUCAO em dummy, 1 = sobreviveu, 0 = óbito.

casos_fil_covid_dem$EVOLUCAO <- if_else(casos_fil_covid_dem$EVOLUCAO == 1 , 1, 0)

Porcentagem de nulos na coluna EVOLUCAO.

sum(is.na(casos_fil_covid$EVOLUCAO)) / length(casos_fil_covid$EVOLUCAO)
## [1] 0.04657025

Variáveis selecionadas: CS_SEX_F = se é mulher ou não, CS_RACA_2 = se é preto ou não, CS_ZONA_2 = se é rural ou não, NU_IDADE_N = idade da pessoa. EVOLUCAO = 1 para sobreviveu, 0 para óbito. Recordo também que o comando glm omite as linhas com valores nulos.

casos_fil_covid.logit <- glm(formula = EVOLUCAO ~   CS_SEXO_F + CS_RACA_2  + CS_ZONA_2 + NU_IDADE_N, family = binomial(link = "logit"), 
    data = casos_fil_covid_dem)
summary(casos_fil_covid.logit)
## 
## Call:
## glm(formula = EVOLUCAO ~ CS_SEXO_F + CS_RACA_2 + CS_ZONA_2 + 
##     NU_IDADE_N, family = binomial(link = "logit"), data = casos_fil_covid_dem)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4794  -1.1620   0.6629   0.9259   1.7909  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  2.921e+00  6.452e-03  452.77   <2e-16 ***
## CS_SEXO_F    1.441e-01  3.330e-03   43.26   <2e-16 ***
## CS_RACA_2   -3.064e-01  7.815e-03  -39.20   <2e-16 ***
## CS_ZONA_2   -1.410e-01  7.443e-03  -18.95   <2e-16 ***
## NU_IDADE_N  -3.892e-02  9.946e-05 -391.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2322208  on 1795884  degrees of freedom
## Residual deviance: 2140628  on 1795880  degrees of freedom
##   (335755 observations deleted due to missingness)
## AIC: 2140638
## 
## Number of Fisher Scoring iterations: 3

Optimizando o código

rm(casos_fil_covid_dem)
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   3337778  178.3    7418824  396.3   3390814  181.1
## Vcells 247414861 1887.7  632284791 4824.0 660739455 5041.1

Efeito vacina, regresão logit

casos_fil_covid_vacina <- casos_fil_covid %>% select(DT_MIN, VACINA_COV, VACINA, NU_IDADE_N, EVOLUCAO)

Summary do df vacina

summary(casos_fil_covid_vacina)
##     DT_MIN.DT_MIN       VACINA_COV         VACINA         NU_IDADE_N   
##  Min.   :2020-01-05   Min.   :1.0      Min.   :1.0      Min.   : 1.00  
##  1st Qu.:2020-11-05   1st Qu.:2.0      1st Qu.:2.0      1st Qu.:45.00  
##  Median :2021-03-21   Median :2.0      Median :2.0      Median :59.00  
##  Mean   :2021-03-12   Mean   :2.6      Mean   :5.1      Mean   :57.91  
##  3rd Qu.:2021-06-10   3rd Qu.:2.0      3rd Qu.:9.0      3rd Qu.:72.00  
##  Max.   :2022-12-04   Max.   :9.0      Max.   :9.0      Max.   :99.00  
##                       NA's   :325989   NA's   :526559                  
##     EVOLUCAO    
##  Min.   :1.0    
##  1st Qu.:1.0    
##  Median :1.0    
##  Mean   :1.5    
##  3rd Qu.:2.0    
##  Max.   :9.0    
##  NA's   :99271

Estruturando em one hot encoded para poder fazer a regressão logit.

casos_fil_covid_vacina <- dummy_cols(casos_fil_covid_vacina, select_columns = c('VACINA_COV', 'VACINA'),
           remove_selected_columns = TRUE)

Transformando a variável EVOLUCAO em dummy, 1 = sobreviveu, 0 = óbito.

casos_fil_covid_vacina$EVOLUCAO <- if_else(casos_fil_covid_vacina$EVOLUCAO == 1 , 1, 0)

Porcentagem de nulos na coluna EVOLUCAO.

sum(is.na(casos_fil_covid_vacina$EVOLUCAO)) / length(casos_fil_covid_vacina$EVOLUCAO)
## [1] 0.04657025

Variáveis selecionadas: CS_SEX_F = se é mulher ou não, CS_RACA_2 = se é preto ou não, CS_ZONA_2 = se é rural ou não, NU_IDADE_N = idade da pessoa. EVOLUCAO = 1 para sobreviveu, 0 para óbito. Recordo também que o comando glm omite as linhas com valores nulos.

casos_fil_covid.logit <- glm(formula = EVOLUCAO ~   VACINA_COV_1 + VACINA_1 + NU_IDADE_N, family = binomial(link = "logit"), 
    data = casos_fil_covid_vacina)
summary(casos_fil_covid.logit)
## 
## Call:
## glm(formula = EVOLUCAO ~ VACINA_COV_1 + VACINA_1 + NU_IDADE_N, 
##     family = binomial(link = "logit"), data = casos_fil_covid_vacina)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7335  -1.1587   0.6586   0.9106   1.6606  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.0475995  0.0074691  408.03   <2e-16 ***
## VACINA_COV_1  0.3423209  0.0047937   71.41   <2e-16 ***
## VACINA_1      0.3637069  0.0057864   62.85   <2e-16 ***
## NU_IDADE_N   -0.0417804  0.0001196 -349.27   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1755431  on 1364590  degrees of freedom
## Residual deviance: 1610957  on 1364587  degrees of freedom
##   (767049 observations deleted due to missingness)
## AIC: 1610965
## 
## Number of Fisher Scoring iterations: 3

Análise dos sintomas

rm(casos_fil_covid_vacina)
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   2907351  155.3    7418824  396.3   3545382  189.4
## Vcells 210745087 1607.9  632284791 4824.0 660739455 5041.1
casos_fil_covid_sintomas <- casos_fil_covid %>% select(DT_MIN, NU_IDADE_N, EVOLUCAO,FEBRE, TOSSE, GARGANTA, DISPNEIA, DESC_RESP, SATURACAO, DIARREIA, VOMITO, DOR_ABD, FADIGA, PERD_OLFT, PERD_PALA)
summary(casos_fil_covid_sintomas)
##     DT_MIN.DT_MIN       NU_IDADE_N       EVOLUCAO         FEBRE       
##  Min.   :2020-01-05   Min.   : 1.00   Min.   :1.0     Min.   :1.0     
##  1st Qu.:2020-11-05   1st Qu.:45.00   1st Qu.:1.0     1st Qu.:1.0     
##  Median :2021-03-21   Median :59.00   Median :1.0     Median :1.0     
##  Mean   :2021-03-12   Mean   :57.91   Mean   :1.5     Mean   :1.4     
##  3rd Qu.:2021-06-10   3rd Qu.:72.00   3rd Qu.:2.0     3rd Qu.:2.0     
##  Max.   :2022-12-04   Max.   :99.00   Max.   :9.0     Max.   :9.0     
##                                       NA's   :99271   NA's   :340351  
##      TOSSE           GARGANTA         DISPNEIA        DESC_RESP     
##  Min.   :1.0      Min.   :1        Min.   :1.00     Min.   :1.0     
##  1st Qu.:1.0      1st Qu.:2        1st Qu.:1.00     1st Qu.:1.0     
##  Median :1.0      Median :2        Median :1.00     Median :1.0     
##  Mean   :1.3      Mean   :2        Mean   :1.28     Mean   :1.4     
##  3rd Qu.:1.0      3rd Qu.:2        3rd Qu.:1.00     3rd Qu.:2.0     
##  Max.   :9.0      Max.   :9        Max.   :9.00     Max.   :9.0     
##  NA's   :268697   NA's   :615677   NA's   :265435   NA's   :409211  
##    SATURACAO         DIARREIA          VOMITO          DOR_ABD      
##  Min.   :1.0      Min.   :1        Min.   :1.0      Min.   :1.0     
##  1st Qu.:1.0      1st Qu.:2        1st Qu.:2.0      1st Qu.:2.0     
##  Median :1.0      Median :2        Median :2.0      Median :2.0     
##  Mean   :1.4      Mean   :2        Mean   :2.1      Mean   :2.2     
##  3rd Qu.:2.0      3rd Qu.:2        3rd Qu.:2.0      3rd Qu.:2.0     
##  Max.   :9.0      Max.   :9        Max.   :9.0      Max.   :9.0     
##  NA's   :341842   NA's   :643986   NA's   :671957   NA's   :873571  
##      FADIGA         PERD_OLFT        PERD_PALA     
##  Min.   :1.0      Min.   :1.0      Min.   :1.0     
##  1st Qu.:1.0      1st Qu.:2.0      1st Qu.:2.0     
##  Median :2.0      Median :2.0      Median :2.0     
##  Mean   :1.9      Mean   :2.1      Mean   :2.1     
##  3rd Qu.:2.0      3rd Qu.:2.0      3rd Qu.:2.0     
##  Max.   :9.0      Max.   :9.0      Max.   :9.0     
##  NA's   :796163   NA's   :852465   NA's   :853816

Criando as dummies

casos_fil_covid_sintomas <- dummy_cols(casos_fil_covid_sintomas, select_columns = c("FEBRE" , "TOSSE", "GARGANTA", "DISPNEIA", "DESC_RESP", "SATURACAO",  "DIARREIA", "VOMITO", "DOR_ABD",  "FADIGA", "PERD_OLFT", "PERD_PALA" ),
           remove_selected_columns = TRUE)

Transformando a variável EVOLUCAO em dummy, 1 = sobreviveu, 0 = óbito.

casos_fil_covid_sintomas$EVOLUCAO <- if_else(casos_fil_covid_sintomas$EVOLUCAO == 1 , 1, 0)

Logit dos sintomas

casos_fil_covid.logit <- glm(formula = EVOLUCAO ~  FEBRE_1 + TOSSE_1 + GARGANTA_1 + DISPNEIA_1,
           family = binomial(link = "logit"), 
    data = casos_fil_covid_sintomas)
summary(casos_fil_covid.logit)
## 
## Call:
## glm(formula = EVOLUCAO ~ FEBRE_1 + TOSSE_1 + GARGANTA_1 + DISPNEIA_1, 
##     family = binomial(link = "logit"), data = casos_fil_covid_sintomas)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7954  -1.3919   0.8600   0.9164   1.1066  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.753840   0.004675  161.24   <2e-16 ***
## FEBRE_1      0.159034   0.003825   41.58   <2e-16 ***
## TOSSE_1      0.322382   0.004182   77.09   <2e-16 ***
## GARGANTA_1   0.153917   0.004632   33.23   <2e-16 ***
## DISPNEIA_1  -0.584917   0.004365 -134.00   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1786226  on 1394858  degrees of freedom
## Residual deviance: 1758084  on 1394854  degrees of freedom
##   (736781 observations deleted due to missingness)
## AIC: 1758094
## 
## Number of Fisher Scoring iterations: 4

Tenho de separar em duas regressões, pois o R não imprime todos os valores, tem um limite a analisar.

casos_fil_covid.logit <- glm(formula = EVOLUCAO ~ DESC_RESP_1 + SATURACAO_1 + DIARREIA_1 + VOMITO_1 + DOR_ABD_1 +  FADIGA_1 + PERD_OLFT_1 + PERD_PALA_1,
           family = binomial(link = "logit"), 
    data = casos_fil_covid_sintomas)
summary(casos_fil_covid.logit)
## 
## Call:
## glm(formula = EVOLUCAO ~ DESC_RESP_1 + SATURACAO_1 + DIARREIA_1 + 
##     VOMITO_1 + DOR_ABD_1 + FADIGA_1 + PERD_OLFT_1 + PERD_PALA_1, 
##     family = binomial(link = "logit"), data = casos_fil_covid_sintomas)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0685  -1.3002   0.7509   0.9405   1.0602  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.121971   0.004279 262.218   <2e-16 ***
## DESC_RESP_1 -0.375743   0.004563 -82.343   <2e-16 ***
## SATURACAO_1 -0.462214   0.004832 -95.653   <2e-16 ***
## DIARREIA_1   0.169312   0.006165  27.464   <2e-16 ***
## VOMITO_1     0.105631   0.007476  14.130   <2e-16 ***
## DOR_ABD_1   -0.002028   0.008309  -0.244    0.807    
## FADIGA_1     0.133151   0.004603  28.929   <2e-16 ***
## PERD_OLFT_1  0.227462   0.010308  22.066   <2e-16 ***
## PERD_PALA_1  0.256625   0.010236  25.072   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1483778  on 1161598  degrees of freedom
## Residual deviance: 1451780  on 1161590  degrees of freedom
##   (970041 observations deleted due to missingness)
## AIC: 1451798
## 
## Number of Fisher Scoring iterations: 4

Análise das comorbidades

rm(casos_fil_covid_sintomas)
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells   2704277  144.5    7418824  396.3   3545382  189.4
## Vcells 258063290 1968.9  632612713 4826.5 660739455 5041.1
casos_fil_covid_comorbidades <- casos_fil_covid %>% select(DT_MIN, NU_IDADE_N, EVOLUCAO, FATOR_RISC, PUERPERA, CARDIOPATI, HEMATOLOGI, SIND_DOWN, HEPATICA, ASMA, DIABETES, NEUROLOGIC, PNEUMOPATI, IMUNODEPRE, RENAL, OBESIDADE)

summary(casos_fil_covid_comorbidades)
##     DT_MIN.DT_MIN       NU_IDADE_N       EVOLUCAO      FATOR_RISC       
##  Min.   :2020-01-05   Min.   : 1.00   Min.   :1.0     Length:2131640    
##  1st Qu.:2020-11-05   1st Qu.:45.00   1st Qu.:1.0     Class :character  
##  Median :2021-03-21   Median :59.00   Median :1.0     Mode  :character  
##  Mean   :2021-03-12   Mean   :57.91   Mean   :1.5                       
##  3rd Qu.:2021-06-10   3rd Qu.:72.00   3rd Qu.:2.0                       
##  Max.   :2022-12-04   Max.   :99.00   Max.   :9.0                       
##                                       NA's   :99271                     
##     PUERPERA         CARDIOPATI        HEMATOLOGI        SIND_DOWN      
##  Min.   :1.0       Min.   :1.0       Min.   :1.0       Min.   :1.0      
##  1st Qu.:2.0       1st Qu.:1.0       1st Qu.:2.0       1st Qu.:2.0      
##  Median :2.0       Median :1.0       Median :2.0       Median :2.0      
##  Mean   :2.2       Mean   :1.4       Mean   :2.2       Mean   :2.2      
##  3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0      
##  Max.   :9.0       Max.   :9.0       Max.   :9.0       Max.   :9.0      
##  NA's   :1352981   NA's   :1099368   NA's   :1350649   NA's   :1353343  
##     HEPATICA            ASMA            DIABETES         NEUROLOGIC     
##  Min.   :1.0       Min.   :1.0       Min.   :1.0       Min.   :1.0      
##  1st Qu.:2.0       1st Qu.:2.0       1st Qu.:1.0       1st Qu.:2.0      
##  Median :2.0       Median :2.0       Median :2.0       Median :2.0      
##  Mean   :2.2       Mean   :2.1       Mean   :1.6       Mean   :2.1      
##  3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0      
##  Max.   :9.0       Max.   :9.0       Max.   :9.0       Max.   :9.0      
##  NA's   :1352960   NA's   :1340898   NA's   :1172542   NA's   :1331091  
##    PNEUMOPATI        IMUNODEPRE          RENAL           OBESIDADE      
##  Min.   :1.0       Min.   :1.0       Min.   :1.0       Min.   :1        
##  1st Qu.:2.0       1st Qu.:2.0       1st Qu.:2.0       1st Qu.:2        
##  Median :2.0       Median :2.0       Median :2.0       Median :2        
##  Mean   :2.1       Mean   :2.1       Mean   :2.1       Mean   :2        
##  3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2.0       3rd Qu.:2        
##  Max.   :9.0       Max.   :9.0       Max.   :9.0       Max.   :9        
##  NA's   :1332442   NA's   :1342756   NA's   :1334452   NA's   :1303592

Criando as dummies

casos_fil_covid_comorbidades <- dummy_cols(casos_fil_covid_comorbidades, select_columns = c('FATOR_RISC', 'PUERPERA', 'CARDIOPATI', 'HEMATOLOGI', 'SIND_DOWN', 'HEPATICA', 'ASMA', 'DIABETES', 'NEUROLOGIC', 'PNEUMOPATI', 'IMUNODEPRE', 'RENAL', 'OBESIDADE'),
           remove_selected_columns = TRUE)

casos_fil_covid_comorbidades$EVOLUCAO <- if_else(casos_fil_covid_comorbidades$EVOLUCAO == 1 , 1, 0)

summary(casos_fil_covid_comorbidades)
##      DT_MIN             NU_IDADE_N       EVOLUCAO      FATOR_RISC_1   
##  Min.   :2020-01-05   Min.   : 1.00   Min.   :0.00    Min.   :0.0000  
##  1st Qu.:2020-11-05   1st Qu.:45.00   1st Qu.:0.00    1st Qu.:0.0000  
##  Median :2021-03-21   Median :59.00   Median :1.00    Median :0.0000  
##  Mean   :2021-03-12   Mean   :57.91   Mean   :0.65    Mean   :0.3899  
##  3rd Qu.:2021-06-10   3rd Qu.:72.00   3rd Qu.:1.00    3rd Qu.:1.0000  
##  Max.   :2022-12-04   Max.   :99.00   Max.   :1.00    Max.   :1.0000  
##                                       NA's   :99271                   
##   FATOR_RISC_2     FATOR_RISC_N     FATOR_RISC_S      PUERPERA_1     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0        
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0        
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0        
##  Mean   :0.2743   Mean   :0.1229   Mean   :0.2128   Mean   :0        
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0        
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1        
##                                                     NA's   :1352981  
##    PUERPERA_2        PUERPERA_9       PUERPERA_NA      CARDIOPATI_1    
##  Min.   :0         Min.   :0         Min.   :0.0000   Min.   :0.0      
##  1st Qu.:1         1st Qu.:0         1st Qu.:0.0000   1st Qu.:0.0      
##  Median :1         Median :0         Median :1.0000   Median :1.0      
##  Mean   :1         Mean   :0         Mean   :0.6347   Mean   :0.6      
##  3rd Qu.:1         3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:1.0      
##  Max.   :1         Max.   :1         Max.   :1.0000   Max.   :1.0      
##  NA's   :1352981   NA's   :1352981                    NA's   :1099368  
##   CARDIOPATI_2      CARDIOPATI_9     CARDIOPATI_NA     HEMATOLOGI_1    
##  Min.   :0.0       Min.   :0         Min.   :0.0000   Min.   :0        
##  1st Qu.:0.0       1st Qu.:0         1st Qu.:0.0000   1st Qu.:0        
##  Median :0.0       Median :0         Median :1.0000   Median :0        
##  Mean   :0.3       Mean   :0         Mean   :0.5157   Mean   :0        
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0        
##  Max.   :1.0       Max.   :1         Max.   :1.0000   Max.   :1        
##  NA's   :1099368   NA's   :1099368                    NA's   :1350649  
##   HEMATOLOGI_2      HEMATOLOGI_9     HEMATOLOGI_NA     SIND_DOWN_1     
##  Min.   :0         Min.   :0         Min.   :0.0000   Min.   :0        
##  1st Qu.:1         1st Qu.:0         1st Qu.:0.0000   1st Qu.:0        
##  Median :1         Median :0         Median :1.0000   Median :0        
##  Mean   :1         Mean   :0         Mean   :0.6336   Mean   :0        
##  3rd Qu.:1         3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0        
##  Max.   :1         Max.   :1         Max.   :1.0000   Max.   :1        
##  NA's   :1350649   NA's   :1350649                    NA's   :1353343  
##   SIND_DOWN_2       SIND_DOWN_9       SIND_DOWN_NA      HEPATICA_1     
##  Min.   :0         Min.   :0         Min.   :0.0000   Min.   :0        
##  1st Qu.:1         1st Qu.:0         1st Qu.:0.0000   1st Qu.:0        
##  Median :1         Median :0         Median :1.0000   Median :0        
##  Mean   :1         Mean   :0         Mean   :0.6349   Mean   :0        
##  3rd Qu.:1         3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0        
##  Max.   :1         Max.   :1         Max.   :1.0000   Max.   :1        
##  NA's   :1353343   NA's   :1353343                    NA's   :1352960  
##    HEPATICA_2        HEPATICA_9       HEPATICA_NA         ASMA_1       
##  Min.   :0         Min.   :0         Min.   :0.0000   Min.   :0.0      
##  1st Qu.:1         1st Qu.:0         1st Qu.:0.0000   1st Qu.:0.0      
##  Median :1         Median :0         Median :1.0000   Median :0.0      
##  Mean   :1         Mean   :0         Mean   :0.6347   Mean   :0.1      
##  3rd Qu.:1         3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0.0      
##  Max.   :1         Max.   :1         Max.   :1.0000   Max.   :1.0      
##  NA's   :1352960   NA's   :1352960                    NA's   :1340898  
##      ASMA_2            ASMA_9           ASMA_NA        DIABETES_1     
##  Min.   :0.0       Min.   :0         Min.   :0.000   Min.   :0.0      
##  1st Qu.:1.0       1st Qu.:0         1st Qu.:0.000   1st Qu.:0.0      
##  Median :1.0       Median :0         Median :1.000   Median :0.0      
##  Mean   :0.9       Mean   :0         Mean   :0.629   Mean   :0.5      
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.000   3rd Qu.:1.0      
##  Max.   :1.0       Max.   :1         Max.   :1.000   Max.   :1.0      
##  NA's   :1340898   NA's   :1340898                   NA's   :1172542  
##    DIABETES_2        DIABETES_9       DIABETES_NA      NEUROLOGIC_1    
##  Min.   :0.0       Min.   :0         Min.   :0.0000   Min.   :0.0      
##  1st Qu.:0.0       1st Qu.:0         1st Qu.:0.0000   1st Qu.:0.0      
##  Median :0.0       Median :0         Median :1.0000   Median :0.0      
##  Mean   :0.5       Mean   :0         Mean   :0.5501   Mean   :0.1      
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0.0      
##  Max.   :1.0       Max.   :1         Max.   :1.0000   Max.   :1.0      
##  NA's   :1172542   NA's   :1172542                    NA's   :1331091  
##   NEUROLOGIC_2      NEUROLOGIC_9     NEUROLOGIC_NA     PNEUMOPATI_1    
##  Min.   :0.0       Min.   :0         Min.   :0.0000   Min.   :0.0      
##  1st Qu.:1.0       1st Qu.:0         1st Qu.:0.0000   1st Qu.:0.0      
##  Median :1.0       Median :0         Median :1.0000   Median :0.0      
##  Mean   :0.9       Mean   :0         Mean   :0.6244   Mean   :0.1      
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0.0      
##  Max.   :1.0       Max.   :1         Max.   :1.0000   Max.   :1.0      
##  NA's   :1331091   NA's   :1331091                    NA's   :1332442  
##   PNEUMOPATI_2      PNEUMOPATI_9     PNEUMOPATI_NA     IMUNODEPRE_1    
##  Min.   :0.0       Min.   :0         Min.   :0.0000   Min.   :0.0      
##  1st Qu.:1.0       1st Qu.:0         1st Qu.:0.0000   1st Qu.:0.0      
##  Median :1.0       Median :0         Median :1.0000   Median :0.0      
##  Mean   :0.9       Mean   :0         Mean   :0.6251   Mean   :0.1      
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0.0      
##  Max.   :1.0       Max.   :1         Max.   :1.0000   Max.   :1.0      
##  NA's   :1332442   NA's   :1332442                    NA's   :1342756  
##   IMUNODEPRE_2      IMUNODEPRE_9     IMUNODEPRE_NA       RENAL_1       
##  Min.   :0.0       Min.   :0         Min.   :0.0000   Min.   :0.0      
##  1st Qu.:1.0       1st Qu.:0         1st Qu.:0.0000   1st Qu.:0.0      
##  Median :1.0       Median :0         Median :1.0000   Median :0.0      
##  Mean   :0.9       Mean   :0         Mean   :0.6299   Mean   :0.1      
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.0000   3rd Qu.:0.0      
##  Max.   :1.0       Max.   :1         Max.   :1.0000   Max.   :1.0      
##  NA's   :1342756   NA's   :1342756                    NA's   :1334452  
##     RENAL_2           RENAL_9           RENAL_NA      OBESIDADE_1     
##  Min.   :0.0       Min.   :0         Min.   :0.000   Min.   :0.0      
##  1st Qu.:1.0       1st Qu.:0         1st Qu.:0.000   1st Qu.:0.0      
##  Median :1.0       Median :0         Median :1.000   Median :0.0      
##  Mean   :0.9       Mean   :0         Mean   :0.626   Mean   :0.2      
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.000   3rd Qu.:0.0      
##  Max.   :1.0       Max.   :1         Max.   :1.000   Max.   :1.0      
##  NA's   :1334452   NA's   :1334452                   NA's   :1303592  
##   OBESIDADE_2       OBESIDADE_9       OBESIDADE_NA   
##  Min.   :0.0       Min.   :0         Min.   :0.0000  
##  1st Qu.:1.0       1st Qu.:0         1st Qu.:0.0000  
##  Median :1.0       Median :0         Median :1.0000  
##  Mean   :0.8       Mean   :0         Mean   :0.6115  
##  3rd Qu.:1.0       3rd Qu.:0         3rd Qu.:1.0000  
##  Max.   :1.0       Max.   :1         Max.   :1.0000  
##  NA's   :1303592   NA's   :1303592

Logit comorbidades

casos_fil_covid.logit <- glm(formula = EVOLUCAO ~ FATOR_RISC_1 + PUERPERA_1 + CARDIOPATI_1 + HEMATOLOGI_1 + SIND_DOWN_1 + HEPATICA_1 + ASMA_1 + DIABETES_1 + NEUROLOGIC_1 + PNEUMOPATI_1 + IMUNODEPRE_1 + RENAL_1 + OBESIDADE_1 ,
           family = binomial(link = "logit"), 
    data = casos_fil_covid_comorbidades)
summary(casos_fil_covid.logit)
## 
## Call:
## glm(formula = EVOLUCAO ~ FATOR_RISC_1 + PUERPERA_1 + CARDIOPATI_1 + 
##     HEMATOLOGI_1 + SIND_DOWN_1 + HEPATICA_1 + ASMA_1 + DIABETES_1 + 
##     NEUROLOGIC_1 + PNEUMOPATI_1 + IMUNODEPRE_1 + RENAL_1 + OBESIDADE_1, 
##     family = binomial(link = "logit"), data = casos_fil_covid_comorbidades)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0496  -1.3077   0.8999   0.9911   2.2419  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.761552   0.005652 134.747  < 2e-16 ***
## FATOR_RISC_1 -0.066832   0.005127 -13.035  < 2e-16 ***
## PUERPERA_1    0.777301   0.041216  18.859  < 2e-16 ***
## CARDIOPATI_1 -0.221110   0.004977 -44.429  < 2e-16 ***
## HEMATOLOGI_1 -0.110694   0.022751  -4.865 1.14e-06 ***
## SIND_DOWN_1  -0.002804   0.034751  -0.081    0.936    
## HEPATICA_1   -0.447586   0.021010 -21.304  < 2e-16 ***
## ASMA_1        0.431097   0.013041  33.056  < 2e-16 ***
## DIABETES_1   -0.239192   0.005109 -46.820  < 2e-16 ***
## NEUROLOGIC_1 -0.564989   0.009985 -56.585  < 2e-16 ***
## PNEUMOPATI_1 -0.502164   0.010495 -47.850  < 2e-16 ***
## IMUNODEPRE_1 -0.404165   0.012380 -32.647  < 2e-16 ***
## RENAL_1      -0.591436   0.010353 -57.130  < 2e-16 ***
## OBESIDADE_1  -0.041856   0.007138  -5.863 4.53e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 954775  on 708893  degrees of freedom
## Residual deviance: 937544  on 708880  degrees of freedom
##   (1422746 observations deleted due to missingness)
## AIC: 937572
## 
## Number of Fisher Scoring iterations: 4