Criando tabela casos
Filtrando somente casos de covid
casos_fil_covid <- filter(casos_fil, CLASSI_FIN == 5)
rm(casos_fil)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1448919 77.4 2705755 144.6 2394751 127.9
## Vcells 167960362 1281.5 632284791 4824.0 660739455 5041.1
Criando a coluna menor data
Summary do df
summary(casos_fil_covid)
## DT_NOTIFIC SEM_NOT SG_UF_NOT ID_MUNICIP
## Min. :2020-02-21 Min. : 1.00 Length:2136005 Length:2136005
## 1st Qu.:2020-11-11 1st Qu.:12.00 Class :character Class :character
## Median :2021-03-24 Median :21.00 Mode :character Mode :character
## Mean :2021-03-17 Mean :22.53
## 3rd Qu.:2021-06-15 3rd Qu.:30.00
## Max. :2022-12-09 Max. :53.00
##
## CO_MUN_NOT CS_SEXO DT_NASC CS_GESTANT
## Min. :110001 Length:2136005 Length:2136005 Min. :0.000
## 1st Qu.:310620 Class :character Class :character 1st Qu.:5.000
## Median :351880 Mode :character Mode :character Median :6.000
## Mean :344069 Mean :5.779
## 3rd Qu.:410690 3rd Qu.:6.000
## Max. :530010 Max. :9.000
##
## CS_RACA CS_ESCOL_N PAC_COCBO CS_ZONA
## Min. :1.00 Min. :0.0 Length:2136005 Min. :1.00
## 1st Qu.:1.00 1st Qu.:2.0 Class :character 1st Qu.:1.00
## Median :4.00 Median :4.0 Mode :character Median :1.00
## Mean :3.49 Mean :5.4 Mean :1.15
## 3rd Qu.:4.00 3rd Qu.:9.0 3rd Qu.:1.00
## Max. :9.00 Max. :9.0 Max. :9.00
## NA's :28576 NA's :716885 NA's :233075
## VACINA_COV VACINA DT_UT_DOSE HOSPITAL
## Min. :1.0 Min. :1.0 Length:2136005 Min. :1.00
## 1st Qu.:2.0 1st Qu.:2.0 Class :character 1st Qu.:1.00
## Median :2.0 Median :2.0 Mode :character Median :1.00
## Mean :2.6 Mean :5.1 Mean :1.04
## 3rd Qu.:2.0 3rd Qu.:9.0 3rd Qu.:1.00
## Max. :9.0 Max. :9.0 Max. :9.00
## NA's :326476 NA's :528302 NA's :45286
## DT_INTERNA UTI DT_ENTUTI
## Min. :2020-01-05 Min. :1.00 Min. :2020-01-05
## 1st Qu.:2020-11-06 1st Qu.:1.00 1st Qu.:2020-11-09
## Median :2021-03-21 Median :2.00 Median :2021-03-24
## Mean :2021-03-23 Mean :1.79 Mean :2021-03-17
## 3rd Qu.:2021-06-11 3rd Qu.:2.00 3rd Qu.:2021-06-14
## Max. :9202-09-11 Max. :9.00 Max. :4202-05-26
## NA's :254657
## DT_SAIDUTI CLASSI_FIN EVOLUCAO DT_EVOLUCA
## Min. :2020-02-21 Min. :5 Min. :1.0 Min. :2020-02-21
## 1st Qu.:2020-11-12 1st Qu.:5 1st Qu.:1.0 1st Qu.:2020-11-17
## Median :2021-03-26 Median :5 Median :1.0 Median :2021-03-31
## Mean :2021-03-18 Mean :5 Mean :1.5 Mean :2021-03-23
## 3rd Qu.:2021-06-16 3rd Qu.:5 3rd Qu.:2.0 3rd Qu.:2021-06-21
## Max. :2121-03-13 Max. :5 Max. :9.0 Max. :2022-12-04
## NA's :99481
## NU_IDADE_N NOSOCOMIAL AVE_SUINO FEBRE
## Min. : -9.00 Min. :1.0 Min. :1.0 Min. :1.0
## 1st Qu.: 45.00 1st Qu.:2.0 1st Qu.:2.0 1st Qu.:1.0
## Median : 59.00 Median :2.0 Median :2.0 Median :1.0
## Mean : 57.94 Mean :2.6 Mean :3.3 Mean :1.4
## 3rd Qu.: 72.00 3rd Qu.:2.0 3rd Qu.:2.0 3rd Qu.:2.0
## Max. :150.00 Max. :9.0 Max. :9.0 Max. :9.0
## NA's :375647 NA's :377867 NA's :341238
## TOSSE GARGANTA DISPNEIA DESC_RESP
## Min. :1.0 Min. :1 Min. :1.00 Min. :1.0
## 1st Qu.:1.0 1st Qu.:2 1st Qu.:1.00 1st Qu.:1.0
## Median :1.0 Median :2 Median :1.00 Median :1.0
## Mean :1.3 Mean :2 Mean :1.28 Mean :1.4
## 3rd Qu.:1.0 3rd Qu.:2 3rd Qu.:1.00 3rd Qu.:2.0
## Max. :9.0 Max. :9 Max. :9.00 Max. :9.0
## NA's :269458 NA's :617102 NA's :266184 NA's :410120
## SATURACAO DIARREIA VOMITO DOR_ABD
## Min. :1.0 Min. :1 Min. :1.0 Min. :1.0
## 1st Qu.:1.0 1st Qu.:2 1st Qu.:2.0 1st Qu.:2.0
## Median :1.0 Median :2 Median :2.0 Median :2.0
## Mean :1.4 Mean :2 Mean :2.1 Mean :2.2
## 3rd Qu.:2.0 3rd Qu.:2 3rd Qu.:2.0 3rd Qu.:2.0
## Max. :9.0 Max. :9 Max. :9.0 Max. :9.0
## NA's :342699 NA's :645448 NA's :673453 NA's :875489
## FADIGA PERD_OLFT PERD_PALA OUTRO_SIN
## Min. :1.0 Min. :1.0 Min. :1.0 Min. :1.0
## 1st Qu.:1.0 1st Qu.:2.0 1st Qu.:2.0 1st Qu.:1.0
## Median :2.0 Median :2.0 Median :2.0 Median :2.0
## Mean :1.9 Mean :2.1 Mean :2.1 Mean :1.8
## 3rd Qu.:2.0 3rd Qu.:2.0 3rd Qu.:2.0 3rd Qu.:2.0
## Max. :9.0 Max. :9.0 Max. :9.0 Max. :9.0
## NA's :797981 NA's :854395 NA's :855749 NA's :618960
## OUTRO_DES FATOR_RISC PUERPERA CARDIOPATI
## Length:2136005 Length:2136005 Min. :1.0 Min. :1.0
## Class :character Class :character 1st Qu.:2.0 1st Qu.:1.0
## Mode :character Mode :character Median :2.0 Median :1.0
## Mean :2.2 Mean :1.4
## 3rd Qu.:2.0 3rd Qu.:2.0
## Max. :9.0 Max. :9.0
## NA's :1355981 NA's :1101849
## HEMATOLOGI SIND_DOWN HEPATICA ASMA
## Min. :1.0 Min. :1.0 Min. :1.0 Min. :1.0
## 1st Qu.:2.0 1st Qu.:2.0 1st Qu.:2.0 1st Qu.:2.0
## Median :2.0 Median :2.0 Median :2.0 Median :2.0
## Mean :2.2 Mean :2.2 Mean :2.2 Mean :2.1
## 3rd Qu.:2.0 3rd Qu.:2.0 3rd Qu.:2.0 3rd Qu.:2.0
## Max. :9.0 Max. :9.0 Max. :9.0 Max. :9.0
## NA's :1353650 NA's :1356345 NA's :1355968 NA's :1343897
## DIABETES NEUROLOGIC PNEUMOPATI IMUNODEPRE
## Min. :1.0 Min. :1.0 Min. :1.0 Min. :1.0
## 1st Qu.:1.0 1st Qu.:2.0 1st Qu.:2.0 1st Qu.:2.0
## Median :2.0 Median :2.0 Median :2.0 Median :2.0
## Mean :1.6 Mean :2.1 Mean :2.1 Mean :2.1
## 3rd Qu.:2.0 3rd Qu.:2.0 3rd Qu.:2.0 3rd Qu.:2.0
## Max. :9.0 Max. :9.0 Max. :9.0 Max. :9.0
## NA's :1175363 NA's :1333968 NA's :1335383 NA's :1345752
## RENAL OBESIDADE OBES_IMC OUT_MORBI
## Min. :1.0 Min. :1 Length:2136005 Min. :1.0
## 1st Qu.:2.0 1st Qu.:2 Class :character 1st Qu.:1.0
## Median :2.0 Median :2 Mode :character Median :1.0
## Mean :2.1 Mean :2 Mean :1.5
## 3rd Qu.:2.0 3rd Qu.:2 3rd Qu.:2.0
## Max. :9.0 Max. :9 Max. :9.0
## NA's :1337435 NA's :1306587 NA's :1176060
## MORB_DESC DOSE_1_COV DOSE_2_COV DOSE_REF
## Length:2136005 Length:2136005 Length:2136005 Length:2136005
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## FAB_COVREF ANTIVIRAL TP_ANTIVIR OUT_ANTIV
## Length:2136005 Min. :1.0 Min. :1.0 Length:2136005
## Class :character 1st Qu.:2.0 1st Qu.:1.0 Class :character
## Mode :character Median :2.0 Median :1.0 Mode :character
## Mean :3.2 Mean :1.2
## 3rd Qu.:2.0 3rd Qu.:1.0
## Max. :9.0 Max. :3.0
## NA's :348281 NA's :2012569
## DT_ANTIVIR SUPORT_VEN RAIOX_RES RAIOX_OUT
## Length:2136005 Min. :1.00 Min. :1.0 Length:2136005
## Class :character 1st Qu.:2.00 1st Qu.:3.0 Class :character
## Mode :character Median :2.00 Median :6.0 Mode :character
## Mean :2.26 Mean :5.3
## 3rd Qu.:2.00 3rd Qu.:6.0
## Max. :9.00 Max. :9.0
## NA's :257209 NA's :859382
## TOMO_RES TOMO_OUT AMOSTRA TP_AMOSTRA
## Min. :1.0 Length:2136005 Min. :1.00 Min. :1.00
## 1st Qu.:1.0 Class :character 1st Qu.:1.00 1st Qu.:1.00
## Median :1.0 Mode :character Median :1.00 Median :1.00
## Mean :3.1 Mean :1.08 Mean :1.38
## 3rd Qu.:6.0 3rd Qu.:1.00 3rd Qu.:1.00
## Max. :9.0 Max. :9.00 Max. :9.00
## NA's :864905 NA's :71983 NA's :185218
## OUT_AMOST PCR_RESUL DT_PCR POS_PCRFLU
## Length:2136005 Min. :1.00 Length:2136005 Min. :1.0
## Class :character 1st Qu.:1.00 Class :character 1st Qu.:2.0
## Mode :character Median :1.00 Mode :character Median :2.0
## Mean :1.89 Mean :2.8
## 3rd Qu.:2.00 3rd Qu.:2.0
## Max. :9.00 Max. :9.0
## NA's :195685 NA's :1320192
## TP_FLU_PCR PCR_FLUASU DT_MIN.DT_MIN
## Min. :1.0 Min. :1.0 Min. :2020-01-05
## 1st Qu.:1.0 1st Qu.:2.0 1st Qu.:2020-11-05
## Median :1.0 Median :2.0 Median :2021-03-21
## Mean :1.1 Mean :2.5 Mean :2021-03-12
## 3rd Qu.:1.0 3rd Qu.:3.0 3rd Qu.:2021-06-10
## Max. :2.0 Max. :6.0 Max. :2022-12-04
## NA's :2135015 NA's :2135160
Criando tabelas por tipo (Possuimos muitas colunas, algumas delas com muitos nulos, portanto para verificar quais possuem impacto na mortalidade vou separar por tipo, por exemplo, demográficas, comorbidades, relacionadas a vacinação, etc.)
Demográficas e economicas
casos_fil_covid_dem <- casos_fil_covid %>% select(DT_MIN, SG_UF_NOT, ID_MUNICIP, CO_MUN_NOT, CS_SEXO, CS_GESTANT, CS_RACA, CS_ESCOL_N, PAC_COCBO, CS_ZONA, NU_IDADE_N, EVOLUCAO)
Summary do df demográfico
summary(casos_fil_covid_dem)
## DT_MIN.DT_MIN SG_UF_NOT ID_MUNICIP CO_MUN_NOT
## Min. :2020-01-05 Length:2131640 Length:2131640 Min. :110001
## 1st Qu.:2020-11-05 Class :character Class :character 1st Qu.:310620
## Median :2021-03-21 Mode :character Mode :character Median :351880
## Mean :2021-03-12 Mean :344123
## 3rd Qu.:2021-06-10 3rd Qu.:410690
## Max. :2022-12-04 Max. :530010
##
## CS_SEXO CS_GESTANT CS_RACA CS_ESCOL_N
## Length:2131640 Min. :0.000 Min. :1.00 Min. :0.0
## Class :character 1st Qu.:5.000 1st Qu.:1.00 1st Qu.:2.0
## Mode :character Median :6.000 Median :4.00 Median :4.0
## Mean :5.779 Mean :3.49 Mean :5.4
## 3rd Qu.:6.000 3rd Qu.:4.00 3rd Qu.:9.0
## Max. :9.000 Max. :9.00 Max. :9.0
## NA's :28517 NA's :715644
## PAC_COCBO CS_ZONA NU_IDADE_N EVOLUCAO
## Length:2131640 Min. :1.00 Min. : 1.00 Min. :1.0
## Class :character 1st Qu.:1.00 1st Qu.:45.00 1st Qu.:1.0
## Mode :character Median :1.00 Median :59.00 Median :1.0
## Mean :1.15 Mean :57.91 Mean :1.5
## 3rd Qu.:1.00 3rd Qu.:72.00 3rd Qu.:2.0
## Max. :9.00 Max. :99.00 Max. :9.0
## NA's :232617 NA's :99271
Estruturando em one hot encoded para poder fazer a regressão logit.
casos_fil_covid_dem <- dummy_cols(casos_fil_covid_dem, select_columns = c('CS_SEXO', 'CS_GESTANT', 'CS_RACA', 'CS_ZONA'),
remove_selected_columns = TRUE)
Porcentagem de nulos na coluna EVOLUCAO.
sum(is.na(casos_fil_covid$EVOLUCAO)) / length(casos_fil_covid$EVOLUCAO)
## [1] 0.04657025
Optimizando o código
rm(casos_fil_covid_dem)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3337778 178.3 7418824 396.3 3390814 181.1
## Vcells 247414861 1887.7 632284791 4824.0 660739455 5041.1
Efeito vacina, regresão logit
casos_fil_covid_vacina <- casos_fil_covid %>% select(DT_MIN, VACINA_COV, VACINA, NU_IDADE_N, EVOLUCAO)
Summary do df vacina
summary(casos_fil_covid_vacina)
## DT_MIN.DT_MIN VACINA_COV VACINA NU_IDADE_N
## Min. :2020-01-05 Min. :1.0 Min. :1.0 Min. : 1.00
## 1st Qu.:2020-11-05 1st Qu.:2.0 1st Qu.:2.0 1st Qu.:45.00
## Median :2021-03-21 Median :2.0 Median :2.0 Median :59.00
## Mean :2021-03-12 Mean :2.6 Mean :5.1 Mean :57.91
## 3rd Qu.:2021-06-10 3rd Qu.:2.0 3rd Qu.:9.0 3rd Qu.:72.00
## Max. :2022-12-04 Max. :9.0 Max. :9.0 Max. :99.00
## NA's :325989 NA's :526559
## EVOLUCAO
## Min. :1.0
## 1st Qu.:1.0
## Median :1.0
## Mean :1.5
## 3rd Qu.:2.0
## Max. :9.0
## NA's :99271
Estruturando em one hot encoded para poder fazer a regressão logit.
casos_fil_covid_vacina <- dummy_cols(casos_fil_covid_vacina, select_columns = c('VACINA_COV', 'VACINA'),
remove_selected_columns = TRUE)
Porcentagem de nulos na coluna EVOLUCAO.
sum(is.na(casos_fil_covid_vacina$EVOLUCAO)) / length(casos_fil_covid_vacina$EVOLUCAO)
## [1] 0.04657025
Análise dos sintomas
rm(casos_fil_covid_vacina)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 2907351 155.3 7418824 396.3 3545382 189.4
## Vcells 210745087 1607.9 632284791 4824.0 660739455 5041.1
casos_fil_covid_sintomas <- casos_fil_covid %>% select(DT_MIN, NU_IDADE_N, EVOLUCAO,FEBRE, TOSSE, GARGANTA, DISPNEIA, DESC_RESP, SATURACAO, DIARREIA, VOMITO, DOR_ABD, FADIGA, PERD_OLFT, PERD_PALA)
summary(casos_fil_covid_sintomas)
## DT_MIN.DT_MIN NU_IDADE_N EVOLUCAO FEBRE
## Min. :2020-01-05 Min. : 1.00 Min. :1.0 Min. :1.0
## 1st Qu.:2020-11-05 1st Qu.:45.00 1st Qu.:1.0 1st Qu.:1.0
## Median :2021-03-21 Median :59.00 Median :1.0 Median :1.0
## Mean :2021-03-12 Mean :57.91 Mean :1.5 Mean :1.4
## 3rd Qu.:2021-06-10 3rd Qu.:72.00 3rd Qu.:2.0 3rd Qu.:2.0
## Max. :2022-12-04 Max. :99.00 Max. :9.0 Max. :9.0
## NA's :99271 NA's :340351
## TOSSE GARGANTA DISPNEIA DESC_RESP
## Min. :1.0 Min. :1 Min. :1.00 Min. :1.0
## 1st Qu.:1.0 1st Qu.:2 1st Qu.:1.00 1st Qu.:1.0
## Median :1.0 Median :2 Median :1.00 Median :1.0
## Mean :1.3 Mean :2 Mean :1.28 Mean :1.4
## 3rd Qu.:1.0 3rd Qu.:2 3rd Qu.:1.00 3rd Qu.:2.0
## Max. :9.0 Max. :9 Max. :9.00 Max. :9.0
## NA's :268697 NA's :615677 NA's :265435 NA's :409211
## SATURACAO DIARREIA VOMITO DOR_ABD
## Min. :1.0 Min. :1 Min. :1.0 Min. :1.0
## 1st Qu.:1.0 1st Qu.:2 1st Qu.:2.0 1st Qu.:2.0
## Median :1.0 Median :2 Median :2.0 Median :2.0
## Mean :1.4 Mean :2 Mean :2.1 Mean :2.2
## 3rd Qu.:2.0 3rd Qu.:2 3rd Qu.:2.0 3rd Qu.:2.0
## Max. :9.0 Max. :9 Max. :9.0 Max. :9.0
## NA's :341842 NA's :643986 NA's :671957 NA's :873571
## FADIGA PERD_OLFT PERD_PALA
## Min. :1.0 Min. :1.0 Min. :1.0
## 1st Qu.:1.0 1st Qu.:2.0 1st Qu.:2.0
## Median :2.0 Median :2.0 Median :2.0
## Mean :1.9 Mean :2.1 Mean :2.1
## 3rd Qu.:2.0 3rd Qu.:2.0 3rd Qu.:2.0
## Max. :9.0 Max. :9.0 Max. :9.0
## NA's :796163 NA's :852465 NA's :853816
Criando as dummies
casos_fil_covid_sintomas <- dummy_cols(casos_fil_covid_sintomas, select_columns = c("FEBRE" , "TOSSE", "GARGANTA", "DISPNEIA", "DESC_RESP", "SATURACAO", "DIARREIA", "VOMITO", "DOR_ABD", "FADIGA", "PERD_OLFT", "PERD_PALA" ),
remove_selected_columns = TRUE)
Logit dos sintomas
casos_fil_covid.logit <- glm(formula = EVOLUCAO ~ FEBRE_1 + TOSSE_1 + GARGANTA_1 + DISPNEIA_1,
family = binomial(link = "logit"),
data = casos_fil_covid_sintomas)
summary(casos_fil_covid.logit)
##
## Call:
## glm(formula = EVOLUCAO ~ FEBRE_1 + TOSSE_1 + GARGANTA_1 + DISPNEIA_1,
## family = binomial(link = "logit"), data = casos_fil_covid_sintomas)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7954 -1.3919 0.8600 0.9164 1.1066
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.753840 0.004675 161.24 <2e-16 ***
## FEBRE_1 0.159034 0.003825 41.58 <2e-16 ***
## TOSSE_1 0.322382 0.004182 77.09 <2e-16 ***
## GARGANTA_1 0.153917 0.004632 33.23 <2e-16 ***
## DISPNEIA_1 -0.584917 0.004365 -134.00 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1786226 on 1394858 degrees of freedom
## Residual deviance: 1758084 on 1394854 degrees of freedom
## (736781 observations deleted due to missingness)
## AIC: 1758094
##
## Number of Fisher Scoring iterations: 4
Tenho de separar em duas regressões, pois o R não imprime todos os valores, tem um limite a analisar.
casos_fil_covid.logit <- glm(formula = EVOLUCAO ~ DESC_RESP_1 + SATURACAO_1 + DIARREIA_1 + VOMITO_1 + DOR_ABD_1 + FADIGA_1 + PERD_OLFT_1 + PERD_PALA_1,
family = binomial(link = "logit"),
data = casos_fil_covid_sintomas)
summary(casos_fil_covid.logit)
##
## Call:
## glm(formula = EVOLUCAO ~ DESC_RESP_1 + SATURACAO_1 + DIARREIA_1 +
## VOMITO_1 + DOR_ABD_1 + FADIGA_1 + PERD_OLFT_1 + PERD_PALA_1,
## family = binomial(link = "logit"), data = casos_fil_covid_sintomas)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0685 -1.3002 0.7509 0.9405 1.0602
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.121971 0.004279 262.218 <2e-16 ***
## DESC_RESP_1 -0.375743 0.004563 -82.343 <2e-16 ***
## SATURACAO_1 -0.462214 0.004832 -95.653 <2e-16 ***
## DIARREIA_1 0.169312 0.006165 27.464 <2e-16 ***
## VOMITO_1 0.105631 0.007476 14.130 <2e-16 ***
## DOR_ABD_1 -0.002028 0.008309 -0.244 0.807
## FADIGA_1 0.133151 0.004603 28.929 <2e-16 ***
## PERD_OLFT_1 0.227462 0.010308 22.066 <2e-16 ***
## PERD_PALA_1 0.256625 0.010236 25.072 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1483778 on 1161598 degrees of freedom
## Residual deviance: 1451780 on 1161590 degrees of freedom
## (970041 observations deleted due to missingness)
## AIC: 1451798
##
## Number of Fisher Scoring iterations: 4
Análise das comorbidades
rm(casos_fil_covid_sintomas)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 2704277 144.5 7418824 396.3 3545382 189.4
## Vcells 258063290 1968.9 632612713 4826.5 660739455 5041.1
casos_fil_covid_comorbidades <- casos_fil_covid %>% select(DT_MIN, NU_IDADE_N, EVOLUCAO, FATOR_RISC, PUERPERA, CARDIOPATI, HEMATOLOGI, SIND_DOWN, HEPATICA, ASMA, DIABETES, NEUROLOGIC, PNEUMOPATI, IMUNODEPRE, RENAL, OBESIDADE)
summary(casos_fil_covid_comorbidades)
## DT_MIN.DT_MIN NU_IDADE_N EVOLUCAO FATOR_RISC
## Min. :2020-01-05 Min. : 1.00 Min. :1.0 Length:2131640
## 1st Qu.:2020-11-05 1st Qu.:45.00 1st Qu.:1.0 Class :character
## Median :2021-03-21 Median :59.00 Median :1.0 Mode :character
## Mean :2021-03-12 Mean :57.91 Mean :1.5
## 3rd Qu.:2021-06-10 3rd Qu.:72.00 3rd Qu.:2.0
## Max. :2022-12-04 Max. :99.00 Max. :9.0
## NA's :99271
## PUERPERA CARDIOPATI HEMATOLOGI SIND_DOWN
## Min. :1.0 Min. :1.0 Min. :1.0 Min. :1.0
## 1st Qu.:2.0 1st Qu.:1.0 1st Qu.:2.0 1st Qu.:2.0
## Median :2.0 Median :1.0 Median :2.0 Median :2.0
## Mean :2.2 Mean :1.4 Mean :2.2 Mean :2.2
## 3rd Qu.:2.0 3rd Qu.:2.0 3rd Qu.:2.0 3rd Qu.:2.0
## Max. :9.0 Max. :9.0 Max. :9.0 Max. :9.0
## NA's :1352981 NA's :1099368 NA's :1350649 NA's :1353343
## HEPATICA ASMA DIABETES NEUROLOGIC
## Min. :1.0 Min. :1.0 Min. :1.0 Min. :1.0
## 1st Qu.:2.0 1st Qu.:2.0 1st Qu.:1.0 1st Qu.:2.0
## Median :2.0 Median :2.0 Median :2.0 Median :2.0
## Mean :2.2 Mean :2.1 Mean :1.6 Mean :2.1
## 3rd Qu.:2.0 3rd Qu.:2.0 3rd Qu.:2.0 3rd Qu.:2.0
## Max. :9.0 Max. :9.0 Max. :9.0 Max. :9.0
## NA's :1352960 NA's :1340898 NA's :1172542 NA's :1331091
## PNEUMOPATI IMUNODEPRE RENAL OBESIDADE
## Min. :1.0 Min. :1.0 Min. :1.0 Min. :1
## 1st Qu.:2.0 1st Qu.:2.0 1st Qu.:2.0 1st Qu.:2
## Median :2.0 Median :2.0 Median :2.0 Median :2
## Mean :2.1 Mean :2.1 Mean :2.1 Mean :2
## 3rd Qu.:2.0 3rd Qu.:2.0 3rd Qu.:2.0 3rd Qu.:2
## Max. :9.0 Max. :9.0 Max. :9.0 Max. :9
## NA's :1332442 NA's :1342756 NA's :1334452 NA's :1303592
Criando as dummies
casos_fil_covid_comorbidades <- dummy_cols(casos_fil_covid_comorbidades, select_columns = c('FATOR_RISC', 'PUERPERA', 'CARDIOPATI', 'HEMATOLOGI', 'SIND_DOWN', 'HEPATICA', 'ASMA', 'DIABETES', 'NEUROLOGIC', 'PNEUMOPATI', 'IMUNODEPRE', 'RENAL', 'OBESIDADE'),
remove_selected_columns = TRUE)
casos_fil_covid_comorbidades$EVOLUCAO <- if_else(casos_fil_covid_comorbidades$EVOLUCAO == 1 , 1, 0)
summary(casos_fil_covid_comorbidades)
## DT_MIN NU_IDADE_N EVOLUCAO FATOR_RISC_1
## Min. :2020-01-05 Min. : 1.00 Min. :0.00 Min. :0.0000
## 1st Qu.:2020-11-05 1st Qu.:45.00 1st Qu.:0.00 1st Qu.:0.0000
## Median :2021-03-21 Median :59.00 Median :1.00 Median :0.0000
## Mean :2021-03-12 Mean :57.91 Mean :0.65 Mean :0.3899
## 3rd Qu.:2021-06-10 3rd Qu.:72.00 3rd Qu.:1.00 3rd Qu.:1.0000
## Max. :2022-12-04 Max. :99.00 Max. :1.00 Max. :1.0000
## NA's :99271
## FATOR_RISC_2 FATOR_RISC_N FATOR_RISC_S PUERPERA_1
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0
## Mean :0.2743 Mean :0.1229 Mean :0.2128 Mean :0
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1
## NA's :1352981
## PUERPERA_2 PUERPERA_9 PUERPERA_NA CARDIOPATI_1
## Min. :0 Min. :0 Min. :0.0000 Min. :0.0
## 1st Qu.:1 1st Qu.:0 1st Qu.:0.0000 1st Qu.:0.0
## Median :1 Median :0 Median :1.0000 Median :1.0
## Mean :1 Mean :0 Mean :0.6347 Mean :0.6
## 3rd Qu.:1 3rd Qu.:0 3rd Qu.:1.0000 3rd Qu.:1.0
## Max. :1 Max. :1 Max. :1.0000 Max. :1.0
## NA's :1352981 NA's :1352981 NA's :1099368
## CARDIOPATI_2 CARDIOPATI_9 CARDIOPATI_NA HEMATOLOGI_1
## Min. :0.0 Min. :0 Min. :0.0000 Min. :0
## 1st Qu.:0.0 1st Qu.:0 1st Qu.:0.0000 1st Qu.:0
## Median :0.0 Median :0 Median :1.0000 Median :0
## Mean :0.3 Mean :0 Mean :0.5157 Mean :0
## 3rd Qu.:1.0 3rd Qu.:0 3rd Qu.:1.0000 3rd Qu.:0
## Max. :1.0 Max. :1 Max. :1.0000 Max. :1
## NA's :1099368 NA's :1099368 NA's :1350649
## HEMATOLOGI_2 HEMATOLOGI_9 HEMATOLOGI_NA SIND_DOWN_1
## Min. :0 Min. :0 Min. :0.0000 Min. :0
## 1st Qu.:1 1st Qu.:0 1st Qu.:0.0000 1st Qu.:0
## Median :1 Median :0 Median :1.0000 Median :0
## Mean :1 Mean :0 Mean :0.6336 Mean :0
## 3rd Qu.:1 3rd Qu.:0 3rd Qu.:1.0000 3rd Qu.:0
## Max. :1 Max. :1 Max. :1.0000 Max. :1
## NA's :1350649 NA's :1350649 NA's :1353343
## SIND_DOWN_2 SIND_DOWN_9 SIND_DOWN_NA HEPATICA_1
## Min. :0 Min. :0 Min. :0.0000 Min. :0
## 1st Qu.:1 1st Qu.:0 1st Qu.:0.0000 1st Qu.:0
## Median :1 Median :0 Median :1.0000 Median :0
## Mean :1 Mean :0 Mean :0.6349 Mean :0
## 3rd Qu.:1 3rd Qu.:0 3rd Qu.:1.0000 3rd Qu.:0
## Max. :1 Max. :1 Max. :1.0000 Max. :1
## NA's :1353343 NA's :1353343 NA's :1352960
## HEPATICA_2 HEPATICA_9 HEPATICA_NA ASMA_1
## Min. :0 Min. :0 Min. :0.0000 Min. :0.0
## 1st Qu.:1 1st Qu.:0 1st Qu.:0.0000 1st Qu.:0.0
## Median :1 Median :0 Median :1.0000 Median :0.0
## Mean :1 Mean :0 Mean :0.6347 Mean :0.1
## 3rd Qu.:1 3rd Qu.:0 3rd Qu.:1.0000 3rd Qu.:0.0
## Max. :1 Max. :1 Max. :1.0000 Max. :1.0
## NA's :1352960 NA's :1352960 NA's :1340898
## ASMA_2 ASMA_9 ASMA_NA DIABETES_1
## Min. :0.0 Min. :0 Min. :0.000 Min. :0.0
## 1st Qu.:1.0 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.0
## Median :1.0 Median :0 Median :1.000 Median :0.0
## Mean :0.9 Mean :0 Mean :0.629 Mean :0.5
## 3rd Qu.:1.0 3rd Qu.:0 3rd Qu.:1.000 3rd Qu.:1.0
## Max. :1.0 Max. :1 Max. :1.000 Max. :1.0
## NA's :1340898 NA's :1340898 NA's :1172542
## DIABETES_2 DIABETES_9 DIABETES_NA NEUROLOGIC_1
## Min. :0.0 Min. :0 Min. :0.0000 Min. :0.0
## 1st Qu.:0.0 1st Qu.:0 1st Qu.:0.0000 1st Qu.:0.0
## Median :0.0 Median :0 Median :1.0000 Median :0.0
## Mean :0.5 Mean :0 Mean :0.5501 Mean :0.1
## 3rd Qu.:1.0 3rd Qu.:0 3rd Qu.:1.0000 3rd Qu.:0.0
## Max. :1.0 Max. :1 Max. :1.0000 Max. :1.0
## NA's :1172542 NA's :1172542 NA's :1331091
## NEUROLOGIC_2 NEUROLOGIC_9 NEUROLOGIC_NA PNEUMOPATI_1
## Min. :0.0 Min. :0 Min. :0.0000 Min. :0.0
## 1st Qu.:1.0 1st Qu.:0 1st Qu.:0.0000 1st Qu.:0.0
## Median :1.0 Median :0 Median :1.0000 Median :0.0
## Mean :0.9 Mean :0 Mean :0.6244 Mean :0.1
## 3rd Qu.:1.0 3rd Qu.:0 3rd Qu.:1.0000 3rd Qu.:0.0
## Max. :1.0 Max. :1 Max. :1.0000 Max. :1.0
## NA's :1331091 NA's :1331091 NA's :1332442
## PNEUMOPATI_2 PNEUMOPATI_9 PNEUMOPATI_NA IMUNODEPRE_1
## Min. :0.0 Min. :0 Min. :0.0000 Min. :0.0
## 1st Qu.:1.0 1st Qu.:0 1st Qu.:0.0000 1st Qu.:0.0
## Median :1.0 Median :0 Median :1.0000 Median :0.0
## Mean :0.9 Mean :0 Mean :0.6251 Mean :0.1
## 3rd Qu.:1.0 3rd Qu.:0 3rd Qu.:1.0000 3rd Qu.:0.0
## Max. :1.0 Max. :1 Max. :1.0000 Max. :1.0
## NA's :1332442 NA's :1332442 NA's :1342756
## IMUNODEPRE_2 IMUNODEPRE_9 IMUNODEPRE_NA RENAL_1
## Min. :0.0 Min. :0 Min. :0.0000 Min. :0.0
## 1st Qu.:1.0 1st Qu.:0 1st Qu.:0.0000 1st Qu.:0.0
## Median :1.0 Median :0 Median :1.0000 Median :0.0
## Mean :0.9 Mean :0 Mean :0.6299 Mean :0.1
## 3rd Qu.:1.0 3rd Qu.:0 3rd Qu.:1.0000 3rd Qu.:0.0
## Max. :1.0 Max. :1 Max. :1.0000 Max. :1.0
## NA's :1342756 NA's :1342756 NA's :1334452
## RENAL_2 RENAL_9 RENAL_NA OBESIDADE_1
## Min. :0.0 Min. :0 Min. :0.000 Min. :0.0
## 1st Qu.:1.0 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.0
## Median :1.0 Median :0 Median :1.000 Median :0.0
## Mean :0.9 Mean :0 Mean :0.626 Mean :0.2
## 3rd Qu.:1.0 3rd Qu.:0 3rd Qu.:1.000 3rd Qu.:0.0
## Max. :1.0 Max. :1 Max. :1.000 Max. :1.0
## NA's :1334452 NA's :1334452 NA's :1303592
## OBESIDADE_2 OBESIDADE_9 OBESIDADE_NA
## Min. :0.0 Min. :0 Min. :0.0000
## 1st Qu.:1.0 1st Qu.:0 1st Qu.:0.0000
## Median :1.0 Median :0 Median :1.0000
## Mean :0.8 Mean :0 Mean :0.6115
## 3rd Qu.:1.0 3rd Qu.:0 3rd Qu.:1.0000
## Max. :1.0 Max. :1 Max. :1.0000
## NA's :1303592 NA's :1303592
Logit comorbidades
casos_fil_covid.logit <- glm(formula = EVOLUCAO ~ FATOR_RISC_1 + PUERPERA_1 + CARDIOPATI_1 + HEMATOLOGI_1 + SIND_DOWN_1 + HEPATICA_1 + ASMA_1 + DIABETES_1 + NEUROLOGIC_1 + PNEUMOPATI_1 + IMUNODEPRE_1 + RENAL_1 + OBESIDADE_1 ,
family = binomial(link = "logit"),
data = casos_fil_covid_comorbidades)
summary(casos_fil_covid.logit)
##
## Call:
## glm(formula = EVOLUCAO ~ FATOR_RISC_1 + PUERPERA_1 + CARDIOPATI_1 +
## HEMATOLOGI_1 + SIND_DOWN_1 + HEPATICA_1 + ASMA_1 + DIABETES_1 +
## NEUROLOGIC_1 + PNEUMOPATI_1 + IMUNODEPRE_1 + RENAL_1 + OBESIDADE_1,
## family = binomial(link = "logit"), data = casos_fil_covid_comorbidades)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0496 -1.3077 0.8999 0.9911 2.2419
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.761552 0.005652 134.747 < 2e-16 ***
## FATOR_RISC_1 -0.066832 0.005127 -13.035 < 2e-16 ***
## PUERPERA_1 0.777301 0.041216 18.859 < 2e-16 ***
## CARDIOPATI_1 -0.221110 0.004977 -44.429 < 2e-16 ***
## HEMATOLOGI_1 -0.110694 0.022751 -4.865 1.14e-06 ***
## SIND_DOWN_1 -0.002804 0.034751 -0.081 0.936
## HEPATICA_1 -0.447586 0.021010 -21.304 < 2e-16 ***
## ASMA_1 0.431097 0.013041 33.056 < 2e-16 ***
## DIABETES_1 -0.239192 0.005109 -46.820 < 2e-16 ***
## NEUROLOGIC_1 -0.564989 0.009985 -56.585 < 2e-16 ***
## PNEUMOPATI_1 -0.502164 0.010495 -47.850 < 2e-16 ***
## IMUNODEPRE_1 -0.404165 0.012380 -32.647 < 2e-16 ***
## RENAL_1 -0.591436 0.010353 -57.130 < 2e-16 ***
## OBESIDADE_1 -0.041856 0.007138 -5.863 4.53e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 954775 on 708893 degrees of freedom
## Residual deviance: 937544 on 708880 degrees of freedom
## (1422746 observations deleted due to missingness)
## AIC: 937572
##
## Number of Fisher Scoring iterations: 4