output: html_document
# Cargar la base desde el archivo subido a Posit Cloud
file_path <- "C:/Users/PCE/Downloads/ParcialBigData/2. covid_example_data.xlsx"
sheets <- excel_sheets(file_path)
print(sheets)
## [1] "in"
# Leer hoja 'in'
covid_data <- read_excel(file_path, sheet = "in")
# Visualizar las primeras filas
head(covid_data)
## # A tibble: 6 x 31
## PID reprt_creationdt_FALSE case_dob_FALSE case_age case_gender
## <chr> <dttm> <dttm> <dbl> <chr>
## 1 3a85e6992a5ac~ 2020-03-22 00:00:00 2004-11-08 00:00:00 16 Male
## 2 c6b5281d5fc50~ 2020-02-01 00:00:00 1964-06-07 00:00:00 57 Male
## 3 53495ad0dca4e~ 2020-02-10 00:00:00 1944-04-06 00:00:00 77 Female
## 4 2948a265da0d0~ 2020-03-20 00:00:00 1964-06-25 00:00:00 57 Female
## 5 a5524aadd1ca0~ 2020-02-26 00:00:00 1964-12-21 00:00:00 56 Male
## 6 db14eeabe531f~ 2020-02-11 00:00:00 1956-06-21 00:00:00 65 Male
## # i 26 more variables: case_race <chr>, case_eth <chr>, case_zip <dbl>,
## # Contact_id <chr>, sym_startdt_FALSE <dttm>, sym_fever <chr>,
## # sym_subjfever <chr>, sym_myalgia <chr>, sym_losstastesmell <chr>,
## # sym_sorethroat <chr>, sym_cough <chr>, sym_headache <chr>,
## # sym_resolved <chr>, sym_resolveddt_FALSE <dttm>, contact_household <chr>,
## # hospitalized <chr>, hosp_admidt_FALSE <dttm>, hosp_dischdt_FALSE <dttm>,
## # died <chr>, died_covid <chr>, died_dt_FALSE <dttm>, ...
# Exploración general
glimpse(covid_data)
## Rows: 82,101
## Columns: 31
## $ PID <chr> "3a85e6992a5ac52f", "c6b5281d5fc50b96", "53495a~
## $ reprt_creationdt_FALSE <dttm> 2020-03-22, 2020-02-01, 2020-02-10, 2020-03-20~
## $ case_dob_FALSE <dttm> 2004-11-08, 1964-06-07, 1944-04-06, 1964-06-25~
## $ case_age <dbl> 16, 57, 77, 57, 56, 65, 47, 61, 36, 42, 74, 27,~
## $ case_gender <chr> "Male", "Male", "Female", "Female", "Male", "Ma~
## $ case_race <chr> "WHITE", "WHITE", "BLACK", "BLACK", "WHITE", "B~
## $ case_eth <chr> "NON-HISPANIC/LATINO", "NON-HISPANIC/LATINO", "~
## $ case_zip <dbl> 30308, 30308, 30315, 30213, 30004, 30314, 30313~
## $ Contact_id <chr> "Yes-Symptomatic", "Yes-Symptomatic", "Yes-Symp~
## $ sym_startdt_FALSE <dttm> 2020-03-20, 2020-01-28, 2020-02-10, 2021-05-19~
## $ sym_fever <chr> "Yes", "No", "Yes", "No", "Yes", "Yes", "No", "~
## $ sym_subjfever <chr> "Yes", "No", NA, "Yes", "Yes", "Yes", "No", "Ye~
## $ sym_myalgia <chr> "No", "Yes", "Yes", "Yes", "Yes", "No", "Unk", ~
## $ sym_losstastesmell <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, "Yes", NA, ~
## $ sym_sorethroat <chr> "Yes", "No", "Yes", "Yes", "No", "Unk", "Yes", ~
## $ sym_cough <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"~
## $ sym_headache <chr> "Yes", "No", NA, "Yes", "No", "Unk", "Yes", "No~
## $ sym_resolved <chr> "No, still symptomatic", "No, still symptomatic~
## $ sym_resolveddt_FALSE <dttm> NA, NA, NA, NA, NA, 2020-02-21, NA, NA, NA, NA~
## $ contact_household <chr> "Yes", "No", NA, "No", "No", "No", "No", "No", ~
## $ hospitalized <chr> "No", "No", "Yes", NA, "Yes", "Yes", "Yes", "No~
## $ hosp_admidt_FALSE <dttm> NA, NA, 2020-02-08, NA, 2020-02-26, 2020-01-27~
## $ hosp_dischdt_FALSE <dttm> NA, NA, NA, NA, NA, 2020-02-21, NA, NA, NA, 20~
## $ died <chr> "No", "No", "No", "No", NA, "Yes", "No", NA, "N~
## $ died_covid <chr> "No", "No", "No", "No", NA, "Yes", "No", NA, "N~
## $ died_dt_FALSE <dttm> NA, NA, NA, NA, NA, 2020-02-21, NA, NA, NA, NA~
## $ confirmed_case <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"~
## $ covid_dx <chr> "Confirmed", "Confirmed", "Confirmed", "Confirm~
## $ pos_sampledt_FALSE <dttm> 2020-03-22, 2020-02-01, 2020-02-10, 2021-01-17~
## $ latitude_JITT <dbl> 33.776645460, 33.780510140, 33.730233310, 33.55~
## $ longitude_JITT <dbl> -84.385685230, -84.389474740, -84.384251890, -8~
skim(covid_data)
| Name | covid_data |
| Number of rows | 82101 |
| Number of columns | 31 |
| _______________________ | |
| Column type frequency: | |
| character | 19 |
| numeric | 4 |
| POSIXct | 8 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| PID | 0 | 1.00 | 8 | 22 | 0 | 82087 | 0 |
| case_gender | 63 | 1.00 | 4 | 7 | 0 | 3 | 0 |
| case_race | 2630 | 0.97 | 5 | 32 | 0 | 7 | 0 |
| case_eth | 2574 | 0.97 | 13 | 19 | 0 | 3 | 0 |
| Contact_id | 32205 | 0.61 | 7 | 15 | 0 | 3 | 0 |
| sym_fever | 31577 | 0.62 | 2 | 3 | 0 | 3 | 0 |
| sym_subjfever | 37908 | 0.54 | 2 | 3 | 0 | 3 | 0 |
| sym_myalgia | 32137 | 0.61 | 2 | 3 | 0 | 4 | 0 |
| sym_losstastesmell | 50724 | 0.38 | 2 | 3 | 0 | 3 | 0 |
| sym_sorethroat | 32241 | 0.61 | 2 | 3 | 0 | 3 | 0 |
| sym_cough | 31630 | 0.61 | 2 | 3 | 0 | 3 | 0 |
| sym_headache | 32018 | 0.61 | 2 | 3 | 0 | 3 | 0 |
| sym_resolved | 42294 | 0.48 | 17 | 25 | 0 | 4 | 0 |
| contact_household | 36737 | 0.55 | 2 | 3 | 0 | 3 | 0 |
| hospitalized | 32482 | 0.60 | 2 | 3 | 0 | 2 | 0 |
| died | 36832 | 0.55 | 2 | 7 | 0 | 3 | 0 |
| died_covid | 42302 | 0.48 | 2 | 12 | 0 | 3 | 0 |
| confirmed_case | 9 | 1.00 | 2 | 7 | 0 | 3 | 0 |
| covid_dx | 0 | 1.00 | 9 | 9 | 0 | 1 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| case_age | 48 | 1 | 39.69 | 19.16 | -20.0 | 25.00 | 37.00 | 53.00 | 106.00 | <U+2581><U+2587><U+2587><U+2583><U+2581> |
| case_zip | 13 | 1 | 30249.72 | 123.83 | 30000.0 | 30213.00 | 30312.00 | 30331.00 | 31707.00 | <U+2587><U+2582><U+2581><U+2581><U+2581> |
| latitude_JITT | 94 | 1 | 32.58 | 6.38 | 0.0 | 33.69 | 33.78 | 33.98 | 34.19 | <U+2581><U+2581><U+2581><U+2581><U+2587> |
| longitude_JITT | 200 | 1 | -81.39 | 15.65 | -84.8 | -84.46 | -84.38 | -84.35 | 0.00 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
Variable type: POSIXct
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| reprt_creationdt_FALSE | 0 | 1.00 | 2019-12-27 | 2021-07-27 | 2020-12-04 | 533 |
| case_dob_FALSE | 48 | 1.00 | 1914-12-28 | 2042-06-07 | 1984-05-13 | 26773 |
| sym_startdt_FALSE | 37480 | 0.54 | 1941-03-08 | 2030-12-03 | 2020-11-22 | 601 |
| sym_resolveddt_FALSE | 65799 | 0.20 | 2001-11-08 | 2102-02-03 | 2020-11-27 | 612 |
| hosp_admidt_FALSE | 77115 | 0.06 | 2019-12-26 | 2032-05-22 | 2020-10-14 | 517 |
| hosp_dischdt_FALSE | 78600 | 0.04 | 2019-12-04 | 2029-12-29 | 2020-10-24 | 504 |
| died_dt_FALSE | 80394 | 0.02 | 2020-02-21 | 2021-07-01 | 2020-12-11 | 429 |
| pos_sampledt_FALSE | 122 | 1.00 | 2020-02-01 | 2021-07-23 | 2020-12-03 | 526 |
# Revisión de valores faltantes por variable
colSums(is.na(covid_data))
## PID reprt_creationdt_FALSE case_dob_FALSE
## 0 0 48
## case_age case_gender case_race
## 48 63 2630
## case_eth case_zip Contact_id
## 2574 13 32205
## sym_startdt_FALSE sym_fever sym_subjfever
## 37480 31577 37908
## sym_myalgia sym_losstastesmell sym_sorethroat
## 32137 50724 32241
## sym_cough sym_headache sym_resolved
## 31630 32018 42294
## sym_resolveddt_FALSE contact_household hospitalized
## 65799 36737 32482
## hosp_admidt_FALSE hosp_dischdt_FALSE died
## 77115 78600 36832
## died_covid died_dt_FALSE confirmed_case
## 42302 80394 9
## covid_dx pos_sampledt_FALSE latitude_JITT
## 0 122 94
## longitude_JITT
## 200
covid_data <- covid_data %>%
rename(
id = PID,
fecha_reporte = reprt_creationdt_FALSE,
edad = case_age,
sexo = case_gender,
raza = case_race,
etnia = case_eth,
fecha_inicio_sintomas = sym_startdt_FALSE,
hospitalizado = hospitalized,
fallecido = died,
confirmado = confirmed_case,
latitud = latitude_JITT,
longitud = longitude_JITT
) %>%
mutate(
fecha_reporte = as.Date(fecha_reporte, format = "%d/%m/%Y"),
fecha_inicio_sintomas = as.Date(fecha_inicio_sintomas, format = "%d/%m/%Y"),
edad = as.numeric(edad),
fallecido = ifelse(fallecido == "Yes", 1, 0),
confirmado = ifelse(confirmado == "Yes", 1, 0)
)
# Confirmar estructura limpia
glimpse(covid_data)
## Rows: 82,101
## Columns: 31
## $ id <chr> "3a85e6992a5ac52f", "c6b5281d5fc50b96", "53495ad~
## $ fecha_reporte <date> 2020-03-22, 2020-02-01, 2020-02-10, 2020-03-20,~
## $ case_dob_FALSE <dttm> 2004-11-08, 1964-06-07, 1944-04-06, 1964-06-25,~
## $ edad <dbl> 16, 57, 77, 57, 56, 65, 47, 61, 36, 42, 74, 27, ~
## $ sexo <chr> "Male", "Male", "Female", "Female", "Male", "Mal~
## $ raza <chr> "WHITE", "WHITE", "BLACK", "BLACK", "WHITE", "BL~
## $ etnia <chr> "NON-HISPANIC/LATINO", "NON-HISPANIC/LATINO", "N~
## $ case_zip <dbl> 30308, 30308, 30315, 30213, 30004, 30314, 30313,~
## $ Contact_id <chr> "Yes-Symptomatic", "Yes-Symptomatic", "Yes-Sympt~
## $ fecha_inicio_sintomas <date> 2020-03-20, 2020-01-28, 2020-02-10, 2021-05-19,~
## $ sym_fever <chr> "Yes", "No", "Yes", "No", "Yes", "Yes", "No", "Y~
## $ sym_subjfever <chr> "Yes", "No", NA, "Yes", "Yes", "Yes", "No", "Yes~
## $ sym_myalgia <chr> "No", "Yes", "Yes", "Yes", "Yes", "No", "Unk", "~
## $ sym_losstastesmell <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, "Yes", NA, N~
## $ sym_sorethroat <chr> "Yes", "No", "Yes", "Yes", "No", "Unk", "Yes", "~
## $ sym_cough <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",~
## $ sym_headache <chr> "Yes", "No", NA, "Yes", "No", "Unk", "Yes", "No"~
## $ sym_resolved <chr> "No, still symptomatic", "No, still symptomatic"~
## $ sym_resolveddt_FALSE <dttm> NA, NA, NA, NA, NA, 2020-02-21, NA, NA, NA, NA,~
## $ contact_household <chr> "Yes", "No", NA, "No", "No", "No", "No", "No", "~
## $ hospitalizado <chr> "No", "No", "Yes", NA, "Yes", "Yes", "Yes", "No"~
## $ hosp_admidt_FALSE <dttm> NA, NA, 2020-02-08, NA, 2020-02-26, 2020-01-27,~
## $ hosp_dischdt_FALSE <dttm> NA, NA, NA, NA, NA, 2020-02-21, NA, NA, NA, 202~
## $ fallecido <dbl> 0, 0, 0, 0, NA, 1, 0, NA, 0, 0, NA, 0, 1, 0, 0, ~
## $ died_covid <chr> "No", "No", "No", "No", NA, "Yes", "No", NA, "No~
## $ died_dt_FALSE <dttm> NA, NA, NA, NA, NA, 2020-02-21, NA, NA, NA, NA,~
## $ confirmado <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ covid_dx <chr> "Confirmed", "Confirmed", "Confirmed", "Confirme~
## $ pos_sampledt_FALSE <dttm> 2020-03-22, 2020-02-01, 2020-02-10, 2021-01-17,~
## $ latitud <dbl> 33.776645460, 33.780510140, 33.730233310, 33.555~
## $ longitud <dbl> -84.385685230, -84.389474740, -84.384251890, -84~
# Estadísticas básicas
summary(select(covid_data, edad, fallecido, confirmado))
## edad fallecido confirmado
## Min. :-20.00 Min. :0.000 Min. :0.0000
## 1st Qu.: 25.00 1st Qu.:0.000 1st Qu.:1.0000
## Median : 37.00 Median :0.000 Median :1.0000
## Mean : 39.69 Mean :0.038 Mean :0.9996
## 3rd Qu.: 53.00 3rd Qu.:0.000 3rd Qu.:1.0000
## Max. :106.00 Max. :1.000 Max. :1.0000
## NA's :48 NA's :36832 NA's :9
# Tablas de frecuencia para variables categóricas
table(covid_data$sexo)
##
## Female Male Unknown
## 43299 38393 346
table(covid_data$raza)
##
## AMERICAN INDIAN/ALASKA NATIVE ASIAN
## 84 3075
## BLACK NATIVE HAWAIIAN/PACIFIC ISLANDER
## 35048 79
## OTHER UNKNOWN
## 5863 3723
## WHITE
## 31599
table(covid_data$etnia)
##
## HISPANIC/LATINO NON-HISPANIC/LATINO NOT SPECIFIED
## 8625 62677 8225
ggplot(covid_data, aes(x = edad)) +
geom_histogram(binwidth = 5, fill = "steelblue", color = "white") +
labs(title = "Distribución de edades", x = "Edad", y = "Frecuencia")
## Warning: Removed 48 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggplot(covid_data, aes(x = sexo, fill = as.factor(fallecido))) +
geom_bar(position = "fill") +
labs(title = "Proporción de fallecidos por sexo",
y = "Proporción", fill = "Fallecido")
num_data <- select(covid_data, edad, fallecido, confirmado, latitud, longitud)
corrplot(cor(num_data, use = "pairwise.complete.obs"), method = "number")
El objetivo es estimar la probabilidad de fallecer en función de la edad, sexo, raza, etnia y hospitalización.
modelo_logit <- glm(fallecido ~ edad + sexo + raza + etnia + hospitalizado,
data = covid_data, family = binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(modelo_logit)
##
## Call:
## glm(formula = fallecido ~ edad + sexo + raza + etnia + hospitalizado,
## family = binomial, data = covid_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -23.21762 785.16423 -0.030 0.97641
## edad 0.09161 0.00252 36.349 < 2e-16 ***
## sexoMale 0.44187 0.07225 6.116 9.62e-10 ***
## sexoUnknown -14.92653 745.35887 -0.020 0.98402
## razaASIAN 13.15723 785.16423 0.017 0.98663
## razaBLACK 13.32844 785.16419 0.017 0.98646
## razaNATIVE HAWAIIAN/PACIFIC ISLANDER 0.77895 1241.00545 0.001 0.99950
## razaOTHER 12.70708 785.16424 0.016 0.98709
## razaUNKNOWN 0.51511 810.42217 0.001 0.99949
## razaWHITE 13.00273 785.16419 0.017 0.98679
## etniaNON-HISPANIC/LATINO 0.05683 0.18847 0.302 0.76302
## etniaNOT SPECIFIED -2.98037 1.02528 -2.907 0.00365 **
## hospitalizadoYes 2.35121 0.07784 30.205 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 11300.2 on 43580 degrees of freedom
## Residual deviance: 5783.8 on 43568 degrees of freedom
## (38520 observations deleted due to missingness)
## AIC: 5809.8
##
## Number of Fisher Scoring iterations: 17
Se muestran los odds ratios (razones de momios) e intervalos de confianza.
exp(cbind(OddsRatio = coef(modelo_logit), confint(modelo_logit)))
## Waiting for profiling to be done...
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values
## OddsRatio 2.5 % 97.5 %
## (Intercept) 8.254957e-11 9.224029e-125 1.566003e+04
## edad 1.095938e+00 1.090597e+00 1.101427e+00
## sexoMale 1.555614e+00 1.350526e+00 1.792804e+00
## sexoUnknown 3.292216e-07 2.929620e-107 3.579320e+08
## razaASIAN 5.177420e+05 7.571023e+279 Inf
## razaBLACK 6.144227e+05 3.377594e-09 6.920154e+119
## razaNATIVE HAWAIIAN/PACIFIC ISLANDER 2.179178e+00 3.100013e-02 1.496621e+02
## razaOTHER 3.300781e+05 1.077409e+282 Inf
## razaUNKNOWN 1.673829e+00 1.095426e-01 2.512491e+01
## razaWHITE 4.436214e+05 2.495939e-09 5.674435e+119
## etniaNON-HISPANIC/LATINO 1.058473e+00 7.383495e-01 1.547002e+00
## etniaNOT SPECIFIED 5.077396e-02 2.826165e-03 2.426766e-01
## hospitalizadoYes 1.049821e+01 9.023954e+00 1.224502e+01
Análisis: - Una edad mayor aumenta la probabilidad de fallecimiento (coeficiente positivo en el modelo). - Los pacientes hospitalizados muestran mayor probabilidad de fallecer. - Las variables de raza y etnia permiten observar diferencias demográficas relevantes. - Los gráficos previos ayudan a identificar los grupos más vulnerables.
write.csv(covid_data, "covid_clean.csv", row.names = FALSE)