Content
- Business task
- Data overview
- R packages
- Data cleaning & manipulation
- Descriptive analysis
- Visualizations or Outputs
- Q & A
- Extra descriptive analysis
Discovering insight from Mexico COVID dataset by doing some descriptive analysis
#summary of variables
summary(covid_df_cln)
## Answering to question 1
#calculate the summary of people who died
died_smry <- covid_df_cln %>%
filter(!is.na(date_died)) %>%
summarise(
died_mean = mean(age, na.rm = T),
died_median = median(age, na.rm = T),
died_mode = getMode(age),
died_range = max(age, na.rm = T) - min(age, na.rm = T)
)
#getMode function
getMode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
## Answer to question 3
#calculating comorbidities for died
dying_comorbidities <- covid_df_cln %>% group_by(ICU_died = died_yn) %>%
filter(ICU_died == "Yes") %>%
summarise(
n_of_patients = n(),
diabetes = length(diabetes[diabetes == "Yes"]),
copd = length(asthma[asthma == "Yes"]),
asthma = length(asthma[asthma == "Yes"]),
inmsupr = length(inmsupr[inmsupr == "Yes"]),
hypertension = length(hypertension[hypertension == "Yes"]),
cardiovascular = length(cardiovascular[cardiovascular == "Yes"]),
tobacco = length(tobacco[tobacco == "Yes"]),
renal_chronic = length(renal_chronic[renal_chronic == "Yes"]),
obesity = length(obesity[obesity == "Yes"]),
) %>% mutate(
ICU_died = case_when(
ICU_died == "Yes" ~ "Died", TRUE ~ ICU_died
)
)
#calculating comorbidities for ICU
icu_comorbidities <- covid_df_cln %>% group_by(ICU_died = icu) %>%
filter(ICU_died == "Yes") %>%
summarise(
n_of_patients = n(),
diabetes = length(diabetes[diabetes == "Yes"]),
copd = length(asthma[asthma == "Yes"]),
asthma = length(asthma[asthma == "Yes"]),
inmsupr = length(inmsupr[inmsupr == "Yes"]),
hypertension = length(hypertension[hypertension == "Yes"]),
cardiovascular = length(cardiovascular[cardiovascular == "Yes"]),
tobacco = length(tobacco[tobacco == "Yes"]),
renal_chronic = length(renal_chronic[renal_chronic == "Yes"]),
obesity = length(obesity[obesity == "Yes"]),
) %>% mutate(
ICU_died = case_when(
ICU_died == "Yes" ~ "ICU", TRUE ~ ICU_died
)
)
#Merging both ICU and Died into one table
icu_died_associates <- bind_rows(dying_comorbidities, icu_comorbidities)
#Reshaping the data to long format, to make more suitable for visualization
icu_died_associates_lng <- icu_died_associates %>% select(-n_of_patients) %>%
gather(key = "Associates", value = "Patients", -ICU_died)
Mean, Median, Mode and Range of age of patients who died
## # A tibble: 1 x 4
## died_mean died_median died_mode died_range
## <dbl> <dbl> <dbl> <dbl>
## 1 61.2 63 65 99
ICU admission and Sex relationship
icu_sex_rel %>% ggplot(aes(x = sex, y = n_of_patients, fill = sex))+
geom_bar(stat = "identity", position = "dodge")+
labs(title = "ICU and Sex relationship", x = "Sex", y = "# of patients in ICU", fill = "Sex")
Associated comorbidities with ICU and Dying
icu_died_associates_lng %>% ggplot(aes(x = Associates, y = Patients, fill = ICU_died))+
geom_bar(stat = "identity", position = "dodge")+
labs(title = "ICU or Died associates", y = "# of patients", fill = "ICU or Died")+
theme(axis.text.x = element_text(angle = 90))
Relationship between sex and COVID positive result
sex_res_rel %>% ggplot(aes(x = sex, y = n_of_patients, fill = sex))+
geom_bar(stat = "identity", position = "dodge")+
labs(title = "Sex and Positive result relationship", x = "Sex", y = "# of patient", fill = "Sex")
covid_df_cln$symptoms_month <- month(covid_df_cln$date_symptoms, label = T)
covid_df_cln %>% ggplot(aes(x = symptoms_month, y = as.numeric(delayed_cure), fill = symptoms_month))+
geom_bar(stat = "identity", position = "dodge")+
labs(title = "Entry admission after symptoms", x = "Months", y = "Days after symptoms", fill = "Months")
covid_cont_covid_res <- covid_df_cln %>% filter(covid_res %in% c("Positive", "Negative")) %>%
group_by(contact_other_covid, covid_res) %>%
summarise(
n_value = n(),
) %>% filter(contact_other_covid == "Yes")
covid_cont_covid_res %>% ggplot(aes(x = covid_res, y = n_value, fill = covid_res))+
geom_bar(stat = "identity", position = "dodge")+
labs(title = "COVID result by COVID contact", x = "COVID Result", y = "# of patient", fill = "COVID Result")