First of all, we will create a single dataset from available data with all gender related attributes.
The first dataset contains information for 141 countries: The available variables are:
names(covid_data)
## [1] "iso_code" "continent"
## [3] "location" "date"
## [5] "total_cases" "new_cases"
## [7] "new_cases_smoothed" "total_deaths"
## [9] "new_deaths" "new_deaths_smoothed"
## [11] "total_cases_per_million" "new_cases_per_million"
## [13] "new_cases_smoothed_per_million" "total_deaths_per_million"
## [15] "new_deaths_per_million" "new_deaths_smoothed_per_million"
## [17] "icu_patients" "icu_patients_per_million"
## [19] "hosp_patients" "hosp_patients_per_million"
## [21] "weekly_icu_admissions" "weekly_icu_admissions_per_million"
## [23] "weekly_hosp_admissions" "weekly_hosp_admissions_per_million"
## [25] "total_tests" "new_tests"
## [27] "total_tests_per_thousand" "new_tests_per_thousand"
## [29] "new_tests_smoothed" "new_tests_smoothed_per_thousand"
## [31] "tests_per_case" "positive_rate"
## [33] "tests_units" "stringency_index"
## [35] "population" "population_density"
## [37] "median_age" "aged_65_older"
## [39] "aged_70_older" "gdp_per_capita"
## [41] "extreme_poverty" "cardiovasc_death_rate"
## [43] "diabetes_prevalence" "female_smokers"
## [45] "male_smokers" "handwashing_facilities"
## [47] "hospital_beds_per_thousand" "life_expectancy"
## [49] "human_development_index"
Let’s keep only gender-related information and countries with complete data
#Remove rows with NA
female_smokers_has_na <-
apply(covid_data[, "female_smokers"], 1, function(x) {
any(is.na(x))
})
male_smokers_has_na <-
apply(covid_data[, "male_smokers"], 1, function(x) {
any(is.na(x))
})
filtered_covid <-
covid_data[!(female_smokers_has_na & male_smokers_has_na),]
#Group by country
data_by_coutnry <- filtered_covid %>% group_by(location) %>%
summarise(
female_smokers = mean(female_smokers),
male_smokers = mean(male_smokers)
)
rmarkdown::paged_table(head(data_by_coutnry))
Source for the 2nd dataset : https://globalhealth5050.org/the-sex-gender-and-covid-19-project/dataset/
Dataset <- read_excel("C:/Users/bakka/Downloads/Dataset.xlsx")
The second dataset contains information for 183 countries: The available variables are:
names(Dataset)
## [1] "Country code"
## [2] "Country"
## [3] "Case & death data by sex?"
## [4] "Cases date"
## [5] "Cases where sex-disaggregated data is available"
## [6] "Cases (% male)"
## [7] "Cases (% female)"
## [8] "Deaths date"
## [9] "Deaths where sex-disaggregated data is available"
## [10] "Deaths (% male)"
## [11] "Deaths (% female)"
## [12] "Deaths in confirmed cases date"
## [13] "Proportion of deaths in confirmed cases (male)"
## [14] "Proportion of deaths in confirmed cases (female)"
## [15] "Proportion of deaths in confirmed cases (Male:female ratio)"
## [16] "Source"
Same as previously, keep only gender-related information and countries with complete data
#Keep only useful columns
Dataset_useful <- Dataset[, c(2, 6, 7, 10, 11, 13, 14, 15)]
raw_has_na <- apply(Dataset_useful, 1, function(x) {
any(is.na(x))
})
#Remove non complete data raws
filtered_covid2 <- Dataset_useful[!raw_has_na,]
colnames(data_by_coutnry)[colnames(data_by_coutnry) == "location"] <-
"Country"
rmarkdown::paged_table(head(filtered_covid2))
We have complete information for 61 countries after merging
data_all <- merge(data_by_coutnry, filtered_covid2, by = "Country")
rmarkdown::paged_table(head(data_all))
data_all[, 2:10] <-
apply(data_all[, 2:10], 2, function(y)
as.numeric(gsub("%", "", y)))
names(data_all) <-
c(
"Country",
"Smokers_F",
"Smokers_M",
"Cases%M",
"Cases%F",
"Deaths%M",
"Deaths%F",
"ProportionDeathsInConfirmedCases_M",
"ProportionDeathsInConfirmedCases_F",
"ProportionDeathsInConfirmedCases_Ratio"
)
data_with_ratio <- data_all %>%
mutate(
ratio_deaths = `Deaths%M` / `Deaths%F`,
ratio_cases = `Cases%M` / `Cases%F`,
ratio_Proportion = ProportionDeathsInConfirmedCases_M / ProportionDeathsInConfirmedCases_F,
ratio_smoking = Smokers_M / Smokers_F
)
Group1 <-
data_with_ratio %>% filter(ratio_cases > 1 & ratio_deaths < 1)
Group2 <-
data_with_ratio %>% filter(ratio_cases < 1 & ratio_deaths > 1)
Group3 <-
data_with_ratio %>% filter(ratio_cases < 1 & ratio_deaths < 1)
Group4 <-
data_with_ratio %>% filter(ratio_cases > 1 & ratio_deaths > 1)
Group1$Country <-
factor(Group1$Country, levels = Group1$Country[order(Group1$ratio_smoking)])
Group1$Country <-
factor(Group1$Country, levels = Group1$Country[order(Group1$ratio_deaths)])
Group2$Country <-
factor(Group2$Country, levels = Group2$Country[order(Group2$ratio_smoking)])
Group2$Country <-
factor(Group2$Country, levels = Group2$Country[order(Group2$ratio_deaths)])
Group4$Country <-
factor(Group4$Country, levels = Group4$Country[order(Group4$ratio_smoking)])
Group4$Country <-
factor(Group4$Country, levels = Group4$Country[order(Group4$ratio_deaths)])
#Group 2
g1 <-
ggplot(Group2, aes(
y = log(ratio_deaths),
x = Country,
fill = Country
)) +
coord_flip() +
geom_bar(stat = "identity", width = .90) +
xlab("") + # Set axis labels
ylab("") + ylim(0, 4.5) +
guides(fill = FALSE) + theme(text = element_text(size = 7))
g2 <-
ggplot(Group2, aes(
y = log(ratio_smoking),
x = Country,
fill = Country
)) +
coord_flip() +
geom_bar(stat = "identity", width = .90) +
xlab("") + # Set axis labels
ylab("") + ylim(-0.05, 4.5) +
guides(fill = FALSE) + theme(text = element_text(size = 7))
ggpubr::ggarrange(g1, g2)
#Group 3
g1 <-
ggplot(Group3, aes(
y = log(ratio_deaths),
x = Country,
fill = Country
)) +
coord_flip() +
geom_bar(stat = "identity", width = .90) +
xlab("") + # Set axis labels
ylab("") + ylim(-0.4, 4.5) +
guides(fill = FALSE) + theme(text = element_text(size = 7))
g2 <-
ggplot(Group3, aes(
y = log(ratio_smoking),
x = Country,
fill = Country
)) +
coord_flip() +
geom_bar(stat = "identity", width = .90) +
xlab("") + # Set axis labels
ylab("") + ylim(-0.4, 4.5) +
guides(fill = FALSE) + theme(text = element_text(size = 7))
ggpubr::ggarrange(g1, g2)
#Group 4
g1 <-
ggplot(Group4, aes(
y = log(ratio_deaths),
x = Country,
fill = Country
)) +
coord_flip() +
geom_bar(stat = "identity", width = .90) +
xlab("") + # Set axis labels
ylab("") + ylim(0, 4.5) +
guides(fill = FALSE) + theme(text = element_text(size = 9))
g2 <-
ggplot(Group4, aes(
y = log(ratio_smoking),
x = Country,
fill = Country
)) +
coord_flip() +
geom_bar(stat = "identity", width = .90) +
xlab("") + # Set axis labels
ylab("") + ylim(0, 4.5) +
guides(fill = FALSE) + theme(text = element_text(size = 7))
ggpubr::ggarrange(g1, g2)