Infant mortality is primarily influenced by the quality of healthcare. However, in this project, I would like to explore the link between the healthcare expenditure, air quality, birth rate per woman, and infant mortality rates.
Load necessary libraries
library(dplyr)
library(countrycode)
library(readxl)
library(arules)
library(arulesViz)
Declare the function to standardise country names across 3 datasets.
standardize_countries <- function(df, country_col) {
df %>% mutate(Country = countrycode(!!sym(country_col), "country.name", "country.name"))
}
Load and standardise Gas emissions, infant mortality, and healthcare expenditure datasets. Moreover, I categorised the data into 3 categories: low, medium, and high.
The links to datasets: https://genderdata.worldbank.org/en/indicator/sp-dyn-tfrt-in?year=2020 - Birth Rates
https://apps.who.int/nha/database/Select/Indicators/en - healthcare expenditure data
https://www.who.int/data/gho/data/indicators/indicator-details/GHO/under-five-mortality-rate-(probability-of-dying-by-age-5-per-1000-live-births) - infant mortality data
gas_data <- read.csv("historical_emissions.csv") %>%
select(Country, Emissions_2020 = X2020) %>%
na.omit()
gas_data <- standardize_countries(gas_data, "Country")
quantiles_gas <- quantile(gas_data$Emissions_2020, probs = c(1/3, 2/3), na.rm = TRUE)
gas_data <- gas_data %>%
mutate(Emissions = case_when(
Emissions_2020 <= quantiles_gas[1] ~ "Low",
Emissions_2020 > quantiles_gas[1] & Emissions_2020 <= quantiles_gas[2] ~ "Medium",
Emissions_2020 > quantiles_gas[2] ~ "High"
))
mort_data <- read.csv("mortality_data.csv") %>%
filter(Period == 2020, Dim1 == "Both sexes") %>%
select(Country = Location, Mortality_2020 = FactValueNumeric) %>%
na.omit()
mort_data <- standardize_countries(mort_data, "Country")
quantiles_mort <- quantile(mort_data$Mortality_2020, probs = c(1/3, 2/3), na.rm = TRUE)
mort_data <- mort_data %>%
mutate(Mortality = case_when(
Mortality_2020 <= quantiles_mort[1] ~ "Low",
Mortality_2020 > quantiles_mort[1] & Mortality_2020 <= quantiles_mort[2] ~ "Medium",
Mortality_2020 > quantiles_mort[2] ~ "High"
))
birth_dataset <- read.csv("births_per_woman.csv") %>%
select(Country = Economy, Birth_rate_2020 = Fertility) %>%
na.omit()
birth_dataset <- standardize_countries(birth_dataset, "Country")
# Compute quantiles for birth rate
quantiles_birth <- quantile(birth_dataset$Birth_rate_2020, probs = c(1/3, 2/3), na.rm = TRUE)
# Categorize birth rates into Low, Medium, and High
birth_dataset <- birth_dataset %>%
mutate(Birth_rate = case_when(
Birth_rate_2020 <= quantiles_birth[1] ~ "Low",
Birth_rate_2020 > quantiles_birth[1] & Birth_rate_2020 <= quantiles_birth[2] ~ "Medium",
Birth_rate_2020 > quantiles_birth[2] ~ "High"
)) %>%
select(Country, Birth_rate) # Keep only categorical data
money_dataset <- read_excel("NHA indicators.xlsx", sheet = "Table") %>%
select(Country = Countries, Expenditure_2020 = `2020`) %>%
na.omit()
money_dataset <- standardize_countries(money_dataset, "Country")
money_dataset$Expenditure_2020 <- as.numeric(money_dataset$Expenditure_2020)
quantiles_money <- quantile(money_dataset$Expenditure_2020, probs = c(1/3, 2/3), na.rm = TRUE)
money_dataset <- money_dataset %>%
mutate(Health_Expenditure = case_when(
Expenditure_2020 <= quantiles_money[1] ~ "Low",
Expenditure_2020 > quantiles_money[1] & Expenditure_2020 <= quantiles_money[2] ~ "Medium",
Expenditure_2020 > quantiles_money[2] ~ "High"
))
final_data <- gas_data %>%
full_join(mort_data, by = "Country") %>%
full_join(money_dataset, by = "Country") %>%
full_join(birth_dataset, by = "Country") %>%
na.omit() %>%
select(-contains("_2020"))
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
# Melt the data for visualization
library(reshape2)
final_data_long <- melt(final_data, id.vars = "Country")
# Plot category distribution
ggplot(final_data_long, aes(x = value, fill = variable)) +
geom_bar(position = "dodge") +
labs(title = "Frequency of Categorized Data", x = "Category", y = "Count") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")
Next, we transform the data into transaction data:
trans_data <- as(final_data[,-1], "transactions")
Now we check association rules
rules <- apriori(trans_data, parameter = list(supp = 0.10, conf = 0.7, minlen = 2))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.7 0.1 1 none FALSE TRUE 5 0.1 2
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 18
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[12 item(s), 186 transaction(s)] done [0.00s].
## sorting and recoding items ... [12 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [33 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
inspect(sort(rules, by = "lift"))
## lhs rhs support confidence coverage lift count
## [1] {Emissions=High,
## Health_Expenditure=High} => {Mortality=Low} 0.1075269 0.9090909 0.1182796 2.865948 20
## [2] {Health_Expenditure=High,
## Birth_rate=Low} => {Mortality=Low} 0.1989247 0.8604651 0.2311828 2.712653 37
## [3] {Mortality=Low,
## Birth_rate=Low} => {Health_Expenditure=High} 0.1989247 0.8409091 0.2365591 2.651002 37
## [4] {Mortality=Low} => {Health_Expenditure=High} 0.2580645 0.8135593 0.3172043 2.564780 48
## [5] {Health_Expenditure=High} => {Mortality=Low} 0.2580645 0.8135593 0.3172043 2.564780 48
## [6] {Emissions=Medium,
## Health_Expenditure=Low,
## Birth_rate=High} => {Mortality=High} 0.1021505 0.9047619 0.1129032 2.549784 19
## [7] {Emissions=High,
## Mortality=Low} => {Health_Expenditure=High} 0.1075269 0.8000000 0.1344086 2.522034 20
## [8] {Health_Expenditure=Low,
## Birth_rate=High} => {Mortality=High} 0.2473118 0.8846154 0.2795699 2.493007 46
## [9] {Mortality=High,
## Birth_rate=High} => {Health_Expenditure=Low} 0.2473118 0.8679245 0.2849462 2.483599 46
## [10] {Mortality=Low,
## Health_Expenditure=High} => {Birth_rate=Low} 0.1989247 0.7708333 0.2580645 2.471983 37
## [11] {Emissions=Medium,
## Mortality=High,
## Birth_rate=High} => {Health_Expenditure=Low} 0.1021505 0.8636364 0.1182796 2.471329 19
## [12] {Emissions=Medium,
## Mortality=High,
## Health_Expenditure=Low} => {Birth_rate=High} 0.1021505 0.9047619 0.1129032 2.438923 19
## [13] {Birth_rate=Low} => {Mortality=Low} 0.2365591 0.7586207 0.3118280 2.391584 44
## [14] {Mortality=Low} => {Birth_rate=Low} 0.2365591 0.7457627 0.3172043 2.391584 44
## [15] {Emissions=Medium,
## Birth_rate=High} => {Mortality=High} 0.1182796 0.8461538 0.1397849 2.384615 22
## [16] {Mortality=High,
## Health_Expenditure=Low} => {Birth_rate=High} 0.2473118 0.8846154 0.2795699 2.384615 46
## [17] {Birth_rate=Low} => {Health_Expenditure=High} 0.2311828 0.7413793 0.3118280 2.337230 43
## [18] {Health_Expenditure=High} => {Birth_rate=Low} 0.2311828 0.7288136 0.3172043 2.337230 43
## [19] {Emissions=Low,
## Mortality=High} => {Birth_rate=High} 0.1021505 0.8636364 0.1182796 2.328063 19
## [20] {Emissions=Medium,
## Mortality=High} => {Health_Expenditure=Low} 0.1129032 0.8076923 0.1397849 2.311243 21
## [21] {Emissions=Medium,
## Birth_rate=High} => {Health_Expenditure=Low} 0.1129032 0.8076923 0.1397849 2.311243 21
## [22] {Emissions=Medium,
## Mortality=High} => {Birth_rate=High} 0.1182796 0.8461538 0.1397849 2.280936 22
## [23] {Emissions=Medium,
## Health_Expenditure=Low} => {Mortality=High} 0.1129032 0.8076923 0.1397849 2.276224 21
## [24] {Mortality=Medium,
## Birth_rate=Medium} => {Health_Expenditure=Medium} 0.1344086 0.7575758 0.1774194 2.272727 25
## [25] {Health_Expenditure=Low} => {Mortality=High} 0.2795699 0.8000000 0.3494624 2.254545 52
## [26] {Mortality=High} => {Health_Expenditure=Low} 0.2795699 0.7878788 0.3548387 2.254545 52
## [27] {Health_Expenditure=Medium,
## Birth_rate=Medium} => {Mortality=Medium} 0.1344086 0.7352941 0.1827957 2.242044 25
## [28] {Emissions=Medium,
## Health_Expenditure=Low} => {Birth_rate=High} 0.1129032 0.8076923 0.1397849 2.177258 21
## [29] {Birth_rate=High} => {Mortality=High} 0.2849462 0.7681159 0.3709677 2.164690 53
## [30] {Mortality=High} => {Birth_rate=High} 0.2849462 0.8030303 0.3548387 2.164690 53
## [31] {Health_Expenditure=Low} => {Birth_rate=High} 0.2795699 0.8000000 0.3494624 2.156522 52
## [32] {Birth_rate=High} => {Health_Expenditure=Low} 0.2795699 0.7536232 0.3709677 2.156522 52
## [33] {Emissions=Low,
## Birth_rate=High} => {Mortality=High} 0.1021505 0.7600000 0.1344086 2.141818 19
The above rules can provide some insight into how mortality rate is affected by gas emissions, healthcare expenditure, and fertility rates of a country.