Infant mortality is primarily influenced by the quality of healthcare. However, in this project, I would like to explore the link between the healthcare expenditure, air quality, birth rate per woman, and infant mortality rates.

Load necessary libraries

library(dplyr)
library(countrycode)
library(readxl)
library(arules)
library(arulesViz)

Declare the function to standardise country names across 3 datasets.

standardize_countries <- function(df, country_col) {
  df %>% mutate(Country = countrycode(!!sym(country_col), "country.name", "country.name"))
}

Load and standardise Gas emissions, infant mortality, and healthcare expenditure datasets. Moreover, I categorised the data into 3 categories: low, medium, and high.

The links to datasets: https://genderdata.worldbank.org/en/indicator/sp-dyn-tfrt-in?year=2020 - Birth Rates

https://www.climatewatchdata.org/data-explorer/historical-emissions?historical-emissions-data-sources=climate-watch&historical-emissions-gases=all-ghg&historical-emissions-regions=All%20Selected&historical-emissions-sectors=total-including-lucf%2Ctotal-including-lucf&page=1 - gas emissions data

https://apps.who.int/nha/database/Select/Indicators/en - healthcare expenditure data

https://www.who.int/data/gho/data/indicators/indicator-details/GHO/under-five-mortality-rate-(probability-of-dying-by-age-5-per-1000-live-births) - infant mortality data

gas_data <- read.csv("historical_emissions.csv") %>%
  select(Country, Emissions_2020 = X2020) %>%
  na.omit()
gas_data <- standardize_countries(gas_data, "Country")

quantiles_gas <- quantile(gas_data$Emissions_2020, probs = c(1/3, 2/3), na.rm = TRUE)

gas_data <- gas_data %>%
  mutate(Emissions = case_when(
    Emissions_2020 <= quantiles_gas[1] ~ "Low",
    Emissions_2020 > quantiles_gas[1] & Emissions_2020 <= quantiles_gas[2] ~ "Medium",
    Emissions_2020 > quantiles_gas[2] ~ "High"
  ))

mort_data <- read.csv("mortality_data.csv") %>%
  filter(Period == 2020, Dim1 == "Both sexes") %>%
  select(Country = Location, Mortality_2020 = FactValueNumeric) %>%
  na.omit()
mort_data <- standardize_countries(mort_data, "Country")

quantiles_mort <- quantile(mort_data$Mortality_2020, probs = c(1/3, 2/3), na.rm = TRUE)

mort_data <- mort_data %>%
  mutate(Mortality = case_when(
    Mortality_2020 <= quantiles_mort[1] ~ "Low",
    Mortality_2020 > quantiles_mort[1] & Mortality_2020 <= quantiles_mort[2] ~ "Medium",
    Mortality_2020 > quantiles_mort[2] ~ "High"
  ))

birth_dataset <- read.csv("births_per_woman.csv") %>% 
  select(Country = Economy, Birth_rate_2020 = Fertility) %>%
  na.omit()

birth_dataset <- standardize_countries(birth_dataset, "Country")

# Compute quantiles for birth rate
quantiles_birth <- quantile(birth_dataset$Birth_rate_2020, probs = c(1/3, 2/3), na.rm = TRUE)

# Categorize birth rates into Low, Medium, and High
birth_dataset <- birth_dataset %>% 
  mutate(Birth_rate = case_when(
    Birth_rate_2020 <= quantiles_birth[1] ~ "Low",
    Birth_rate_2020 > quantiles_birth[1] & Birth_rate_2020 <= quantiles_birth[2] ~ "Medium",
    Birth_rate_2020 > quantiles_birth[2] ~ "High"
  )) %>%
  select(Country, Birth_rate)  # Keep only categorical data



money_dataset <- read_excel("NHA indicators.xlsx", sheet = "Table") %>%
  select(Country = Countries, Expenditure_2020 = `2020`) %>%
  na.omit()
money_dataset <- standardize_countries(money_dataset, "Country")
money_dataset$Expenditure_2020 <- as.numeric(money_dataset$Expenditure_2020)

quantiles_money <- quantile(money_dataset$Expenditure_2020, probs = c(1/3, 2/3), na.rm = TRUE)

money_dataset <- money_dataset %>%
  mutate(Health_Expenditure = case_when(
    Expenditure_2020 <= quantiles_money[1] ~ "Low",
    Expenditure_2020 > quantiles_money[1] & Expenditure_2020 <= quantiles_money[2] ~ "Medium",
    Expenditure_2020 > quantiles_money[2] ~ "High"
  ))

final_data <- gas_data %>%
  full_join(mort_data, by = "Country") %>%
  full_join(money_dataset, by = "Country") %>%
  full_join(birth_dataset, by = "Country") %>%
  na.omit() %>%
  select(-contains("_2020"))  
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
# Melt the data for visualization
library(reshape2)
final_data_long <- melt(final_data, id.vars = "Country")

# Plot category distribution
ggplot(final_data_long, aes(x = value, fill = variable)) +
  geom_bar(position = "dodge") +
  labs(title = "Frequency of Categorized Data", x = "Category", y = "Count") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")

Next, we transform the data into transaction data:

trans_data <- as(final_data[,-1], "transactions")

Now we check association rules

rules <- apriori(trans_data, parameter = list(supp = 0.10, conf = 0.7, minlen = 2))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.7    0.1    1 none FALSE            TRUE       5     0.1      2
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 18 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[12 item(s), 186 transaction(s)] done [0.00s].
## sorting and recoding items ... [12 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [33 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
inspect(sort(rules, by = "lift"))
##      lhs                             rhs                           support confidence  coverage     lift count
## [1]  {Emissions=High,                                                                                         
##       Health_Expenditure=High}    => {Mortality=Low}             0.1075269  0.9090909 0.1182796 2.865948    20
## [2]  {Health_Expenditure=High,                                                                                
##       Birth_rate=Low}             => {Mortality=Low}             0.1989247  0.8604651 0.2311828 2.712653    37
## [3]  {Mortality=Low,                                                                                          
##       Birth_rate=Low}             => {Health_Expenditure=High}   0.1989247  0.8409091 0.2365591 2.651002    37
## [4]  {Mortality=Low}              => {Health_Expenditure=High}   0.2580645  0.8135593 0.3172043 2.564780    48
## [5]  {Health_Expenditure=High}    => {Mortality=Low}             0.2580645  0.8135593 0.3172043 2.564780    48
## [6]  {Emissions=Medium,                                                                                       
##       Health_Expenditure=Low,                                                                                 
##       Birth_rate=High}            => {Mortality=High}            0.1021505  0.9047619 0.1129032 2.549784    19
## [7]  {Emissions=High,                                                                                         
##       Mortality=Low}              => {Health_Expenditure=High}   0.1075269  0.8000000 0.1344086 2.522034    20
## [8]  {Health_Expenditure=Low,                                                                                 
##       Birth_rate=High}            => {Mortality=High}            0.2473118  0.8846154 0.2795699 2.493007    46
## [9]  {Mortality=High,                                                                                         
##       Birth_rate=High}            => {Health_Expenditure=Low}    0.2473118  0.8679245 0.2849462 2.483599    46
## [10] {Mortality=Low,                                                                                          
##       Health_Expenditure=High}    => {Birth_rate=Low}            0.1989247  0.7708333 0.2580645 2.471983    37
## [11] {Emissions=Medium,                                                                                       
##       Mortality=High,                                                                                         
##       Birth_rate=High}            => {Health_Expenditure=Low}    0.1021505  0.8636364 0.1182796 2.471329    19
## [12] {Emissions=Medium,                                                                                       
##       Mortality=High,                                                                                         
##       Health_Expenditure=Low}     => {Birth_rate=High}           0.1021505  0.9047619 0.1129032 2.438923    19
## [13] {Birth_rate=Low}             => {Mortality=Low}             0.2365591  0.7586207 0.3118280 2.391584    44
## [14] {Mortality=Low}              => {Birth_rate=Low}            0.2365591  0.7457627 0.3172043 2.391584    44
## [15] {Emissions=Medium,                                                                                       
##       Birth_rate=High}            => {Mortality=High}            0.1182796  0.8461538 0.1397849 2.384615    22
## [16] {Mortality=High,                                                                                         
##       Health_Expenditure=Low}     => {Birth_rate=High}           0.2473118  0.8846154 0.2795699 2.384615    46
## [17] {Birth_rate=Low}             => {Health_Expenditure=High}   0.2311828  0.7413793 0.3118280 2.337230    43
## [18] {Health_Expenditure=High}    => {Birth_rate=Low}            0.2311828  0.7288136 0.3172043 2.337230    43
## [19] {Emissions=Low,                                                                                          
##       Mortality=High}             => {Birth_rate=High}           0.1021505  0.8636364 0.1182796 2.328063    19
## [20] {Emissions=Medium,                                                                                       
##       Mortality=High}             => {Health_Expenditure=Low}    0.1129032  0.8076923 0.1397849 2.311243    21
## [21] {Emissions=Medium,                                                                                       
##       Birth_rate=High}            => {Health_Expenditure=Low}    0.1129032  0.8076923 0.1397849 2.311243    21
## [22] {Emissions=Medium,                                                                                       
##       Mortality=High}             => {Birth_rate=High}           0.1182796  0.8461538 0.1397849 2.280936    22
## [23] {Emissions=Medium,                                                                                       
##       Health_Expenditure=Low}     => {Mortality=High}            0.1129032  0.8076923 0.1397849 2.276224    21
## [24] {Mortality=Medium,                                                                                       
##       Birth_rate=Medium}          => {Health_Expenditure=Medium} 0.1344086  0.7575758 0.1774194 2.272727    25
## [25] {Health_Expenditure=Low}     => {Mortality=High}            0.2795699  0.8000000 0.3494624 2.254545    52
## [26] {Mortality=High}             => {Health_Expenditure=Low}    0.2795699  0.7878788 0.3548387 2.254545    52
## [27] {Health_Expenditure=Medium,                                                                              
##       Birth_rate=Medium}          => {Mortality=Medium}          0.1344086  0.7352941 0.1827957 2.242044    25
## [28] {Emissions=Medium,                                                                                       
##       Health_Expenditure=Low}     => {Birth_rate=High}           0.1129032  0.8076923 0.1397849 2.177258    21
## [29] {Birth_rate=High}            => {Mortality=High}            0.2849462  0.7681159 0.3709677 2.164690    53
## [30] {Mortality=High}             => {Birth_rate=High}           0.2849462  0.8030303 0.3548387 2.164690    53
## [31] {Health_Expenditure=Low}     => {Birth_rate=High}           0.2795699  0.8000000 0.3494624 2.156522    52
## [32] {Birth_rate=High}            => {Health_Expenditure=Low}    0.2795699  0.7536232 0.3709677 2.156522    52
## [33] {Emissions=Low,                                                                                          
##       Birth_rate=High}            => {Mortality=High}            0.1021505  0.7600000 0.1344086 2.141818    19

The above rules can provide some insight into how mortality rate is affected by gas emissions, healthcare expenditure, and fertility rates of a country.