Install & Load Required Packages
# Load all sheets from Excel file
excel_path <- "C:/Users/Khetiwe Dlamini/Downloads/Financing Schemes and UHC (1).xlsx"
# Load individual sheets
metadata <- read_excel(excel_path, sheet = "CountryMetadata")
che <- read_excel(excel_path, sheet = "CHE")
schemes <- read_excel(excel_path, sheet = "FinancingSchemes")
uhc <- read_excel(excel_path, sheet = "UHC Index")
Clean & Prepare Data
# Reshape CHE and schemes to long format
che_long <- che %>%
pivot_longer(cols = starts_with("20"), names_to = "year", values_to = "che_value") %>%
mutate(year = as.integer(year))
head(che_long)
## # A tibble: 6 × 6
## country iso3 indicators unit year che_value
## <chr> <chr> <chr> <chr> <int> <dbl>
## 1 Algeria DZA Current Health Expenditure (CHE) In constant (2… 2010 226.
## 2 Algeria DZA Current Health Expenditure (CHE) In constant (2… 2011 237.
## 3 Algeria DZA Current Health Expenditure (CHE) In constant (2… 2012 278.
## 4 Algeria DZA Current Health Expenditure (CHE) In constant (2… 2013 280.
## 5 Algeria DZA Current Health Expenditure (CHE) In constant (2… 2014 304.
## 6 Algeria DZA Current Health Expenditure (CHE) In constant (2… 2015 324.
schemes_long <- schemes %>%
pivot_longer(cols = starts_with("20"), names_to = "year", values_to = "scheme_value") %>%
mutate(year = as.integer(year))
head(che_long)
## # A tibble: 6 × 6
## country iso3 indicators unit year che_value
## <chr> <chr> <chr> <chr> <int> <dbl>
## 1 Algeria DZA Current Health Expenditure (CHE) In constant (2… 2010 226.
## 2 Algeria DZA Current Health Expenditure (CHE) In constant (2… 2011 237.
## 3 Algeria DZA Current Health Expenditure (CHE) In constant (2… 2012 278.
## 4 Algeria DZA Current Health Expenditure (CHE) In constant (2… 2013 280.
## 5 Algeria DZA Current Health Expenditure (CHE) In constant (2… 2014 304.
## 6 Algeria DZA Current Health Expenditure (CHE) In constant (2… 2015 324.
# Pivot financing schemes wider by 'indicators'
schemes_wide <- schemes_long %>%
pivot_wider(names_from = indicators, values_from = scheme_value)
head(schemes_wide)
## # A tibble: 6 × 17
## country iso3 unit year `Government schemes` Social health insura…¹
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 Algeria DZA % Current hea… 2010 43.1 26.4
## 2 Algeria DZA % Current hea… 2011 46.0 24.6
## 3 Algeria DZA % Current hea… 2012 49.3 22.2
## 4 Algeria DZA % Current hea… 2013 44.4 25.0
## 5 Algeria DZA % Current hea… 2014 44.3 25.9
## 6 Algeria DZA % Current hea… 2015 41.3 27.2
## # ℹ abbreviated name: ¹`Social health insurance schemes`
## # ℹ 11 more variables: `Compulsory private insurance schemes` <dbl>,
## # `Compulsory Medical Saving Accounts (CMSA)` <dbl>,
## # `Voluntary health insurance schemes` <dbl>,
## # `NPISH financing schemes (including development agencies)` <dbl>,
## # `Enterprise financing schemes` <dbl>,
## # `Household out-of-pocket payments (OOPS)` <dbl>, …
# Clean UHC data
uhc_clean <- uhc %>%
select(iso3, year, uhc_index = value) %>%
filter(!is.na(uhc_index))
# Clean metadata
meta_clean <- metadata %>%
rename(iso3 = code)
# Merge all datasets
merged_data <- uhc_clean %>%
left_join(schemes_wide, by = c("iso3", "year")) %>%
left_join(che_long %>% select(iso3, year, indicator = indicators, che_value), by = c("iso3", "year")) %>%
left_join(meta_clean, by = "iso3")
head(merged_data)
## # A tibble: 6 × 23
## iso3 year uhc_index country.x unit `Government schemes`
## <chr> <dbl> <dbl> <chr> <chr> <dbl>
## 1 AUS 2021 87 Australia % Current health expendi… 73.5
## 2 AUT 2021 85 Austria % Current health expendi… 37.7
## 3 BEL 2021 86 Belgium % Current health expendi… 22.4
## 4 BRA 2021 80 Brazil % Current health expendi… 44.3
## 5 CAN 2021 91 Canada % Current health expendi… 71.6
## 6 CHL 2021 82 Chile % Current health expendi… 3.94
## # ℹ 17 more variables: `Social health insurance schemes` <dbl>,
## # `Compulsory private insurance schemes` <dbl>,
## # `Compulsory Medical Saving Accounts (CMSA)` <dbl>,
## # `Voluntary health insurance schemes` <dbl>,
## # `NPISH financing schemes (including development agencies)` <dbl>,
## # `Enterprise financing schemes` <dbl>,
## # `Household out-of-pocket payments (OOPS)` <dbl>, …
Generate Trend Plots of Financing Schemes Over Time
# Reshape back to long for plotting trends
scheme_cols <- c(
"Government schemes",
"Social health insurance schemes",
"Compulsory private insurance schemes",
"Voluntary health insurance schemes",
"Household out-of-pocket payments (OOPS)"
)
# Convert column names to clean, consistent format
names(merged_data) <- make.names(names(merged_data))
# Update scheme column names after cleaning
scheme_cols_clean <- make.names(scheme_cols)
scheme_trends <- merged_data %>%
select(year, all_of(scheme_cols_clean)) %>%
pivot_longer(cols = all_of(scheme_cols_clean),
names_to = "scheme",
values_to = "value") %>%
group_by(year, scheme) %>%
summarise(avg_value = mean(value, na.rm = TRUE), .groups = "drop")
# Plot
ggplot(scheme_trends, aes(x = year, y = avg_value, color = scheme)) +
geom_line(size = 1.2) +
labs(
title = "Global Trends in Health Financing Schemes (2010–2023)",
y = "Average % Share of Total Health Financing",
x = "Year"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).

Regression on Merged Data
options(scipen = 5)
# Clean column names
merged_data <- merged_data %>% clean_names()
colnames(merged_data)
## [1] "iso3"
## [2] "year"
## [3] "uhc_index"
## [4] "country_x"
## [5] "unit"
## [6] "government_schemes"
## [7] "social_health_insurance_schemes"
## [8] "compulsory_private_insurance_schemes"
## [9] "compulsory_medical_saving_accounts_cmsa"
## [10] "voluntary_health_insurance_schemes"
## [11] "npish_financing_schemes_including_development_agencies"
## [12] "enterprise_financing_schemes"
## [13] "household_out_of_pocket_payments_oops"
## [14] "unspecified_financing_schemes_n_e_c"
## [15] "unspecified_compulsory_contributory_health_insurance_schemes_n_e_c"
## [16] "unspecified_government_schemes_and_compulsory_contributory_schemes_n_e_c"
## [17] "rest_of_the_world_financing_schemes_non_resident"
## [18] "unspecified_voluntary_health_care_payment_schemes_n_e_c"
## [19] "indicator"
## [20] "che_value"
## [21] "country_y"
## [22] "region"
## [23] "income"
regression_data <- merged_data %>%
select(
uhc_index,
che_value, # Current health expenditure per capita
government_schemes,
social_health_insurance_schemes,
compulsory_private_insurance_schemes,
voluntary_health_insurance_schemes,
household_out_of_pocket_payments_oops,
income
) %>%
drop_na()
model <- lm(uhc_index ~ ., data = regression_data)
summary(model)
##
## Call:
## lm(formula = uhc_index ~ ., data = regression_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.6615 -5.2960 0.5867 5.6869 21.5754
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 47.6935628 3.8317971 12.447 < 2e-16
## che_value 0.0018797 0.0002475 7.593 9.11e-14
## government_schemes 0.2313953 0.0396532 5.835 7.92e-09
## social_health_insurance_schemes 0.3345995 0.0401195 8.340 3.45e-16
## compulsory_private_insurance_schemes 0.1847369 0.0692284 2.669 0.00778
## voluntary_health_insurance_schemes 0.4047990 0.0600010 6.747 2.99e-11
## household_out_of_pocket_payments_oops 0.2312535 0.0373602 6.190 9.82e-10
## incomeLow -28.8707753 1.5558640 -18.556 < 2e-16
## incomeLower-middle -15.7519711 1.1204382 -14.059 < 2e-16
## incomeUpper-middle -5.0243634 0.9827592 -5.113 4.02e-07
##
## (Intercept) ***
## che_value ***
## government_schemes ***
## social_health_insurance_schemes ***
## compulsory_private_insurance_schemes **
## voluntary_health_insurance_schemes ***
## household_out_of_pocket_payments_oops ***
## incomeLow ***
## incomeLower-middle ***
## incomeUpper-middle ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.343 on 764 degrees of freedom
## Multiple R-squared: 0.7575, Adjusted R-squared: 0.7546
## F-statistic: 265.2 on 9 and 764 DF, p-value: < 2.2e-16
#model_fix <- model
#tidy(model_fix, conf.int = TRUE, exponentiate = FALSE)
regression_data$predicted <- predict(model)
ggplot(regression_data, aes(x = uhc_index, y = predicted)) +
geom_point(alpha = 0.6) +
geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dashed") +
labs(title = "Predicted vs. Actual UHC Index",
x = "Actual UHC Index",
y = "Predicted UHC Index") +
theme_light()

ggplot(merged_data, aes(x = voluntary_health_insurance_schemes, y = uhc_index, color = income)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "Relationship Between Voluntary Health Insurance and UHC Index",
x = "Voluntary Health Insurance (% of CHE)",
y = "UHC Index Score",
color = "Income Group"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 475 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 475 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Check multicollinearity
vif(model) # Variance Inflation Factor: values > 5–10 indicate collinearity
## GVIF Df GVIF^(1/(2*Df))
## che_value 2.421911 1 1.556249
## government_schemes 9.917468 1 3.149201
## social_health_insurance_schemes 8.702413 1 2.949985
## compulsory_private_insurance_schemes 2.036454 1 1.427044
## voluntary_health_insurance_schemes 1.853541 1 1.361448
## household_out_of_pocket_payments_oops 5.314037 1 2.305220
## income 3.896816 3 1.254445
# Diagnostic plots
par(mfrow = c(2, 2))
plot(model)

Explore relationships
library(corrplot)
corrplot(cor(regression_data[, c("government_schemes", "social_health_insurance_schemes",
"voluntary_health_insurance_schemes",
"household_out_of_pocket_payments_oops",
"che_value")]), method = "circle")
