Install & Load Required Packages

# Load all sheets from Excel file
excel_path <- "C:/Users/Khetiwe Dlamini/Downloads/Financing Schemes and UHC (1).xlsx" 

# Load individual sheets
metadata <- read_excel(excel_path, sheet = "CountryMetadata")
che <- read_excel(excel_path, sheet = "CHE")
schemes <- read_excel(excel_path, sheet = "FinancingSchemes")
uhc <- read_excel(excel_path, sheet = "UHC Index")

Clean & Prepare Data

# Reshape CHE and schemes to long format
che_long <- che %>%
  pivot_longer(cols = starts_with("20"), names_to = "year", values_to = "che_value") %>%
  mutate(year = as.integer(year))

head(che_long)
## # A tibble: 6 × 6
##   country iso3  indicators                       unit             year che_value
##   <chr>   <chr> <chr>                            <chr>           <int>     <dbl>
## 1 Algeria DZA   Current Health Expenditure (CHE) In constant (2…  2010      226.
## 2 Algeria DZA   Current Health Expenditure (CHE) In constant (2…  2011      237.
## 3 Algeria DZA   Current Health Expenditure (CHE) In constant (2…  2012      278.
## 4 Algeria DZA   Current Health Expenditure (CHE) In constant (2…  2013      280.
## 5 Algeria DZA   Current Health Expenditure (CHE) In constant (2…  2014      304.
## 6 Algeria DZA   Current Health Expenditure (CHE) In constant (2…  2015      324.
schemes_long <- schemes %>%
  pivot_longer(cols = starts_with("20"), names_to = "year", values_to = "scheme_value") %>%
  mutate(year = as.integer(year))

head(che_long)
## # A tibble: 6 × 6
##   country iso3  indicators                       unit             year che_value
##   <chr>   <chr> <chr>                            <chr>           <int>     <dbl>
## 1 Algeria DZA   Current Health Expenditure (CHE) In constant (2…  2010      226.
## 2 Algeria DZA   Current Health Expenditure (CHE) In constant (2…  2011      237.
## 3 Algeria DZA   Current Health Expenditure (CHE) In constant (2…  2012      278.
## 4 Algeria DZA   Current Health Expenditure (CHE) In constant (2…  2013      280.
## 5 Algeria DZA   Current Health Expenditure (CHE) In constant (2…  2014      304.
## 6 Algeria DZA   Current Health Expenditure (CHE) In constant (2…  2015      324.
# Pivot financing schemes wider by 'indicators'
schemes_wide <- schemes_long %>%
  pivot_wider(names_from = indicators, values_from = scheme_value)

head(schemes_wide)
## # A tibble: 6 × 17
##   country iso3  unit            year `Government schemes` Social health insura…¹
##   <chr>   <chr> <chr>          <int>                <dbl>                  <dbl>
## 1 Algeria DZA   % Current hea…  2010                 43.1                   26.4
## 2 Algeria DZA   % Current hea…  2011                 46.0                   24.6
## 3 Algeria DZA   % Current hea…  2012                 49.3                   22.2
## 4 Algeria DZA   % Current hea…  2013                 44.4                   25.0
## 5 Algeria DZA   % Current hea…  2014                 44.3                   25.9
## 6 Algeria DZA   % Current hea…  2015                 41.3                   27.2
## # ℹ abbreviated name: ¹​`Social health insurance schemes`
## # ℹ 11 more variables: `Compulsory private insurance schemes` <dbl>,
## #   `Compulsory Medical Saving Accounts (CMSA)` <dbl>,
## #   `Voluntary health insurance schemes` <dbl>,
## #   `NPISH financing schemes (including development agencies)` <dbl>,
## #   `Enterprise financing schemes` <dbl>,
## #   `Household out-of-pocket payments (OOPS)` <dbl>, …
# Clean UHC data
uhc_clean <- uhc %>%
  select(iso3, year, uhc_index = value) %>%
  filter(!is.na(uhc_index))

# Clean metadata
meta_clean <- metadata %>%
  rename(iso3 = code)

# Merge all datasets
merged_data <- uhc_clean %>%
  left_join(schemes_wide, by = c("iso3", "year")) %>%
  left_join(che_long %>% select(iso3, year, indicator = indicators, che_value), by = c("iso3", "year")) %>%
  left_join(meta_clean, by = "iso3")

head(merged_data)
## # A tibble: 6 × 23
##   iso3   year uhc_index country.x unit                      `Government schemes`
##   <chr> <dbl>     <dbl> <chr>     <chr>                                    <dbl>
## 1 AUS    2021        87 Australia % Current health expendi…                73.5 
## 2 AUT    2021        85 Austria   % Current health expendi…                37.7 
## 3 BEL    2021        86 Belgium   % Current health expendi…                22.4 
## 4 BRA    2021        80 Brazil    % Current health expendi…                44.3 
## 5 CAN    2021        91 Canada    % Current health expendi…                71.6 
## 6 CHL    2021        82 Chile     % Current health expendi…                 3.94
## # ℹ 17 more variables: `Social health insurance schemes` <dbl>,
## #   `Compulsory private insurance schemes` <dbl>,
## #   `Compulsory Medical Saving Accounts (CMSA)` <dbl>,
## #   `Voluntary health insurance schemes` <dbl>,
## #   `NPISH financing schemes (including development agencies)` <dbl>,
## #   `Enterprise financing schemes` <dbl>,
## #   `Household out-of-pocket payments (OOPS)` <dbl>, …

Generate Trend Plots of Financing Schemes Over Time

# Reshape back to long for plotting trends
scheme_cols <- c(
  "Government schemes",
  "Social health insurance schemes",
  "Compulsory private insurance schemes",
  "Voluntary health insurance schemes",
  "Household out-of-pocket payments (OOPS)"
)

# Convert column names to clean, consistent format
names(merged_data) <- make.names(names(merged_data))

# Update scheme column names after cleaning
scheme_cols_clean <- make.names(scheme_cols)

scheme_trends <- merged_data %>%
  select(year, all_of(scheme_cols_clean)) %>%
  pivot_longer(cols = all_of(scheme_cols_clean),
               names_to = "scheme",
               values_to = "value") %>%
  group_by(year, scheme) %>%
  summarise(avg_value = mean(value, na.rm = TRUE), .groups = "drop")

# Plot
ggplot(scheme_trends, aes(x = year, y = avg_value, color = scheme)) +
  geom_line(size = 1.2) +
  labs(
    title = "Global Trends in Health Financing Schemes (2010–2023)",
    y = "Average % Share of Total Health Financing",
    x = "Year"
  ) +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).

Regression on Merged Data

options(scipen = 5)

# Clean column names
merged_data <- merged_data %>% clean_names()

colnames(merged_data)
##  [1] "iso3"                                                                    
##  [2] "year"                                                                    
##  [3] "uhc_index"                                                               
##  [4] "country_x"                                                               
##  [5] "unit"                                                                    
##  [6] "government_schemes"                                                      
##  [7] "social_health_insurance_schemes"                                         
##  [8] "compulsory_private_insurance_schemes"                                    
##  [9] "compulsory_medical_saving_accounts_cmsa"                                 
## [10] "voluntary_health_insurance_schemes"                                      
## [11] "npish_financing_schemes_including_development_agencies"                  
## [12] "enterprise_financing_schemes"                                            
## [13] "household_out_of_pocket_payments_oops"                                   
## [14] "unspecified_financing_schemes_n_e_c"                                     
## [15] "unspecified_compulsory_contributory_health_insurance_schemes_n_e_c"      
## [16] "unspecified_government_schemes_and_compulsory_contributory_schemes_n_e_c"
## [17] "rest_of_the_world_financing_schemes_non_resident"                        
## [18] "unspecified_voluntary_health_care_payment_schemes_n_e_c"                 
## [19] "indicator"                                                               
## [20] "che_value"                                                               
## [21] "country_y"                                                               
## [22] "region"                                                                  
## [23] "income"
regression_data <- merged_data %>%
  select(
    uhc_index,
    che_value,  # Current health expenditure per capita
    government_schemes,
    social_health_insurance_schemes,
    compulsory_private_insurance_schemes,
    voluntary_health_insurance_schemes,
    household_out_of_pocket_payments_oops,
    income
  ) %>%
  drop_na()
model <- lm(uhc_index ~ ., data = regression_data)
summary(model)
## 
## Call:
## lm(formula = uhc_index ~ ., data = regression_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.6615  -5.2960   0.5867   5.6869  21.5754 
## 
## Coefficients:
##                                          Estimate  Std. Error t value Pr(>|t|)
## (Intercept)                            47.6935628   3.8317971  12.447  < 2e-16
## che_value                               0.0018797   0.0002475   7.593 9.11e-14
## government_schemes                      0.2313953   0.0396532   5.835 7.92e-09
## social_health_insurance_schemes         0.3345995   0.0401195   8.340 3.45e-16
## compulsory_private_insurance_schemes    0.1847369   0.0692284   2.669  0.00778
## voluntary_health_insurance_schemes      0.4047990   0.0600010   6.747 2.99e-11
## household_out_of_pocket_payments_oops   0.2312535   0.0373602   6.190 9.82e-10
## incomeLow                             -28.8707753   1.5558640 -18.556  < 2e-16
## incomeLower-middle                    -15.7519711   1.1204382 -14.059  < 2e-16
## incomeUpper-middle                     -5.0243634   0.9827592  -5.113 4.02e-07
##                                          
## (Intercept)                           ***
## che_value                             ***
## government_schemes                    ***
## social_health_insurance_schemes       ***
## compulsory_private_insurance_schemes  ** 
## voluntary_health_insurance_schemes    ***
## household_out_of_pocket_payments_oops ***
## incomeLow                             ***
## incomeLower-middle                    ***
## incomeUpper-middle                    ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.343 on 764 degrees of freedom
## Multiple R-squared:  0.7575, Adjusted R-squared:  0.7546 
## F-statistic: 265.2 on 9 and 764 DF,  p-value: < 2.2e-16
#model_fix <- model
#tidy(model_fix, conf.int = TRUE, exponentiate = FALSE)
regression_data$predicted <- predict(model)

ggplot(regression_data, aes(x = uhc_index, y = predicted)) +
  geom_point(alpha = 0.6) +
  geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dashed") +
  labs(title = "Predicted vs. Actual UHC Index",
       x = "Actual UHC Index",
       y = "Predicted UHC Index") +
  theme_light()

ggplot(merged_data, aes(x = voluntary_health_insurance_schemes, y = uhc_index, color = income)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE) +
  labs(
    title = "Relationship Between Voluntary Health Insurance and UHC Index",
    x = "Voluntary Health Insurance (% of CHE)",
    y = "UHC Index Score",
    color = "Income Group"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 475 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 475 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Check multicollinearity
vif(model)  # Variance Inflation Factor: values > 5–10 indicate collinearity
##                                           GVIF Df GVIF^(1/(2*Df))
## che_value                             2.421911  1        1.556249
## government_schemes                    9.917468  1        3.149201
## social_health_insurance_schemes       8.702413  1        2.949985
## compulsory_private_insurance_schemes  2.036454  1        1.427044
## voluntary_health_insurance_schemes    1.853541  1        1.361448
## household_out_of_pocket_payments_oops 5.314037  1        2.305220
## income                                3.896816  3        1.254445
# Diagnostic plots
par(mfrow = c(2, 2))
plot(model)

Explore relationships

library(corrplot)
corrplot(cor(regression_data[, c("government_schemes", "social_health_insurance_schemes", 
                                 "voluntary_health_insurance_schemes", 
                                 "household_out_of_pocket_payments_oops", 
                                 "che_value")]), method = "circle")