R Markdown

Importing GHED Data

ghed_raw <- read_xlsx("GHED_data.xlsx", sheet = "Data") %>%
  select(country, code, year, 
         gghed_usd, gghed_usd2021, gghed_usd2021_pc,
         gdp_usd2021_pc, gghed_gdp, gge_gdp) %>%
  mutate(est_type = "observed")

# View the resulting data frame
#print(ghed_raw)
## Add years to 2030 and convert to long format
temp <- expand.grid(unique(ghed_raw$code), seq(2000, 2030, 1)) %>%
  rename("code" = "Var1", "year" = "Var2") %>%
  left_join(ghed_raw, by = c("code", "year")) %>%
  #convert to long format
  pivot_longer(cols = starts_with("gghed"), 
               names_to = "ind_name", values_to = "ind_value")

ghed_long <- ghed_raw %>%
  pivot_longer(cols = starts_with("gghed"), 
               names_to = "ind_name", values_to = "ind_value")

#Regression using Panel data for beginners
#print(ghed_long)

Exclude certain countries and years and take logs

ghed_filtered <- ghed_raw %>%
    mutate(
    log_gdp_pc = log(gdp_usd2021_pc),
    log_gghed_pc = log(gghed_usd2021_pc),
    log_gge_gdp = log(gge_gdp),
    contract = ifelse((log_gdp_pc-dplyr::lag(log_gdp_pc))<0,1,0)
    ) %>%
    filter(!(country %in% c("Cuba", "South Sudan", "Venezuela", "Yemen",
                          "Zimbabwe"))) %>%
    filter(!(year %in% c("2020", "2021", "2022"))) %>%
    na.omit()  # Remove any rows with NA values



model1 <- plm(log_gghed_pc ~ log_gdp_pc, data = ghed_filtered,index =
                     c("country", "year"), model = "within")
model2 <- plm(log_gghed_pc ~ log_gdp_pc + contract +log_gdp_pc*contract, 
              data = ghed_filtered,index = c("country", "year"), model = "within")
model3 <- plm(log_gghed_pc ~ log_gdp_pc + contract + log_gdp_pc*contract + 
                log_gge_gdp + log_gge_gdp*contract,data = ghed_filtered,
                index = c("country", "year"), model = "within")

# Use stargazer to display the model summary
stargazer(model1, model2, model3, type="text",align = TRUE)
## 
## ========================================================================================================
##                                                      Dependent variable:                                
##                      -----------------------------------------------------------------------------------
##                                                         log_gghed_pc                                    
##                                  (1)                         (2)                         (3)            
## --------------------------------------------------------------------------------------------------------
## log_gdp_pc                    1.269***                    1.276***                    1.161***          
##                                (0.022)                     (0.022)                     (0.020)          
##                                                                                                         
## contract                                                  -0.134**                     -0.064           
##                                                            (0.057)                     (0.066)          
##                                                                                                         
## log_gge_gdp                                                                           0.617***          
##                                                                                        (0.023)          
##                                                                                                         
## log_gdp_pc:contract                                       0.021***                     -0.005           
##                                                            (0.006)                     (0.006)          
##                                                                                                         
## contract:log_gge_gdp                                                                   0.041**          
##                                                                                        (0.019)          
##                                                                                                         
## --------------------------------------------------------------------------------------------------------
## Observations                    3,698                       3,698                       3,698           
## R2                              0.493                       0.498                       0.593           
## Adjusted R2                     0.466                       0.471                       0.570           
## F Statistic          3,415.132*** (df = 1; 3509) 1,159.300*** (df = 3; 3507) 1,019.727*** (df = 5; 3505)
## ========================================================================================================
## Note:                                                                        *p<0.1; **p<0.05; ***p<0.01

Make Predictions for 2020-2022

# Prepare the new data for the years 2020, 2021, and 2022
ghed_new_data <- ghed_raw %>%
    mutate(
        log_gdp_pc = log(gdp_usd2021_pc),
        log_gghed_pc = log(gghed_usd2021_pc),
        log_gge_gdp = log(gge_gdp),
        contract = ifelse((log_gdp_pc - dplyr::lag(log_gdp_pc)) < 0, 1, 0)
    ) %>%
    filter(country %in% c("South Africa", "Kenya", "Namibia")) %>%
    filter(year %in% c("2020", "2021", "2022")) %>%
    na.omit()  # Remove any rows with NA values

# Make predictions with all three models
predictions_model1 <- predict(model1, newdata = ghed_new_data)
## Warning in predict.plm(model1, newdata = ghed_new_data): Data supplied in
## argument 'newdata' is not a pdata.frame; weighted mean of fixed effects as in
## original model used for prediction, see ?predict.plm.
predictions_model2 <- predict(model2, newdata = ghed_new_data)
## Warning in predict.plm(model2, newdata = ghed_new_data): Data supplied in
## argument 'newdata' is not a pdata.frame; weighted mean of fixed effects as in
## original model used for prediction, see ?predict.plm.
predictions_model3 <- predict(model3, newdata = ghed_new_data)
## Warning in predict.plm(model3, newdata = ghed_new_data): Data supplied in
## argument 'newdata' is not a pdata.frame; weighted mean of fixed effects as in
## original model used for prediction, see ?predict.plm.
# Combine predictions with ghed_new_data for easy interpretation
results <- ghed_new_data %>%
    mutate(pred_model1 = predictions_model1,
           pred_model2 = predictions_model2,
           pred_model3 = predictions_model3)

# Visualize the results
ggplot(results, aes(x = as.factor(year), group = country)) +
    geom_line(aes(y = log_gghed_pc, color = "Actual")) +
    geom_line(aes(y = pred_model1, color = "Model 1 Prediction"), linetype = "dashed") +
    geom_line(aes(y = pred_model2, color = "Model 2 Prediction"), linetype = "dotted") +
    geom_line(aes(y = pred_model3, color = "Model 3 Prediction"), linetype = "twodash") +
    facet_wrap(~country, scales = "free_y") +
    labs(title = "Predictions vs Actual Log GGHE per Capita",
         x = "Year",
         y = "Log GGHE per Capita",
         color = "Legend") +
    theme_minimal()

Make predictions for South Africa and plot against orginal

# Create new data for South Africa for the years 2020-2029
sa_data <- data.frame(log_gdp_pc = rep(mean(ghed_filtered$log_gdp_pc), 10),
                      year = 2020:2029,
                      contract = 0,
                      log_gge_gdp = rep(mean(ghed_filtered$log_gge_gdp), 10))

# Make predictions using each model
sa_data$model1_pred <- predict(model1, newdata = sa_data)
## Warning in predict.plm(model1, newdata = sa_data): Data supplied in argument
## 'newdata' is not a pdata.frame; weighted mean of fixed effects as in original
## model used for prediction, see ?predict.plm.
sa_data$model2_pred <- predict(model2, newdata = sa_data)
## Warning in predict.plm(model2, newdata = sa_data): Data supplied in argument
## 'newdata' is not a pdata.frame; weighted mean of fixed effects as in original
## model used for prediction, see ?predict.plm.
sa_data$model3_pred <- predict(model3, newdata = sa_data)
## Warning in predict.plm(model3, newdata = sa_data): Data supplied in argument
## 'newdata' is not a pdata.frame; weighted mean of fixed effects as in original
## model used for prediction, see ?predict.plm.
# Plotting
sa_original <- ghed_filtered %>% filter(country == "South Africa")

ggplot() +
    geom_line(data = sa_original, aes(x = year, y = exp(log_gghed_pc), color = "Original Data"), linewidth = 1) +
    geom_line(data = sa_data, aes(x = year, y = exp(model1_pred), color = "Model 1 Prediction"), linetype = "dashed", linewidth = 1) +
    geom_line(data = sa_data, aes(x = year, y = exp(model2_pred), color = "Model 2 Prediction"), linetype = "dashed", linewidth = 1) +
    geom_line(data = sa_data, aes(x = year, y = exp(model3_pred), color = "Model 3 Prediction"), linetype = "dashed", linewidth = 1) +
    labs(title = "Comparison of Predictions for South Africa (2020-2029)",
         x = "Year",
         y = "GGHED per capita",
         color = "Data/Model") +
    scale_color_manual(values = c("Original Data" = "blue", "Model 1 Prediction" = "green", "Model 2 Prediction" = "red", "Model 3 Prediction" = "purple")) +
    theme_minimal()