library(foreign)
library(Zelig)
library(ggplot2)
library(dplyr)
library(tidyr)
library(Hmisc)

Load dataset

world.data <- read.dta("http://uclspp.github.io/PUBLG100/data/QoG2012.dta")

Remove observations with missing values

world.data <- world.data %>%
  filter(!is.na(undp_hdi), !is.na(former_col))

Estimate a model using Zelig

model <- zelig(undp_hdi ~ former_col, model = "ls", data = world.data, cite = FALSE)
summary(model)
## Model: 
## 
## Call:
## z5$zelig(formula = undp_hdi ~ former_col, data = world.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.46595 -0.11869  0.02805  0.11755  0.27156 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.82495    0.01983  41.607  < 2e-16
## former_col  -0.19451    0.02457  -7.918 2.79e-13
## 
## Residual standard error: 0.1549 on 173 degrees of freedom
## Multiple R-squared:  0.266,  Adjusted R-squared:  0.2618 
## F-statistic:  62.7 on 1 and 173 DF,  p-value: 2.791e-13
## 
## Next step: Use 'setx' method

Set explanatory variables

model_matrix <- setx(model, former_col = range(world.data$former_col))

Run simulation

sim_obj <- sim(model, model_matrix)
summary(sim_obj)
## 
## [1] 0
## 
## 
##  sim range :
##  -----
## ev
##        mean         sd       50%      2.5%     97.5%
## 1 0.8255723 0.01989683 0.8261939 0.7866444 0.8627066
## pv
##           mean        sd       50%      2.5%    97.5%
## [1,] 0.8261087 0.1579578 0.8207204 0.5174259 1.141889
## 
## 
## [1] 1
## 
## 
##  sim range :
##  -----
## ev
##        mean         sd       50%     2.5%     97.5%
## 1 0.6306817 0.01479409 0.6308304 0.600907 0.6599257
## pv
##           mean        sd       50%      2.5%     97.5%
## [1,] 0.6310538 0.1550079 0.6350994 0.3258246 0.9211023

Extract quantities of interest from the simulations

NOTE: Colony_Status is the second column in the dataframe returned from zelig_qi_to_df, hence Colony_Status = 2 below:

sim_matrix <- zelig_qi_to_df(sim_obj) %>% 
  select(Colony_Status = 2, Prediction = expected_value) %>% 
  mutate(Colony_Status = recode(Colony_Status, `0` = "Not_Former_Colony", `1` = "Former_Colony"))

Create box plot with ggplot

ggplot(sim_matrix, aes(x = Colony_Status, y = Prediction)) +
  geom_boxplot(fill = "grey", colour = "black") + 
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", colour = "red") + 
  stat_summary(fun.y = mean, geom = "point", colour = "red") +
  xlab("Colonial Past")