library(foreign)
library(Zelig)
library(ggplot2)
library(dplyr)
library(tidyr)
library(Hmisc)
Load dataset
world.data <- read.dta("http://uclspp.github.io/PUBLG100/data/QoG2012.dta")
Remove observations with missing values
world.data <- world.data %>%
filter(!is.na(undp_hdi), !is.na(former_col))
Estimate a model using Zelig
model <- zelig(undp_hdi ~ former_col, model = "ls", data = world.data, cite = FALSE)
summary(model)
## Model:
##
## Call:
## z5$zelig(formula = undp_hdi ~ former_col, data = world.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.46595 -0.11869 0.02805 0.11755 0.27156
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.82495 0.01983 41.607 < 2e-16
## former_col -0.19451 0.02457 -7.918 2.79e-13
##
## Residual standard error: 0.1549 on 173 degrees of freedom
## Multiple R-squared: 0.266, Adjusted R-squared: 0.2618
## F-statistic: 62.7 on 1 and 173 DF, p-value: 2.791e-13
##
## Next step: Use 'setx' method
Set explanatory variables
model_matrix <- setx(model, former_col = range(world.data$former_col))
Run simulation
sim_obj <- sim(model, model_matrix)
summary(sim_obj)
##
## [1] 0
##
##
## sim range :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## 1 0.8255723 0.01989683 0.8261939 0.7866444 0.8627066
## pv
## mean sd 50% 2.5% 97.5%
## [1,] 0.8261087 0.1579578 0.8207204 0.5174259 1.141889
##
##
## [1] 1
##
##
## sim range :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## 1 0.6306817 0.01479409 0.6308304 0.600907 0.6599257
## pv
## mean sd 50% 2.5% 97.5%
## [1,] 0.6310538 0.1550079 0.6350994 0.3258246 0.9211023
Extract quantities of interest from the simulations
NOTE: Colony_Status is the second column in the dataframe returned from zelig_qi_to_df, hence Colony_Status = 2 below:
sim_matrix <- zelig_qi_to_df(sim_obj) %>%
select(Colony_Status = 2, Prediction = expected_value) %>%
mutate(Colony_Status = recode(Colony_Status, `0` = "Not_Former_Colony", `1` = "Former_Colony"))
Create box plot with ggplot
ggplot(sim_matrix, aes(x = Colony_Status, y = Prediction)) +
geom_boxplot(fill = "grey", colour = "black") +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar", colour = "red") +
stat_summary(fun.y = mean, geom = "point", colour = "red") +
xlab("Colonial Past")