Introduction

This is an attempted R analytic replication of analyses reported in Carl (2017) subbmited to OpenPsych.

Setup

Load packages and data.

library(pacman)
p_load(kirkegaard, readxl, dplyr, grid, readr)
options(digits = 2, scipen = 2)

#data
d = read_excel("data/Global Terrorism Data.xlsx")
mega = read_csv("data/Megadataset_v2.0p.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_character(),
##   EthnicHeterogenityVanhanen2012 = col_integer(),
##   EthnicConflictVanhanen2012 = col_integer(),
##   SlowTimePrefWangetal2011 = col_integer(),
##   Math00Mean = col_integer(),
##   Math00SD = col_integer(),
##   Read00Mean = col_integer(),
##   Read00SD = col_integer(),
##   Sci00Mean = col_integer(),
##   Sci00SD = col_integer(),
##   Math03Mean = col_integer(),
##   Math03SD = col_integer(),
##   Read03Mean = col_integer(),
##   Read03SD = col_integer(),
##   Sci03Mean = col_integer(),
##   Sci03SD = col_integer(),
##   PS03Mean = col_integer(),
##   PS03SD = col_integer(),
##   Read09.Native = col_integer(),
##   Read09.1g = col_integer()
##   # ... with 101 more columns
## )
## See spec(...) for full column specifications.
#merge
d = dplyr::left_join(d, mega[c("X1", "lat", "lon")], by = c("code" = "X1"))

Transform variables

Calculate the population residualized outcome measures.

#redoce regions
d$region = d$reglab %>% fct_recode(Asia = "asi", Eurasia = "eur", "MENA" = "mid", Africa = "afr", "Latin_Carib" = "lat", West = "wes")

#resid func
resider = function(var) {
  .model = sprintf("%s ~ log_pop", var)
  lm(as.formula(.model), data = d) %>% 
    resid() %>% 
    standardize()
}

#some derived vars
d %<>% mutate(
  #basic
  pop = pop10,
  anti_isis = part %>% as.logical,
  casualties = deaths + injuries,
  pct_muslim = per10,
  #this variable is given as 6 columns but the options are mutually exclusive, so we recode as a factor
  #there are probably better ways to do this...
  legal = df_subset_by_pattern(d, "legor") %>% apply(MARGIN = 1, function(row) {
    names(row)[row %>% as.logical %>% which] %>% str_replace("legor_", "")
  }) %>% factor,
  
  #log versions
  log_pop = log(pop),
  log_pct_muslim = log(pct_muslim + 1),
  log_attacks = log(attacks + 1),
  log_casualties = log(casualties + 1),
  log_land = log(area),
  log_gdp = ln_gdppc2000,
  log_milldeaths = log(mildeaths + 1)
)

#model derived vars
d %<>% mutate(
  log_attacks_resid = resider("log_attacks"),
  log_casualties_resid = resider("log_casualties"),
  log_pct_muslim_resid = resider("log_pct_muslim")
)

Figure 1-2

#figs 1
ggplot(d, aes(log_attacks_resid, log_casualties_resid)) +
  geom_smooth(method = lm, se = F, color = "black", linetype = "dotted") +
  geom_point(aes(size = pop, color = region), shape = 1) +
  scale_size_continuous(guide = F) +
  theme_bw() 

ggsave("figures/figure 1.png")
## Saving 7 x 5 in image
#figs 2
fig2a = ggplot(d, aes(log_pct_muslim_resid, log_attacks_resid)) +
  geom_smooth(method = lm, se = F) +
  geom_point(aes(size = pop), shape = 1) +
  scale_size_continuous(guide = F) +
  theme_bw()

fig2b = ggplot(d, aes(log_pct_muslim_resid, log_casualties_resid)) +
  geom_smooth(method = lm, se = F) +
  geom_point(aes(size = pop), shape = 1) +
  scale_size_continuous(guide = F) +
  theme_bw() 

#combine
grid.newpage()
grid.draw(cbind(ggplotGrob(fig2a), ggplotGrob(fig2a), size = "last"))

ggsave("figures/figure 2.png")
## Saving 7 x 5 in image

The plots replicated. I added colors for improved interpretation.

The subplots show that there are probably modeling problems with the errors for the left cluster of cases.

Section 3.2 - Within-region associations

#combinations of region and outcome
model_combos = expand.grid(region = levels(d$region),
                           y = c("log_attacks", "log_casualties"),
                           stringsAsFactors = F)

#model and plot combinations
reg_models = map(seq_along_rows(model_combos), function(row_) {
  row_ = model_combos[row_, ]

  #subdata
  dsub = d %>% filter(region == row_$region)
  
  #build model
  model_ = sprintf("%s ~ log_pct_muslim + log_pop", row_$y)
  
  #fit
  fit = lm(model_, data = dsub) %>% MOD_summary(kfold = F)
  
  #plot
  gg = ggplot(dsub, aes_string("log_pct_muslim_resid", row_$y + "_resid")) + 
    geom_point(aes(size = pop), shape = 1) +
    geom_smooth(method = lm, se = F) +
    scale_size_continuous(guide = F) +
    theme_bw() 
  
  list(
    fit = fit,
    plot = gg
  )
})

#extract model info
reg_models_fits = map(reg_models, "fit")

data_frame(beta = map_dbl(reg_models_fits, ~.$coefs[1, 1]),
           region = model_combos$region,
           n = map_int(reg_models_fits, ~.$meta$N)
           )
## # A tibble: 12 × 3
##      beta      region     n
##     <dbl>       <chr> <int>
## 1   0.347      Africa    46
## 2   0.495        Asia    21
## 3   0.307     Eurasia    20
## 4   0.062 Latin_Carib    30
## 5  -0.215        MENA    20
## 6   0.430        West    31
## 7   0.380      Africa    46
## 8   0.491        Asia    21
## 9   0.365     Eurasia    20
## 10  0.131 Latin_Carib    30
## 11 -0.180        MENA    20
## 12  0.329        West    31
#plots
#not sure how to easily add these together to 2 compound 2x3 figures
walk(reg_models, ~.$plot %>% print)