Note we have missing data here. Seven missing langs/countries for dplace.

# netlm ci int
get_netlm_cis <- function(raw_predictors_mat, model_summary){
  raw_predictors <- raw_predictors_mat %>% 
    as.vector()  %>%
    .[!is.na(.)]
  
  mse <- mean(model_summary$residuals^2)
  mean_predictor <- mean(raw_predictors)
  crit_t <- qt(0.975, length(raw_predictors) - 2) # critical value of t
  
  ci_int <- tibble(diff =  raw_predictors - mean_predictor) %>%
    mutate(diff2 = diff^2) %>%
    summarize(denom = sum(diff2)) %>%
    mutate(ci_int = crit_t*sqrt(mse/denom)) %>%
    pull(ci_int)
  
  ci_int
}

get_regression_mat <- function(this_df){
  mat <- this_df %>%
    spread(lang1_ets, value) %>%
    select(-lang2_ets) %>%
    as.matrix()
  
  mat[lower.tri(mat, diag = TRUE)] <- NA 
  mat
}

make_dv_mat <- function(mat_df, predictors){
  
  target_mats <- mat_df %>%
    filter(measure %in% predictors)
  
  mat_size <- dim(target_mats$mat[[1]])[1]
  
  out_array <- array(0, dim = c(nrow(target_mats), mat_size, mat_size))
  for (i in 1:nrow(target_mats)){
    out_array[i,,] <- filter(mat_df, measure == predictors[i])$mat[[1]]
  }
  out_array
}

plot_coefficients <- function(var_df, predictor_names){
  all_mats <-  var_df %>%
    mutate_if(is.numeric, scale) %>%
    gather("measure", "value", -1:-2) %>%
    group_by(measure) %>%
    nest() %>%
    mutate(mat = map(data, get_regression_mat)) %>%
    select(-data)
  
  dv_mat <- make_dv_mat(all_mats, predictor_names)
  iv_mat <- filter(all_mats, measure == "semantic_dist")$mat[[1]]
  
  # run the model
  qap_model <- netlm(iv_mat, 
                     dv_mat, 
                     nullhyp = "qap",
                     reps = 100) 
  # tidy the model
  model_params <- tibble(term = c("intercept", predictor_names),
                    qap_p = summary(qap_model)$pgreqabs,
                    estimate = summary(qap_model)$coefficients,
                    tstat = summary(qap_model)$tstat,
                    n =  summary(qap_model)$n) %>%
    left_join(all_mats, by = c("term" = "measure")) %>%
    mutate(ci = map_dbl(mat, get_netlm_cis, summary(qap_model)),
           ci_lower = estimate - ci,
           ci_upper = estimate + ci) %>%
    select(-ci, -mat) %>%
    mutate(model = "full") %>%
    filter(term != "intercept")
  
  plot <- ggplot(model_params, aes(x = term, color = model, y = estimate, 
                         ymin = ci_lower, ymax = ci_upper)) +
    geom_pointrange(size = 1) +
    geom_hline(aes(yintercept = 0), linetype = 2) +
    coord_flip() +
    theme_classic(base_size = 20) +
    ylim(-.95, 1) +
    xlab("Predictor") +
    ylab("Estimate") +
    theme(legend.position = "none",
          axis.line = element_line(size = 1.2),
          axis.ticks = element_line(size = 1))
  
  table <- kable(model_params)
  list(plot, table)
}
INFILE <- "data/language_all_vars_distance.csv"
all_dists <- read_csv(INFILE)  

All predictors

Cultural subset of data

all vars

map(1:2, ~pluck(plot_coefficients(all_dists, 
                  c("eco_dist",
                    "physical_dist",
                    "wals_lang_dist",
                    "asjp_lang_dist",
                    "cultural_distances")), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term                  qap_p    estimate      tstat     n     ci_lower    ci_upper  model 
## -------------------  ------  ----------  ---------  ----  -----------  ----------  ------
## eco_dist               0.25   0.0544046   1.005859   378   -0.0127642   0.1215734  full  
## physical_dist          0.06   0.1062135   1.848197   378    0.0405807   0.1718464  full  
## wals_lang_dist         0.09   0.1018967   2.019937   378    0.0290979   0.1746955  full  
## asjp_lang_dist         0.00   0.4425779   5.013834   378    0.3163943   0.5687615  full  
## cultural_distances     0.00   0.2281461   3.533408   378    0.1240839   0.3322082  full

no cultural

map(1:2, ~pluck(plot_coefficients(all_dists %>% filter(!is.na(cultural_distances)), 
                  c("eco_dist",
                    "physical_dist",
                    "wals_lang_dist",
                    "asjp_lang_dist")), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term              qap_p    estimate      tstat     n    ci_lower    ci_upper  model 
## ---------------  ------  ----------  ---------  ----  ----------  ----------  ------
## eco_dist           0.01   0.1116405   2.270914   378   0.0302966   0.1929844  full  
## physical_dist      0.00   0.1171417   2.227873   378   0.0386681   0.1956153  full  
## wals_lang_dist     0.11   0.0922570   1.809889   378   0.0050598   0.1794541  full  
## asjp_lang_dist     0.00   0.5461737   6.176683   378   0.3929427   0.6994046  full

cultural only

map(1:2, ~pluck(plot_coefficients(all_dists %>% filter(!is.na(cultural_distances)), "cultural_distances"), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term                  qap_p    estimate      tstat     n   ci_lower    ci_upper  model 
## -------------------  ------  ----------  ---------  ----  ---------  ----------  ------
## cultural_distances        0   0.4321532   8.096371   378   0.327478   0.5368283  full

Eco only

map(1:2, ~pluck(plot_coefficients(all_dists %>% filter(!is.na(cultural_distances)), "eco_dist"), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term        qap_p    estimate      tstat     n    ci_lower    ci_upper  model 
## ---------  ------  ----------  ---------  ----  ----------  ----------  ------
## eco_dist        0   0.2862488   6.291347   378   0.1970218   0.3754758  full

physical only

map(1:2, ~pluck(plot_coefficients(all_dists %>% filter(!is.na(cultural_distances)), "physical_dist"), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term             qap_p    estimate      tstat     n    ci_lower    ci_upper  model 
## --------------  ------  ----------  ---------  ----  ----------  ----------  ------
## physical_dist        0   0.3275063   7.626395   378   0.2432899   0.4117227  full

wals only

map(1:2, ~pluck(plot_coefficients(all_dists %>% filter(!is.na(cultural_distances)), "wals_lang_dist"), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term              qap_p    estimate      tstat     n    ci_lower   ci_upper  model 
## ---------------  ------  ----------  ---------  ----  ----------  ---------  ------
## wals_lang_dist        0   0.2866301   5.837821   378   0.1903432   0.382917  full

asjp only

map(1:2, ~pluck(plot_coefficients(all_dists %>% filter(!is.na(cultural_distances)), "asjp_lang_dist"), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term              qap_p    estimate      tstat     n    ci_lower   ci_upper  model 
## ---------------  ------  ----------  ---------  ----  ----------  ---------  ------
## asjp_lang_dist        0   0.7614965   9.389798   378   0.6024559   0.920537  full

All data

all vars

map(1:2, ~pluck(plot_coefficients(all_dists, 
                  c("eco_dist",
                    "physical_dist",
                    "wals_lang_dist",
                    "asjp_lang_dist")), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term              qap_p    estimate       tstat     n     ci_lower    ci_upper  model 
## ---------------  ------  ----------  ----------  ----  -----------  ----------  ------
## eco_dist           0.00   0.1828193   4.8081533   595    0.1189864   0.2466522  full  
## physical_dist      0.00   0.1512714   3.7537372   595    0.0888981   0.2136447  full  
## wals_lang_dist     0.36   0.0300911   0.7752059   595   -0.0390922   0.0992744  full  
## asjp_lang_dist     0.00   0.4477126   6.4547659   595    0.3277957   0.5676294  full

Eco only

map(1:2, ~pluck(plot_coefficients(all_dists , "eco_dist"), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term        qap_p    estimate      tstat     n    ci_lower   ci_upper  model 
## ---------  ------  ----------  ---------  ----  ----------  ---------  ------
## eco_dist        0   0.3419801   9.792337   595   0.2735072   0.410453  full

Physical only

map(1:2, ~pluck(plot_coefficients(all_dists , "physical_dist"), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term             qap_p    estimate      tstat     n    ci_lower    ci_upper  model 
## --------------  ------  ----------  ---------  ----  ----------  ----------  ------
## physical_dist        0   0.3490575   10.30511   595   0.2826452   0.4154697  full

wals only

map(1:2, ~pluck(plot_coefficients(all_dists , "wals_lang_dist"), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term              qap_p    estimate      tstat     n    ci_lower    ci_upper  model 
## ---------------  ------  ----------  ---------  ----  ----------  ----------  ------
## wals_lang_dist        0   0.2129886   5.345091   595   0.1348608   0.2911163  full

asjp only

map(1:2, ~pluck(plot_coefficients(all_dists , "asjp_lang_dist"), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term              qap_p    estimate      tstat     n    ci_lower    ci_upper  model 
## ---------------  ------  ----------  ---------  ----  ----------  ----------  ------
## asjp_lang_dist        0   0.6965851   10.77162   595   0.5697914   0.8233788  full

Cultural variables

All vars

map(1:2, ~pluck(plot_coefficients(all_dists, 
                  c("agriculture_and_vegetation",
                    "basic_actions_and_technology",
                    "emotions_and_values",
                    "kinship",
                    "law", 
                    "possession",
                    "religion_and_belief",
                    "social_and_political_relations",
                    "the_house", 
                    "the_physical_world")), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term                              qap_p     estimate       tstat     n     ci_lower     ci_upper  model 
## -------------------------------  ------  -----------  ----------  ----  -----------  -----------  ------
## agriculture_and_vegetation         0.10   -0.1255285   -1.829369   378   -0.2084230   -0.0426341  full  
## basic_actions_and_technology       0.03    0.3738688    2.141551   378    0.2790195    0.4687181  full  
## emotions_and_values                0.00   -0.7764731   -6.044932   378   -0.8758539   -0.6770923  full  
## kinship                            0.00    0.3474032    4.915761   378    0.2554386    0.4393679  full  
## law                                0.00    0.5385601    3.317070   378    0.4512989    0.6258213  full  
## possession                         0.00    0.5402290    2.880950   378    0.4424306    0.6380274  full  
## religion_and_belief                0.01    0.2057709    2.767753   378    0.1124045    0.2991373  full  
## social_and_political_relations     0.00   -0.8083459   -3.083431   378   -0.9042143   -0.7124774  full  
## the_house                          0.08    0.2643000    1.732752   378    0.1707621    0.3578379  full  
## the_physical_world                 0.69   -0.0363138   -0.533854   378   -0.1273253    0.0546977  full

agriculture_and_vegetation

map(1:2, ~pluck(plot_coefficients(all_dists, 
                  c("agriculture_and_vegetation")), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term                          qap_p    estimate      tstat     n     ci_lower    ci_upper  model 
## ---------------------------  ------  ----------  ---------  ----  -----------  ----------  ------
## agriculture_and_vegetation     0.15   0.0660781   1.316577   378   -0.0323474   0.1645035  full

basic_actions_and_technology

map(1:2, ~pluck(plot_coefficients(all_dists, 
                  c("basic_actions_and_technology")), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term                            qap_p    estimate      tstat     n    ci_lower    ci_upper  model 
## -----------------------------  ------  ----------  ---------  ----  ----------  ----------  ------
## basic_actions_and_technology        0   0.3252578   5.907181   378   0.2172777   0.4332379  full

emotions_and_values

map(1:2, ~pluck(plot_coefficients(all_dists, 
                  c("emotions_and_values")), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term                   qap_p    estimate      tstat     n    ci_lower    ci_upper  model 
## --------------------  ------  ----------  ---------  ----  ----------  ----------  ------
## emotions_and_values        0   0.2246805   3.796166   378   0.1086114   0.3407495  full

kinship

map(1:2, ~pluck(plot_coefficients(all_dists, 
                  c("kinship")), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term       qap_p    estimate      tstat     n    ci_lower    ci_upper  model 
## --------  ------  ----------  ---------  ----  ----------  ----------  ------
## kinship        0   0.4217395   8.205573   378   0.3209462   0.5225329  full

law

map(1:2, ~pluck(plot_coefficients(all_dists, 
                  c("law")), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term    qap_p    estimate     tstat     n    ci_lower    ci_upper  model 
## -----  ------  ----------  --------  ----  ----------  ----------  ------
## law         0   0.2859437   5.62215   378   0.1862026   0.3856849  full

possession

map(1:2, ~pluck(plot_coefficients(all_dists, 
                  c("possession")), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term          qap_p    estimate      tstat     n    ci_lower   ci_upper  model 
## -----------  ------  ----------  ---------  ----  ----------  ---------  ------
## possession        0   0.3806043   6.795328   378   0.2707645   0.490444  full

religion_and_belief

map(1:2, ~pluck(plot_coefficients(all_dists, 
                  c("religion_and_belief")), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term                   qap_p    estimate      tstat     n    ci_lower    ci_upper  model 
## --------------------  ------  ----------  ---------  ----  ----------  ----------  ------
## religion_and_belief        0   0.3032731   5.568886   378   0.1964754   0.4100708  full

social_and_political_relations

map(1:2, ~pluck(plot_coefficients(all_dists, 
                  c("social_and_political_relations")), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term                              qap_p    estimate      tstat     n    ci_lower    ci_upper  model 
## -------------------------------  ------  ----------  ---------  ----  ----------  ----------  ------
## social_and_political_relations        0   0.2670548   4.724576   378   0.1562053   0.3779043  full

the_house

map(1:2, ~pluck(plot_coefficients(all_dists, 
                  c("the_house")), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term         qap_p    estimate      tstat     n    ci_lower    ci_upper  model 
## ----------  ------  ----------  ---------  ----  ----------  ----------  ------
## the_house     0.07   0.1224486   2.170629   378   0.0118207   0.2330765  full

the_physical_world

map(1:2, ~pluck(plot_coefficients(all_dists, 
                  c("the_physical_world")), .x))
## [[1]]

## 
## [[2]]
## 
## 
## term                  qap_p    estimate      tstat     n     ci_lower    ci_upper  model 
## -------------------  ------  ----------  ---------  ----  -----------  ----------  ------
## the_physical_world     0.15   0.0816674   1.482976   378   -0.0263294   0.1896642  full