Note we have missing data here. Seven missing langs/countries for dplace.
# netlm ci int
get_netlm_cis <- function(raw_predictors_mat, model_summary){
raw_predictors <- raw_predictors_mat %>%
as.vector() %>%
.[!is.na(.)]
mse <- mean(model_summary$residuals^2)
mean_predictor <- mean(raw_predictors)
crit_t <- qt(0.975, length(raw_predictors) - 2) # critical value of t
ci_int <- tibble(diff = raw_predictors - mean_predictor) %>%
mutate(diff2 = diff^2) %>%
summarize(denom = sum(diff2)) %>%
mutate(ci_int = crit_t*sqrt(mse/denom)) %>%
pull(ci_int)
ci_int
}
get_regression_mat <- function(this_df){
mat <- this_df %>%
spread(lang1_ets, value) %>%
select(-lang2_ets) %>%
as.matrix()
mat[lower.tri(mat, diag = TRUE)] <- NA
mat
}
make_dv_mat <- function(mat_df, predictors){
target_mats <- mat_df %>%
filter(measure %in% predictors)
mat_size <- dim(target_mats$mat[[1]])[1]
out_array <- array(0, dim = c(nrow(target_mats), mat_size, mat_size))
for (i in 1:nrow(target_mats)){
out_array[i,,] <- filter(mat_df, measure == predictors[i])$mat[[1]]
}
out_array
}
plot_coefficients <- function(var_df, predictor_names){
all_mats <- var_df %>%
mutate_if(is.numeric, scale) %>%
gather("measure", "value", -1:-2) %>%
group_by(measure) %>%
nest() %>%
mutate(mat = map(data, get_regression_mat)) %>%
select(-data)
dv_mat <- make_dv_mat(all_mats, predictor_names)
iv_mat <- filter(all_mats, measure == "semantic_dist")$mat[[1]]
# run the model
qap_model <- netlm(iv_mat,
dv_mat,
nullhyp = "qap",
reps = 100)
# tidy the model
model_params <- tibble(term = c("intercept", predictor_names),
qap_p = summary(qap_model)$pgreqabs,
estimate = summary(qap_model)$coefficients,
tstat = summary(qap_model)$tstat,
n = summary(qap_model)$n) %>%
left_join(all_mats, by = c("term" = "measure")) %>%
mutate(ci = map_dbl(mat, get_netlm_cis, summary(qap_model)),
ci_lower = estimate - ci,
ci_upper = estimate + ci) %>%
select(-ci, -mat) %>%
mutate(model = "full") %>%
filter(term != "intercept")
plot <- ggplot(model_params, aes(x = term, color = model, y = estimate,
ymin = ci_lower, ymax = ci_upper)) +
geom_pointrange(size = 1) +
geom_hline(aes(yintercept = 0), linetype = 2) +
coord_flip() +
theme_classic(base_size = 20) +
ylim(-.95, 1) +
xlab("Predictor") +
ylab("Estimate") +
theme(legend.position = "none",
axis.line = element_line(size = 1.2),
axis.ticks = element_line(size = 1))
table <- kable(model_params)
list(plot, table)
}
INFILE <- "data/language_all_vars_distance.csv"
all_dists <- read_csv(INFILE)
map(1:2, ~pluck(plot_coefficients(all_dists,
c("eco_dist",
"physical_dist",
"wals_lang_dist",
"asjp_lang_dist",
"cultural_distances")), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## ------------------- ------ ---------- --------- ---- ----------- ---------- ------
## eco_dist 0.25 0.0544046 1.005859 378 -0.0127642 0.1215734 full
## physical_dist 0.06 0.1062135 1.848197 378 0.0405807 0.1718464 full
## wals_lang_dist 0.09 0.1018967 2.019937 378 0.0290979 0.1746955 full
## asjp_lang_dist 0.00 0.4425779 5.013834 378 0.3163943 0.5687615 full
## cultural_distances 0.00 0.2281461 3.533408 378 0.1240839 0.3322082 full
map(1:2, ~pluck(plot_coefficients(all_dists %>% filter(!is.na(cultural_distances)),
c("eco_dist",
"physical_dist",
"wals_lang_dist",
"asjp_lang_dist")), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## --------------- ------ ---------- --------- ---- ---------- ---------- ------
## eco_dist 0.01 0.1116405 2.270914 378 0.0302966 0.1929844 full
## physical_dist 0.00 0.1171417 2.227873 378 0.0386681 0.1956153 full
## wals_lang_dist 0.11 0.0922570 1.809889 378 0.0050598 0.1794541 full
## asjp_lang_dist 0.00 0.5461737 6.176683 378 0.3929427 0.6994046 full
map(1:2, ~pluck(plot_coefficients(all_dists %>% filter(!is.na(cultural_distances)), "cultural_distances"), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## ------------------- ------ ---------- --------- ---- --------- ---------- ------
## cultural_distances 0 0.4321532 8.096371 378 0.327478 0.5368283 full
map(1:2, ~pluck(plot_coefficients(all_dists %>% filter(!is.na(cultural_distances)), "eco_dist"), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## --------- ------ ---------- --------- ---- ---------- ---------- ------
## eco_dist 0 0.2862488 6.291347 378 0.1970218 0.3754758 full
map(1:2, ~pluck(plot_coefficients(all_dists %>% filter(!is.na(cultural_distances)), "physical_dist"), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## -------------- ------ ---------- --------- ---- ---------- ---------- ------
## physical_dist 0 0.3275063 7.626395 378 0.2432899 0.4117227 full
map(1:2, ~pluck(plot_coefficients(all_dists %>% filter(!is.na(cultural_distances)), "wals_lang_dist"), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## --------------- ------ ---------- --------- ---- ---------- --------- ------
## wals_lang_dist 0 0.2866301 5.837821 378 0.1903432 0.382917 full
map(1:2, ~pluck(plot_coefficients(all_dists %>% filter(!is.na(cultural_distances)), "asjp_lang_dist"), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## --------------- ------ ---------- --------- ---- ---------- --------- ------
## asjp_lang_dist 0 0.7614965 9.389798 378 0.6024559 0.920537 full
map(1:2, ~pluck(plot_coefficients(all_dists,
c("eco_dist",
"physical_dist",
"wals_lang_dist",
"asjp_lang_dist")), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## --------------- ------ ---------- ---------- ---- ----------- ---------- ------
## eco_dist 0.00 0.1828193 4.8081533 595 0.1189864 0.2466522 full
## physical_dist 0.00 0.1512714 3.7537372 595 0.0888981 0.2136447 full
## wals_lang_dist 0.36 0.0300911 0.7752059 595 -0.0390922 0.0992744 full
## asjp_lang_dist 0.00 0.4477126 6.4547659 595 0.3277957 0.5676294 full
map(1:2, ~pluck(plot_coefficients(all_dists , "eco_dist"), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## --------- ------ ---------- --------- ---- ---------- --------- ------
## eco_dist 0 0.3419801 9.792337 595 0.2735072 0.410453 full
map(1:2, ~pluck(plot_coefficients(all_dists , "physical_dist"), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## -------------- ------ ---------- --------- ---- ---------- ---------- ------
## physical_dist 0 0.3490575 10.30511 595 0.2826452 0.4154697 full
map(1:2, ~pluck(plot_coefficients(all_dists , "wals_lang_dist"), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## --------------- ------ ---------- --------- ---- ---------- ---------- ------
## wals_lang_dist 0 0.2129886 5.345091 595 0.1348608 0.2911163 full
map(1:2, ~pluck(plot_coefficients(all_dists , "asjp_lang_dist"), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## --------------- ------ ---------- --------- ---- ---------- ---------- ------
## asjp_lang_dist 0 0.6965851 10.77162 595 0.5697914 0.8233788 full
map(1:2, ~pluck(plot_coefficients(all_dists,
c("agriculture_and_vegetation",
"basic_actions_and_technology",
"emotions_and_values",
"kinship",
"law",
"possession",
"religion_and_belief",
"social_and_political_relations",
"the_house",
"the_physical_world")), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## ------------------------------- ------ ----------- ---------- ---- ----------- ----------- ------
## agriculture_and_vegetation 0.10 -0.1255285 -1.829369 378 -0.2084230 -0.0426341 full
## basic_actions_and_technology 0.03 0.3738688 2.141551 378 0.2790195 0.4687181 full
## emotions_and_values 0.00 -0.7764731 -6.044932 378 -0.8758539 -0.6770923 full
## kinship 0.00 0.3474032 4.915761 378 0.2554386 0.4393679 full
## law 0.00 0.5385601 3.317070 378 0.4512989 0.6258213 full
## possession 0.00 0.5402290 2.880950 378 0.4424306 0.6380274 full
## religion_and_belief 0.01 0.2057709 2.767753 378 0.1124045 0.2991373 full
## social_and_political_relations 0.00 -0.8083459 -3.083431 378 -0.9042143 -0.7124774 full
## the_house 0.08 0.2643000 1.732752 378 0.1707621 0.3578379 full
## the_physical_world 0.69 -0.0363138 -0.533854 378 -0.1273253 0.0546977 full
map(1:2, ~pluck(plot_coefficients(all_dists,
c("agriculture_and_vegetation")), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## --------------------------- ------ ---------- --------- ---- ----------- ---------- ------
## agriculture_and_vegetation 0.15 0.0660781 1.316577 378 -0.0323474 0.1645035 full
map(1:2, ~pluck(plot_coefficients(all_dists,
c("basic_actions_and_technology")), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## ----------------------------- ------ ---------- --------- ---- ---------- ---------- ------
## basic_actions_and_technology 0 0.3252578 5.907181 378 0.2172777 0.4332379 full
map(1:2, ~pluck(plot_coefficients(all_dists,
c("emotions_and_values")), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## -------------------- ------ ---------- --------- ---- ---------- ---------- ------
## emotions_and_values 0 0.2246805 3.796166 378 0.1086114 0.3407495 full
map(1:2, ~pluck(plot_coefficients(all_dists,
c("kinship")), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## -------- ------ ---------- --------- ---- ---------- ---------- ------
## kinship 0 0.4217395 8.205573 378 0.3209462 0.5225329 full
map(1:2, ~pluck(plot_coefficients(all_dists,
c("law")), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## ----- ------ ---------- -------- ---- ---------- ---------- ------
## law 0 0.2859437 5.62215 378 0.1862026 0.3856849 full
map(1:2, ~pluck(plot_coefficients(all_dists,
c("possession")), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## ----------- ------ ---------- --------- ---- ---------- --------- ------
## possession 0 0.3806043 6.795328 378 0.2707645 0.490444 full
map(1:2, ~pluck(plot_coefficients(all_dists,
c("religion_and_belief")), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## -------------------- ------ ---------- --------- ---- ---------- ---------- ------
## religion_and_belief 0 0.3032731 5.568886 378 0.1964754 0.4100708 full
map(1:2, ~pluck(plot_coefficients(all_dists,
c("the_house")), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## ---------- ------ ---------- --------- ---- ---------- ---------- ------
## the_house 0.07 0.1224486 2.170629 378 0.0118207 0.2330765 full
map(1:2, ~pluck(plot_coefficients(all_dists,
c("the_physical_world")), .x))
## [[1]]
##
## [[2]]
##
##
## term qap_p estimate tstat n ci_lower ci_upper model
## ------------------- ------ ---------- --------- ---- ----------- ---------- ------
## the_physical_world 0.15 0.0816674 1.482976 378 -0.0263294 0.1896642 full
social_and_political_relations