PAIRWISE_WORD_DIST_CORRS <- "data/language_pairwise_swadesh_correlations_by_item.csv"
PAIRWISE_LANG_DISTS <- "../lang_dists/lang_distance_metrics/physical/data/physical_language_distance.csv"
corr_df <- read_csv(PAIRWISE_WORD_DIST_CORRS)
pairwise_physical_dists <- read_csv(PAIRWISE_LANG_DISTS)
corr_geo <- full_join(corr_df, pairwise_physical_dists)
corr_geo %>%
ggplot(aes(x = r, y = physical_dist)) +
facet_wrap(~item) +
geom_point(size = .1) +
geom_smooth(aes(color = item), se = F, method = "lm") +
xlab("swadesh pairwise distance correlation") +
ylab("distance (meters)") +
theme_classic()
#pdf("../figs/swadesh_plot1.pdf", width = 9)
corr_geo %>%
ggplot(aes(y = r, x = physical_dist)) +
ylab("Word-Pairwise Correlation \n(Pearson's r)") +
xlab("Distance (meters)") +
geom_line(aes(color = item), stat="smooth", method = "lm",
size = .8,
alpha = 0.5) + # geom_line here is used so can se alpha
geom_smooth(method = "lm", size = 1.5, color = "black") +
ylim(0,1) +
scale_color_discrete(name = "Swadesh word") +
geom_rug(data = filter(corr_geo, r > 0), size = .005) +
theme_classic(base_size = 15) +
guides(color = guide_legend(override.aes = list(size = 2)))
#dev.off()
#theme(legend.position = c(0.8, 0.7))
#pdf("../figs/swadesh_plot3.pdf", width = 10)
corr_geo %>%
ggplot(aes(y = r, x = physical_dist)) +
ylab("Pairwise Word Correlation across Languages \n(Pearson's r)") +
xlab("Distance (meters)") +
geom_line(aes(color = item), stat = "smooth", method = "lm",
size = .9,
alpha = 0.5) + # geom_line here is used so can se alpha
geom_smooth(method = "lm", size = 1.5, color = "black", alpha = .9) +
# xlim(0, 1.65 * 10000000) +
scale_color_discrete(name = "Swadesh word") +
theme_classic(base_size = 20) +
theme(axis.line = element_line(size = 1.2),
axis.ticks = element_line(size = 1)) +
guides(color = guide_legend(override.aes = list(size = 2)))
# ggrepel::geom_text_repel(data = label_data, aes(label = item, x = x_val, y = r, color = item), segment.color = "grey", size = 6, xlim = c((1.3 * 10000000), (1.7 * 10000000)))
#dev.off()
Z- transformed.
corr_geo %>%
filter(lang1 < lang2) %>%
mutate(z = fisherz(r)) %>%
summarize(mean = mean(z),
sd = sd(z)) %>%
mutate_all(fisherz2r) %>%
kable()
| mean | sd |
|---|---|
| 0.5505672 | 0.3740164 |
Z- transformed
PHYSICAL_DISTANCE_PVALS <- "data/qap_p_values_asym_z_physical.csv"
read_csv(PHYSICAL_DISTANCE_PVALS) %>%
arrange(-abs(estimate)) %>%
kable()
| item | qap_p | estimate | tstat | n |
|---|---|---|---|---|
| mountain | 0.000 | -2825749.1 | -6.527855 | 561 |
| earth | 0.000 | -2397131.1 | -6.953374 | 595 |
| dust | 0.000 | -2243129.7 | -5.799924 | 528 |
| fire | 0.000 | -2165472.5 | -6.911889 | 561 |
| stone | 0.000 | -2093437.8 | -5.421962 | 561 |
| sand | 0.000 | -2076848.1 | -5.678202 | 496 |
| smoke | 0.000 | -1974560.7 | -6.355182 | 528 |
| water | 0.000 | -1915211.4 | -6.102433 | 561 |
| sun | 0.000 | -1493137.5 | -5.747840 | 561 |
| river | 0.005 | -1489060.2 | -4.148784 | 561 |
| day | 0.000 | -1463891.7 | -4.570918 | 595 |
| sea | 0.003 | -1397519.1 | -3.332816 | 595 |
| night | 0.000 | -1353659.1 | -4.032551 | 465 |
| moon | 0.000 | -1282983.2 | -4.871707 | 528 |
| ash | 0.000 | -1207001.5 | -3.927133 | 528 |
| salt | 0.000 | -1153702.0 | -3.920307 | 561 |
| year | 0.005 | -1121038.0 | -3.375470 | 595 |
| wind | 0.003 | -1006740.1 | -3.184747 | 528 |
| sky | 0.000 | -994977.3 | -3.482458 | 561 |
| cloud | 0.006 | -865113.0 | -3.271573 | 561 |
| star | 0.058 | -714136.4 | -2.278606 | 561 |
| lake | 0.083 | -712085.2 | -1.921121 | 528 |
PAIRWISE_WORD_DIST_CORRS <- "data/language_pairwise_swadesh_correlations_by_item.csv"
PAIRWISE_LANG_DISTS <- "../lang_dists/lang_distance_metrics/ecological/data/eco_language_distance.csv"
PAIRWISE_LANG_DISTS_LANG_CODES <- "../lang_dists/lang_distance_metrics/physical/data/physical_language_distance.csv"
lang_codes <- read_csv(PAIRWISE_LANG_DISTS_LANG_CODES) %>%
select(contains("lang"))
corr_df <- read_csv(PAIRWISE_WORD_DIST_CORRS) %>%
left_join(lang_codes) %>%
select(-lang1, -lang2)
pairwise_physical_dists <- read_csv(PAIRWISE_LANG_DISTS) %>%
arrange(lang1_ETS, lang2_ETS)
corr_geo <- full_join(corr_df, pairwise_physical_dists)
corr_geo %>%
ggplot(aes(x = r, y = eco_dist)) +
facet_wrap(~item) +
geom_point(size = .1) +
geom_smooth(aes(color = item), se = F, method = "lm") +
xlab("swadesh pairwise distance correlation") +
ylab("distance (meters)") +
theme_classic()
Z- transformed
PHYSICAL_DISTANCE_PVALS <- "data/qap_p_values_asym_z_eco.csv"
read_csv(PHYSICAL_DISTANCE_PVALS) %>%
arrange(-abs(estimate)) %>%
kable()
| item | qap_p | estimate | tstat | n |
|---|---|---|---|---|
| smoke | 0.000 | -0.7446401 | -5.9530375 | 528 |
| mountain | 0.000 | -0.7210152 | -4.1372637 | 561 |
| fire | 0.000 | -0.6689288 | -5.3312855 | 561 |
| wind | 0.000 | -0.5576390 | -4.4401131 | 528 |
| dust | 0.003 | -0.5320889 | -3.4083017 | 528 |
| earth | 0.003 | -0.5029236 | -3.6067660 | 595 |
| year | 0.004 | -0.4236519 | -3.2420952 | 595 |
| stone | 0.023 | -0.4222807 | -2.7519627 | 561 |
| ash | 0.005 | -0.4218673 | -3.4124309 | 528 |
| sky | 0.005 | -0.3836457 | -3.3262714 | 561 |
| sand | 0.040 | -0.3728497 | -2.5253549 | 496 |
| moon | 0.003 | -0.3704383 | -3.4068907 | 528 |
| cloud | 0.003 | -0.3369127 | -3.1570371 | 561 |
| water | 0.042 | -0.3298983 | -2.5863249 | 561 |
| sea | 0.117 | -0.3167361 | -1.9092911 | 595 |
| day | 0.053 | -0.2888066 | -2.2639741 | 595 |
| night | 0.175 | -0.2194666 | -1.5960683 | 465 |
| sun | 0.129 | -0.2131704 | -2.0153614 | 561 |
| salt | 0.146 | -0.2091020 | -1.7988737 | 561 |
| river | 0.462 | -0.1487705 | -1.0372717 | 561 |
| lake | 0.607 | 0.0809121 | 0.5534958 | 528 |
| star | 0.694 | -0.0603273 | -0.4914063 | 561 |
```