Physical distance

PAIRWISE_WORD_DIST_CORRS <- "data/language_pairwise_swadesh_correlations_by_item.csv"
PAIRWISE_LANG_DISTS <- "../lang_dists/lang_distance_metrics/physical/data/physical_language_distance.csv"
corr_df <- read_csv(PAIRWISE_WORD_DIST_CORRS)  
pairwise_physical_dists <- read_csv(PAIRWISE_LANG_DISTS) 
corr_geo <- full_join(corr_df, pairwise_physical_dists)  

Plots

corr_geo %>%
  ggplot(aes(x = r, y = physical_dist)) +
  facet_wrap(~item) +
  geom_point(size = .1) +
  geom_smooth(aes(color = item), se = F, method = "lm") +
  xlab("swadesh pairwise distance correlation") +
  ylab("distance (meters)") +
  theme_classic()

#pdf("../figs/swadesh_plot1.pdf", width = 9)
corr_geo %>%
  ggplot(aes(y = r, x = physical_dist)) +
  ylab("Word-Pairwise Correlation \n(Pearson's r)") +
  xlab("Distance (meters)") +
  geom_line(aes(color = item), stat="smooth", method = "lm",
              size = .8,
              alpha = 0.5) + # geom_line here is used so can se alpha
  geom_smooth(method = "lm", size = 1.5, color = "black") +
  ylim(0,1) +
  scale_color_discrete(name = "Swadesh word") +
  geom_rug(data =  filter(corr_geo, r > 0), size = .005)  +
  theme_classic(base_size = 15)  +
  guides(color = guide_legend(override.aes = list(size = 2)))

#dev.off()
  #theme(legend.position = c(0.8, 0.7))



#pdf("../figs/swadesh_plot3.pdf", width = 10)
corr_geo %>%
  ggplot(aes(y = r, x = physical_dist)) +
  ylab("Pairwise Word Correlation across Languages \n(Pearson's r)") +
  xlab("Distance (meters)") +
  geom_line(aes(color = item), stat = "smooth", method = "lm",
              size = .9,
              alpha = 0.5) + # geom_line here is used so can se alpha
  geom_smooth(method = "lm", size = 1.5, color = "black", alpha = .9) +
 # xlim(0, 1.65 * 10000000) +
  scale_color_discrete(name = "Swadesh word") +
   theme_classic(base_size = 20)  +
  theme(axis.line = element_line(size = 1.2),
        axis.ticks = element_line(size = 1)) +
  guides(color = guide_legend(override.aes = list(size = 2))) 

#  ggrepel::geom_text_repel(data = label_data, aes(label = item, x = x_val, y = r,  color = item), segment.color = "grey", size = 6, xlim = c((1.3 * 10000000), (1.7 * 10000000)))
    
#dev.off()

Descriptive stats

Z- transformed.

corr_geo %>%
  filter(lang1 < lang2) %>%
  mutate(z = fisherz(r)) %>%
  summarize(mean = mean(z),
            sd = sd(z)) %>%
  mutate_all(fisherz2r) %>%
  kable()
mean sd
0.5505672 0.3740164

QAP p-values

Z- transformed

PHYSICAL_DISTANCE_PVALS <- "data/qap_p_values_asym_z_physical.csv"
read_csv(PHYSICAL_DISTANCE_PVALS) %>%
  arrange(-abs(estimate)) %>%

  kable()
item qap_p estimate tstat n
mountain 0.000 -2825749.1 -6.527855 561
earth 0.000 -2397131.1 -6.953374 595
dust 0.000 -2243129.7 -5.799924 528
fire 0.000 -2165472.5 -6.911889 561
stone 0.000 -2093437.8 -5.421962 561
sand 0.000 -2076848.1 -5.678202 496
smoke 0.000 -1974560.7 -6.355182 528
water 0.000 -1915211.4 -6.102433 561
sun 0.000 -1493137.5 -5.747840 561
river 0.005 -1489060.2 -4.148784 561
day 0.000 -1463891.7 -4.570918 595
sea 0.003 -1397519.1 -3.332816 595
night 0.000 -1353659.1 -4.032551 465
moon 0.000 -1282983.2 -4.871707 528
ash 0.000 -1207001.5 -3.927133 528
salt 0.000 -1153702.0 -3.920307 561
year 0.005 -1121038.0 -3.375470 595
wind 0.003 -1006740.1 -3.184747 528
sky 0.000 -994977.3 -3.482458 561
cloud 0.006 -865113.0 -3.271573 561
star 0.058 -714136.4 -2.278606 561
lake 0.083 -712085.2 -1.921121 528

Eco distance distance

PAIRWISE_WORD_DIST_CORRS <- "data/language_pairwise_swadesh_correlations_by_item.csv"
PAIRWISE_LANG_DISTS <- "../lang_dists/lang_distance_metrics/ecological/data/eco_language_distance.csv"
PAIRWISE_LANG_DISTS_LANG_CODES <- "../lang_dists/lang_distance_metrics/physical/data/physical_language_distance.csv"

lang_codes <- read_csv(PAIRWISE_LANG_DISTS_LANG_CODES) %>%
  select(contains("lang"))
corr_df <- read_csv(PAIRWISE_WORD_DIST_CORRS)  %>%
  left_join(lang_codes) %>%
  select(-lang1, -lang2)
pairwise_physical_dists <- read_csv(PAIRWISE_LANG_DISTS) %>%
  arrange(lang1_ETS, lang2_ETS)

  
corr_geo <- full_join(corr_df, pairwise_physical_dists) 

Plots

corr_geo %>%
  ggplot(aes(x = r, y = eco_dist)) +
  facet_wrap(~item) +
  geom_point(size = .1) +
  geom_smooth(aes(color = item), se = F, method = "lm") +
  xlab("swadesh pairwise distance correlation") +
  ylab("distance (meters)") +
  theme_classic()

QAP p-values

Z- transformed

PHYSICAL_DISTANCE_PVALS <- "data/qap_p_values_asym_z_eco.csv"
read_csv(PHYSICAL_DISTANCE_PVALS) %>%
  arrange(-abs(estimate)) %>%
  kable()
item qap_p estimate tstat n
smoke 0.000 -0.7446401 -5.9530375 528
mountain 0.000 -0.7210152 -4.1372637 561
fire 0.000 -0.6689288 -5.3312855 561
wind 0.000 -0.5576390 -4.4401131 528
dust 0.003 -0.5320889 -3.4083017 528
earth 0.003 -0.5029236 -3.6067660 595
year 0.004 -0.4236519 -3.2420952 595
stone 0.023 -0.4222807 -2.7519627 561
ash 0.005 -0.4218673 -3.4124309 528
sky 0.005 -0.3836457 -3.3262714 561
sand 0.040 -0.3728497 -2.5253549 496
moon 0.003 -0.3704383 -3.4068907 528
cloud 0.003 -0.3369127 -3.1570371 561
water 0.042 -0.3298983 -2.5863249 561
sea 0.117 -0.3167361 -1.9092911 595
day 0.053 -0.2888066 -2.2639741 595
night 0.175 -0.2194666 -1.5960683 465
sun 0.129 -0.2131704 -2.0153614 561
salt 0.146 -0.2091020 -1.7988737 561
river 0.462 -0.1487705 -1.0372717 561
lake 0.607 0.0809121 0.5534958 528
star 0.694 -0.0603273 -0.4914063 561

```