Read in files
geo <- read_csv("../../data/supplementary_data/cultural_sim_measures/geo/all_google_geo_dists.csv") %>%
select(-capital_dist_meters)
lang <- read_csv("../../data/supplementary_data/cultural_sim_measures/lang/all_google_lang_dists.csv")
events<- read_csv("../../data/supplementary_data/cultural_sim_measures/events/all_google_event_dists.csv")
These are our variables: “centroid_dist_meters”
“wals_euclidean_dist”/“asjp_dist”
“log_normalized_n_events_all” “lop_normalized_mean_imports_dollars”
get_unique_relation_id <- function (x, y){
pairs = c(x, y)
ordered = order(pairs)
paste0(pairs[ordered[1]], pairs[ordered[2]])
}
all_dyadic2 <- events %>%
full_join(geo) %>%
full_join(lang) %>%
rowwise() %>%
mutate(all_codes = get_unique_relation_id(country_code_1, country_code_2)) %>%
ungroup() %>%
select(all_codes, everything())
# this just cause there's a weird bug
#write_csv(all_dyadic2, "../../data/supplementary_data/cultural_sim_measures/events/all_dyadic2_temp.csv")
all_dyadic2 <- read_csv("../../data/supplementary_data/cultural_sim_measures/events/all_dyadic2_temp.csv") %>%
mutate(all_codes = as.factor(all_codes))
is.na(all_dyadic2) <- do.call(cbind, lapply(all_dyadic2, is.infinite))
dyadic_clean <- all_dyadic2 %>%
group_by(all_codes) %>%
slice(1) %>%
ungroup()
Summary of dyadic measures:
summary(dyadic_clean)
## all_codes country_code_2 country_code_1
## AEAE : 1 Length:2556 Length:2556
## AEAR : 1 Class :character Class :character
## AEAT : 1 Mode :character Mode :character
## AEAU : 1
## AEBA : 1
## AEBE : 1
## (Other):2550
## log_normalized_n_events_all log_normalized_mean_imports_dollars
## Min. :-20.258 Min. :-31.90
## 1st Qu.:-16.251 1st Qu.:-24.07
## Median :-15.175 Median :-22.90
## Mean :-15.126 Mean :-22.98
## 3rd Qu.:-13.986 3rd Qu.:-21.78
## Max. : -9.761 Max. :-17.43
## NA's :690 NA's :315
## centroid_dist_meters wals_euclidean_dist asjp_dist
## Min. : 0 Min. : 0.00 Min. :0.0000
## 1st Qu.: 2108497 1st Qu.: 82.39 1st Qu.:0.7687
## Median : 5328696 Median :141.73 Median :0.8321
## Mean : 6286402 Mean :117.52 Mean :0.7175
## 3rd Qu.: 9655098 3rd Qu.:156.59 3rd Qu.:0.8755
## Max. :19758598 Max. :216.48 Max. :0.9458
## NA's :71 NA's :712 NA's :835
dyadic_clean %>%
gather("measure", "value" , 4:8) %>%
ggplot(aes(x = value, fill = measure)) +
geom_histogram() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
theme(legend.position = "none")
# get contintents
dist_by_continent <- all_dyadic2 %>%
rowwise() %>%
mutate(country_name_1 = as.factor(countrycode(country_code_1, "iso2c","country.name")),
country_name_2 = as.factor(countrycode(country_code_2, "iso2c","country.name")),
continent_name_1 = countrycode(country_code_1, 'iso2c', 'continent'),
continent_name_2 = countrycode(country_code_2, 'iso2c', 'continent')) %>%
ungroup() %>%
mutate(cont_order_1 = as.factor(continent_name_1) %>% as.numeric,
cont_order_2 = as.factor(continent_name_2) %>% as.numeric) %>%
select(-continent_name_1, -continent_name_2) %>%
mutate(country_name_1 = fct_reorder(country_name_1, cont_order_1),
country_name_2 = fct_reorder(country_name_2, cont_order_2))
dist_by_continent %>%
ggplot(aes(x = country_name_1,
y = country_name_2)) +
geom_raster(aes(fill = centroid_dist_meters)) +
scale_fill_continuous(low = "#ffffcc", high = "#800026") +
ggtitle("Geographical distance") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45,
hjust = 1, vjust = 1, size = 6),
axis.text.y = element_text(size = 6),
axis.title = element_blank())
dist_by_continent %>%
ggplot(aes(x = country_name_1,
y = country_name_2)) +
geom_raster(aes(fill = wals_euclidean_dist)) +
scale_fill_continuous(low = "#ffffcc", high = "#800026") +
ggtitle("WALS distance") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45,
hjust = 1, vjust = 1, size = 6),
axis.text.y = element_text(size = 6),
axis.title = element_blank())
dist_by_continent %>%
ggplot(aes(x = country_name_1,
y = country_name_2)) +
geom_raster(aes(fill = asjp_dist)) +
scale_fill_continuous(low = "#ffffcc", high = "#800026") +
ggtitle("ASJP distance") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45,
hjust = 1, vjust = 1, size = 6),
axis.text.y = element_text(size = 6),
axis.title = element_blank())
dist_by_continent %>%
ggplot(aes(x = country_name_1,
y = country_name_2)) +
geom_raster(aes(fill = log_normalized_n_events_all)) +
scale_fill_continuous(low = "#ffffcc", high = "#800026") +
ggtitle("events") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45,
hjust = 1, vjust = 1, size = 6),
axis.text.y = element_text(size = 6),
axis.title = element_blank())
dist_by_continent %>%
ggplot(aes(x = country_name_1,
y = country_name_2)) +
geom_raster(aes(fill = log_normalized_mean_imports_dollars)) +
scale_fill_continuous(low = "#ffffcc", high = "#800026") +
ggtitle("mean_imports_dollars") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45,
hjust = 1, vjust = 1, size = 6),
axis.text.y = element_text(size = 6),
axis.title = element_blank())
corrs <- correlate(all_dyadic2 %>% select(-1:-3),
use = "complete.obs") %>%
shave()
rplot(corrs,
legend = TRUE,
colours = c("skyblue1", "white","indianred2")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
gather(corrs,
"variable1", "value", -1) %>%
ungroup() %>%
arrange(-value) %>%
as.data.frame() %>%
filter(!is.na(value)) %>%
kable()
rowname | variable1 | value |
---|---|---|
asjp_dist | wals_euclidean_dist | 0.7704572 |
log_normalized_mean_imports_dollars | log_normalized_n_events_all | 0.4849369 |
wals_euclidean_dist | centroid_dist_meters | 0.3330267 |
asjp_dist | centroid_dist_meters | 0.1832161 |
wals_euclidean_dist | log_normalized_mean_imports_dollars | -0.1558068 |
asjp_dist | log_normalized_mean_imports_dollars | -0.1669058 |
wals_euclidean_dist | log_normalized_n_events_all | -0.2923945 |
asjp_dist | log_normalized_n_events_all | -0.3193012 |
centroid_dist_meters | log_normalized_n_events_all | -0.4088289 |
centroid_dist_meters | log_normalized_mean_imports_dollars | -0.4438697 |
ggplot(all_dyadic2, aes(x = centroid_dist_meters,
y = log_normalized_mean_imports_dollars)) +
geom_point() +
geom_smooth(method = "lm")+
theme_minimal()
ggplot(all_dyadic2, aes(x = centroid_dist_meters,
y = log_normalized_n_events_all)) +
geom_point() +
geom_smooth(method = "lm") +
theme_minimal()
ggplot(all_dyadic2, aes(x = log_normalized_n_events_all,
y = log_normalized_mean_imports_dollars)) +
geom_point() +
geom_smooth(method = "lm")+
theme_minimal()
ggplot(all_dyadic2, aes(x = centroid_dist_meters,
y = asjp_dist)) +
geom_point() +
geom_smooth(method = "lm") +
theme_minimal()
ggplot(all_dyadic2, aes(x = log_normalized_n_events_all,
y = asjp_dist)) +
geom_point() +
geom_smooth(method = "lm") +
theme_minimal()
ggplot(all_dyadic2, aes(x = log_normalized_mean_imports_dollars,
y = asjp_dist)) +
geom_point() +
geom_smooth(method = "lm") +
theme_minimal()
write_csv(dist_by_continent, "../../data/supplementary_data/cultural_sim_measures/all_dyadic_vars.csv")