library(dplyr)
library(tidyr)
library(readr)
library(printr)
Calculate Jensen–Shannon divergence between two distributions
jsdivergence <- function(p, q) {
m <- 0.5 * (p + q)
return (0.5 * (sum(p * log(p / m)) + sum(q * log(q / m))))
}
Load sample data for one country and compute weighted nightlight measure
country <- "AFG"
filename <- file.path("~/Projects/inequality/data", paste0(country, ".CSV"))
nightlight <- filename %>%
read.csv() %>%
filter(!is.na(population_density), population_density > 0, !is.na(nightlight), nightlight > 0) %>%
mutate(weighted_nightlight = nightlight / population_density)
Summarize all the groups
groups <- nightlight %>%
group_by(group_id, group) %>%
summarize()
groups
| group_id | group |
|---|---|
| 4000 | Baloch |
| 6000 | Aimaq |
| 7000 | Hazara |
| 10000 | Pamir Tajiks |
| 11000 | Pashai |
| 12000 | Pashtuns |
| 14000 | Tajiks |
| 15000 | Turkmen |
| 16000 | Uzbeks |
Transform nightlight grid data to a distribution
get_distribution <- function(x, bins = 10) {
n <- length(x)
dist <- table(cut(x, breaks = seq(0, max(x), max(x)/bins)))
as.vector(dist/sum(dist))
}
Create a matrix for group differences
group_differences <- as.data.frame(matrix(nrow = nrow(groups), ncol = nrow(groups)))
rownames(group_differences) <- groups$group
colnames(group_differences) <- groups$group
Calculate the differences between each group
for (row in 1:nrow(groups)) {
for (col in 1:nrow(groups)) {
if (row == col) {
result <- 0
} else {
group_1 <- nightlight %>%
filter(group_id == as.integer(groups[row, "group_id"]))
group_2 <- nightlight %>%
filter(group_id == as.integer(groups[col, "group_id"]))
x1 <- get_distribution(group_1$weighted_nightlight, 4)
x2 <- get_distribution(group_2$weighted_nightlight, 4)
result <- jsdivergence(x1, x2)
}
group_differences[row,col] <- result
}
}
group_differences
| Baloch | Aimaq | Hazara | Pamir Tajiks | Pashai | Pashtuns | Tajiks | Turkmen | Uzbeks | |
|---|---|---|---|---|---|---|---|---|---|
| Baloch | 0.0000000 | 0.0193532 | 0.0375731 | 0.0140459 | 0.0352403 | 0.0797279 | 0.0799163 | 0.0102992 | 0.0348592 |
| Aimaq | 0.0193532 | 0.0000000 | 0.0099827 | 0.0315829 | 0.0633202 | 0.0354329 | 0.0363693 | 0.0043443 | 0.0033841 |
| Hazara | 0.0375731 | 0.0099827 | 0.0000000 | 0.0284572 | 0.0536547 | 0.0527505 | 0.0538679 | 0.0097213 | 0.0074158 |
| Pamir Tajiks | 0.0140459 | 0.0315829 | 0.0284572 | 0.0000000 | 0.0120722 | 0.1134309 | 0.1138424 | 0.0130309 | 0.0451943 |
| Pashai | 0.0352403 | 0.0633202 | 0.0536547 | 0.0120722 | 0.0000000 | 0.1485424 | 0.1484836 | 0.0388901 | 0.0755221 |
| Pashtuns | 0.0797279 | 0.0354329 | 0.0527505 | 0.1134309 | 0.1485424 | 0.0000000 | 0.0001947 | 0.0584617 | 0.0253501 |
| Tajiks | 0.0799163 | 0.0363693 | 0.0538679 | 0.1138424 | 0.1484836 | 0.0001947 | 0.0000000 | 0.0591159 | 0.0263258 |
| Turkmen | 0.0102992 | 0.0043443 | 0.0097213 | 0.0130309 | 0.0388901 | 0.0584617 | 0.0591159 | 0.0000000 | 0.0117500 |
| Uzbeks | 0.0348592 | 0.0033841 | 0.0074158 | 0.0451943 | 0.0755221 | 0.0253501 | 0.0263258 | 0.0117500 | 0.0000000 |