library(dplyr)
library(tidyr)
library(readr)
library(printr)

Calculate Jensen–Shannon divergence between two distributions

jsdivergence <- function(p, q) {
  m <- 0.5 * (p + q)
  return (0.5 * (sum(p * log(p / m)) + sum(q * log(q / m))))
}

Load sample data for one country and compute weighted nightlight measure

country <- "AFG"
filename <- file.path("~/Projects/inequality/data", paste0(country, ".CSV"))

nightlight <- filename %>%
  read.csv() %>%
  filter(!is.na(population_density), population_density > 0, !is.na(nightlight), nightlight > 0) %>%
  mutate(weighted_nightlight = nightlight / population_density)

Summarize all the groups

groups <- nightlight %>%
  group_by(group_id, group) %>%
  summarize()
groups
group_id group
4000 Baloch
6000 Aimaq
7000 Hazara
10000 Pamir Tajiks
11000 Pashai
12000 Pashtuns
14000 Tajiks
15000 Turkmen
16000 Uzbeks

Transform nightlight grid data to a distribution

get_distribution <- function(x, bins = 10) {
  n <- length(x)
  dist <- table(cut(x, breaks = seq(0, max(x), max(x)/bins)))
  as.vector(dist/sum(dist))
}

Create a matrix for group differences

group_differences <- as.data.frame(matrix(nrow = nrow(groups), ncol = nrow(groups)))
rownames(group_differences) <- groups$group
colnames(group_differences) <- groups$group

Calculate the differences between each group

for (row in 1:nrow(groups)) {
  for (col in 1:nrow(groups)) {
    if (row == col) {
      result <- 0
    } else {
      group_1 <- nightlight %>%
        filter(group_id == as.integer(groups[row, "group_id"]))

      group_2 <- nightlight %>%
        filter(group_id == as.integer(groups[col, "group_id"]))

      x1 <- get_distribution(group_1$weighted_nightlight, 4)
      x2 <- get_distribution(group_2$weighted_nightlight, 4)
      result <- jsdivergence(x1, x2)
    }

    group_differences[row,col] <- result
  }
}
group_differences
Baloch Aimaq Hazara Pamir Tajiks Pashai Pashtuns Tajiks Turkmen Uzbeks
Baloch 0.0000000 0.0193532 0.0375731 0.0140459 0.0352403 0.0797279 0.0799163 0.0102992 0.0348592
Aimaq 0.0193532 0.0000000 0.0099827 0.0315829 0.0633202 0.0354329 0.0363693 0.0043443 0.0033841
Hazara 0.0375731 0.0099827 0.0000000 0.0284572 0.0536547 0.0527505 0.0538679 0.0097213 0.0074158
Pamir Tajiks 0.0140459 0.0315829 0.0284572 0.0000000 0.0120722 0.1134309 0.1138424 0.0130309 0.0451943
Pashai 0.0352403 0.0633202 0.0536547 0.0120722 0.0000000 0.1485424 0.1484836 0.0388901 0.0755221
Pashtuns 0.0797279 0.0354329 0.0527505 0.1134309 0.1485424 0.0000000 0.0001947 0.0584617 0.0253501
Tajiks 0.0799163 0.0363693 0.0538679 0.1138424 0.1484836 0.0001947 0.0000000 0.0591159 0.0263258
Turkmen 0.0102992 0.0043443 0.0097213 0.0130309 0.0388901 0.0584617 0.0591159 0.0000000 0.0117500
Uzbeks 0.0348592 0.0033841 0.0074158 0.0451943 0.0755221 0.0253501 0.0263258 0.0117500 0.0000000