Quantifying Transnational Climate Impact Exposure: Adaptation Without Borders

1 Objectives:
2 view assumptions / parameters
3 compile data and calculate TCI scores
4 Spatial Adjacency Map
5 Cluster Dendrogram
6 moransI
7 Uncertainty Indicator Permutation
8 Comparison
9 Distributions
10 Maps NDGAIN
11 Maps TCI
12 Nmds

1 Objectives:

The main task is to reproduce how to quantify transnational climate impact exposure based on the paper of Hedlund et al. (2018).
All R codes shown here are download from https://github.com/sei-international/TCI/tree/master/code.
Here we only show the coding and graphical outputs
Any interpretation of analysis , please refer to Hedlund et al. (2018)

2 view assumptions / parameters

dname :  chr "run_20191130"
indicators : List of 9
 $ Transboundary_Water:List of 1
 $ FDI                :List of 1
 $ Remittances        :List of 1
 $ Asylum             :List of 1
 $ Migration          :List of 1
 $ Trade_Openness     :List of 1
 $ Cereal_Imports     :List of 2
 $ Embedded_Water     :List of 1
 $ Globalization      :List of 1
min.num.inds :  num 6
now :  POSIXct[1:1], format: "2019-11-30 00:39:21"
num.quantiles :  num 10
outdir :  chr "outputs/run_20191130"
percent.error :  num 15
score : function (values, bins)  
score2 : function (values, bins)

3 compile data and calculate TCI scores

3.1 code/0_getData.R

# combine data

D <- list()

for (ind in names(indicators)) {
    
    print(ind)
    
    # read dataset
    d <- read.csv(file.path("data", indicators[[ind]]$fname), stringsAsFactors = FALSE, 
        check.names = FALSE)
    
    names(d)[3] <- paste0(ind, "_value")
    
    s <- score(d[, 3], num.quantiles)
    d[, paste0(ind, "_score")] <- s
    
    D[[ind]] <- d
    
}

df <- Reduce(function(...) merge(..., all = T, by = c("Country code", "Country name")), 
    D)

# include covariates

gdp <- read.csv(file.path("data", "gdp.csv"), check.names = F, strings = F)
df <- merge(df, gdp, by = c("Country code", "Country name"), all = T)

hdi <- read.csv(file.path("data", "hdi.csv"), check.names = F, strings = F)
df <- merge(df, hdi, by = c("Country code", "Country name"), all = T)

pop <- read.csv(file.path("data", "pop.csv"), check.names = F, strings = F)
df <- merge(df, pop, by = c("Country code", "Country name"), all = T)

gain <- read.csv(file.path("data", "ndgain.csv"), check.names = F, strings = F)
df <- merge(df, gain, by = c("Country code", "Country name"), all = T)

# calculate overall TCI score

g <- df[, grep("score", names(df))]
number.ok <- apply(g, 1, function(x) sum(!is.na(x)))

df$TCI <- ifelse(number.ok >= min.num.inds, rowMeans(g, na.rm = TRUE), NA)

# write data to output file

write.csv(df, file.path(outdir, "data.csv"), row.names = F)

3.2 code/1_uncertainty.R

## Sensitivity and Uncertainty Analysis Steve Fick 8-29-2017

################################################################ Effect of removing each indicator in turn on index calculation
set.seed(1984)

d <- read.csv(file.path(outdir, "data.csv"), stringsAsFactors = FALSE)

g <- d[, grep("score", names(d))]

ss <- list()
for (i in names(indicators)) {
    
    # recalculate index without indicator
    j <- grep(i, names(g))
    nas <- apply(g[, -j], 1, function(x) sum(!is.na(x)) >= min.num.inds)
    v <- ifelse(nas, rowMeans(g[, -j], na.rm = TRUE), NA)
    ss[[i]]$spearman <- cor(v, d$TCI, method = "spearman", use = "complete.obs")
    
}

s <- as.data.frame(cbind(names(ss), unlist(ss)))
row.names(s) <- NULL
names(s) <- c("indicator", "Spearman Correlation")
s$`Spearman Correlation` <- round(as.numeric(as.character(s[, 2])), 4)

dir.create(file.path(outdir, "tables"))
dir.create(file.path(outdir, "figures"))

write.csv(s, file.path(outdir, "tables", "indicator_removal_effect.csv"), row.names = F)

################################################################ effect of different bin numbers on index

v <- d[, grep("value", names(d))]
bb <- list()

for (bin in 1:15) {
    
    b <- apply(v, 2, score, bin)
    nas <- apply(b, 1, function(x) sum(!is.na(x)) >= min.num.inds)
    x <- ifelse(nas, rowMeans(b, na.rm = TRUE), NA)
    bb[[as.character(bin)]] <- cor(x, d$TCI, use = "complete.obs", method = "spearman")
    
}

sb <- as.data.frame(cbind(names(bb), unlist(bb)))
row.names(sb) <- NULL
names(sb) <- c("bins", "Spearman Correlation")
sb$`Spearman Correlation` <- round(as.numeric(as.character(sb[, 2])), 4)

write.csv(sb, file.path(outdir, "tables", "bin_number_effect.csv"), row.names = F)

######################################################## indicator data uncertainty


noise <- function(vect, percent.error) {
    # generate deviations based on a 0 mean normal distribution with sdev equal to
    n <- rnorm(length(vect), 0, diff(range(vect, na.rm = TRUE)) * percent.error/100 * 
        2)
    # cap result to observed variable range
    pmax(min(vect, na.rm = TRUE), pmin(vect + n, max(vect, na.rm = TRUE)))
}

# for calculating change in rank
Rbar <- function(a, b) {
    mean(abs(rank(a) - rank(b)))
}

nsim <- 999
sims.out <- list()

for (ind in names(indicators)) {
    
    cat(" running simulations for ", ind, "\n")
    flush.console()
    
    sims <- c()
    j <- grep(ind, names(v))
    
    for (i in 1:nsim) {
        
        # add noise
        vs <- v
        vs[, j] <- noise(vs[, j], percent.error)
        
        # recalculate index
        b <- apply(vs, 2, score, 10)
        nas <- apply(b, 1, function(x) sum(!is.na(x)) >= min.num.inds)
        x <- ifelse(nas, rowMeans(b, na.rm = TRUE), NA)
        
        # compare correlations
        sims <- c(sims, cor(x, d$TCI, use = "complete.obs", method = "spearman"))
        # sims <- c(sims, Rbar(x, d$TCI))
    }
    
    sims.out[[ind]] <- sims
    
}


## create summary table
sumry <- lapply(sims.out, function(x) c(mean(x) - sd(x), mean(x), mean(x) + sd(x)))
smry <- do.call(rbind, sumry)
smry <- as.data.frame(t(as.data.frame(sumry)))  #Ugly !
names(smry) <- c("mean - sd", "avg spearman rho", "mean + sd")

write.csv(smry, file.path(outdir, "tables", "indicator_data_permutation.csv"), row.names = F)

## create summary figure


f.out <- file.path(outdir, "figures", "uncertainty_indicator_permutation.svg")

svg(f.out)
dotchart(smry[, 2], xlim = c(min(unlist(sims.out)), max(unlist(sims.out))), labels = row.names(smry), 
    xlab = "Spearman rank correlation")

for (i in 1:length(sims.out)) {
    den <- (density(sims.out[[i]]))
    den$x <- c(par("usr")[1], den$x, par("usr")[2])
    den$y <- c(0, den$y, 0)
    
    den$y <- den$y/max(den$y) * 0.5
    den$y <- den$y + i
    not <- which(den$x >= par("usr")[1] & den$x <= par("usr")[2])
    den$x <- den$x[not]
    den$y <- den$y[not]
    
    polygon(den, col = "#e2edff")
    
}

segments(smry[, 1], 1:nrow(smry), smry[, 3], 1:nrow(smry), lwd = 3)
points(smry[, 2], 1:nrow(smry), pch = 16, cex = 2)

points(s$`Spearman Correlation`, 1:nrow(s), pch = 4, cex = 1.5, lwd = 3)

dev.off()

# calculate pseudo P values

# generate all possible combinations of two indicators
e <- combn(names(sims.out), 2)

# object to store results
ps <- matrix(NA, length(sims.out), length(sims.out))
row.names(ps) <- names(sims.out)
colnames(ps) <- names(sims.out)

# for each combo
for (i in 1:ncol(e)) {
    
    # difference between correlations
    a <- (sum((sims.out[[e[1, i]]] - sims.out[[e[2, i]]]) > 0) + 1)/(length(sims.out[[1]]) + 
        1)
    
    # allow for inverse if difference is negative
    b <- pmin(a, 1 - a)
    ps[e[2, i], e[1, i]] <- pmin(1, b)
}

# bonferroni correction

ps.sig <- ps < 0.05/ncol(e)
ps.sig[ps.sig] <- "*"
ps.sig[ps.sig == "FALSE"] <- ""
ps.sig[is.na(ps.sig)] <- ""

out <- matrix(paste(round(ps, 3), ps.sig), nrow(ps), ncol(ps))
out[out == "NA "] <- ""
row.names(out) <- row.names(ps)
colnames(out) <- colnames(ps)

write.csv(out, file.path(outdir, "tables", "pairwise_indicator_comparisons.csv"), 
    row.names = T)

3.3 code/2_spatial_autocorrelation.R

## spatial autocorrelation

# Steve Fick
# Aug 29 2017

#This script develops an global adjacency matrix for countries of interest and evaluates levels of spatial autocorrelation using Moran's I for a given variable.

# make adjacency / distance matrix for world countries

library(raster)
library(deldir)
library(maps)

# setup
d <- read.csv(file.path(outdir, 'data.csv'), stringsAsFactors = FALSE, check.names=F)


sh <- shapefile('data/global_flows_map/COUNTRY_CENTROIDS.shp')
cc <- d$`Country code`
goog <- "+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext  +no_defs"

# filter to only countries in dataset

s <- sh[which(sh$ISO %in% cc), ]
d <- d[-which(!d$`Country code` %in% s$ISO),]

#rearrange
s <- s[match(d$`Country code`, s$ISO),]


xg1 <- spTransform(s, CRS(goog))

xg2 <- xg1@data

# to create links across pacific
spin <- function(cr){
  x <- cr[,1]
  y <- cr[,2]
  x[which(x > 0)] <- x[which(x>0)] -20026376.39
  x[which(x <= 0)] <- x[which(x <= 0)] + 20026376.39
  #y[which(y > 0)] <- y[which(y>0)] -20048966.10
  #y[which(y <= 0)] <- y[which(y <= 0)] + 20048966.10
  
  return( cbind(x,y))
}

coordinates(xg2) <- spin(coordinates(xg1))

# find delauney connections from one perspective

dela <- deldir(coordinates(xg1)[,1], coordinates(xg1)[,2])
del1 <- dela$delsgs

# rotate earth and find connections from other perspective

# not including thise because not necessary to keep links between pacific(?)
delb <- deldir(coordinates(xg2)[,1], coordinates(xg2)[,2])
del2 <- delb$delsgs

# create (sparse) adjacency matrix based on delaunay
dis1 <- matrix(0, nrow(s), nrow(s))

dis1[ cbind(del1$ind1, del1$ind2)] <- 1
dis1[ cbind(del1$ind2, del1$ind1)] <- 1

#dis1[ cbind(del2$ind1, del2$ind2)] <- 1
#dis1[ cbind(del2$ind2, del2$ind1)] <- 1

row.names(dis1) <- s$ISO
colnames(dis1) <- s$ISO

# double check
map('world')
title('Spatial Adjacency Matrix')
points(s, cex = .4, col = 'red')

invisible(lapply(1:nrow(dis1), function(v){
  
  adj <- which(dis1[v,] > 0)
  for (a in adj){
    lines(coordinates(s)[c(v,a),1 ] ,coordinates(s)[c(v,a),2], col = rgb(0,0,0,.2))
  }
}))

# some ad-hoc corrections to the adjacency matrix
dadj <- dis1

# remove small islands
tooSmall <- c(

  'Bouvet Island' = "BVT",
  'Kiribati' = 'KIR'
)

ts <- sapply (tooSmall, grep, colnames(dadj))
dadj <- dadj[-ts,-ts]  

# remove these connections

  # all of bermudas connections to europe and africa
  bmu <- c('ISL', 'IRL', 'PRT', 'CPV')
  dadj['BMU', bmu] <- 0
  dadj[bmu, 'BMU'] <- 0

  #all of cape Verde's connections to America
  cpv <- c('PRT', 'ATG', 'BRB')
  dadj['CPV', cpv] <- 0
  dadj[cpv, 'CPV'] <- 0
  
  #uruguay - Namib
  dadj['URY','NAM' ] <- 0
  dadj['NAM', 'URY' ] <- 0
  
  #Kazakstan, east baltic staes
  eb <- c('FIN', 'EST', 'LVA', 'BLR', 'UKR', 'GEO')
  dadj['KAZ' , eb] <- dadj[eb, 'KAZ'] <- 0
  
  # MUS and SEA
  dadj['MUS' , c("AUS", "SGP" )] <- dadj[c("AUS", "SGP" ), 'MUS'] <- 0
  
  # Namibia and liberia, stp
  dadj['NAM' , c("STP", "LBR" )] <- dadj[c("LBR", "STP" ), 'NAM'] <- 0
  
  
# Add these connections
  
  # Tonga - micronesia
  dadj['TON' , c('NZL', 'VUT','MHL')] <- dadj[ c('NZL', 'VUT','MHL'), 'TON'] <- 1

  #Russia - east baltic and china
  eb <- c('CHN','FIN', 'EST', 'LVA', 'BLR', 'UKR', 'GEO')
  dadj['RUS' , eb] <- dadj[eb, 'RUS'] <- 1

  #china <-> vietnam, india, pakistan, tajikistan, kazakstan
  cn <- c('VNM', "TJK", 'PAK', 'IND', 'KAZ', 'NPL')
  dadj['CHN' , cn] <- dadj[cn, 'CHN'] <- 1

# move these centroids
  usa <- grep('USA', s$ISO)
  cr <- coordinates(s)
  cr[usa,] <- c(-97.99, 39.38)
  x <- s@data
  coordinates(x) <- cr
  projection(x) <- projection(s)
  s <- x


# print output

svg(file.path(outdir, 'figures', 'spatial_adjacency.svg'))  
  map('world')
  for (v in 1:nrow(dadj)){
    adj <- which(dadj[v,] > 0)
    for (a in adj){
      ind <- which(s$ISO %in% row.names(dadj)[c(v,a)] )
      lines(coordinates(s)[ind, 1 ] ,coordinates(s)[ind,2], col = rgb(0,0,0,.2))
    }
  }
  
  points(s, cex = .4, col= 'red')

dev.off()
  
dis1 <- dadj

#The choice of spatial adjacency may have a large impact on estimations of autocorrelation. Here I used delaunay triangulation, which connects points in space without 'overconnecting' and creating 'sliver triangles'. Note that I did not make any connections across the Pacific, which may not be reasonable. 


# raw distance matrix

dis2 <- pointDistance(coordinates(s), coordinates(s),lonlat = TRUE, allpairs = TRUE)

# function for calculating moran's I 

my.m <- function(v){
  
  # v must be a named vector with iso codes
  
  require(spdep)
  
  i <- names(v)[which(!is.na(v))]
  
  
  w1 <- mat2listw(dis1[i,i])
  
  v <- v[i]
  
  #moran.mc(v, w1, 999)
  corr <- sp.correlogram(w1$neighbours,v,order = 5, method = 'I', randomisation = FALSE, zero.policy = TRUE)
  #plot(corr) 
  print (length(v))
  return(corr)
  
}

#add names
g <- d$GaIN
tt <- d$TCI
names(g) <- names(tt) <- d$`Country code`

gain <- my.m(g)
tc <- my.m(tt)

svg(file.path(outdir, 'figures', 'moransI.svg'))


plot(0,0, xlim = c(.7,5.3), ylim = c(-.3, .8), type = 'n', xlab = 'Spatial Lag', ylab = "Moran's I", xaxt = 'n')
axis(1, at = 1:5, labels = 1:5)

dtc <- sqrt(tc$res[,3])*2
itc <- tc$res[,1]
dg <- sqrt(gain$res[,3])*2
ig <- gain$res[,1]

arrows((1:5+.1),itc + dtc ,(1:5+.1), itc -dtc, code = 3, angle = 90 , length = .1)
# points((1:5+.1),itc, pch = 21, lwd = 1, bg = 'grey')
points((1:5+.1),itc, pch = 15, lwd = 2, col = 'black')

arrows((1:5-.1),ig + dg , (1:5-.1), ig -dg, code = 3, angle = 90 , length = .1)
points((1:5-.1),ig, pch =16, lwd = 2, col = 'black')

abline(0,0)
legend('topright', legend = c('GaIN', 'TCI'), pch = c(16, 15, lwd = 2))

dev.off()

#High 'I' values here indicate that the variable is strongly spatially clustered, or autocorrelated. Clearly GaIN is much spatially autocorrelated across spatial lags (order of neighbors). 

# calculate whether I's are statistically different
ii <- list()

for(i in 1:5){
  
  # difference in estimate
  dif <- gain$res[i,1]-tc$res[i,1]
  # pooled variance
  vrsd <- sqrt((gain$res[i,3])+(tc$res[i,3]))
  # probability of drawing this difference given a 0 mean, vrsd variance distribution
  p <- pnorm(0,dif, vrsd)
  p # very small 
  cat('\nspatial lag ', i, ' : ', round(p,10))
  
  ii[[paste0('lag ',i)]] <- p
}

out <- as.data.frame(do.call(rbind, ii))
names(out) <- c('p-value')
write.csv(out, file.path(outdir, 'tables', 'moransI_comparison.csv'), row.names=T)

#The TCI and Gain scores are significantly different up to the 5th spatial lag. 

#It may be useful to examine patterns of autocorrelation in other variables, and to measure how sensitive global index autocorrelation is to perturbations of the index components (weights, choice of variables etc.)

3.4 code/3_cluster.R

# cluster analysis for TCI scores Steve Fick 8-29-2017

library(vegan)
library(raster)
set.seed(1985)
d <- read.csv(file.path(outdir, "data.csv"), stringsAsFactors = FALSE, check.names = F)
d$HDI <- as.numeric(d$HDI)
row.names(d) <- d$`Country name`
s <- grep("score", names(d))

sh <- shapefile("data/global_flows_map/COUNTRY_CENTROIDS.shp")
d$area <- sh$SQKM[match(d$`Country code`, sh$ISO)]
d$logArea <- log(d$area + 1)

# find top 30%
t70 <- d$TCI > quantile(d$TCI, 0.7, na.rm = TRUE)
nt70 <- !t70


# omit NAS to allow for distance matrix calculation
x <- na.omit(d[t70, s])
i <- attributes(x)$na.action

################################################# Perform wards clustering on euclidean distance

nclust <- 7
# di <- vegdist(d[t70,s], method = 'bray', na.rm=TRUE)
di <- dist(x, method = "euclidean")
tree <- hclust(di, metho = "ward")
clh <- cutree(tree, k = nclust)

s2 <- c(s, grep("GaIN", names(d)), grep("HDI", names(d)), grep("logArea", names(d)))
a <- aggregate(d[t70, s2][-i, ], list(clh), mean, na.rm = TRUE)
aver <- apply(d[, s2], 2, function(x) mean(as.numeric(x), na.rm = TRUE))

colz = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#a65628", "#f781bf", 
    "#999999")

# make plot

svg(file.path(outdir, "figures", "cluster_dendrogram.svg"), width = 10, height = 10)

layout(matrix(c(2, 3, 4, 5, 6, 1, 1, 7, 8, 1, 1, 9, 10, 11, 12, 13), 4, 4))
plot(tree, labels = row.names(x), xlab = "", main = "Upper 30% TCI", ylab = "", yaxt = "n", 
    xaxt = "n", sub = "")

# j <- rect.hclust(tree, k=nclust, border= rainbow(nclust))
j <- rect.hclust(tree, k = nclust, border = colz[1:nclust])



grps <- do.call(c, lapply(1:length(j), function(x) rep(x, length(j[[x]]))))
ids <- do.call(c, j)

# labels
xs <- aggregate(1:length(grps), by = list(grps), FUN = mean)$x
text(xs, par("usr")[3], letters[1:nclust], pos = 3, offset = 0.3)


cols <- grps[match(names(clh), names(ids))]
cols <- aggregate(clh, by = list(cols), unique)
# cols <- rainbow(nclust)[order(cols$x)]

for (i in names(d)[s2]) {
    # for(i in names(d)[s]){
    name <- gsub("_score", "", i)
    name <- gsub("_", " ", name)
    # barplot(c(a[cols$x,i]), names = letters[1:nclust], main = i, col =
    # rainbow(nclust))
    yl = c(0, max(c(aver[i], max(a[cols$x, i]))))
    if (grepl("log", i)) {
        k = c(aver[i], a[cols$x, i])
        barplot(c(a[cols$x, i]), names = letters[1:nclust], main = name, col = colz[1:nclust], 
            ylim = c(min(k) - 0.5, max(k)), xpd = FALSE)
        
    } else {
        barplot(c(a[cols$x, i]), names = letters[1:nclust], main = name, col = colz[1:nclust], 
            ylim = yl)
    }
    abline(h = aver[i], lty = 2)
}


dev.off()



######################################## NMDS
layout(matrix(1, 1, 1))
row.names(d) <- d$`Country name`
y <- na.omit(d[, s])
jj <- attributes(y)$na.action
yt70 <- which(row.names(y) %in% d$`Country name`[t70])
nyt70 <- which(!row.names(y) %in% d$`Country name`[t70])

m <- metaMDS(y, try = 1000)

# m$points['Austria',]<- m$points['Austria']+.05 m$points['Netherlands',2]<-
# m$points['Netherlands',2]-.01


svg(file.path(outdir, "figures", "nmds.svg"), height = 10, width = 12)

nnames <- gsub("_score", "", names(d)[s])
nnames <- gsub("_", " ", nnames)

ordiplot(m, type = "n", xlim = c(-0.42, 0.35), ylim = c(-0.41, 0.35))
# ordiplot(m,type='n')
orditorp(m, display = "species", labels = nnames, col = "deepskyblue2", air = 0.01, 
    cex = 1.1)
orditorp(m, display = "sites", select = yt70, col = colz[grps[match(as.character(row.names(y)[yt70]), 
    names(unlist(j)))]], air = 0.01, cex = (d$TCI[-jj]/max(d$TCI[-jj]))[yt70])
orditorp(m, display = "sites", select = nyt70, col = rgb(0, 0, 0, 0.5), air = 0.01, 
    cex = (d$TCI[-jj]/max(d$TCI[-jj]))[nyt70])

text(-0.4, -0.4, paste0("stress = ", round(m$stress, 3)))
dev.off()

4 Spatial Adjacency Map

5 Cluster Dendrogram

6 moransI

7 Uncertainty Indicator Permutation

8 Comparison

9 Distributions

10 Maps NDGAIN

11 Maps TCI

12 Nmds

Hedlund, Johanna, Stephen Fick, Henrik Carlsen, and Magnus Benzie. 2018. “Quantifying Transnational Climate Impact Exposure: New Perspectives on the Global Distribution of Climate Risk.” Global Environmental Change 52 (September): 75–85. http://www.sciencedirect.com/science/article/pii/S0959378017312505.

Written by DK WC

2019-11-30