Quantifying Transnational Climate Impact Exposure: Adaptation Without Borders
Quantifying Transnational Climate Impact Exposure: Adaptation Without Borders
1 Objectives:
- The main task is to reproduce how to quantify transnational climate impact exposure based on the paper of Hedlund et al. (2018).
- All R codes shown here are download from https://github.com/sei-international/TCI/tree/master/code.
- Here we only show the coding and graphical outputs
- Any interpretation of analysis , please refer to Hedlund et al. (2018)
2 view assumptions / parameters
dname : chr "run_20191130"
indicators : List of 9
$ Transboundary_Water:List of 1
$ FDI :List of 1
$ Remittances :List of 1
$ Asylum :List of 1
$ Migration :List of 1
$ Trade_Openness :List of 1
$ Cereal_Imports :List of 2
$ Embedded_Water :List of 1
$ Globalization :List of 1
min.num.inds : num 6
now : POSIXct[1:1], format: "2019-11-30 00:39:21"
num.quantiles : num 10
outdir : chr "outputs/run_20191130"
percent.error : num 15
score : function (values, bins)
score2 : function (values, bins)
3 compile data and calculate TCI scores
3.1 code/0_getData.R
# combine data
D <- list()
for (ind in names(indicators)) {
print(ind)
# read dataset
d <- read.csv(file.path("data", indicators[[ind]]$fname), stringsAsFactors = FALSE,
check.names = FALSE)
names(d)[3] <- paste0(ind, "_value")
s <- score(d[, 3], num.quantiles)
d[, paste0(ind, "_score")] <- s
D[[ind]] <- d
}
df <- Reduce(function(...) merge(..., all = T, by = c("Country code", "Country name")),
D)
# include covariates
gdp <- read.csv(file.path("data", "gdp.csv"), check.names = F, strings = F)
df <- merge(df, gdp, by = c("Country code", "Country name"), all = T)
hdi <- read.csv(file.path("data", "hdi.csv"), check.names = F, strings = F)
df <- merge(df, hdi, by = c("Country code", "Country name"), all = T)
pop <- read.csv(file.path("data", "pop.csv"), check.names = F, strings = F)
df <- merge(df, pop, by = c("Country code", "Country name"), all = T)
gain <- read.csv(file.path("data", "ndgain.csv"), check.names = F, strings = F)
df <- merge(df, gain, by = c("Country code", "Country name"), all = T)
# calculate overall TCI score
g <- df[, grep("score", names(df))]
number.ok <- apply(g, 1, function(x) sum(!is.na(x)))
df$TCI <- ifelse(number.ok >= min.num.inds, rowMeans(g, na.rm = TRUE), NA)
# write data to output file
write.csv(df, file.path(outdir, "data.csv"), row.names = F)3.2 code/1_uncertainty.R
## Sensitivity and Uncertainty Analysis Steve Fick 8-29-2017
################################################################ Effect of removing each indicator in turn on index calculation
set.seed(1984)
d <- read.csv(file.path(outdir, "data.csv"), stringsAsFactors = FALSE)
g <- d[, grep("score", names(d))]
ss <- list()
for (i in names(indicators)) {
# recalculate index without indicator
j <- grep(i, names(g))
nas <- apply(g[, -j], 1, function(x) sum(!is.na(x)) >= min.num.inds)
v <- ifelse(nas, rowMeans(g[, -j], na.rm = TRUE), NA)
ss[[i]]$spearman <- cor(v, d$TCI, method = "spearman", use = "complete.obs")
}
s <- as.data.frame(cbind(names(ss), unlist(ss)))
row.names(s) <- NULL
names(s) <- c("indicator", "Spearman Correlation")
s$`Spearman Correlation` <- round(as.numeric(as.character(s[, 2])), 4)
dir.create(file.path(outdir, "tables"))
dir.create(file.path(outdir, "figures"))
write.csv(s, file.path(outdir, "tables", "indicator_removal_effect.csv"), row.names = F)
################################################################ effect of different bin numbers on index
v <- d[, grep("value", names(d))]
bb <- list()
for (bin in 1:15) {
b <- apply(v, 2, score, bin)
nas <- apply(b, 1, function(x) sum(!is.na(x)) >= min.num.inds)
x <- ifelse(nas, rowMeans(b, na.rm = TRUE), NA)
bb[[as.character(bin)]] <- cor(x, d$TCI, use = "complete.obs", method = "spearman")
}
sb <- as.data.frame(cbind(names(bb), unlist(bb)))
row.names(sb) <- NULL
names(sb) <- c("bins", "Spearman Correlation")
sb$`Spearman Correlation` <- round(as.numeric(as.character(sb[, 2])), 4)
write.csv(sb, file.path(outdir, "tables", "bin_number_effect.csv"), row.names = F)
######################################################## indicator data uncertainty
noise <- function(vect, percent.error) {
# generate deviations based on a 0 mean normal distribution with sdev equal to
n <- rnorm(length(vect), 0, diff(range(vect, na.rm = TRUE)) * percent.error/100 *
2)
# cap result to observed variable range
pmax(min(vect, na.rm = TRUE), pmin(vect + n, max(vect, na.rm = TRUE)))
}
# for calculating change in rank
Rbar <- function(a, b) {
mean(abs(rank(a) - rank(b)))
}
nsim <- 999
sims.out <- list()
for (ind in names(indicators)) {
cat(" running simulations for ", ind, "\n")
flush.console()
sims <- c()
j <- grep(ind, names(v))
for (i in 1:nsim) {
# add noise
vs <- v
vs[, j] <- noise(vs[, j], percent.error)
# recalculate index
b <- apply(vs, 2, score, 10)
nas <- apply(b, 1, function(x) sum(!is.na(x)) >= min.num.inds)
x <- ifelse(nas, rowMeans(b, na.rm = TRUE), NA)
# compare correlations
sims <- c(sims, cor(x, d$TCI, use = "complete.obs", method = "spearman"))
# sims <- c(sims, Rbar(x, d$TCI))
}
sims.out[[ind]] <- sims
}
## create summary table
sumry <- lapply(sims.out, function(x) c(mean(x) - sd(x), mean(x), mean(x) + sd(x)))
smry <- do.call(rbind, sumry)
smry <- as.data.frame(t(as.data.frame(sumry))) #Ugly !
names(smry) <- c("mean - sd", "avg spearman rho", "mean + sd")
write.csv(smry, file.path(outdir, "tables", "indicator_data_permutation.csv"), row.names = F)
## create summary figure
f.out <- file.path(outdir, "figures", "uncertainty_indicator_permutation.svg")
svg(f.out)
dotchart(smry[, 2], xlim = c(min(unlist(sims.out)), max(unlist(sims.out))), labels = row.names(smry),
xlab = "Spearman rank correlation")
for (i in 1:length(sims.out)) {
den <- (density(sims.out[[i]]))
den$x <- c(par("usr")[1], den$x, par("usr")[2])
den$y <- c(0, den$y, 0)
den$y <- den$y/max(den$y) * 0.5
den$y <- den$y + i
not <- which(den$x >= par("usr")[1] & den$x <= par("usr")[2])
den$x <- den$x[not]
den$y <- den$y[not]
polygon(den, col = "#e2edff")
}
segments(smry[, 1], 1:nrow(smry), smry[, 3], 1:nrow(smry), lwd = 3)
points(smry[, 2], 1:nrow(smry), pch = 16, cex = 2)
points(s$`Spearman Correlation`, 1:nrow(s), pch = 4, cex = 1.5, lwd = 3)
dev.off()
# calculate pseudo P values
# generate all possible combinations of two indicators
e <- combn(names(sims.out), 2)
# object to store results
ps <- matrix(NA, length(sims.out), length(sims.out))
row.names(ps) <- names(sims.out)
colnames(ps) <- names(sims.out)
# for each combo
for (i in 1:ncol(e)) {
# difference between correlations
a <- (sum((sims.out[[e[1, i]]] - sims.out[[e[2, i]]]) > 0) + 1)/(length(sims.out[[1]]) +
1)
# allow for inverse if difference is negative
b <- pmin(a, 1 - a)
ps[e[2, i], e[1, i]] <- pmin(1, b)
}
# bonferroni correction
ps.sig <- ps < 0.05/ncol(e)
ps.sig[ps.sig] <- "*"
ps.sig[ps.sig == "FALSE"] <- ""
ps.sig[is.na(ps.sig)] <- ""
out <- matrix(paste(round(ps, 3), ps.sig), nrow(ps), ncol(ps))
out[out == "NA "] <- ""
row.names(out) <- row.names(ps)
colnames(out) <- colnames(ps)
write.csv(out, file.path(outdir, "tables", "pairwise_indicator_comparisons.csv"),
row.names = T)3.3 code/2_spatial_autocorrelation.R
## spatial autocorrelation
# Steve Fick
# Aug 29 2017
#This script develops an global adjacency matrix for countries of interest and evaluates levels of spatial autocorrelation using Moran's I for a given variable.
# make adjacency / distance matrix for world countries
library(raster)
library(deldir)
library(maps)
# setup
d <- read.csv(file.path(outdir, 'data.csv'), stringsAsFactors = FALSE, check.names=F)
sh <- shapefile('data/global_flows_map/COUNTRY_CENTROIDS.shp')
cc <- d$`Country code`
goog <- "+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs"
# filter to only countries in dataset
s <- sh[which(sh$ISO %in% cc), ]
d <- d[-which(!d$`Country code` %in% s$ISO),]
#rearrange
s <- s[match(d$`Country code`, s$ISO),]
xg1 <- spTransform(s, CRS(goog))
xg2 <- xg1@data
# to create links across pacific
spin <- function(cr){
x <- cr[,1]
y <- cr[,2]
x[which(x > 0)] <- x[which(x>0)] -20026376.39
x[which(x <= 0)] <- x[which(x <= 0)] + 20026376.39
#y[which(y > 0)] <- y[which(y>0)] -20048966.10
#y[which(y <= 0)] <- y[which(y <= 0)] + 20048966.10
return( cbind(x,y))
}
coordinates(xg2) <- spin(coordinates(xg1))
# find delauney connections from one perspective
dela <- deldir(coordinates(xg1)[,1], coordinates(xg1)[,2])
del1 <- dela$delsgs
# rotate earth and find connections from other perspective
# not including thise because not necessary to keep links between pacific(?)
delb <- deldir(coordinates(xg2)[,1], coordinates(xg2)[,2])
del2 <- delb$delsgs
# create (sparse) adjacency matrix based on delaunay
dis1 <- matrix(0, nrow(s), nrow(s))
dis1[ cbind(del1$ind1, del1$ind2)] <- 1
dis1[ cbind(del1$ind2, del1$ind1)] <- 1
#dis1[ cbind(del2$ind1, del2$ind2)] <- 1
#dis1[ cbind(del2$ind2, del2$ind1)] <- 1
row.names(dis1) <- s$ISO
colnames(dis1) <- s$ISO
# double check
map('world')
title('Spatial Adjacency Matrix')
points(s, cex = .4, col = 'red')
invisible(lapply(1:nrow(dis1), function(v){
adj <- which(dis1[v,] > 0)
for (a in adj){
lines(coordinates(s)[c(v,a),1 ] ,coordinates(s)[c(v,a),2], col = rgb(0,0,0,.2))
}
}))
# some ad-hoc corrections to the adjacency matrix
dadj <- dis1
# remove small islands
tooSmall <- c(
'Bouvet Island' = "BVT",
'Kiribati' = 'KIR'
)
ts <- sapply (tooSmall, grep, colnames(dadj))
dadj <- dadj[-ts,-ts]
# remove these connections
# all of bermudas connections to europe and africa
bmu <- c('ISL', 'IRL', 'PRT', 'CPV')
dadj['BMU', bmu] <- 0
dadj[bmu, 'BMU'] <- 0
#all of cape Verde's connections to America
cpv <- c('PRT', 'ATG', 'BRB')
dadj['CPV', cpv] <- 0
dadj[cpv, 'CPV'] <- 0
#uruguay - Namib
dadj['URY','NAM' ] <- 0
dadj['NAM', 'URY' ] <- 0
#Kazakstan, east baltic staes
eb <- c('FIN', 'EST', 'LVA', 'BLR', 'UKR', 'GEO')
dadj['KAZ' , eb] <- dadj[eb, 'KAZ'] <- 0
# MUS and SEA
dadj['MUS' , c("AUS", "SGP" )] <- dadj[c("AUS", "SGP" ), 'MUS'] <- 0
# Namibia and liberia, stp
dadj['NAM' , c("STP", "LBR" )] <- dadj[c("LBR", "STP" ), 'NAM'] <- 0
# Add these connections
# Tonga - micronesia
dadj['TON' , c('NZL', 'VUT','MHL')] <- dadj[ c('NZL', 'VUT','MHL'), 'TON'] <- 1
#Russia - east baltic and china
eb <- c('CHN','FIN', 'EST', 'LVA', 'BLR', 'UKR', 'GEO')
dadj['RUS' , eb] <- dadj[eb, 'RUS'] <- 1
#china <-> vietnam, india, pakistan, tajikistan, kazakstan
cn <- c('VNM', "TJK", 'PAK', 'IND', 'KAZ', 'NPL')
dadj['CHN' , cn] <- dadj[cn, 'CHN'] <- 1
# move these centroids
usa <- grep('USA', s$ISO)
cr <- coordinates(s)
cr[usa,] <- c(-97.99, 39.38)
x <- s@data
coordinates(x) <- cr
projection(x) <- projection(s)
s <- x
# print output
svg(file.path(outdir, 'figures', 'spatial_adjacency.svg'))
map('world')
for (v in 1:nrow(dadj)){
adj <- which(dadj[v,] > 0)
for (a in adj){
ind <- which(s$ISO %in% row.names(dadj)[c(v,a)] )
lines(coordinates(s)[ind, 1 ] ,coordinates(s)[ind,2], col = rgb(0,0,0,.2))
}
}
points(s, cex = .4, col= 'red')
dev.off()
dis1 <- dadj
#The choice of spatial adjacency may have a large impact on estimations of autocorrelation. Here I used delaunay triangulation, which connects points in space without 'overconnecting' and creating 'sliver triangles'. Note that I did not make any connections across the Pacific, which may not be reasonable.
# raw distance matrix
dis2 <- pointDistance(coordinates(s), coordinates(s),lonlat = TRUE, allpairs = TRUE)
# function for calculating moran's I
my.m <- function(v){
# v must be a named vector with iso codes
require(spdep)
i <- names(v)[which(!is.na(v))]
w1 <- mat2listw(dis1[i,i])
v <- v[i]
#moran.mc(v, w1, 999)
corr <- sp.correlogram(w1$neighbours,v,order = 5, method = 'I', randomisation = FALSE, zero.policy = TRUE)
#plot(corr)
print (length(v))
return(corr)
}
#add names
g <- d$GaIN
tt <- d$TCI
names(g) <- names(tt) <- d$`Country code`
gain <- my.m(g)
tc <- my.m(tt)
svg(file.path(outdir, 'figures', 'moransI.svg'))
plot(0,0, xlim = c(.7,5.3), ylim = c(-.3, .8), type = 'n', xlab = 'Spatial Lag', ylab = "Moran's I", xaxt = 'n')
axis(1, at = 1:5, labels = 1:5)
dtc <- sqrt(tc$res[,3])*2
itc <- tc$res[,1]
dg <- sqrt(gain$res[,3])*2
ig <- gain$res[,1]
arrows((1:5+.1),itc + dtc ,(1:5+.1), itc -dtc, code = 3, angle = 90 , length = .1)
# points((1:5+.1),itc, pch = 21, lwd = 1, bg = 'grey')
points((1:5+.1),itc, pch = 15, lwd = 2, col = 'black')
arrows((1:5-.1),ig + dg , (1:5-.1), ig -dg, code = 3, angle = 90 , length = .1)
points((1:5-.1),ig, pch =16, lwd = 2, col = 'black')
abline(0,0)
legend('topright', legend = c('GaIN', 'TCI'), pch = c(16, 15, lwd = 2))
dev.off()
#High 'I' values here indicate that the variable is strongly spatially clustered, or autocorrelated. Clearly GaIN is much spatially autocorrelated across spatial lags (order of neighbors).
# calculate whether I's are statistically different
ii <- list()
for(i in 1:5){
# difference in estimate
dif <- gain$res[i,1]-tc$res[i,1]
# pooled variance
vrsd <- sqrt((gain$res[i,3])+(tc$res[i,3]))
# probability of drawing this difference given a 0 mean, vrsd variance distribution
p <- pnorm(0,dif, vrsd)
p # very small
cat('\nspatial lag ', i, ' : ', round(p,10))
ii[[paste0('lag ',i)]] <- p
}
out <- as.data.frame(do.call(rbind, ii))
names(out) <- c('p-value')
write.csv(out, file.path(outdir, 'tables', 'moransI_comparison.csv'), row.names=T)
#The TCI and Gain scores are significantly different up to the 5th spatial lag.
#It may be useful to examine patterns of autocorrelation in other variables, and to measure how sensitive global index autocorrelation is to perturbations of the index components (weights, choice of variables etc.)3.4 code/3_cluster.R
# cluster analysis for TCI scores Steve Fick 8-29-2017
library(vegan)
library(raster)
set.seed(1985)
d <- read.csv(file.path(outdir, "data.csv"), stringsAsFactors = FALSE, check.names = F)
d$HDI <- as.numeric(d$HDI)
row.names(d) <- d$`Country name`
s <- grep("score", names(d))
sh <- shapefile("data/global_flows_map/COUNTRY_CENTROIDS.shp")
d$area <- sh$SQKM[match(d$`Country code`, sh$ISO)]
d$logArea <- log(d$area + 1)
# find top 30%
t70 <- d$TCI > quantile(d$TCI, 0.7, na.rm = TRUE)
nt70 <- !t70
# omit NAS to allow for distance matrix calculation
x <- na.omit(d[t70, s])
i <- attributes(x)$na.action
################################################# Perform wards clustering on euclidean distance
nclust <- 7
# di <- vegdist(d[t70,s], method = 'bray', na.rm=TRUE)
di <- dist(x, method = "euclidean")
tree <- hclust(di, metho = "ward")
clh <- cutree(tree, k = nclust)
s2 <- c(s, grep("GaIN", names(d)), grep("HDI", names(d)), grep("logArea", names(d)))
a <- aggregate(d[t70, s2][-i, ], list(clh), mean, na.rm = TRUE)
aver <- apply(d[, s2], 2, function(x) mean(as.numeric(x), na.rm = TRUE))
colz = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#a65628", "#f781bf",
"#999999")
# make plot
svg(file.path(outdir, "figures", "cluster_dendrogram.svg"), width = 10, height = 10)
layout(matrix(c(2, 3, 4, 5, 6, 1, 1, 7, 8, 1, 1, 9, 10, 11, 12, 13), 4, 4))
plot(tree, labels = row.names(x), xlab = "", main = "Upper 30% TCI", ylab = "", yaxt = "n",
xaxt = "n", sub = "")
# j <- rect.hclust(tree, k=nclust, border= rainbow(nclust))
j <- rect.hclust(tree, k = nclust, border = colz[1:nclust])
grps <- do.call(c, lapply(1:length(j), function(x) rep(x, length(j[[x]]))))
ids <- do.call(c, j)
# labels
xs <- aggregate(1:length(grps), by = list(grps), FUN = mean)$x
text(xs, par("usr")[3], letters[1:nclust], pos = 3, offset = 0.3)
cols <- grps[match(names(clh), names(ids))]
cols <- aggregate(clh, by = list(cols), unique)
# cols <- rainbow(nclust)[order(cols$x)]
for (i in names(d)[s2]) {
# for(i in names(d)[s]){
name <- gsub("_score", "", i)
name <- gsub("_", " ", name)
# barplot(c(a[cols$x,i]), names = letters[1:nclust], main = i, col =
# rainbow(nclust))
yl = c(0, max(c(aver[i], max(a[cols$x, i]))))
if (grepl("log", i)) {
k = c(aver[i], a[cols$x, i])
barplot(c(a[cols$x, i]), names = letters[1:nclust], main = name, col = colz[1:nclust],
ylim = c(min(k) - 0.5, max(k)), xpd = FALSE)
} else {
barplot(c(a[cols$x, i]), names = letters[1:nclust], main = name, col = colz[1:nclust],
ylim = yl)
}
abline(h = aver[i], lty = 2)
}
dev.off()
######################################## NMDS
layout(matrix(1, 1, 1))
row.names(d) <- d$`Country name`
y <- na.omit(d[, s])
jj <- attributes(y)$na.action
yt70 <- which(row.names(y) %in% d$`Country name`[t70])
nyt70 <- which(!row.names(y) %in% d$`Country name`[t70])
m <- metaMDS(y, try = 1000)
# m$points['Austria',]<- m$points['Austria']+.05 m$points['Netherlands',2]<-
# m$points['Netherlands',2]-.01
svg(file.path(outdir, "figures", "nmds.svg"), height = 10, width = 12)
nnames <- gsub("_score", "", names(d)[s])
nnames <- gsub("_", " ", nnames)
ordiplot(m, type = "n", xlim = c(-0.42, 0.35), ylim = c(-0.41, 0.35))
# ordiplot(m,type='n')
orditorp(m, display = "species", labels = nnames, col = "deepskyblue2", air = 0.01,
cex = 1.1)
orditorp(m, display = "sites", select = yt70, col = colz[grps[match(as.character(row.names(y)[yt70]),
names(unlist(j)))]], air = 0.01, cex = (d$TCI[-jj]/max(d$TCI[-jj]))[yt70])
orditorp(m, display = "sites", select = nyt70, col = rgb(0, 0, 0, 0.5), air = 0.01,
cex = (d$TCI[-jj]/max(d$TCI[-jj]))[nyt70])
text(-0.4, -0.4, paste0("stress = ", round(m$stress, 3)))
dev.off()