1 Setup

source("FUNMyChordPlot.R") 

# Function for pretty density plots
theme_dviz_hgrid <- function(font_size = 14, font_family = "") {
        color = "grey90"
        line_size = 0.5
        # Starts with theme_cowplot and then modify some parts
        cowplot::theme_cowplot(font_size = font_size, 
                               font_family = font_family) %+replace%
                theme(# make horizontal grid lines
                        panel.grid.major = element_line(colour = color,
                                                          size = line_size),
                        panel.grid.major.x = element_blank(),
                        # adjust axis tickmarks
                        axis.ticks = element_line(colour = color, 
                                                  size = line_size),
                        # adjust x axis
                        axis.line.x = element_line(colour = color, 
                                                   size = line_size),
                        # no y axis line
                        axis.line.y = element_blank()
                )
}

library(openxlsx)
library(ggplot2)
library(grid)
library(tidyverse)
library(reshape2)
# library(FactoMineR)
# library(nFactors)
# library(psych)
# library(missMDA)
# library(qgraph)
library(knitr)
library(chorddiag)
library(corrplot)
# library(Hmisc)
library(circlize)
library(corpcor)
# library(mgm)
# library(rworldmap)
library(RColorBrewer)
library(arm)
library(GGally)
library(bestNormalize)

2 Performance

# load Indicator Data by Country, including GDP values, 
# with NA's ReMoved (complete final set)
performance <- read.csv("Data/ModelOutput/data_indicator-by-country-gdp-narm.csv")
perf.meta <- read.csv("Data/ModelOutput/metadata_state-narm.csv", stringsAsFactors = F)
# get vector of SDG indicator column names
vars <- grep("X", names(performance), value = T)

2.1 Scatterplots

ggpairs.perf <- ggpairs(performance[, vars])
ggsave("Figures-simplified/ggpairs-performance", device = "pdf", plot = ggpairs.perf,
       width = 100, height = 100, units = "cm")

ggpairs.perf.sc <- ggpairs(as.data.frame(scale(performance[, vars])))
ggsave("Figures-simplified/ggpairs-performance.scaled", device = "pdf", plot = ggpairs.perf.sc,
       width = 100, height = 100, units = "cm")

2.2 Density distributions

Original

tmp <- performance

tmp <- gather(as.data.frame(scale(tmp[, vars])), "Indicator.myID", "value")
tmp$Indicator.myID <- gsub("X", "", tmp$Indicator.myID)

tmp <- merge(tmp, perf.meta[, c("Indicator.myID", "Shortname")])
tmp$Shortname <- factor(tmp$Shortname, 
                        labels = unique(tmp$Shortname), 
                        levels = unique(tmp$Shortname))

ggplot(tmp, aes(x = value, y = ..scaled..)) + 
        geom_density(aes(fill = Shortname), trim = T) +
        scale_y_continuous(name = "density") +
        # scale_fill_manual(values = perf.meta$Colour) +
        facet_wrap(~Shortname, nrow = 7) +
        theme_minimal() +
        theme_dviz_hgrid() +
        theme(legend.position = "none",
              legend.justification = "right",
              legend.title = element_blank(),
              legend.margin = margin(0, 0, 0.2, 0, "cm"),
              legend.spacing.x = grid::unit(0.2, "cm"),
              strip.text = element_text(margin = margin(0.2,0,0.2,0, "cm"),
                                        size = 7),
              text = element_text(size=7),
              # panel.border = element_rect(fill = "transparent"),
              strip.background = element_rect(),
              axis.ticks.y = element_blank(),
              axis.text.y = element_blank(),
              axis.text.x = element_text(angle = 45, hjust = 1, size = 7))

Best Transformation with BestNormalize package

perf.t <- performance

# perf.t[, "GDP"] <- log10(perf.t[, "GDP"])
perf.t[, c(vars, "GDP")] <- apply(perf.t[, c(vars, "GDP")], 2, 
                                  FUN = function(x) bestNormalize(x)$x.t)

tmp2 <- gather(as.data.frame(perf.t[, vars]), "Indicator.myID", "value")
tmp2$Indicator.myID <- gsub("X", "", tmp2$Indicator.myID)

tmp2 <- merge(tmp2, perf.meta[, c("Indicator.myID", "Shortname")])
tmp2$Shortname <- factor(tmp2$Shortname, 
                        labels = unique(tmp2$Shortname), 
                        levels = unique(tmp2$Shortname))

ggplot(tmp2, aes(x = value, y = ..scaled..)) + 
        geom_density(aes(fill = Shortname), trim = T) +
        scale_y_continuous(name = "density") +
        # scale_fill_manual(values = perf.meta$Colour) +
        facet_wrap(~Shortname, nrow = 7) +
        theme_minimal() +
        theme_dviz_hgrid() +
        theme(legend.position = "none",
              legend.justification = "right",
              legend.title = element_blank(),
              legend.margin = margin(0, 0, 0.2, 0, "cm"),
              legend.spacing.x = grid::unit(0.2, "cm"),
              strip.text = element_text(margin = margin(0.2,0,0.2,0, "cm"),
                                        size = 7),
              text = element_text(size=7),
              # panel.border = element_rect(fill = "transparent"),
              strip.background = element_rect(),
              axis.ticks.y = element_blank(),
              axis.text.y = element_blank(),
              axis.text.x = element_text(angle = 45, hjust = 1, size = 7))

2.3 Models without GDP

# create empty matrix to fill with coefficients, p-values, and R^2
perf.coefs <- matrix(nrow = length(vars), ncol = length(vars),
                dimnames = list(vars, vars))
perf.pvals <- perf.coefs
perf.rsq <- perf.coefs

# fill the matrices with model outputs
for (y in vars) {
        for (x in vars[-which(vars == y)]) {
                # form <- formula(paste0("scale(", y, ") ~ ", "1 + scale(", x, ")"))
                # message(y, "~", x)
                # model <- lm(form, data = performance)
                
                # with normalized variables:
                model <- lm(perf.t[[y]] ~ perf.t[[x]])
                
                perf.coefs[x, y] <- summary(model)$coefficients[2, 1]
                perf.pvals[x, y] <- summary(model)$coefficients[2, 4]
                perf.rsq[x, y] <- summary(model)$r.squared
        }
}        
        
mods <- list(perf.coefs, perf.pvals, perf.rsq)
names(mods) <- c("perf.coefs", "perf.pvals", "perf.rsq")

MyChordPlotFun(data = mods$perf.coefs, 
               pvals = mods$perf.pvals,
               md = perf.meta,
          alpha=0.05, BH = T, # calculate Benjamini-Hochberg corrected p-values
          type = "NvsP", 
          title.left = "Negative relationships",
          title.right = "Positive relationships")

2.4 Models with GDP

# create empty matrices (for GDP and SDG coefficients)
perf.coefs.gdp <- matrix(nrow = length(vars), ncol = length(vars),
                dimnames = list(vars, vars))
perf.pvals.gdp <- perf.coefs.gdp
perf.rsq.gdp <- perf.coefs.gdp
perf.coefs.sdg <- perf.coefs.gdp
perf.pvals.sdg <- perf.coefs.gdp

# this writes coefficien
for (y in vars) {
        for (x in vars[-which(vars == y)]) {
                # form <- formula(paste0("scale(", y, ") ~ ", "scale(", x,                                        ") * ", "scale(log(GDP))"))
                # message(y, "~", x)
                # model <- lm(form, data = performance)
                
                # with normalized variables:
                model <- lm(perf.t[[y]] ~ perf.t[[x]] * perf.t[["GDP"]])
                
                # note that this writes results of 'y~gdp' on diagonal
                perf.coefs.sdg[x, y] <- summary(model)$coefficients[2, 1]
                perf.pvals.sdg[x, y] <- summary(model)$coefficients[2, 4]
                perf.coefs.gdp[x, y] <- summary(model)$coefficients[3, 1]
                perf.pvals.gdp[x, y] <- summary(model)$coefficients[3, 4]
                perf.rsq.gdp[x, y] <- summary(model)$r.squared
        }
}        
        
mods.gdp <- list(perf.coefs.sdg, perf.pvals.sdg, perf.rsq.gdp, perf.coefs.gdp, perf.pvals.gdp)
names(mods.gdp) <- c("perf.coefs", "perf.pvals", "perf.rsq", "gdp.perf.coefs", "gdp.perf.pvals")

MyChordPlotFun(data = mods.gdp$perf.coefs,
               pvals = mods.gdp$perf.pvals,
               md = perf.meta,
          alpha=0.05, type = "NvsP", BH = T, 
          title.left = "Negative relationships",
          title.right = "Positive relationships")

3 Progress

# load Indicator Data by Country, including GDP values, 
# with NA's ReMoved (complete final set)
progress <- read.csv("Data/ModelOutput/data_mac-by-country-gdp-narm.csv")
prog.meta <- read.csv("Data/ModelOutput/metadata_change-narm.csv", stringsAsFactors = F)

# get vector of SDG indicator column names
vars <- grep("X", names(progress), value = T)

3.1 Scatterplots

ggpairs.prog <- ggpairs(progress[, vars])
ggsave("Figures-simplified/ggpairs-progress", device = "pdf", plot = ggpairs.prog,
       width = 100, height = 100, units = "cm")

ggpairs.prog.sc <- ggpairs(as.data.frame(scale(progress[, vars])))
ggsave("Figures-simplified/ggpairs-progress.scaled", device = "pdf", plot = ggpairs.prog.sc,
       width = 100, height = 100, units = "cm")

3.2 Density distributions

3.2.1 Original

tmp <- progress

tmp <- gather(as.data.frame(scale(tmp[, vars])), "Indicator.myID", "value")
tmp$Indicator.myID <- gsub("X", "", tmp$Indicator.myID)

tmp <- merge(tmp, perf.meta[, c("Indicator.myID", "Shortname")])
tmp$Shortname <- factor(tmp$Shortname, 
                        labels = unique(tmp$Shortname), 
                        levels = unique(tmp$Shortname))

ggplot(tmp, aes(x = value, y = ..scaled..)) + 
        geom_density(aes(fill = Shortname), trim = T) +
        scale_y_continuous(name = "density") +
        # scale_fill_manual(values = perf.meta$Colour) +
        facet_wrap(~Shortname, nrow = 7) +
        theme_minimal() +
        theme_dviz_hgrid() +
        theme(legend.position = "none",
              legend.justification = "right",
              legend.title = element_blank(),
              legend.margin = margin(0, 0, 0.2, 0, "cm"),
              legend.spacing.x = grid::unit(0.2, "cm"),
              strip.text = element_text(margin = margin(0.2,0,0.2,0, "cm"),
                                        size = 7),
              text = element_text(size=7),
              # panel.border = element_rect(fill = "transparent"),
              strip.background = element_rect(),
              axis.ticks.y = element_blank(),
              axis.text.y = element_blank(),
              axis.text.x = element_text(angle = 45, hjust = 1, size = 7))

3.2.2 Best Transformation with `BestNormalize` package

prog.t <- progress

# prog.t[, "GDP"] <- log10(prog.t[, "GDP"])
prog.t[, c(vars, "GDP")] <- apply(prog.t[, c(vars, "GDP")], 2, 
                                  FUN = function(x) bestNormalize(x)$x.t)

tmp2 <- gather(as.data.frame(prog.t[, vars]), "Indicator.myID", "value")
tmp2$Indicator.myID <- gsub("X", "", tmp2$Indicator.myID)

tmp2 <- merge(tmp2, perf.meta[, c("Indicator.myID", "Shortname")])
tmp2$Shortname <- factor(tmp2$Shortname, 
                        labels = unique(tmp2$Shortname), 
                        levels = unique(tmp2$Shortname))

ggplot(tmp2, aes(x = value, y = ..scaled..)) + 
        geom_density(aes(fill = Shortname), trim = T) +
        scale_y_continuous(name = "density") +
        # scale_fill_manual(values = perf.meta$Colour) +
        facet_wrap(~Shortname, nrow = 7) +
        theme_minimal() +
        theme_dviz_hgrid() +
        theme(legend.position = "none",
              legend.justification = "right",
              legend.title = element_blank(),
              legend.margin = margin(0, 0, 0.2, 0, "cm"),
              legend.spacing.x = grid::unit(0.2, "cm"),
              strip.text = element_text(margin = margin(0.2,0,0.2,0, "cm"),
                                        size = 7),
              text = element_text(size=7),
              # panel.border = element_rect(fill = "transparent"),
              strip.background = element_rect(),
              axis.ticks.y = element_blank(),
              axis.text.y = element_blank(),
              axis.text.x = element_text(angle = 45, hjust = 1, size = 7))

3.3 Models without GDP

# create empty matrix to fill with coefficients, p-values, and R^2
prog.coefs <- matrix(nrow = length(vars), ncol = length(vars),
                dimnames = list(vars, vars))
prog.pvals <- prog.coefs
prog.rsq <- prog.coefs

# fill the matrices with model outputs
for (y in vars) {
        for (x in vars[-which(vars == y)]) {
                # form <- formula(paste0("scale(", y, ") ~ ", "1 + scale(", x, ")"))
                # # message(y, "~", x)
                # model <- lm(form, data = progress)
                
                # with normalized variables:
                model <- lm(prog.t[[y]] ~ prog.t[[x]])
                
                prog.coefs[x, y] <- summary(model)$coefficients[2, 1]
                prog.pvals[x, y] <- summary(model)$coefficients[2, 4]
                prog.rsq[x, y] <- summary(model)$r.squared
        }
}        
        
mods <- list(prog.coefs, prog.pvals, prog.rsq)
names(mods) <- c("prog.coefs", "prog.pvals", "prog.rsq")

MyChordPlotFun(data = mods$prog.coefs, 
               pvals = mods$prog.pvals,
               md = prog.meta,
          alpha=0.05, BH = T, # calculate Benjamini-Hochberg corrected p-values
          type = "NvsP", 
          title.left = "Negative relationships",
          title.right = "Positive relationships")

3.4 Models with GDP

# create empty matrices (for GDP and SDG coefficients)
prog.coefs.gdp <- matrix(nrow = length(vars), ncol = length(vars),
                dimnames = list(vars, vars))
prog.pvals.gdp <- prog.coefs.gdp
prog.rsq.gdp <- prog.coefs.gdp
prog.coefs.sdg <- prog.coefs.gdp
prog.pvals.sdg <- prog.coefs.gdp

# this writes coefficien
for (y in vars) {
        for (x in vars[-which(vars == y)]) {
                # form <- formula(paste0("scale(", y, ") ~ ", "scale(", x,                                        ") * ", "scale(log(GDP))"))
                # # message(y, "~", x)
                # model <- lm(form, data = progress)
                
                # with normalized variables:
                model <- lm(prog.t[[y]] ~ prog.t[[x]] * prog.t[["GDP"]])
                
                # note that this writes results of 'y~gdp' on diagonal
                prog.coefs.sdg[x, y] <- summary(model)$coefficients[2, 1]
                prog.pvals.sdg[x, y] <- summary(model)$coefficients[2, 4]
                prog.coefs.gdp[x, y] <- summary(model)$coefficients[3, 1]
                prog.pvals.gdp[x, y] <- summary(model)$coefficients[3, 4]
                prog.rsq.gdp[x, y] <- summary(model)$r.squared
        }
}        
        
mods.gdp <- list(prog.coefs.sdg, prog.pvals.sdg, prog.rsq.gdp, prog.coefs.gdp, prog.pvals.gdp)
names(mods.gdp) <- c("prog.coefs", "prog.pvals", "prog.rsq", "gdp.prog.coefs", "gdp.prog.pvals")

MyChordPlotFun(data = mods.gdp$prog.coefs,
               pvals = mods.gdp$prog.pvals,
               md= prog.meta,
          alpha=0.05, type = "NvsP", BH = T, 
          title.left = "Negative relationships",
          title.right = "Positive relationships")

SDG simplified markdown

Judith Ament

Monday 15 April 2019

1 Setup

2 Performance

2.1 Scatterplots

2.2 Density distributions

2.3 Models without GDP

2.4 Models with GDP

3 Progress

3.1 Scatterplots

3.2 Density distributions

3.2.1 Original

3.2.2 Best Transformation with `BestNormalize` package

3.3 Models without GDP

3.4 Models with GDP

SDG simplified markdown

Judith Ament

Monday 15 April 2019

1 Setup

2 Performance

2.1 Scatterplots

2.2 Density distributions

2.3 Models without GDP

2.4 Models with GDP

3 Progress

3.1 Scatterplots

3.2 Density distributions

3.2.1 Original

3.2.2 Best Transformation with BestNormalize package

3.3 Models without GDP

3.4 Models with GDP

3.2.2 Best Transformation with `BestNormalize` package