Re-create economist graphic/plot called Corruption and human development between corruption perseption index (CPI) and human development index (HDI) in 2011
Install package
install.packages(ggplot2)
install.packages(GGally)
install.packages(ggthemes)
install.packages(ggpubr)
install.packages(ggrepel)
install.packages(grid)
install.packages(cowplot)
library(ggplot2)
library(GGally)
library(ggthemes)
library(ggpubr)
library(ggrepel)
library(grid)
library(cowplot)
Import/read data, adjust and select the data used
read_data <- read.csv(file="EconomistData.csv", header = TRUE, sep = ",")
head(read_data)
## X Country HDI.Rank HDI CPI Region
## 1 1 Afghanistan 172 0.398 1.5 Asia Pacific
## 2 2 Albania 70 0.739 3.1 East EU Cemt Asia
## 3 3 Algeria 96 0.698 2.9 MENA
## 4 4 Angola 148 0.486 2.0 SSA
## 5 5 Argentina 45 0.797 3.0 Americas
## 6 6 Armenia 86 0.716 2.6 East EU Cemt Asia
str(read_data)
## 'data.frame': 173 obs. of 6 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Country : Factor w/ 173 levels "Afghanistan",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ HDI.Rank: int 172 70 96 148 45 86 2 19 91 53 ...
## $ HDI : num 0.398 0.739 0.698 0.486 0.797 0.716 0.929 0.885 0.7 0.771 ...
## $ CPI : num 1.5 3.1 2.9 2 3 2.6 8.8 7.8 2.4 7.3 ...
## $ Region : Factor w/ 6 levels "Americas","Asia Pacific",..: 2 3 5 6 1 3 2 4 3 1 ...
read_data$Region <- factor(read_data$Region, levels = c("EU W. Europe",
"Americas",
"Asia Pacific",
"East EU Cemt Asia",
"MENA",
"SSA"),
labels = c("OECD", "Americas", "Asia &\nOceania",
"Central &\nEastern Europe",
"Middle East &\nNorth Africa",
"Sub-Saharan\nAfrica"))
data <- read_data[,c("Country","HDI","CPI","Region")]
str(data)
## 'data.frame': 173 obs. of 4 variables:
## $ Country: Factor w/ 173 levels "Afghanistan",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ HDI : num 0.398 0.739 0.698 0.486 0.797 0.716 0.929 0.885 0.7 0.771 ...
## $ CPI : num 1.5 3.1 2.9 2 3 2.6 8.8 7.8 2.4 7.3 ...
## $ Region : Factor w/ 6 levels "OECD","Americas",..: 3 4 5 6 2 4 3 1 4 2 ...
head(data)
## Country HDI CPI Region
## 1 Afghanistan 0.398 1.5 Asia &\nOceania
## 2 Albania 0.739 3.1 Central &\nEastern Europe
## 3 Algeria 0.698 2.9 Middle East &\nNorth Africa
## 4 Angola 0.486 2.0 Sub-Saharan\nAfrica
## 5 Argentina 0.797 3.0 Americas
## 6 Armenia 0.716 2.6 Central &\nEastern Europe
Add R-square value
r_square1 <- summary(lm(HDI ~ CPI + log(CPI), data = data))$r.squared
r_square2 <- round(r_square1*100)
r_square <- paste0(format(r_square2), "%")
Create plot between CPI (X) and HDI (Y)
g1 <- ggplot(data = data, aes(x = CPI, y = HDI, color = Region)) +
geom_point(shape = 1, size = 3, stroke = 1.25)
g1
Adding the trend line
g2 <- g1 + geom_smooth(mapping = aes(linetype = "r2"), method = "lm",
formula = y ~ x + log(x), se=FALSE, color= "Red") +
scale_color_manual(values = c("#34677a", "#27b1e0", "#87d8f7",
"#2c948a", "#f2492d","#97503f")) +
scale_linetype(name = "", breaks = "r2",
labels = list(bquote(R^2==.(r_square))),
guide = guide_legend(override.aes = list(linetype = 1,size = 1.25, color = "red")))
g2
Labelling point
point1 <- c("Venezuela", "Iraq", "Myanmar", "Sudan", "Afghanistan", "Congo",
"Greece", "Argentina", "India", "Italy", "Botswana", "Cape Verde",
"Bhutan", "Rwanda", "Britain", "Barbados", "Norway",
"New Zealand", "Singapore", "United States")
point2 <- c("Russia", "Brazil", "China", "South Africa", "Spain","France",
"Germany", "Japan")
g3 <- g2 + geom_text_repel(data=subset(data,Country %in% point1),
aes(label=Country), color = "gray20", hjust = 1,
vjust = 0.5) +
geom_text_repel(data=subset(data,Country %in% point2),
aes(label=Country), box.padding = unit(1, 'lines'),
color = "gray20", force = 3)
g3
Adding caption, label, and title
g4 <- g3 + labs(title = "Corruption and human development\n",
caption="Sources: Transparency International; UN Human Development Report") +
scale_x_continuous(name = "Corruption Perceptions Index, 2011 (10=least corrupt)",
limits = c(1, 10), breaks = seq(0,10,1)) +
scale_y_continuous(name = "Human Development Index, 2011 (1=Best)",
limits = c(0.2, 1.0),
breaks = seq(0.2, 1.0, by = 0.1))
g4
Editing theme
g5 <- g4 + theme(panel.border = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray70",
size = 0.7),
panel.grid.major.x = element_blank(),
panel.background = element_blank(),
line = element_blank(),
axis.text.y.left = element_text(size=8),
axis.text.x.bottom = element_text(size=8),
axis.ticks.length = unit(0.001, "cm"),
axis.ticks.y = element_blank(),
axis.title.x = element_text(color="black", size=7,
face="italic"),
axis.title.y = element_text(color="black", size=7,
face="italic"),
plot.title = element_text(colour="black", size = 12,
face="bold", hjust = -0.15,
vjust = 3),
plot.caption = element_text(size = 7, hjust = -0.17),
legend.position=c(-0.09, 1.05),
legend.title = element_blank(),
legend.direction = "horizontal",
legend.box = "horizontal",
legend.text=element_text(size=9),
legend.box.just = "left") +
guides(col = guide_legend(nrow = 1))
g5