Here, I want to re-create economist plot about correlation between corruption and development in many country using dataset that you can get in thos link.
Graph source : (http://www.economist.com/node/21541178)
#install.packages("pdftools")
#install.packages("tidyverse")
#nstall.packages("ggthemes")
#install.packages("ggrepel")
#install.packages("tm")
pcg <- c("pdftools", "tidyverse", "ggplot2", "ggthemes", "ggrepel", "tm", "grid", "dplyr")
sapply(pcg, library, character.only = TRUE)## $pdftools
## [1] "pdftools" "stats" "graphics" "grDevices" "utils" "datasets"
## [7] "methods" "base"
##
## $tidyverse
## [1] "forcats" "stringr" "dplyr" "purrr" "readr"
## [6] "tidyr" "tibble" "ggplot2" "tidyverse" "pdftools"
## [11] "stats" "graphics" "grDevices" "utils" "datasets"
## [16] "methods" "base"
##
## $ggplot2
## [1] "forcats" "stringr" "dplyr" "purrr" "readr"
## [6] "tidyr" "tibble" "ggplot2" "tidyverse" "pdftools"
## [11] "stats" "graphics" "grDevices" "utils" "datasets"
## [16] "methods" "base"
##
## $ggthemes
## [1] "ggthemes" "forcats" "stringr" "dplyr" "purrr"
## [6] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [11] "pdftools" "stats" "graphics" "grDevices" "utils"
## [16] "datasets" "methods" "base"
##
## $ggrepel
## [1] "ggrepel" "ggthemes" "forcats" "stringr" "dplyr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "pdftools" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## $tm
## [1] "tm" "NLP" "ggrepel" "ggthemes" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "pdftools" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## $grid
## [1] "grid" "tm" "NLP" "ggrepel" "ggthemes"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "pdftools"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## $dplyr
## [1] "grid" "tm" "NLP" "ggrepel" "ggthemes"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "pdftools"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
This data is about Human Development Index (HDI) and Corruption Perception Index (CPI) in many country in the world. We want to look relation between HDI and CPI in many country.
## Country HDI.Rank HDI CPI Region
## 1 Afghanistan 172 0.398 1.5 Asia Pacific
## 2 Albania 70 0.739 3.1 East EU Cemt Asia
## 3 Algeria 96 0.698 2.9 MENA
## 4 Angola 148 0.486 2.0 SSA
## 5 Argentina 45 0.797 3.0 Americas
## 6 Armenia 86 0.716 2.6 East EU Cemt Asia
## Country HDI.Rank HDI CPI Region
## 0 0 0 0 0
Before making economist, we start to manipulate the data.
We want to visualise the data using ggplot2.
g1 <- ggplot(eco, aes(x = CPI, y = HDI))
g1 + geom_point(aes(color = Region), shape = 21, size = 3, stroke = 1.25)After we plot our point of data by HDI and CPI variable, we want to make a linear line based on HDI and CPI.
g2 <- g1 + geom_smooth(method = "lm", aes(fill = "R² = 52%"),
formula = y ~ x + log(x), color = "red", se = FALSE) +
geom_point(aes(color = Region), shape = 21, size = 3.25, stroke = 1.5, fill = "white")
g2 After we make a linear line, we want to look our country position in the plot point.
It looks not good and so crowded and not pretty. So let’s let’s set the country point that we will display in the plot.
p1 <- c("Afghanistan", "Congo", "Rwanda", "Norway", "Singapore", "Argentina", "Iraq", "India", "France",
"Morocco", "Indonesia", "Brazil", "Korea (South)", "Myanmar")
g2 + geom_text_repel(data = eco[eco$Country %in% p1, ], aes(label = Country))g3 <- g2 + geom_text_repel(data=eco[eco$Country %in% p1, ], aes(label = Country), force = 16,
label.padding = unit(4, "lines"))
g3g4 <- g3 + scale_x_continuous(limits = c(1, 10.2),
breaks = seq(1, 10, 1),
name = "Corruption Perception Index (10=Least corrupt)") +
scale_y_continuous(limits = c(.2, 1),
breaks = seq(.2, 1, .1),
name="Human Development Index, 2011 (1=best)") +
scale_color_manual(values = c("#24576D", "#099DD7", "#28AADC", "#248E84", "#F2583F",
"#96503F"))+
labs(title = "Corruption and Human Development",
caption = "Source:Transparency International; UN Human Development report") +
guides(colour = guide_legend(nrow = 1)) + # forces legend to be in a single line
theme(text = element_text(colour = "black"),
axis.title = element_text(colour = "black", size = 12, face = "bold"),
axis.title.x = element_text(colour = "black", size = 8, face = "italic"),
axis.title.y = element_text(colour = "black", size = 8, face = "italic"),
legend.position = "top", legend.direction = "horizontal",
legend.justification = c(.1, .1), legend.text = element_text(size = 8),
panel.background = element_blank(),
panel.grid.major.y = element_line(colour = "grey", size = 0.4),
panel.grid.minor.x = element_blank(),
legend.title = element_blank(),
plot.title = element_text(hjust = -.1, vjust = 2.1,
colour = "black", size = 15, face = "bold"),
plot.caption = element_text(hjust = 0, colour = "black", size = 5))
g4## png
## 2