En este proyecto vamos a intentar replicar la gráfica siguiente del periódico The Economist.
library(ggplot2)
library(imager)
library(ggthemes)
library(data.table)
library(plotly)
image <- load.image("~/R/Imagenes/Economist1.png")
plot(image)
Abrimos el documento
df <- fread("~/R/R-Course-HTML-Notes/R-for-Data-Science-and-Machine-Learning/Training Exercises/Capstone and Data Viz Projects/Data Visualization Project/Economist_Assignment_Data.csv", drop = 1)
head(df)
## Country HDI.Rank HDI CPI Region
## 1: Afghanistan 172 0.398 1.5 Asia Pacific
## 2: Albania 70 0.739 3.1 East EU Cemt Asia
## 3: Algeria 96 0.698 2.9 MENA
## 4: Angola 148 0.486 2.0 SSA
## 5: Argentina 45 0.797 3.0 Americas
## 6: Armenia 86 0.716 2.6 East EU Cemt Asia
str(df)
## Classes 'data.table' and 'data.frame': 173 obs. of 5 variables:
## $ Country : chr "Afghanistan" "Albania" "Algeria" "Angola" ...
## $ HDI.Rank: int 172 70 96 148 45 86 2 19 91 53 ...
## $ HDI : num 0.398 0.739 0.698 0.486 0.797 0.716 0.929 0.885 0.7 0.771 ...
## $ CPI : num 1.5 3.1 2.9 2 3 2.6 8.8 7.8 2.4 7.3 ...
## $ Region : chr "Asia Pacific" "East EU Cemt Asia" "MENA" "SSA" ...
## - attr(*, ".internal.selfref")=<externalptr>
pl <- ggplot(df, aes(x=CPI, y=HDI))
pl + geom_point(aes(col=Region))
pl<- pl + geom_point(size=5, shape=1, aes(col=Region))
pl
pl2 <- pl + geom_smooth(method='lm', se=FALSE, color='red', formula=y~log(x))
pl2
pointsToLabel <- c("Russia", "Venezuela", "Iraq", "Myanmar", "Sudan",
"Afghanistan", "Congo", "Greece", "Argentina", "Brazil",
"India", "Italy", "China", "South Africa", "Spane",
"Botswana", "Cape Verde", "Bhutan", "Rwanda", "France",
"United States", "Germany", "Britain", "Barbados", "Norway", "Japan",
"New Zealand", "Singapore")
pl3 <- pl2 + geom_text(aes(label=Country), col="gray20", data=subset(df, Country %in% pointsToLabel, check_overlap=TRUE))
pl3
pl4 <- pl3 + scale_x_continuous(name = "Corruption Perceptions Index, 2011 (10=least corrupt)", limits = c(1,10), breaks=1:10) + scale_y_continuous("Human Development Index, 2011 (1=Best)", limits= c(0.2,1), breaks=seq(0.2,1, by=0.2)) + theme_bw()
pl4
pl5 <- pl4 + ggtitle("Corruption and Human development")
pl5
ggplotly(pl5)