En este proyecto vamos a intentar replicar la gráfica siguiente del periódico The Economist.

library(ggplot2)
library(imager)
library(ggthemes)
library(data.table)
library(plotly)
image <- load.image("~/R/Imagenes/Economist1.png")
plot(image)

Abrimos el documento

df <- fread("~/R/R-Course-HTML-Notes/R-for-Data-Science-and-Machine-Learning/Training Exercises/Capstone and Data Viz Projects/Data Visualization Project/Economist_Assignment_Data.csv", drop = 1)
head(df)
##        Country HDI.Rank   HDI CPI            Region
## 1: Afghanistan      172 0.398 1.5      Asia Pacific
## 2:     Albania       70 0.739 3.1 East EU Cemt Asia
## 3:     Algeria       96 0.698 2.9              MENA
## 4:      Angola      148 0.486 2.0               SSA
## 5:   Argentina       45 0.797 3.0          Americas
## 6:     Armenia       86 0.716 2.6 East EU Cemt Asia
str(df)
## Classes 'data.table' and 'data.frame':   173 obs. of  5 variables:
##  $ Country : chr  "Afghanistan" "Albania" "Algeria" "Angola" ...
##  $ HDI.Rank: int  172 70 96 148 45 86 2 19 91 53 ...
##  $ HDI     : num  0.398 0.739 0.698 0.486 0.797 0.716 0.929 0.885 0.7 0.771 ...
##  $ CPI     : num  1.5 3.1 2.9 2 3 2.6 8.8 7.8 2.4 7.3 ...
##  $ Region  : chr  "Asia Pacific" "East EU Cemt Asia" "MENA" "SSA" ...
##  - attr(*, ".internal.selfref")=<externalptr>
pl <- ggplot(df, aes(x=CPI, y=HDI))
pl + geom_point(aes(col=Region))

pl<- pl + geom_point(size=5, shape=1, aes(col=Region))
pl

pl2 <- pl + geom_smooth(method='lm', se=FALSE, color='red', formula=y~log(x))
pl2

pointsToLabel <- c("Russia", "Venezuela", "Iraq", "Myanmar", "Sudan",
                   "Afghanistan", "Congo", "Greece", "Argentina", "Brazil",
                   "India", "Italy", "China", "South Africa", "Spane",
                   "Botswana", "Cape Verde", "Bhutan", "Rwanda", "France",
                   "United States", "Germany", "Britain", "Barbados", "Norway", "Japan",
                   "New Zealand", "Singapore")
pl3 <- pl2 + geom_text(aes(label=Country), col="gray20", data=subset(df, Country %in% pointsToLabel, check_overlap=TRUE))
pl3

pl4 <- pl3 + scale_x_continuous(name = "Corruption Perceptions Index, 2011 (10=least corrupt)", limits = c(1,10), breaks=1:10) +  scale_y_continuous("Human Development Index, 2011 (1=Best)", limits= c(0.2,1), breaks=seq(0.2,1, by=0.2)) + theme_bw()
pl4

pl5 <- pl4 + ggtitle("Corruption and Human development")
pl5

ggplotly(pl5)