In this project we will be recreating this plot from The Economist:

Data

We will import the ggplot2 data.table libraries and use fread to load the csv file, which is ‘Economist_Assignment_Data.csv’

library(ggplot2)
library(data.table)
df <- fread('./data/Economist_Assignment_Data.csv', drop=1)
head(df)
##        Country HDI.Rank   HDI CPI            Region
## 1: Afghanistan      172 0.398 1.5      Asia Pacific
## 2:     Albania       70 0.739 3.1 East EU Cemt Asia
## 3:     Algeria       96 0.698 2.9              MENA
## 4:      Angola      148 0.486 2.0               SSA
## 5:   Argentina       45 0.797 3.0          Americas
## 6:     Armenia       86 0.716 2.6 East EU Cemt Asia

Now let’s use ggplot() + geom_point() to create a scatter plot object called p1.

p1 <- ggplot(df, aes(x=CPI, y=HDI, color=Region)) + geom_point()
p1

Let’s have some fun with the points

p1 <- ggplot(df, aes(x=CPI, y=HDI, color=Region)) + geom_point(size=4, shape=1)
p1

Now let’s add a trend line.

p1 + geom_smooth(aes(group=1))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Let’s edit the trendline with some of the geom_smooth arguments

p12 <- p1 + geom_smooth(aes(group=1), method='lm', formula = y~log(x), se=FALSE, color='red')
p12

Now let’s add a whole bunch of labels!

p12 + geom_text(aes(label=Country))

That’s a bit of overkill, so let’s try something else!

pointsToLabel <- c('Russia', 'Venezuela', 'Iraq', 'Myanmar', 'Sudan', 'Afghanistan', 'Congo', 'Greece', 'Argentina', 'Brazil', 'India', 'Italy', 'China', 'South Africa', 'Spain', 'Botswana', 'Cape Verde', 'Bhutan', 'Rwanda', 'France', 'United States', 'Germany', 'Britain', 'Barbados', 'Norway', 'Japan', 'New Zealand', 'Singapore')

p13 <- p12 + geom_text(aes(label=Country), color= 'gray20', data=subset(df, Country %in% pointsToLabel), check_overlap = TRUE)

p13

Finally, let’s add a theme to this beauty.

p14 <- p13 + theme_bw()

p14

Maybe a little more tweaking?

p15 <- p14 + scale_x_continuous(name = "Corruption Perceptions Index, 2011 (10=least corrupt)", limits=c(0.9, 10.5), breaks=1:10)

p15

p16 <- p15 + scale_y_continuous(name = "Human Development Index, 2011 (1=Best)", limits=c(0.2, 1.0))

p16

p16 + ggtitle('Corruption and Human Development')

library(ggthemes)
p16 + theme_economist_white()