This data set is from my Data Science and Machine Learning with R Course. The original assignment was to make a scatter plot similar to the image above. I’ve taken it a few steps further to make an interactive version that easily displays all the data.
my.data <- read.csv("Economist_Assignment_Data.csv")
head(my.data)
## X Country HDI.Rank HDI CPI Region
## 1 1 Afghanistan 172 0.398 1.5 Asia Pacific
## 2 2 Albania 70 0.739 3.1 East EU Cemt Asia
## 3 3 Algeria 96 0.698 2.9 MENA
## 4 4 Angola 148 0.486 2.0 SSA
## 5 5 Argentina 45 0.797 3.0 Americas
## 6 6 Armenia 86 0.716 2.6 East EU Cemt Asia
# Removing the first column since it's just an integer
my.data <- my.data[, -1]
# Fixing the typo
my.data$Region[my.data$Region == "East EU Cemt Asia"] <- "East EU Cent Asia"
library(ggplot2)
library(plotly)
sp <- ggplot(my.data, aes(x = CPI, y = HDI, label = Country))
sp2 <- sp + geom_point(aes(color = Region), shape = 21, size = 4) +
geom_smooth(method = "lm", formula = y ~ log(x), se = FALSE, color = "red") +
scale_x_continuous(name = "Corruption Perceptions Index, 2011, (10 = least corrupt)",
limits = c(1,10), breaks = 1:10) +
scale_y_continuous(name = "Human Development Index, 2011 (1 = best)", limits = c(0.2, 1)) +
labs(title = "Corruption and Human Development") +
theme_bw()
Hover over each point to view the data!
ggplotly(sp2, tooltip = c("HDI", "CPI", "label"))