Click the Original, Code and Reconstruction tabs to read about the issues and how they were fixed.
Objective
The visualisation chosen had the following three main issues:
Reference
The following code was used to fix the issues identified in the original.
library(ggplot2)
library(readr)
library(readxl)
library(dplyr)
# import GDP data
GDP <- read_csv("GDP.csv",
na = "NA", skip = 3)
# subset the years 2007-2017
GDP2007to2017 <- GDP[c('Country Name','Country Code', '2007':'2017')]
# rename columns
names(GDP2007to2017) <- c("Country","Country Code", "GDP2007",
"GDP2008", "GDP2009", "GDP2010", "GDP2011",
"GDP2012", "GDP2013", "GDP2014", "GDP2015",
"GDP2016", "GDP2017")
# rename Russian Ferderation to Russia to match CPI
GDP2007to2017[201,1] <- "Russia"
# import CPI data (2012-2017)
CPI <- read_excel("CPI2017_FullDataSet.xlsx",
sheet = "CPI historical data 2012-2017",
skip = 2)
CPI_needed <- CPI[c("Country", "CPI Score 2012", "CPI Score 2013",
"CPI score 2014", "CPI score 2015", "CPI score 2016",
"CPI score 2017")]
# rename "USA" to "United States" to match 2007-2011 dataset
CPI_needed[18,1] <- "United States"
# import CPI data (2007-2011)
CPI2 <- read_csv("~/CPI2011_Fulldataset.csv")
CPI2[3:7] <- CPI2[3:7] *10
# join CPI and GDP information to one data frame
data <- CPI2 %>%
left_join(CPI_needed, by="Country") %>%
left_join(GDP2007to2017, by = "Country")
# create column with average GDP growth 2007-2017
GDPcolName <- c("GDP2008", "GDP2009",
"GDP2010", "GDP2011", "GDP2012",
"GDP2013", "GDP2014", "GDP2015", "GDP2016",
"GDP2017")
LastCol <- "GDP2007"
for (thiscol in GDPcolName) {
NewCol <- paste(thiscol, "_pct", sep="")
print(c(LastCol, thiscol, NewCol))
x <- data %>% select(thiscol)
y <- data %>% select(LastCol)
data[NewCol] <- (x-y)/y * 100
# reset for next loop
LastCol <- thiscol
}
## [1] "GDP2007" "GDP2008" "GDP2008_pct"
## [1] "GDP2008" "GDP2009" "GDP2009_pct"
## [1] "GDP2009" "GDP2010" "GDP2010_pct"
## [1] "GDP2010" "GDP2011" "GDP2011_pct"
## [1] "GDP2011" "GDP2012" "GDP2012_pct"
## [1] "GDP2012" "GDP2013" "GDP2013_pct"
## [1] "GDP2013" "GDP2014" "GDP2014_pct"
## [1] "GDP2014" "GDP2015" "GDP2015_pct"
## [1] "GDP2015" "GDP2016" "GDP2016_pct"
## [1] "GDP2016" "GDP2017" "GDP2017_pct"
data <- data %>%
mutate(AvgGDPGrowth = rowMeans(data[,c(26:35)]))
# create column with average CPI 2007-2017
data <- data %>% mutate(AverageCPI = rowMeans(data[,c(3:13)]))
# import region data, the data was included in the World Bank GDP datapack
Regions <- read_csv("Metadata_Country_API_NY.GDP.MKTP.CD_DS2_en_csv_v2_988718.csv")
# join region data to main data frame
data <- data %>% left_join(Regions, by="Country Code")
# subset only the information needed for the visualisation and remove NA
data <- data %>% select(Country, `Country Code`,CPI2007:`CPI score 2017`, GDP2007:GDP2017,
AvgGDPGrowth, AverageCPI, Region) %>%
na.omit()
# data visualisation code - scatter plot
p1 <- ggplot(data = data, aes(group = 1,
x = AverageCPI,
y = AvgGDPGrowth,
col = Region))
p1 <- p1 + geom_point(alpha = 0.7) +
geom_text(data = subset(data, AvgGDPGrowth > 15 |
AvgGDPGrowth < -3 |
Country == "China" |
Country == "Iraq" |
Country == "India" |
Country == "Germany" |
Country =="Mexico" |
Country == "Italy"),
aes(AverageCPI, AvgGDPGrowth,label = Country),
col = "black", nudge_y = -0.8, nudge_x = 0.05,
alpha = 0.7, size = 3.5) +
#add trend line
stat_smooth(geom = 'line', method = "lm", alpha = 0.7, col = "red", se = FALSE) +
scale_color_manual(values = c("#9b2bcf", "#2359c4" , "#999999",
"#2e4057", "#d1495b", "#CC79A7", "#D55E00")) +
# set axis limitss
expand_limits(x = c(0, 100)) +
# add title and labels
labs(title = "World Corruption Perception Index and Economic Growth",
y = "Average Annual GDP Growth [%]",
x = "Average CPI Score",
subtitle = "10 Year Average from 2007 to 2017") +
# manual annotation for New Zealand
annotate("text", x = 90, y = 8.5, label = "New Zealand", size=3.5, alpha=0.7) +
annotate("segment", x = 91.88, xend = 91.88, y = 7, yend = 5,
colour = "black", size=0.5, alpha=0.4,
arrow=arrow(length = unit(1, "mm"))) +
# manual annotation for United States
annotate("text", x = 73.4, y = 11, label = "United States", size=3.5, alpha=0.7) +
annotate("segment", x = 73.4, xend = 73.4, y = 10, yend = 3.9,
colour = "black", size=0.5, alpha=0.4,
arrow=arrow(length = unit(1, "mm"))) +
# manual annotation for Saudi Arabia
annotate("text", x = 44.44, y = 17, label = "Saudi Arabia", size=3.5, alpha=0.7) +
annotate("segment", x = 44.44, xend = 44.44, y = 16, yend = 6.8,
colour = "black", size=0.5, alpha=0.4,
arrow=arrow(length = unit(1, "mm"))) +
#manual annotation for United Kingdom
annotate("text", x = 78.6, y = -2, label = "United Kingdom", size=3.5, alpha=0.7) +
# manual annotation for Russia
annotate("text", x = 25.58, y = -2.7, label = "Russia", size=3.5, alpha=0.7) +
annotate("segment", x = 25.58, xend = 25.58, y = -2.5, yend = 4.4,
colour = "black", size=0.5, alpha=0.4,
arrow=arrow(length = unit(1, "mm"))) +
#themes
theme_minimal() +
theme(plot.title = element_text(size = 12, face = "bold"),
axis.title.x = element_text(size = 10, face = "bold"),
axis.title.y = element_text(size = 10, face = "bold"),
legend.title = element_text(size = 9, face = "bold"))
Data Reference
The following plot fixes the main issues in the original.