Click the Original, Code and Reconstruction tabs to read about the issues and how they were fixed.

Original


Source: HowMuch.net - Visualizing the Relationship Between Corruption and Economic Growth Around the World (2017).


Objective

  • The objective of the original data visualisation: Visualise the relationship between country corruption and economic growth
  • The targetted audience: English speaking general audience, people without specialist knowledge

The visualisation chosen had the following three main issues:

  • The use of 2017 GDP rather than GDP growth over time is not informative. The average growth percentage will allow the audience to see if there is corruption has any correlations to change in economic growth.
  • The use of unnecessary graphics and colours distract the audience from the core message it’s presenting.
  • The visualisation is difficult to read as it requires the reader to scroll up and down the page to compare the numbers between countries with varying levels of Corruption Perception Index Score.

Reference

  • Visualizing the Relationship Between Corruption and Economic Growth Around the World. (2018). The Current State of the World’s Corruption: Corruption Perception Index & Gross Domestic Product 2017. Retrieved May 02, 2020, from HowMuch.net website: https://howmuch.net/articles/the-state-of-worlds-corruption-2017

Code

The following code was used to fix the issues identified in the original.

library(ggplot2)
library(readr)
library(readxl)
library(dplyr)

# import GDP data
GDP <- read_csv("GDP.csv", 
    na = "NA", skip = 3)

# subset the years 2007-2017
GDP2007to2017 <- GDP[c('Country Name','Country Code', '2007':'2017')]

# rename columns
names(GDP2007to2017) <- c("Country","Country Code", "GDP2007",
                          "GDP2008", "GDP2009", "GDP2010", "GDP2011",
                          "GDP2012", "GDP2013", "GDP2014", "GDP2015",
                          "GDP2016", "GDP2017")
# rename Russian Ferderation to Russia to match CPI
GDP2007to2017[201,1] <- "Russia"

# import CPI data (2012-2017)
CPI <- read_excel("CPI2017_FullDataSet.xlsx", 
    sheet = "CPI historical data 2012-2017", 
    skip = 2)

CPI_needed <- CPI[c("Country", "CPI Score 2012", "CPI Score 2013",
                    "CPI score 2014", "CPI score 2015", "CPI score 2016",
                    "CPI score 2017")]

# rename "USA" to "United States" to match 2007-2011 dataset
CPI_needed[18,1] <- "United States"

# import CPI data (2007-2011)
CPI2 <- read_csv("~/CPI2011_Fulldataset.csv")
CPI2[3:7] <- CPI2[3:7] *10

# join CPI and GDP information to one data frame
data <- CPI2 %>%
  left_join(CPI_needed, by="Country") %>%
  left_join(GDP2007to2017, by = "Country")

# create column with average GDP growth 2007-2017
GDPcolName <- c("GDP2008", "GDP2009",
                "GDP2010", "GDP2011", "GDP2012",
                "GDP2013", "GDP2014", "GDP2015", "GDP2016",
                "GDP2017")

LastCol <- "GDP2007"
for (thiscol in GDPcolName) {
  NewCol <- paste(thiscol, "_pct", sep="")
  print(c(LastCol, thiscol, NewCol))
  x <- data %>% select(thiscol)
  y <- data %>% select(LastCol)
  data[NewCol] <- (x-y)/y * 100
  # reset for next loop
  LastCol <- thiscol
}
## [1] "GDP2007"     "GDP2008"     "GDP2008_pct"
## [1] "GDP2008"     "GDP2009"     "GDP2009_pct"
## [1] "GDP2009"     "GDP2010"     "GDP2010_pct"
## [1] "GDP2010"     "GDP2011"     "GDP2011_pct"
## [1] "GDP2011"     "GDP2012"     "GDP2012_pct"
## [1] "GDP2012"     "GDP2013"     "GDP2013_pct"
## [1] "GDP2013"     "GDP2014"     "GDP2014_pct"
## [1] "GDP2014"     "GDP2015"     "GDP2015_pct"
## [1] "GDP2015"     "GDP2016"     "GDP2016_pct"
## [1] "GDP2016"     "GDP2017"     "GDP2017_pct"
data <- data %>%
  mutate(AvgGDPGrowth = rowMeans(data[,c(26:35)]))

# create column with average CPI 2007-2017
data <- data %>% mutate(AverageCPI = rowMeans(data[,c(3:13)]))

# import region data, the data was included in the World Bank GDP datapack
Regions <- read_csv("Metadata_Country_API_NY.GDP.MKTP.CD_DS2_en_csv_v2_988718.csv")

# join region data to main data frame
data <- data %>% left_join(Regions, by="Country Code")

# subset only the information needed for the visualisation and remove NA
data <- data %>% select(Country, `Country Code`,CPI2007:`CPI score 2017`, GDP2007:GDP2017,
                        AvgGDPGrowth, AverageCPI, Region) %>%
  na.omit()

# data visualisation code - scatter plot
p1 <- ggplot(data = data, aes(group = 1,
                              x = AverageCPI,
                              y = AvgGDPGrowth,
                              col = Region))

p1 <- p1 + geom_point(alpha = 0.7) +
  geom_text(data = subset(data, AvgGDPGrowth > 15 |
                            AvgGDPGrowth < -3 |
                            Country == "China" |
                            Country == "Iraq" |
                            Country == "India" |
                            Country == "Germany" |
                            Country =="Mexico" |
                            Country == "Italy"),
            aes(AverageCPI, AvgGDPGrowth,label = Country), 
            col = "black", nudge_y = -0.8, nudge_x = 0.05,
            alpha = 0.7, size = 3.5) +
#add trend line
  stat_smooth(geom = 'line', method = "lm", alpha = 0.7, col = "red", se = FALSE) +
  scale_color_manual(values = c("#9b2bcf", "#2359c4" , "#999999",
                                "#2e4057", "#d1495b", "#CC79A7", "#D55E00")) +
# set axis limitss
  expand_limits(x = c(0, 100)) +
  
# add title and labels
  labs(title = "World Corruption Perception Index and Economic Growth",
       y = "Average Annual GDP Growth [%]",
       x = "Average CPI Score",
       subtitle = "10 Year Average from 2007 to 2017") +
  
# manual annotation for New Zealand
annotate("text", x = 90, y = 8.5, label = "New Zealand", size=3.5, alpha=0.7) +
  annotate("segment", x = 91.88, xend = 91.88, y = 7, yend = 5,
           colour = "black", size=0.5, alpha=0.4,
           arrow=arrow(length = unit(1, "mm"))) +

# manual annotation for United States
annotate("text", x = 73.4, y = 11, label = "United States", size=3.5, alpha=0.7) +
  annotate("segment", x = 73.4, xend = 73.4, y = 10, yend = 3.9,
           colour = "black", size=0.5, alpha=0.4,
           arrow=arrow(length = unit(1, "mm"))) +

# manual annotation for Saudi Arabia
annotate("text", x = 44.44, y = 17, label = "Saudi Arabia", size=3.5, alpha=0.7) +
  annotate("segment", x = 44.44, xend = 44.44, y = 16, yend = 6.8,
           colour = "black", size=0.5, alpha=0.4,
           arrow=arrow(length = unit(1, "mm"))) +

#manual annotation for United Kingdom
annotate("text", x = 78.6, y = -2, label = "United Kingdom", size=3.5, alpha=0.7) +
  
# manual annotation for Russia
annotate("text", x = 25.58, y = -2.7, label = "Russia", size=3.5, alpha=0.7) +
  annotate("segment", x = 25.58, xend = 25.58, y = -2.5, yend = 4.4,
           colour = "black", size=0.5, alpha=0.4,
           arrow=arrow(length = unit(1, "mm"))) +
  
#themes
theme_minimal() +
theme(plot.title = element_text(size = 12, face = "bold"),
      axis.title.x = element_text(size = 10, face = "bold"),
      axis.title.y = element_text(size = 10, face = "bold"),
      legend.title = element_text(size = 9, face = "bold"))

Data Reference

  • GDP per capita. (2018). GDP per capita (current US$). Retrieved May 02, 2020, from The World Bank website: https://data.worldbank.org/
  • Corruption Perception Index 2017. (2017). Corruption Perception Index 2017. Retrieved May 02, 2020, from Transparancy International website: - https://www.transparency.org/

Reconstruction

The following plot fixes the main issues in the original.