Original


Source: Reddit (Data is Beautiful) (2019).


Objective

The objective of the original data visualisation is to determine the relationship between the GDP per capita and Index of Economic Freedom per country.

The targeted audience in this visualisation is not specified by the creator and therefore it can be interpreted to by any individual who is aware or interested in GDP per Capita and the Index of Economic Freedom (IEF). Whilst GDP per Capita is a mathematical ratio (GDP/Number of people per country), the IEF is an average value of 12 different index scores based on the 4 subcategories of Rule of Law, Goverment Size, Regulatory Efficiency and Open Markets. The creator does not specify these sub-scores, however the creator does provide a link to the website/source of these values and their respective calculations.

The visualisation chosen had the following three main issues:

Reference

Code

library(ggplot2)
library(knitr)
library(readr)
library(dplyr)
library(gridExtra)
#Import and make sure all the data is in the correct Format.
IEF <- read_csv("index2019_data.csv")
str(IEF)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 187 obs. of  34 variables:
##  $ CountryID                 : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ Country Name              : chr  "Afghanistan" "Albania" "Algeria" "Angola" ...
##  $ WEBNAME                   : chr  "Afghanistan" "Albania" "Algeria" "Angola" ...
##  $ Region                    : chr  "Asia-Pacific" "Europe" "Middle East and North Africa" "Sub-Saharan Africa" ...
##  $ World Rank                : chr  "152" "52" "171" "156" ...
##  $ Region Rank               : chr  "39" "27" "14" "33" ...
##  $ 2019 Score                : chr  "51.5" "66.5" "46.2" "50.6" ...
##  $ Property Rights           : chr  "19.6" "54.8" "31.6" "35.9" ...
##  $ Judical Effectiveness     : chr  "29.6" "30.6" "36.2" "26.6" ...
##  $ Government Integrity      : chr  "25.2" "40.4" "28.9" "20.5" ...
##  $ Tax Burden                : chr  "91.7" "86.3" "76.4" "83.9" ...
##  $ Gov't Spending            : chr  "80.3" "73.9" "48.7" "80.7" ...
##  $ Fiscal Health             : chr  "99.3" "80.6" "18.7" "58.2" ...
##  $ Business Freedom          : chr  "49.2" "69.3" "61.6" "55.7" ...
##  $ Labor Freedom             : chr  "60.4" "52.7" "49.9" "58.8" ...
##  $ Monetary Freedom          : chr  "76.7" "81.5" "74.9" "55.4" ...
##  $ Trade Freedom             : chr  "66.0" "87.8" "67.4" "61.2" ...
##  $ Investment Freedom        : chr  "10" "70" "30" "30" ...
##  $ Financial Freedom         : chr  "10" "70" "30" "40" ...
##  $ Tariff Rate (%)           : chr  "7.0" "1.1" "8.8" "9.4" ...
##  $ Income Tax Rate (%)       : chr  "20.0" "23.0" "35.0" "17.0" ...
##  $ Corporate Tax Rate (%)    : chr  "20.0" "15.0" "23.0" "30.0" ...
##  $ Tax Burden % of GDP       : chr  "5.0" "24.9" "24.5" "20.6" ...
##  $ Gov't Expenditure % of GDP: chr  "25.6" "29.5" "41.4" "25.3" ...
##  $ Country                   : chr  "Afghanistan" "Albania" "Algeria" "Angola" ...
##  $ Population (Millions)     : chr  "35.5" "2.9" "41.5" "28.2" ...
##  $ GDP (Billions, PPP)       : chr  "$69.6" "$36.0" "$632.9" "$190.3" ...
##  $ GDP Growth Rate (%)       : chr  "2.5" "3.9" "2.0" "0.7" ...
##  $ 5 Year GDP Growth Rate (%): chr  "2.9" "2.5" "3.1" "2.9" ...
##  $ GDP per Capita (PPP)      : chr  "1957.58" "12506.65" "15237.20" "6752.58" ...
##  $ Unemployment (%)          : chr  "8.8" "13.9" "10.0" "8.2" ...
##  $ Inflation (%)             : chr  "5.0" "2.0" "5.6" "31.7" ...
##  $ FDI Inflow (Millions)     : chr  "53.9" "1,119.1" "1,203.0" "-2,254.5" ...
##  $ Public Debt (% of GDP)    : chr  "7.3" "71.2" "25.8" "65.3" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   CountryID = col_double(),
##   ..   `Country Name` = col_character(),
##   ..   WEBNAME = col_character(),
##   ..   Region = col_character(),
##   ..   `World Rank` = col_character(),
##   ..   `Region Rank` = col_character(),
##   ..   `2019 Score` = col_character(),
##   ..   `Property Rights` = col_character(),
##   ..   `Judical Effectiveness` = col_character(),
##   ..   `Government Integrity` = col_character(),
##   ..   `Tax Burden` = col_character(),
##   ..   `Gov't Spending` = col_character(),
##   ..   `Fiscal Health` = col_character(),
##   ..   `Business Freedom` = col_character(),
##   ..   `Labor Freedom` = col_character(),
##   ..   `Monetary Freedom` = col_character(),
##   ..   `Trade Freedom` = col_character(),
##   ..   `Investment Freedom` = col_character(),
##   ..   `Financial Freedom` = col_character(),
##   ..   `Tariff Rate (%)` = col_character(),
##   ..   `Income Tax Rate (%)` = col_character(),
##   ..   `Corporate Tax Rate (%)` = col_character(),
##   ..   `Tax Burden % of GDP` = col_character(),
##   ..   `Gov't Expenditure % of GDP` = col_character(),
##   ..   Country = col_character(),
##   ..   `Population (Millions)` = col_character(),
##   ..   `GDP (Billions, PPP)` = col_character(),
##   ..   `GDP Growth Rate (%)` = col_character(),
##   ..   `5 Year GDP Growth Rate (%)` = col_character(),
##   ..   `GDP per Capita (PPP)` = col_character(),
##   ..   `Unemployment (%)` = col_character(),
##   ..   `Inflation (%)` = col_character(),
##   ..   `FDI Inflow (Millions)` = col_character(),
##   ..   `Public Debt (% of GDP)` = col_character()
##   .. )
head(IEF)
## # A tibble: 6 x 34
##   CountryID `Country Name` WEBNAME Region `World Rank` `Region Rank`
##       <dbl> <chr>          <chr>   <chr>  <chr>        <chr>        
## 1         1 Afghanistan    Afghan~ Asia-~ 152          39           
## 2         2 Albania        Albania Europe 52           27           
## 3         3 Algeria        Algeria Middl~ 171          14           
## 4         4 Angola         Angola  Sub-S~ 156          33           
## 5         5 Argentina      Argent~ Ameri~ 148          26           
## 6         6 Armenia        Armenia Europe 47           24           
## # ... with 28 more variables: `2019 Score` <chr>, `Property Rights` <chr>,
## #   `Judical Effectiveness` <chr>, `Government Integrity` <chr>, `Tax
## #   Burden` <chr>, `Gov't Spending` <chr>, `Fiscal Health` <chr>,
## #   `Business Freedom` <chr>, `Labor Freedom` <chr>, `Monetary
## #   Freedom` <chr>, `Trade Freedom` <chr>, `Investment Freedom` <chr>,
## #   `Financial Freedom` <chr>, `Tariff Rate (%)` <chr>, `Income Tax Rate
## #   (%)` <chr>, `Corporate Tax Rate (%)` <chr>, `Tax Burden % of
## #   GDP` <chr>, `Gov't Expenditure % of GDP` <chr>, Country <chr>,
## #   `Population (Millions)` <chr>, `GDP (Billions, PPP)` <chr>, `GDP
## #   Growth Rate (%)` <chr>, `5 Year GDP Growth Rate (%)` <chr>, `GDP per
## #   Capita (PPP)` <chr>, `Unemployment (%)` <chr>, `Inflation (%)` <chr>,
## #   `FDI Inflow (Millions)` <chr>, `Public Debt (% of GDP)` <chr>
IEF_simplified <- IEF[,c(2,4,5,7,30)]
head(IEF_simplified)
## # A tibble: 6 x 5
##   `Country Name` Region         `World Rank` `2019 Score` `GDP per Capita ~
##   <chr>          <chr>          <chr>        <chr>        <chr>            
## 1 Afghanistan    Asia-Pacific   152          51.5         1957.58          
## 2 Albania        Europe         52           66.5         12506.65         
## 3 Algeria        Middle East a~ 171          46.2         15237.20         
## 4 Angola         Sub-Saharan A~ 156          50.6         6752.58          
## 5 Argentina      Americas       148          52.2         20875.76         
## 6 Armenia        Europe         47           67.7         9455.94
IEF_simplified$`2019 Score` <- as.double(IEF_simplified$`2019 Score`)
IEF_simplified$`GDP per Capita (PPP)` <- as.double(IEF_simplified$`GDP per Capita (PPP)`)
IEF_simplified$`World Rank` <- as.double(IEF_simplified$`World Rank`)
head(IEF_simplified)
## # A tibble: 6 x 5
##   `Country Name` Region         `World Rank` `2019 Score` `GDP per Capita ~
##   <chr>          <chr>                 <dbl>        <dbl>             <dbl>
## 1 Afghanistan    Asia-Pacific            152         51.5             1958.
## 2 Albania        Europe                   52         66.5            12507.
## 3 Algeria        Middle East a~          171         46.2            15237.
## 4 Angola         Sub-Saharan A~          156         50.6             6753.
## 5 Argentina      Americas                148         52.2            20876.
## 6 Armenia        Europe                   47         67.7             9456.
#Note: No country is both a tax haven and an Oil/Gas Major in the data set: Therefore this outcome can be ignored.
Conditions <- read_csv("OilandGas&Tax Haven.csv")
head(Conditions)
## # A tibble: 6 x 2
##   `Country Name` `Oil and Gas Major/Tax Haven`
##   <chr>          <chr>                        
## 1 Afghanistan    Neither                      
## 2 Albania        Neither                      
## 3 Algeria        Neither                      
## 4 Angola         Neither                      
## 5 Argentina      Neither                      
## 6 Armenia        Neither
Data <- IEF_simplified %>% left_join(Conditions, by = "Country Name")
head(Data)  
## # A tibble: 6 x 6
##   `Country Name` Region `World Rank` `2019 Score` `GDP per Capita~
##   <chr>          <chr>         <dbl>        <dbl>            <dbl>
## 1 Afghanistan    Asia-~          152         51.5            1958.
## 2 Albania        Europe           52         66.5           12507.
## 3 Algeria        Middl~          171         46.2           15237.
## 4 Angola         Sub-S~          156         50.6            6753.
## 5 Argentina      Ameri~          148         52.2           20876.
## 6 Armenia        Europe           47         67.7            9456.
## # ... with 1 more variable: `Oil and Gas Major/Tax Haven` <chr>
#Remove NA values
Data_NA <- subset(Data,is.na(Data$`Oil and Gas Major/Tax Haven`))
Data_NA
## # A tibble: 1 x 6
##   `Country Name` Region `World Rank` `2019 Score` `GDP per Capita~
##   <chr>          <chr>         <dbl>        <dbl>            <dbl>
## 1 <NA>           <NA>             NA           NA               NA
## # ... with 1 more variable: `Oil and Gas Major/Tax Haven` <chr>
#Only 1 NA Row: Therefore we can remove this row.
Data_fixed <- na.omit(Data)
Type <- Data_fixed$`Oil and Gas Major/Tax Haven`
theme_update(plot.title = element_text(hjust = 0.5)) #Make sure all Graph headings are in the center.
p <- ggplot(data = Data_fixed,
            aes(x = (Data_fixed$`2019 Score`), 
                y = (Data_fixed$`GDP per Capita (PPP)`), 
                colour = Type) )
p + ggtitle("Economic Freedom vs. GDP Per Capita") + xlab("Index of Economic Freedom") + ylab("GDP per Capita (PPP)") + geom_point() + geom_smooth(method = "lm")

#Data is still somewhat visually messy
p + ggtitle("Economic Freedom vs. GDP Per Capita") + xlab("Index of Economic Freedom") + ylab("GDP per Capita (PPP)") + geom_point() + facet_grid(~ Data_fixed$`Oil and Gas Major/Tax Haven`) + geom_smooth(method = "lm")

#Trends are easier to identify but scale is broken now.
#We can use a log function to fix this issue
p2 <- ggplot(data = Data_fixed,
             aes(x = (Data_fixed$`2019 Score`), 
                 y = (log(Data_fixed$`GDP per Capita (PPP)`)), 
                 colour = Type,) )
p2a <- p2 + ggtitle("Overall Summary") + xlab("") + ylab("Log of GDP per Capita (PPP)") + geom_point() + facet_grid(~ Data_fixed$`Oil and Gas Major/Tax Haven`) + geom_smooth(method = "lm")
p2a

#Switiching to logs we can see the scale is more manageable, and the relationship is more observable.
#However,unlike the original plot no specific data points are identified, in addition it is going against the normal economic data display of GDP per Capita by transforming it into a log. Therefore additional plots need to be constructed.
#We can specify the specific Oil and Gas Majors and Tax haven's seperatly in individual scatterplots,then superimpose the labels over smaller number of observations.
Oil_and_Gas_Majors <- as.data.frame(Data_fixed %>% filter(`Oil and Gas Major/Tax Haven` == 'Oil and Gas Major'))
Oil_and_Gas_Majors
##            Country Name                       Region World Rank 2019 Score
## 1                Canada                     Americas          8       77.7
## 2                 China                 Asia-Pacific        100       58.4
## 3                 India                 Asia-Pacific        129       55.2
## 4             Indonesia                 Asia-Pacific         56       65.8
## 5                  Iran Middle East and North Africa        155       51.1
## 6                Kuwait Middle East and North Africa         90       60.8
## 7                Norway                       Europe         26       73.0
## 8                 Qatar Middle East and North Africa         28       72.6
## 9                Russia                       Europe         98       58.9
## 10         Saudi Arabia Middle East and North Africa         91       60.7
## 11         Turkmenistan                 Asia-Pacific        164       48.4
## 12 United Arab Emirates Middle East and North Africa          9       77.6
## 13        United States                     Americas         12       76.8
##    GDP per Capita (PPP) Oil and Gas Major/Tax Haven
## 1              48265.25           Oil and Gas Major
## 2              16660.27           Oil and Gas Major
## 3               7182.80           Oil and Gas Major
## 4              12377.49           Oil and Gas Major
## 5              20199.59           Oil and Gas Major
## 6              66162.89           Oil and Gas Major
## 7              71830.88           Oil and Gas Major
## 8             124529.05           Oil and Gas Major
## 9              27834.09           Oil and Gas Major
## 10             54777.38           Oil and Gas Major
## 11             18125.68           Oil and Gas Major
## 12             67740.86           Oil and Gas Major
## 13             59501.11           Oil and Gas Major
p3 <- ggplot(data = Oil_and_Gas_Majors,
             aes(x = (Oil_and_Gas_Majors$`2019 Score`), 
                 y = (Oil_and_Gas_Majors$`GDP per Capita (PPP)`) 
             ) )
p3a <- p3 + ggtitle("Oil and Gas Majors Summary") + xlab("") + ylab("") + geom_point() + geom_text(aes(label=Oil_and_Gas_Majors$`Country Name`), size=3) + geom_smooth(method = "lm")
p3a

Tax_Haven <- as.data.frame(Data_fixed %>% filter(`Oil and Gas Major/Tax Haven` == 'Tax Haven'))
Tax_Haven
##   Country Name       Region World Rank 2019 Score GDP per Capita (PPP)
## 1    Hong Kong Asia-Pacific          1       90.2             61393.32
## 2      Ireland       Europe          6       80.5             75538.36
## 3   Luxembourg       Europe         17       75.9            106373.78
## 4  Netherlands       Europe         13       76.8             53634.56
## 5    Singapore Asia-Pacific          2       89.4             93905.50
## 6  Switzerland       Europe          4       81.9             61421.82
##   Oil and Gas Major/Tax Haven
## 1                   Tax Haven
## 2                   Tax Haven
## 3                   Tax Haven
## 4                   Tax Haven
## 5                   Tax Haven
## 6                   Tax Haven
p4 <- ggplot(data = Tax_Haven,
             aes(x = (Tax_Haven$`2019 Score`),
                 y = (Tax_Haven$`GDP per Capita (PPP)`) 
             ) )
p4a <- p4 + ggtitle("Tax Haven Summary") + geom_point() + xlab("") + ylab("")  + geom_text(aes(label=Tax_Haven$`Country Name`), size=3) + geom_smooth(method = "lm") + expand_limits(y=0)
p4a

#We can now identify which countries are Oil/Gas Majors and Tax Havens and their respective trends.
#We can also create an individual scatterplot for the remainder of the countries.
Neither <- as.data.frame(Data_fixed %>% filter(`Oil and Gas Major/Tax Haven` == 'Neither'))
head(Neither)
##   Country Name                       Region World Rank 2019 Score
## 1  Afghanistan                 Asia-Pacific        152       51.5
## 2      Albania                       Europe         52       66.5
## 3      Algeria Middle East and North Africa        171       46.2
## 4       Angola           Sub-Saharan Africa        156       50.6
## 5    Argentina                     Americas        148       52.2
## 6      Armenia                       Europe         47       67.7
##   GDP per Capita (PPP) Oil and Gas Major/Tax Haven
## 1              1957.58                     Neither
## 2             12506.65                     Neither
## 3             15237.20                     Neither
## 4              6752.58                     Neither
## 5             20875.76                     Neither
## 6              9455.94                     Neither
p5 <- ggplot(data = Neither,
             aes(x = (Neither$`2019 Score`), 
                 y = (Neither$`GDP per Capita (PPP)`) 
             ) )
p5a <- p5 + ggtitle("Neither Oil/Gas Major or Tax Haven Summary") + xlab("") + ylab("")  + geom_point()  + geom_smooth(method = "lm")
p5a

Data Reference

Note the above link was mentioned in a wikipedia article: https://en.wikipedia.org/wiki/Tax_haven#Top_10_tax_havens

Reconstruction