The Economist Version

Image:

Import Library

#install.packages("pdftools")
pkg <- c("tidyverse", "pdftools", "ggthemes", "ggpubr", "tm", "grid", "ggrepel", "scales")
sapply(pkg, library, character.only = TRUE)

Data Pre-Processing

#download.file("http://www.starsfoundation.org.uk/sites/default/files/downloads/Social%20Progress%20Index%202014%20Report.pdf", "Social Progress Index 2014 Report.pdf")
datatext <- pdf_text("Social Progress Index 2014 Report.pdf")
datatext88 <- datatext[[88]] %>%
              read_lines()
head(datatext88)
## [1] "    APPENDIX 1 / SOCIAL PROGRESS INDEX 2014 FULL RESULTS"                                                                                  
## [2] "                                                                                                       Nutrition and"                      
## [3] "                             GDP per capita- Social Progress    Basic Human Foundations of                           Water and"            
## [4] "   Rank Country                                                                            Opportunity Basic Medical            Shelter Pe"
## [5] "                             2005 constant $     Index             Needs      Wellbeing                              Sanitation"           
## [6] "                                                                                                           Care"
data1 <- datatext88[7:50]
head(data1)
## [1] "     1  New Zealand                $25,857       88.24             91.74       84.97         88.01        97.57       100.00    83.23"
## [2] "     2  Switzerland                $39,293       88.19            94.87        89.78         79.92        98.33        99.92    88.48"
## [3] "     3  Iceland                    $33,880       88.07            94.32         88.19         81.71       98.78       100.00    85.05"
## [4] "     4  Netherlands                $36,438       87.37             93.91       87.56         80.63        98.16       100.00    88.92"
## [5] "     5  Norway                     $47,547       87.12            93.59        86.94         80.82        98.71       100.00    82.93"
## [6] "     6  Sweden                     $34,945       87.08            94.59         84.71        81.95        98.26       100.00    86.74"
all_data1_lines <- data1[1:44] %>%
  str_squish() %>%
  str_replace_all(",", "")
head(all_data1_lines)
## [1] "1 New Zealand $25857 88.24 91.74 84.97 88.01 97.57 100.00 83.23"
## [2] "2 Switzerland $39293 88.19 94.87 89.78 79.92 98.33 99.92 88.48" 
## [3] "3 Iceland $33880 88.07 94.32 88.19 81.71 98.78 100.00 85.05"    
## [4] "4 Netherlands $36438 87.37 93.91 87.56 80.63 98.16 100.00 88.92"
## [5] "5 Norway $47547 87.12 93.59 86.94 80.82 98.71 100.00 82.93"     
## [6] "6 Sweden $34945 87.08 94.59 84.71 81.95 98.26 100.00 86.74"
all_data1_lines_sub <- data1[-c(1, 13, 16, 23,25,28,37)] %>%
  str_squish() %>%
  str_replace_all(",", "") %>%
  strsplit(split = " ")
head(all_data1_lines_sub)
## [[1]]
##  [1] "2"           "Switzerland" "$39293"      "88.19"       "94.87"      
##  [6] "89.78"       "79.92"       "98.33"       "99.92"       "88.48"      
## 
## [[2]]
##  [1] "3"       "Iceland" "$33880"  "88.07"   "94.32"   "88.19"   "81.71"  
##  [8] "98.78"   "100.00"  "85.05"  
## 
## [[3]]
##  [1] "4"           "Netherlands" "$36438"      "87.37"       "93.91"      
##  [6] "87.56"       "80.63"       "98.16"       "100.00"      "88.92"      
## 
## [[4]]
##  [1] "5"      "Norway" "$47547" "87.12"  "93.59"  "86.94"  "80.82" 
##  [8] "98.71"  "100.00" "82.93" 
## 
## [[5]]
##  [1] "6"      "Sweden" "$34945" "87.08"  "94.59"  "84.71"  "81.95" 
##  [8] "98.26"  "100.00" "86.74" 
## 
## [[6]]
##  [1] "7"      "Canada" "$35936" "86.95"  "93.52"  "80.31"  "87.02" 
##  [8] "98.10"  "95.76"  "88.64"
datadf88_sub <- plyr::ldply(all_data1_lines_sub) %>%
  select(c(V2,V3,V4)) %>%
  rename(Country = V2,
         GDP = V3,
         SPI = V4)
datadf88_sub_2 <- data1[c(1,13,16,23,25)] %>%
  str_squish() %>%
  str_replace_all(",", "") %>%
  strsplit(split = " ") %>%
  plyr::ldply() %>%
  mutate(V2 = paste(V2,V3)) %>%
  select(c(V2,V4,V5)) %>%
  rename(Country = V2,
         GDP = V4,
         SPI = V5)
datadf88_sub_3 <- data1[c(28,37)] %>%
  str_squish() %>%
  str_replace_all(",", "") %>%
  strsplit(split = " ") %>%
  plyr::ldply() %>%
  mutate(V2 = paste(V2,V3,V4)) %>%
  select(c(V2,V5,V6)) %>%
  rename(Country = V2,
         GDP = V5,
         SPI = V6)
df88 <- rbind(datadf88_sub,datadf88_sub_2,datadf88_sub_3)
df88$GDP <- str_remove(df88$GDP, "[$]")
df88 <- df88 %>%
  mutate(GDP = as.numeric(GDP),
         SPI = as.numeric(SPI))

#Next page
datatext90 <- datatext[[90]] %>%
  read_lines()
data2 <- datatext90[7:50]
all_data2_lines <- data2[1:44] %>%
  str_squish() %>%
  str_replace_all(",", "")
all_data2_lines_sub <- data2[-c(3, 17, 19, 21, 24, 25, 41)] %>%
  str_squish() %>%
  str_replace_all(",", "") %>%
  strsplit(split = " ")
datadf90_sub <- plyr::ldply(all_data2_lines_sub) %>%
  select(c(V2,V3,V4)) %>%
  rename(Country = V2,
         GDP = V3,
         SPI = V4)
datadf90_sub_2 <- data2[c(19, 21, 24, 25, 41)] %>%
  str_squish() %>%
  str_replace_all(",", "") %>%
  strsplit(split = " ") %>%
  plyr::ldply() %>%
  mutate(V2 = paste(V2,V3)) %>%
  select(c(V2,V4,V5)) %>%
  rename(Country = V2,
         GDP = V4,
         SPI = V5)
datadf90_sub_3 <- data2[c(3,17)] %>%
  str_squish() %>%
  str_replace_all(",", "") %>%
  strsplit(split = " ") %>%
  plyr::ldply() %>%
  mutate(V2 = paste(V2,V3,V4)) %>%
  select(c(V2,V5,V6)) %>%
  rename(Country = V2,
         GDP = V5,
         SPI = V6)
df90 <- rbind(datadf90_sub,datadf90_sub_2,datadf90_sub_3)
df90$GDP <- str_remove(df90$GDP, "[$]")
df90 <- df90 %>%
  mutate(GDP = as.numeric(GDP),
         SPI = as.numeric(SPI))

#The last Page
datatext92 <- datatext[[92]] %>%
  read_lines()
data3 <- datatext92[7:50]
all_data3_lines <- data3[1:44] %>%
  str_squish() %>%
  str_replace_all(",", "")
all_data3_lines_sub <- data3[-c(22, 24, 43)] %>%
  str_squish() %>%
  str_replace_all(",", "") %>%
  strsplit(split = " ")
datadf92_sub <- plyr::ldply(all_data3_lines_sub) %>%
  select(c(V2,V3,V4)) %>%
  rename(Country = V2,
         GDP = V3,
         SPI = V4)
datadf92_sub_2 <- data3[24] %>%
  str_squish() %>%
  str_replace_all(",", "") %>%
  strsplit(split = " ") %>%
  plyr::ldply() %>%
  mutate(V2 = paste(V2,V3)) %>%
  select(c(V2,V4,V5)) %>%
  rename(Country = V2,
         GDP = V4,
         SPI = V5)
datadf92_sub_3 <- data3[c(22,43)] %>%
  str_squish() %>%
  str_replace_all(",", "") %>%
  strsplit(split = " ") %>%
  plyr::ldply() %>%
  mutate(V2 = paste(V2,V3,V4)) %>%
  select(c(V2,V5,V6)) %>%
  rename(Country = V2,
         GDP = V5,
         SPI = V6)
df92 <- rbind(datadf92_sub,datadf92_sub_2,datadf92_sub_3)
df92$GDP <- str_remove(df92$GDP, "[$]")
df92 <- df92 %>%
  mutate(GDP = as.numeric(GDP),
         SPI = as.numeric(SPI))

#combine all data
data <- rbind(df88, df90, df92)
data <- data %>% arrange(desc(SPI)) %>%
                 mutate(Country = ifelse(Country == "United Kingdom", "Britain", Country)) %>%
                 mutate(Col_Country = ifelse(Country == "Costa Rica" | Country == "Brazil" | Country == "China" | 
                                Country == "Chad" | Country == "Angola" | Country == "India" |
                                Country == "Iran" | Country == "Philippines" | Country == "Jamaica" | 
                                Country == "Russia" | Country == "Greece"| Country == "Italy" | 
                                Country == "Israel" | Country == "Saudi Arabia" | Country == "France" |
                                Country == "Korea" | Country == "Japan" | Country == "Britain" | Country == "Germany" |
                                Country == "Canada" | Country == "United Arab Emirates" | Country == "Kuwait" |
                                Country == "Switzerland" | Country =="Norway"| Country == "United States", 
                                "navy","blue")) %>%
  mutate(Col_Country = as.factor(Col_Country))

Data Visualization

p <- ggplot(data, aes(x=GDP,y=SPI)) 
p

p2 <- p + theme_hc() +
  geom_point(colour="black", size = 2.7, pch=21, aes(fill=Col_Country)) +
  scale_fill_manual(values = c("#86d7f2","#00485d"), labels = NULL)
p2

p3 <- p2 + geom_smooth(method = "loess",
              span = 1.7,
              formula = y~x,
              se = FALSE,
              color = "#bf1912", size = 1.5)
p3

p4 <-  p3 + labs(title="Measuring development",
       subtitle="Social progress index and GDP per person",
       y="Social progress index, 2014",
       x="GDP per person, 2012, $ at PPP*",
       caption="*Purchasing-Power Parity, 2005 prices") +
  scale_y_continuous(breaks =seq(20,90,10), limits = c(20,90)) +
  scale_x_continuous(breaks = seq(0,50000,10000) , limits = c(0,50000), labels = comma) +
  theme(plot.title = element_text(size = 11, face = "bold", hjust = 0.0, color = "black"),
         plot.subtitle = element_text(size = 9, hjust = 0.0, color = "black"),
         plot.caption = element_text(size = 8, color = "black"),
         legend.position = "none",
         axis.title = element_text(size = 8, face = "italic"))
p4

labelpoin <- c("Costa Rica","Brazil","China","Chad","Angola",
               "India","Iran","Philippines","Jamaica","Russia", 
               "Greece","Italy","Israel","Saudi Arabia","France",
               "Korea","Japan","Britain","Germany","Canada","United Arab Emirates",
               "Kuwait","Switzerland","Norway","United States")
set.seed(2012)
p5 <- p4 + geom_text_repel(aes(label = Country),
              color = "black", 
              data = subset(data, Country %in% labelpoin),
              force = 30)
p5

Add multiple caption

p6 <- ggplotGrob(p5)
k <- which(p6$layout$name=="caption")
grbTxt <- p6$grobs[[k]]$children[[1]]

grbTxt$label <- "Source: Social Progress Imperative"
grbTxt$name <- "GRID.text.left"
grbTxt$x <- unit(0,"npc")
grbTxt$hjust <- 0
grbTxt$gp$col <- "black"

p6$grobs[[k]] <- addGrob(p6$grobs[[k]],grbTxt)
grid.draw(p6)