The Economist Version
Image:
Import Library
#install.packages("pdftools")
pkg <- c("tidyverse", "pdftools", "ggthemes", "ggpubr", "tm", "grid", "ggrepel", "scales")
sapply(pkg, library, character.only = TRUE)
Data Pre-Processing
#download.file("http://www.starsfoundation.org.uk/sites/default/files/downloads/Social%20Progress%20Index%202014%20Report.pdf", "Social Progress Index 2014 Report.pdf")
datatext <- pdf_text("Social Progress Index 2014 Report.pdf")
datatext88 <- datatext[[88]] %>%
read_lines()
head(datatext88)
## [1] " APPENDIX 1 / SOCIAL PROGRESS INDEX 2014 FULL RESULTS"
## [2] " Nutrition and"
## [3] " GDP per capita- Social Progress Basic Human Foundations of Water and"
## [4] " Rank Country Opportunity Basic Medical Shelter Pe"
## [5] " 2005 constant $ Index Needs Wellbeing Sanitation"
## [6] " Care"
data1 <- datatext88[7:50]
head(data1)
## [1] " 1 New Zealand $25,857 88.24 91.74 84.97 88.01 97.57 100.00 83.23"
## [2] " 2 Switzerland $39,293 88.19 94.87 89.78 79.92 98.33 99.92 88.48"
## [3] " 3 Iceland $33,880 88.07 94.32 88.19 81.71 98.78 100.00 85.05"
## [4] " 4 Netherlands $36,438 87.37 93.91 87.56 80.63 98.16 100.00 88.92"
## [5] " 5 Norway $47,547 87.12 93.59 86.94 80.82 98.71 100.00 82.93"
## [6] " 6 Sweden $34,945 87.08 94.59 84.71 81.95 98.26 100.00 86.74"
all_data1_lines <- data1[1:44] %>%
str_squish() %>%
str_replace_all(",", "")
head(all_data1_lines)
## [1] "1 New Zealand $25857 88.24 91.74 84.97 88.01 97.57 100.00 83.23"
## [2] "2 Switzerland $39293 88.19 94.87 89.78 79.92 98.33 99.92 88.48"
## [3] "3 Iceland $33880 88.07 94.32 88.19 81.71 98.78 100.00 85.05"
## [4] "4 Netherlands $36438 87.37 93.91 87.56 80.63 98.16 100.00 88.92"
## [5] "5 Norway $47547 87.12 93.59 86.94 80.82 98.71 100.00 82.93"
## [6] "6 Sweden $34945 87.08 94.59 84.71 81.95 98.26 100.00 86.74"
all_data1_lines_sub <- data1[-c(1, 13, 16, 23,25,28,37)] %>%
str_squish() %>%
str_replace_all(",", "") %>%
strsplit(split = " ")
head(all_data1_lines_sub)
## [[1]]
## [1] "2" "Switzerland" "$39293" "88.19" "94.87"
## [6] "89.78" "79.92" "98.33" "99.92" "88.48"
##
## [[2]]
## [1] "3" "Iceland" "$33880" "88.07" "94.32" "88.19" "81.71"
## [8] "98.78" "100.00" "85.05"
##
## [[3]]
## [1] "4" "Netherlands" "$36438" "87.37" "93.91"
## [6] "87.56" "80.63" "98.16" "100.00" "88.92"
##
## [[4]]
## [1] "5" "Norway" "$47547" "87.12" "93.59" "86.94" "80.82"
## [8] "98.71" "100.00" "82.93"
##
## [[5]]
## [1] "6" "Sweden" "$34945" "87.08" "94.59" "84.71" "81.95"
## [8] "98.26" "100.00" "86.74"
##
## [[6]]
## [1] "7" "Canada" "$35936" "86.95" "93.52" "80.31" "87.02"
## [8] "98.10" "95.76" "88.64"
datadf88_sub <- plyr::ldply(all_data1_lines_sub) %>%
select(c(V2,V3,V4)) %>%
rename(Country = V2,
GDP = V3,
SPI = V4)
datadf88_sub_2 <- data1[c(1,13,16,23,25)] %>%
str_squish() %>%
str_replace_all(",", "") %>%
strsplit(split = " ") %>%
plyr::ldply() %>%
mutate(V2 = paste(V2,V3)) %>%
select(c(V2,V4,V5)) %>%
rename(Country = V2,
GDP = V4,
SPI = V5)
datadf88_sub_3 <- data1[c(28,37)] %>%
str_squish() %>%
str_replace_all(",", "") %>%
strsplit(split = " ") %>%
plyr::ldply() %>%
mutate(V2 = paste(V2,V3,V4)) %>%
select(c(V2,V5,V6)) %>%
rename(Country = V2,
GDP = V5,
SPI = V6)
df88 <- rbind(datadf88_sub,datadf88_sub_2,datadf88_sub_3)
df88$GDP <- str_remove(df88$GDP, "[$]")
df88 <- df88 %>%
mutate(GDP = as.numeric(GDP),
SPI = as.numeric(SPI))
#Next page
datatext90 <- datatext[[90]] %>%
read_lines()
data2 <- datatext90[7:50]
all_data2_lines <- data2[1:44] %>%
str_squish() %>%
str_replace_all(",", "")
all_data2_lines_sub <- data2[-c(3, 17, 19, 21, 24, 25, 41)] %>%
str_squish() %>%
str_replace_all(",", "") %>%
strsplit(split = " ")
datadf90_sub <- plyr::ldply(all_data2_lines_sub) %>%
select(c(V2,V3,V4)) %>%
rename(Country = V2,
GDP = V3,
SPI = V4)
datadf90_sub_2 <- data2[c(19, 21, 24, 25, 41)] %>%
str_squish() %>%
str_replace_all(",", "") %>%
strsplit(split = " ") %>%
plyr::ldply() %>%
mutate(V2 = paste(V2,V3)) %>%
select(c(V2,V4,V5)) %>%
rename(Country = V2,
GDP = V4,
SPI = V5)
datadf90_sub_3 <- data2[c(3,17)] %>%
str_squish() %>%
str_replace_all(",", "") %>%
strsplit(split = " ") %>%
plyr::ldply() %>%
mutate(V2 = paste(V2,V3,V4)) %>%
select(c(V2,V5,V6)) %>%
rename(Country = V2,
GDP = V5,
SPI = V6)
df90 <- rbind(datadf90_sub,datadf90_sub_2,datadf90_sub_3)
df90$GDP <- str_remove(df90$GDP, "[$]")
df90 <- df90 %>%
mutate(GDP = as.numeric(GDP),
SPI = as.numeric(SPI))
#The last Page
datatext92 <- datatext[[92]] %>%
read_lines()
data3 <- datatext92[7:50]
all_data3_lines <- data3[1:44] %>%
str_squish() %>%
str_replace_all(",", "")
all_data3_lines_sub <- data3[-c(22, 24, 43)] %>%
str_squish() %>%
str_replace_all(",", "") %>%
strsplit(split = " ")
datadf92_sub <- plyr::ldply(all_data3_lines_sub) %>%
select(c(V2,V3,V4)) %>%
rename(Country = V2,
GDP = V3,
SPI = V4)
datadf92_sub_2 <- data3[24] %>%
str_squish() %>%
str_replace_all(",", "") %>%
strsplit(split = " ") %>%
plyr::ldply() %>%
mutate(V2 = paste(V2,V3)) %>%
select(c(V2,V4,V5)) %>%
rename(Country = V2,
GDP = V4,
SPI = V5)
datadf92_sub_3 <- data3[c(22,43)] %>%
str_squish() %>%
str_replace_all(",", "") %>%
strsplit(split = " ") %>%
plyr::ldply() %>%
mutate(V2 = paste(V2,V3,V4)) %>%
select(c(V2,V5,V6)) %>%
rename(Country = V2,
GDP = V5,
SPI = V6)
df92 <- rbind(datadf92_sub,datadf92_sub_2,datadf92_sub_3)
df92$GDP <- str_remove(df92$GDP, "[$]")
df92 <- df92 %>%
mutate(GDP = as.numeric(GDP),
SPI = as.numeric(SPI))
#combine all data
data <- rbind(df88, df90, df92)
data <- data %>% arrange(desc(SPI)) %>%
mutate(Country = ifelse(Country == "United Kingdom", "Britain", Country)) %>%
mutate(Col_Country = ifelse(Country == "Costa Rica" | Country == "Brazil" | Country == "China" |
Country == "Chad" | Country == "Angola" | Country == "India" |
Country == "Iran" | Country == "Philippines" | Country == "Jamaica" |
Country == "Russia" | Country == "Greece"| Country == "Italy" |
Country == "Israel" | Country == "Saudi Arabia" | Country == "France" |
Country == "Korea" | Country == "Japan" | Country == "Britain" | Country == "Germany" |
Country == "Canada" | Country == "United Arab Emirates" | Country == "Kuwait" |
Country == "Switzerland" | Country =="Norway"| Country == "United States",
"navy","blue")) %>%
mutate(Col_Country = as.factor(Col_Country))
Data Visualization
p <- ggplot(data, aes(x=GDP,y=SPI))
p

p2 <- p + theme_hc() +
geom_point(colour="black", size = 2.7, pch=21, aes(fill=Col_Country)) +
scale_fill_manual(values = c("#86d7f2","#00485d"), labels = NULL)
p2

p3 <- p2 + geom_smooth(method = "loess",
span = 1.7,
formula = y~x,
se = FALSE,
color = "#bf1912", size = 1.5)
p3

p4 <- p3 + labs(title="Measuring development",
subtitle="Social progress index and GDP per person",
y="Social progress index, 2014",
x="GDP per person, 2012, $ at PPP*",
caption="*Purchasing-Power Parity, 2005 prices") +
scale_y_continuous(breaks =seq(20,90,10), limits = c(20,90)) +
scale_x_continuous(breaks = seq(0,50000,10000) , limits = c(0,50000), labels = comma) +
theme(plot.title = element_text(size = 11, face = "bold", hjust = 0.0, color = "black"),
plot.subtitle = element_text(size = 9, hjust = 0.0, color = "black"),
plot.caption = element_text(size = 8, color = "black"),
legend.position = "none",
axis.title = element_text(size = 8, face = "italic"))
p4

labelpoin <- c("Costa Rica","Brazil","China","Chad","Angola",
"India","Iran","Philippines","Jamaica","Russia",
"Greece","Italy","Israel","Saudi Arabia","France",
"Korea","Japan","Britain","Germany","Canada","United Arab Emirates",
"Kuwait","Switzerland","Norway","United States")
set.seed(2012)
p5 <- p4 + geom_text_repel(aes(label = Country),
color = "black",
data = subset(data, Country %in% labelpoin),
force = 30)
p5

Add multiple caption
p6 <- ggplotGrob(p5)
k <- which(p6$layout$name=="caption")
grbTxt <- p6$grobs[[k]]$children[[1]]
grbTxt$label <- "Source: Social Progress Imperative"
grbTxt$name <- "GRID.text.left"
grbTxt$x <- unit(0,"npc")
grbTxt$hjust <- 0
grbTxt$gp$col <- "black"
p6$grobs[[k]] <- addGrob(p6$grobs[[k]],grbTxt)
grid.draw(p6)
