# This sets plot images to a nice size
options(repr.plot.width = 6, repr.plot.height = 6)

# Loading packages
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.5.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
# Loading data
life_expectancy <- read.csv("UNdata.csv")

# Taking a look at the first few rows
head(life_expectancy)
##   Country.or.Area Subgroup      Year
## 1     Afghanistan   Female 2000-2005
## 2     Afghanistan   Female 1995-2000
## 3     Afghanistan   Female 1990-1995
## 4     Afghanistan   Female 1985-1990
## 5     Afghanistan     Male 2000-2005
## 6     Afghanistan     Male 1995-2000
##                                                          Source  Unit
## 1 UNPD_World Population Prospects_2006 (International estimate) Years
## 2 UNPD_World Population Prospects_2006 (International estimate) Years
## 3 UNPD_World Population Prospects_2006 (International estimate) Years
## 4 UNPD_World Population Prospects_2006 (International estimate) Years
## 5 UNPD_World Population Prospects_2006 (International estimate) Years
## 6 UNPD_World Population Prospects_2006 (International estimate) Years
##   Value Value.Footnotes
## 1    42              NA
## 2    42              NA
## 3    42              NA
## 4    41              NA
## 5    42              NA
## 6    42              NA
# Subsetting and reshaping the life expectancy data
subdata <- life_expectancy  %>% 
    filter(Year == "2000-2005") %>%
    select(Country.or.Area, Subgroup, Value) %>%
    spread(Subgroup, Value)
# Taking a look at the first few rows
head(subdata)
##   Country.or.Area Female Male
## 1     Afghanistan     42   42
## 2         Albania     79   73
## 3         Algeria     72   70
## 4          Angola     43   39
## 5       Argentina     78   71
## 6         Armenia     75   68
# Plotting male and female life expectancy
ggplot(subdata, aes(x = Male, y = Female)) +
    geom_point()

# Adding an abline and changing the scale of axes of the previous plots
ggplot(subdata, aes(x = Male, y = Female)) +
    geom_point() +
    geom_abline(intercept = 0, slope = 1, linetype = 2) +
    scale_x_continuous(limits=c(35,85))+
    scale_y_continuous(limits=c(35,85))

# Adding labels to previous plot
ggplot(subdata, aes(x=Male, y=Female))+
  geom_point(colour="white", fill="chartreuse3", shape=21, alpha=.55, size=5)+
  geom_abline(intercept = 0, slope = 1, linetype=2)+
  scale_x_continuous(limits=c(35,85))+
  scale_y_continuous(limits=c(35,85))+
  labs(title="Life Expectancy at Birth by Country",
       subtitle="Years. Period: 2000-2005. Average.",
       caption="Source: United Nations Statistics Division",
       x="Males",
       y="Females")

# Subseting data to obtain countries of interest
top_male <- subdata %>% arrange(Male-Female) %>% head(3)
top_female <- subdata %>% arrange(Female-Male) %>% head(3)

# Adding text to the previous plot to label countries of interest
ggplot(subdata, aes(x=Male, y=Female, label=Country.or.Area))+
  geom_point(colour="white", fill="chartreuse3", shape=21, alpha=.55, size=5)+
  geom_abline(intercept = 0, slope = 1, linetype=2)+
  scale_x_continuous(limits=c(35,85))+
  scale_y_continuous(limits=c(35,85))+
  labs(title="Life Expectancy at Birth by Country",
       subtitle="Years. Period: 2000-2005. Average.",
       caption="Source: United Nations Statistics Division",
       x="Males",
       y="Females")+
  geom_text(data=top_male, size = 3) +
  geom_text(data=top_female, size = 3) +
  theme_bw()

top_male
##      Country.or.Area Female Male
## 1 Russian Federation     72   58
## 2            Belarus     75   63
## 3            Estonia     77   65
top_female
##   Country.or.Area Female Male
## 1           Niger     54   55
## 2     Afghanistan     42   42
## 3        Maldives     66   66
# Subsetting, mutating and reshaping the life expectancy data
subdata2 <- life_expectancy %>% 
  filter(Year %in% c("1985-1990", "2000-2005")) %>% 
  mutate(Sub_Year=paste(Subgroup, Year, sep="_")) %>% 
  mutate(Sub_Year=gsub("-", "_", Sub_Year)) %>% 
  select(-Subgroup, -Year) %>% 
  spread(Sub_Year, Value) %>%
  mutate(
    diff_Female = Female_2000_2005 - Female_1985_1990,
    diff_Male = Male_2000_2005 - Male_1985_1990
  )

# Taking a look at the first few rows
head(subdata2)
##   Country.or.Area
## 1     Afghanistan
## 2         Albania
## 3         Algeria
## 4          Angola
## 5       Argentina
## 6         Armenia
##                                                          Source  Unit
## 1 UNPD_World Population Prospects_2006 (International estimate) Years
## 2 UNPD_World Population Prospects_2006 (International estimate) Years
## 3 UNPD_World Population Prospects_2006 (International estimate) Years
## 4 UNPD_World Population Prospects_2006 (International estimate) Years
## 5 UNPD_World Population Prospects_2006 (International estimate) Years
## 6 UNPD_World Population Prospects_2006 (International estimate) Years
##   Value.Footnotes Female_1985_1990 Female_2000_2005 Male_1985_1990
## 1              NA               41               42             41
## 2              NA               75               79             69
## 3              NA               67               72             65
## 4              NA               42               43             38
## 5              NA               75               78             68
## 6              NA               71               75             66
##   Male_2000_2005 diff_Female diff_Male
## 1             42           1         1
## 2             73           4         4
## 3             70           5         5
## 4             39           1         1
## 5             71           3         3
## 6             68           4         2
# Doing a nice first version of the plot with abline, scaling axis and adding labels
ggplot(subdata2, aes(x=diff_Male, y=diff_Female, label=Country.or.Area))+ # the difference between Female_2000_2005 and Female_1985_1990
  geom_point(colour="white", fill="chartreuse3", shape=21, alpha=.55, size=5)+
  geom_abline(intercept = 0, slope = 1, linetype=2)+
  scale_x_continuous(limits = c(-25, 25)) +
  scale_y_continuous(limits = c(-25, 25)) +
  labs(title="Life Expectancy at Birth by Country in Years",
       subtitle="Difference between 1985-1990 and 2000-2005. Average.",
       caption="Source: United Nations Statistics Division",
       x="Males",
       y="Females")+
theme_bw()

# Adding an hline and vline to previous plots
ggplot(subdata2, aes(x=diff_Male, y=diff_Female, label=Country.or.Area))+
  geom_point(colour="white", fill="chartreuse3", shape=21, alpha=.55, size=5)+
  geom_abline(intercept = 0, slope = 1, linetype=2)+
  scale_x_continuous(limits=c(-25,25))+
  scale_y_continuous(limits=c(-25,25))+
  geom_hline(yintercept = 0, linetype = 2) +
  geom_vline(xintercept = 0, linetype = 2) +
  labs(title="Life Expectancy at Birth by Country",
       subtitle="Years. Difference between 1985-1990 and 2000-2005. Average.",
       caption="Source: United Nations Statistics Division",
       x="Males",
       y="Females")+
theme_bw()

# Subseting data to obtain countries of interest
top <- subdata2 %>% arrange(diff_Male+diff_Female) %>% head(3)
bottom <- subdata2 %>% arrange(-(diff_Male+diff_Female)) %>% head(3)

# Adding text to the previous plot to label countries of interest
ggplot(subdata2, aes(x=diff_Male, y=diff_Female, label=Country.or.Area), guide=FALSE)+
  geom_point(colour="white", fill="chartreuse3", shape=21, alpha=.55, size=5)+
  geom_abline(intercept = 0, slope = 1, linetype=2)+
  scale_x_continuous(limits=c(-25,25))+
  scale_y_continuous(limits=c(-25,25))+
  geom_hline(yintercept=0, linetype=2)+
  geom_vline(xintercept=0, linetype=2)+
  labs(title="Life Expectancy at Birth by Country",
       subtitle="Years. Difference between 1985-1990 and 2000-2005. Average.",
       caption="Source: United Nations Statistics Division",
       x="Males",
       y="Females")+
  geom_text(data=top, size=3)+
  geom_text(data=bottom, size=3)+
  theme_bw()

top
##   Country.or.Area
## 1        Zimbabwe
## 2        Botswana
## 3       Swaziland
##                                                          Source  Unit
## 1 UNPD_World Population Prospects_2006 (International estimate) Years
## 2 UNPD_World Population Prospects_2006 (International estimate) Years
## 3 UNPD_World Population Prospects_2006 (International estimate) Years
##   Value.Footnotes Female_1985_1990 Female_2000_2005 Male_1985_1990
## 1              NA               64               40             60
## 2              NA               66               47             61
## 3              NA               60               45             56
##   Male_2000_2005 diff_Female diff_Male
## 1             40         -24       -20
## 2             46         -19       -15
## 3             43         -15       -13
bottom
##   Country.or.Area
## 1     Timor Leste
## 2          Bhutan
## 3           Egypt
##                                                          Source  Unit
## 1 UNPD_World Population Prospects_2006 (International estimate) Years
## 2 UNPD_World Population Prospects_2006 (International estimate) Years
## 3 UNPD_World Population Prospects_2006 (International estimate) Years
##   Value.Footnotes Female_1985_1990 Female_2000_2005 Male_1985_1990
## 1              NA               44               59             43
## 2              NA               52               65             49
## 3              NA               61               72             59
##   Male_2000_2005 diff_Female diff_Male
## 1             58          15        15
## 2             62          13        13
## 3             68          11         9