# This sets plot images to a nice size
options(repr.plot.width = 6, repr.plot.height = 6)
# Loading packages
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.5.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
# Loading data
life_expectancy <- read.csv("UNdata.csv")
# Taking a look at the first few rows
head(life_expectancy)
## Country.or.Area Subgroup Year
## 1 Afghanistan Female 2000-2005
## 2 Afghanistan Female 1995-2000
## 3 Afghanistan Female 1990-1995
## 4 Afghanistan Female 1985-1990
## 5 Afghanistan Male 2000-2005
## 6 Afghanistan Male 1995-2000
## Source Unit
## 1 UNPD_World Population Prospects_2006 (International estimate) Years
## 2 UNPD_World Population Prospects_2006 (International estimate) Years
## 3 UNPD_World Population Prospects_2006 (International estimate) Years
## 4 UNPD_World Population Prospects_2006 (International estimate) Years
## 5 UNPD_World Population Prospects_2006 (International estimate) Years
## 6 UNPD_World Population Prospects_2006 (International estimate) Years
## Value Value.Footnotes
## 1 42 NA
## 2 42 NA
## 3 42 NA
## 4 41 NA
## 5 42 NA
## 6 42 NA
# Subsetting and reshaping the life expectancy data
subdata <- life_expectancy %>%
filter(Year == "2000-2005") %>%
select(Country.or.Area, Subgroup, Value) %>%
spread(Subgroup, Value)
# Taking a look at the first few rows
head(subdata)
## Country.or.Area Female Male
## 1 Afghanistan 42 42
## 2 Albania 79 73
## 3 Algeria 72 70
## 4 Angola 43 39
## 5 Argentina 78 71
## 6 Armenia 75 68
# Plotting male and female life expectancy
ggplot(subdata, aes(x = Male, y = Female)) +
geom_point()

# Adding an abline and changing the scale of axes of the previous plots
ggplot(subdata, aes(x = Male, y = Female)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, linetype = 2) +
scale_x_continuous(limits=c(35,85))+
scale_y_continuous(limits=c(35,85))

# Adding labels to previous plot
ggplot(subdata, aes(x=Male, y=Female))+
geom_point(colour="white", fill="chartreuse3", shape=21, alpha=.55, size=5)+
geom_abline(intercept = 0, slope = 1, linetype=2)+
scale_x_continuous(limits=c(35,85))+
scale_y_continuous(limits=c(35,85))+
labs(title="Life Expectancy at Birth by Country",
subtitle="Years. Period: 2000-2005. Average.",
caption="Source: United Nations Statistics Division",
x="Males",
y="Females")

# Subseting data to obtain countries of interest
top_male <- subdata %>% arrange(Male-Female) %>% head(3)
top_female <- subdata %>% arrange(Female-Male) %>% head(3)
# Adding text to the previous plot to label countries of interest
ggplot(subdata, aes(x=Male, y=Female, label=Country.or.Area))+
geom_point(colour="white", fill="chartreuse3", shape=21, alpha=.55, size=5)+
geom_abline(intercept = 0, slope = 1, linetype=2)+
scale_x_continuous(limits=c(35,85))+
scale_y_continuous(limits=c(35,85))+
labs(title="Life Expectancy at Birth by Country",
subtitle="Years. Period: 2000-2005. Average.",
caption="Source: United Nations Statistics Division",
x="Males",
y="Females")+
geom_text(data=top_male, size = 3) +
geom_text(data=top_female, size = 3) +
theme_bw()

top_male
## Country.or.Area Female Male
## 1 Russian Federation 72 58
## 2 Belarus 75 63
## 3 Estonia 77 65
top_female
## Country.or.Area Female Male
## 1 Niger 54 55
## 2 Afghanistan 42 42
## 3 Maldives 66 66
# Subsetting, mutating and reshaping the life expectancy data
subdata2 <- life_expectancy %>%
filter(Year %in% c("1985-1990", "2000-2005")) %>%
mutate(Sub_Year=paste(Subgroup, Year, sep="_")) %>%
mutate(Sub_Year=gsub("-", "_", Sub_Year)) %>%
select(-Subgroup, -Year) %>%
spread(Sub_Year, Value) %>%
mutate(
diff_Female = Female_2000_2005 - Female_1985_1990,
diff_Male = Male_2000_2005 - Male_1985_1990
)
# Taking a look at the first few rows
head(subdata2)
## Country.or.Area
## 1 Afghanistan
## 2 Albania
## 3 Algeria
## 4 Angola
## 5 Argentina
## 6 Armenia
## Source Unit
## 1 UNPD_World Population Prospects_2006 (International estimate) Years
## 2 UNPD_World Population Prospects_2006 (International estimate) Years
## 3 UNPD_World Population Prospects_2006 (International estimate) Years
## 4 UNPD_World Population Prospects_2006 (International estimate) Years
## 5 UNPD_World Population Prospects_2006 (International estimate) Years
## 6 UNPD_World Population Prospects_2006 (International estimate) Years
## Value.Footnotes Female_1985_1990 Female_2000_2005 Male_1985_1990
## 1 NA 41 42 41
## 2 NA 75 79 69
## 3 NA 67 72 65
## 4 NA 42 43 38
## 5 NA 75 78 68
## 6 NA 71 75 66
## Male_2000_2005 diff_Female diff_Male
## 1 42 1 1
## 2 73 4 4
## 3 70 5 5
## 4 39 1 1
## 5 71 3 3
## 6 68 4 2
# Doing a nice first version of the plot with abline, scaling axis and adding labels
ggplot(subdata2, aes(x=diff_Male, y=diff_Female, label=Country.or.Area))+ # the difference between Female_2000_2005 and Female_1985_1990
geom_point(colour="white", fill="chartreuse3", shape=21, alpha=.55, size=5)+
geom_abline(intercept = 0, slope = 1, linetype=2)+
scale_x_continuous(limits = c(-25, 25)) +
scale_y_continuous(limits = c(-25, 25)) +
labs(title="Life Expectancy at Birth by Country in Years",
subtitle="Difference between 1985-1990 and 2000-2005. Average.",
caption="Source: United Nations Statistics Division",
x="Males",
y="Females")+
theme_bw()

# Adding an hline and vline to previous plots
ggplot(subdata2, aes(x=diff_Male, y=diff_Female, label=Country.or.Area))+
geom_point(colour="white", fill="chartreuse3", shape=21, alpha=.55, size=5)+
geom_abline(intercept = 0, slope = 1, linetype=2)+
scale_x_continuous(limits=c(-25,25))+
scale_y_continuous(limits=c(-25,25))+
geom_hline(yintercept = 0, linetype = 2) +
geom_vline(xintercept = 0, linetype = 2) +
labs(title="Life Expectancy at Birth by Country",
subtitle="Years. Difference between 1985-1990 and 2000-2005. Average.",
caption="Source: United Nations Statistics Division",
x="Males",
y="Females")+
theme_bw()

# Subseting data to obtain countries of interest
top <- subdata2 %>% arrange(diff_Male+diff_Female) %>% head(3)
bottom <- subdata2 %>% arrange(-(diff_Male+diff_Female)) %>% head(3)
# Adding text to the previous plot to label countries of interest
ggplot(subdata2, aes(x=diff_Male, y=diff_Female, label=Country.or.Area), guide=FALSE)+
geom_point(colour="white", fill="chartreuse3", shape=21, alpha=.55, size=5)+
geom_abline(intercept = 0, slope = 1, linetype=2)+
scale_x_continuous(limits=c(-25,25))+
scale_y_continuous(limits=c(-25,25))+
geom_hline(yintercept=0, linetype=2)+
geom_vline(xintercept=0, linetype=2)+
labs(title="Life Expectancy at Birth by Country",
subtitle="Years. Difference between 1985-1990 and 2000-2005. Average.",
caption="Source: United Nations Statistics Division",
x="Males",
y="Females")+
geom_text(data=top, size=3)+
geom_text(data=bottom, size=3)+
theme_bw()

top
## Country.or.Area
## 1 Zimbabwe
## 2 Botswana
## 3 Swaziland
## Source Unit
## 1 UNPD_World Population Prospects_2006 (International estimate) Years
## 2 UNPD_World Population Prospects_2006 (International estimate) Years
## 3 UNPD_World Population Prospects_2006 (International estimate) Years
## Value.Footnotes Female_1985_1990 Female_2000_2005 Male_1985_1990
## 1 NA 64 40 60
## 2 NA 66 47 61
## 3 NA 60 45 56
## Male_2000_2005 diff_Female diff_Male
## 1 40 -24 -20
## 2 46 -19 -15
## 3 43 -15 -13
bottom
## Country.or.Area
## 1 Timor Leste
## 2 Bhutan
## 3 Egypt
## Source Unit
## 1 UNPD_World Population Prospects_2006 (International estimate) Years
## 2 UNPD_World Population Prospects_2006 (International estimate) Years
## 3 UNPD_World Population Prospects_2006 (International estimate) Years
## Value.Footnotes Female_1985_1990 Female_2000_2005 Male_1985_1990
## 1 NA 44 59 43
## 2 NA 52 65 49
## 3 NA 61 72 59
## Male_2000_2005 diff_Female diff_Male
## 1 58 15 15
## 2 62 13 13
## 3 68 11 9