This dataset contains data for 184 countries from 1960 - 2016, but
this visualization only plots the dimensions of life expectancy and
population for the year 2010.
# Import necessary packages
library(dslabs) # gapminder data set
library(dplyr) # need dplyr for filter and mutate
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Filter for year 2010 and create new column for population (in millions), saving this to a new data frame
gapminder_newpop <- gapminder %>%
filter(year == 2010) %>%
mutate(pop_mil = population/1000000)
# Take the previously created data frame with the population in millions column
# Create new column that is natural log of population, in millions
gapminder_newpop <- gapminder_newpop %>%
mutate(log_pop_mil = log(pop_mil))
# find the indices for two points considered unusual that will be labeled in the plot
ind_1 <- which(gapminder_newpop$continent == "Americas" & gapminder_newpop$life_expectancy <35) #Haiti
ind_2 <- which(gapminder_newpop$continent == "Asia" & gapminder_newpop$log_pop_mil > 7 & gapminder_newpop$life_expectancy < 70) #India
ggplot(gapminder_newpop, aes(x = log_pop_mil, y = life_expectancy, color = continent, size = fertility))+
geom_point()+
scale_y_continuous(limits = c(30,90),
breaks = seq(30, 90, by = 10), sec.axis = sec_axis(transform = "identity", breaks = seq(30, 90, by = 10), labels = NULL)) +
scale_x_continuous(limits = c(-3,7.5),
breaks = seq(-3, 7.5, by = 1.5),
sec.axis = sec_axis(transform = "identity", name = "Population (in millions)",
breaks = seq(-3, 7.5, by = 1.5),
labels = round(exp(seq(-3,7.5, by = 1.5)),1)))+
labs(title = "Life Expectancy vs. Natural Log of Population (millions)", x = "Natural log of population (in millions)", y = "Life Expectancy", subtitle = "Ferility is average number of children per woman") +
theme(plot.title = element_text(color = "blue", face = "bold", vjust = 2, size = 22),
axis.title.x = element_text(vjust = -0.6, size = 15, face = "bold"),
axis.text.x = element_text(size = 15, vjust = -0.6, face = "bold"),
axis.text.x.top = element_text(size = 15, vjust = 0.8, face = "bold"),
axis.title.x.top = element_text(vjust = 2, size = 15, face = "bold"),
axis.title.y = element_text(vjust = 1.3, size = 15, face = "bold"),
axis.text.y = element_text(size = 15, face = "bold"),
axis.ticks.length = unit(0.2, "cm"),
plot.subtitle = element_text(color = "blue", face = "italic", vjust = 4, size = 18),
plot.background = element_rect(fill = "beige"),
panel.background = element_rect(fill = "gray96", color = "black"),
panel.grid = element_line(color = "gray80", linetype = 2, linewidth = 0.5),
panel.border = element_rect(fill = "transparent",
color = "black",
linewidth = 2))+
# Add "unusual" points
# Add Haiti label
annotate("text", x = gapminder_newpop[ind_1,]$log_pop_mil + 0.6, y = gapminder_newpop[ind_1,]$life_expectancy + 0.5, label = as.character(gapminder_newpop[ind_1,]$country), color = "goldenrod4", size = 6) +
# Add India label
annotate("text", x = gapminder_newpop[ind_2,]$log_pop_mil + 0.1, y = gapminder_newpop[ind_2,]$life_expectancy - 3, label = as.character(gapminder_newpop[ind_2,]$country), color = "darkgreen", size = 6)
