This code will plot the life expectancy vs. the natural log of population (in millions) for the year 2010 from the gapminder dataset in the dslabs package. There will be a secondary axis plotting the original population in millions without a natural log transformation.

This dataset contains data for 184 countries from 1960 - 2016, but this visualization only plots the dimensions of life expectancy and population for the year 2010.

# Import necessary packages
library(dslabs)  # gapminder data set
library(dplyr)  # need dplyr for filter and mutate
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
# Filter for year 2010 and create new column for population (in millions), saving this to a new data frame
gapminder_newpop <- gapminder %>%
  filter(year == 2010) %>%
  mutate(pop_mil = population/1000000) 
# Take the previously created data frame with the population in millions column
# Create new column that is natural log of population, in millions
gapminder_newpop <- gapminder_newpop %>%
  mutate(log_pop_mil = log(pop_mil))

# find the indices for two points considered unusual that will be labeled in the plot
ind_1 <- which(gapminder_newpop$continent == "Americas" & gapminder_newpop$life_expectancy <35)  #Haiti
ind_2 <- which(gapminder_newpop$continent == "Asia" & gapminder_newpop$log_pop_mil > 7 & gapminder_newpop$life_expectancy < 70)   #India

ggplot(gapminder_newpop, aes(x = log_pop_mil, y = life_expectancy, color = continent, size = fertility))+
  geom_point()+
  scale_y_continuous(limits = c(30,90),
                     breaks = seq(30, 90, by = 10), sec.axis = sec_axis(transform = "identity", breaks = seq(30, 90, by = 10), labels = NULL)) +
  scale_x_continuous(limits = c(-3,7.5),
                     breaks = seq(-3, 7.5, by = 1.5), 
                     sec.axis = sec_axis(transform = "identity", name = "Population (in millions)", 
                                         breaks = seq(-3, 7.5, by = 1.5), 
                                         labels = round(exp(seq(-3,7.5, by = 1.5)),1)))+
  labs(title = "Life Expectancy vs. Natural Log of Population (millions)", x = "Natural log of population (in millions)", y = "Life Expectancy", subtitle = "Ferility is average number of children per woman") + 
  theme(plot.title = element_text(color = "blue", face = "bold", vjust = 2, size = 22), 
        axis.title.x = element_text(vjust = -0.6, size = 15, face = "bold"), 
        axis.text.x = element_text(size = 15, vjust = -0.6, face = "bold"),
        axis.text.x.top = element_text(size = 15, vjust = 0.8, face = "bold"), 
        axis.title.x.top = element_text(vjust = 2, size = 15, face = "bold"), 
        axis.title.y = element_text(vjust = 1.3, size = 15, face = "bold"), 
        axis.text.y = element_text(size = 15, face = "bold"), 
        axis.ticks.length = unit(0.2, "cm"), 
        plot.subtitle = element_text(color = "blue", face = "italic", vjust = 4, size = 18),
        plot.background = element_rect(fill = "beige"),
        panel.background = element_rect(fill = "gray96", color = "black"),
        panel.grid = element_line(color = "gray80", linetype = 2, linewidth = 0.5),
        panel.border = element_rect(fill = "transparent", 
                                    color = "black",            
                                    linewidth = 2))+ 
  # Add "unusual" points
  # Add Haiti label
  annotate("text", x = gapminder_newpop[ind_1,]$log_pop_mil + 0.6, y = gapminder_newpop[ind_1,]$life_expectancy + 0.5, label = as.character(gapminder_newpop[ind_1,]$country), color = "goldenrod4", size = 6) + 
  # Add India label
  annotate("text", x = gapminder_newpop[ind_2,]$log_pop_mil + 0.1, y = gapminder_newpop[ind_2,]$life_expectancy - 3, label = as.character(gapminder_newpop[ind_2,]$country), color = "darkgreen", size = 6)