R Markdown

Next, please replicate the Hans Rosling’s visualization as closely as possible using R and ggplot. Note that his visualization is a video which covers many years of data. You only need to visualize a few years (pick at least three years that you like to replicate). Obviously, it is difficulty to replicate every single detail in the visualization. You should try to replicate as much as you can.

The data gathered and plotted are from the gapminder data file from the gapminder R package. The years I chose to plot are 2007, 1972, and 2002. Each plot created has income (gdp/Capita) as the x axis and life expetancy as the y axis. In addition, the different continents of the world are color coded and population size varies the size of each different point, representing a different country by population.

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(tidyr)
library(ggplot2)
library(ggthemes)
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(AER)
## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
library(Hmisc)
## Loading required package: lattice
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(zoo)
library(hexbin)
library(grid)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(gapminder)



data(gapminder)


Year_2007 <- filter(gapminder, year == 2007)
Year_1972 <- filter(gapminder, year == 1972)
Year_2002 <- filter(gapminder, year == 2002)

ggplot(data=Year_1972) +
  annotate(geom = "text", x = 8000, y = 50, label = "1972", color = "black", size = 40, alpha = 0.2) +
  geom_point(aes(x = gdpPercap, y = lifeExp, fill = continent, size = pop), shape = 21, alpha = 0.5) +
  scale_fill_manual(breaks = c("Asia", "Europe", "Oceania", "Americas", "Africa"), values = c("pink", "yellow", "red", "green", "blue")) +
  scale_x_log10(breaks = c(500, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000), labels = c("500", "1000", "2000", "4000", "8000", "16k", "32k", "64k", "128k")) +
  scale_y_continuous(breaks = c(10, 20, 30, 40, 50 , 60, 70, 80, 90)) +
  theme(panel.background = element_blank(), axis.line = element_line(color = "black"), 
        panel.grid.major = element_line(color = "grey")) +
  labs(x = "Income - per person (GDP/capita, PPP$ inflation-adjusted)",
       y = "Life Expectancy (years)")

ggplot(data=Year_2002) +
  annotate(geom = "text", x = 8000, y = 50, label = "2002", color = "black", size = 40, alpha = 0.2) +
  geom_point(aes(x = gdpPercap, y = lifeExp, fill = continent, size = pop), shape = 21, alpha = 0.5) +
  scale_fill_manual(breaks = c("Asia", "Europe", "Oceania", "Americas", "Africa"), values = c("pink", "yellow", "red", "green", "blue")) +
  scale_x_log10(breaks = c(500, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000), labels = c("500", "1000", "2000", "4000", "8000", "16k", "32k", "64k", "128k")) +
  scale_y_continuous(breaks = c(10, 20, 30, 40, 50 , 60, 70, 80, 90)) +
  theme(panel.background = element_blank(), axis.line = element_line(color = "black"), 
        panel.grid.major = element_line(color = "grey")) +
  labs(x = "Income - per person (GDP/capita, PPP$ inflation-adjusted)",
       y = "Life Expectancy (years)")

ggplot(data=Year_2007) +
  annotate(geom = "text", x = 8000, y = 50, label = "2007", color = "black", size = 40, alpha = 0.2) +
  geom_point(aes(x = gdpPercap, y = lifeExp, fill = continent, size = pop), shape = 21, alpha = 0.5) +
  scale_fill_manual(breaks = c("Asia", "Europe", "Oceania", "Americas", "Africa"), values = c("pink", "yellow", "red", "green", "blue")) +
  scale_x_log10(breaks = c(500, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000), labels = c("500", "1000", "2000", "4000", "8000", "16k", "32k", "64k", "128k")) +
  scale_y_continuous(breaks = c(10, 20, 30, 40, 50 , 60, 70, 80, 90)) +
  theme(panel.background = element_blank(), axis.line = element_line(color = "black"), 
        panel.grid.major = element_line(color = "grey")) +
  labs(x = "Income - per person (GDP/capita, PPP$ inflation-adjusted)",
       y = "Life Expectancy (years)")