Using Gapminder data

First, we start off by loading the relevant libraries: gapminder, dplyr and ggplot2.

library(gapminder)
library(dplyr)
library(ggplot2)

Part A

Summarize and plot the median life expectancy (MLE) in 1952 for each of the 5 countries with the top MLE’s and for each of the 5 countries with the bottom MLE’s. You should have one summary table and one plot.

# Creating variable to store the median life expectancies for the year 1952 grouped by country.
mle1952 <- gapminder %>%
  filter(year == "1952") %>%
  group_by(country) %>%
  summarize(median = median(lifeExp))

# Selecting the top 5 observations according to the median life exp.
mle1952_top5 <- mle1952 %>% top_n(5, median)
# Selecting the bottom 5 observations according to the median life exp.
mle1952_bot5 <- mle1952 %>% top_n(-5, median)

# Combining the top and bottom 5 to make one list.
mle_table1952 <- do.call(rbind, list(mle1952_top5, mle1952_bot5))

# Creating a bar box plot for country by median life exp.
mle1952_plot <- ggplot(mle_table1952, aes(country, median)) +
  geom_col(fill = "brown") +
  # Adding appropriate titles and labels.
  labs(title = "Top 5 and Bottom 5 Median Life Expectancies",
       subtitle = "In 1952",
       x = "Country",
       y = "Median Life\nExpectancy") +
  theme_gray() +
  # Adjusting all title positioning.
  theme(plot.title = element_text(hjust = 0.5, size = 15),
        plot.subtitle = element_text(hjust = 0.5, size = 13),
        axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0),
                                    angle = 360,
                                    vjust = 0.5),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) +
  # Removing x axis label overlap.
  scale_x_discrete(guide = guide_axis(n.dodge = 2))

# Printing table.
mle_table1952
## # A tibble: 10 x 2
##    country      median
##    <fct>         <dbl>
##  1 Denmark        70.8
##  2 Iceland        72.5
##  3 Netherlands    72.1
##  4 Norway         72.7
##  5 Sweden         71.9
##  6 Afghanistan    28.8
##  7 Angola         30.0
##  8 Gambia         30  
##  9 Mozambique     31.3
## 10 Sierra Leone   30.3
# Printing plot.
mle1952_plot

Part B

Summarize and plot the median life expectancy (MLE) in 2007 for each of the 5 countries with the top MLE’s and for each of the 5 countries with the bottom MLE’s. You should have one summary table and one plot.

# Creating variable to store the median life expectancies for the year 2007 grouped by country.
mle07 <- gapminder %>%
  filter(year == "2007") %>%
  group_by(country) %>%
  summarize(median = median(lifeExp))
  

# Selecting the top 5 observations according to the median life exp.
mle07_top5 <- mle07 %>% top_n(5, median)
# Selecting the bottom 5 observations according to the median life exp.
mle07_bot5 <- mle07 %>% top_n(-5, median) 

# Combining the top and bottom 5 to make one list.
mle_table07 <- do.call(rbind, list(mle07_top5, mle07_bot5))

# Creating a bar box plot for country by median life exp.
mle07_plot <- ggplot(mle_table07, aes(country, median)) +
  geom_col(fill = "brown") +
  # Adding appropriate titles and labels.
  labs(title = "Top 5 and Bottom 5 Median Life Expectancies",
       subtitle = "In 2007",
       x = "Country",
       y = "Median Life\nExpectancy") +
  theme_gray() +
  # Adjusting all title positioning.
  theme(plot.title = element_text(hjust = 0.5, size = 15),
        plot.subtitle = element_text(hjust = 0.5, size = 13),
        axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0),
                                    angle = 360,
                                    vjust = 0.5),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) +
  # Removing x axis label overlap.
  scale_x_discrete(guide = guide_axis(n.dodge = 2))

# Printing table.
mle_table07
## # A tibble: 10 x 2
##    country          median
##    <fct>             <dbl>
##  1 Australia          81.2
##  2 Hong Kong, China   82.2
##  3 Iceland            81.8
##  4 Japan              82.6
##  5 Switzerland        81.7
##  6 Lesotho            42.6
##  7 Mozambique         42.1
##  8 Sierra Leone       42.6
##  9 Swaziland          39.6
## 10 Zambia             42.4
# Printing plot.
mle07_plot

Part C

Summarize and plot the median life expectancy in each year for the largest 5 countries in terms of 2007 population. You should have one summary table and one plot.

# Filtering to find top 5 countries in terms of 2007 population.
top5_pop_07 <- gapminder %>%
  filter(year == 2007) %>%
  top_n(5, pop)

# Storing those countries as a separate vector.
countries <- top5_pop_07$country

# Creating a subset of gapminder with filtered countries, grouped first by year then by the country with their respective median life expectancies.
mle_year <- gapminder %>%
  filter(country %in% countries) %>%
  group_by(year, country) %>%
  summarise(mle = median(lifeExp))

# Creating a plot for median life expectancy by year, overlayed by country.
mle_year_plot <- ggplot(mle_year, aes(year, mle, color = country)) +
  geom_line() +
  # Adding appropriate titles and labels.
  labs(title = "Median Life Expectancies from 1952 to 2007",
       subtitle = "Of the countries with the highest population in 2007",
       x = "Year",
       y = "Median Life\nExpectancy",
       col = "Country") +
  # Adjusting all title positioning.
  theme(plot.title = element_text(hjust = 0.5, size = 15),
        plot.subtitle = element_text(hjust = 0.5, size = 12),
        axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0),
                                    angle = 360,
                                    vjust = 0.5),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)))

# Printing table
mle_year
## # A tibble: 60 x 3
## # Groups:   year [12]
##     year country         mle
##    <int> <fct>         <dbl>
##  1  1952 Brazil         50.9
##  2  1952 China          44  
##  3  1952 India          37.4
##  4  1952 Indonesia      37.5
##  5  1952 United States  68.4
##  6  1957 Brazil         53.3
##  7  1957 China          50.5
##  8  1957 India          40.2
##  9  1957 Indonesia      39.9
## 10  1957 United States  69.5
## # ... with 50 more rows
# Printing plot.
mle_year_plot

Part D

Summarize and plot the median life expectancy in each year for each continent. You should have one summary table and one plot.

# Grouping gapminder data first by year then by continent then calculating the median life expectancy.
mle_continent <- gapminder %>%
  group_by(year, continent) %>%
  summarise(mle = median(lifeExp))

# Creating a plot for median life expectancy by year, overlayed by country.
mle_continent_plot <- ggplot(mle_continent, aes(year, mle, color = continent)) +
  geom_line() +
  theme_gray() +
  # Adding appropriate titles and labels.
  labs(title = "Median Life Expectancies from 1952 to 2007",
       subtitle = "across the continents",
       x = "Year",
       y = "Median Life\nExpectancy",
       col = "Continents") +
  # Adjusting all title positioning.
  theme(plot.title = element_text(hjust = 0.5, size = 15),
        plot.subtitle = element_text(hjust = 0.5, size = 12),
        axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0),
                                    angle = 360,
                                    vjust = 0.5),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)))

# Printing plot
mle_continent
## # A tibble: 60 x 3
## # Groups:   year [12]
##     year continent   mle
##    <int> <fct>     <dbl>
##  1  1952 Africa     38.8
##  2  1952 Americas   54.7
##  3  1952 Asia       44.9
##  4  1952 Europe     65.9
##  5  1952 Oceania    69.3
##  6  1957 Africa     40.6
##  7  1957 Americas   56.1
##  8  1957 Asia       48.3
##  9  1957 Europe     67.6
## 10  1957 Oceania    70.3
## # ... with 50 more rows
# Printing plot.
mle_continent_plot