First, we start off by loading the relevant libraries: gapminder, dplyr and ggplot2.
library(gapminder)
library(dplyr)
library(ggplot2)
Summarize and plot the median life expectancy (MLE) in 1952 for each of the 5 countries with the top MLE’s and for each of the 5 countries with the bottom MLE’s. You should have one summary table and one plot.
# Creating variable to store the median life expectancies for the year 1952 grouped by country.
mle1952 <- gapminder %>%
filter(year == "1952") %>%
group_by(country) %>%
summarize(median = median(lifeExp))
# Selecting the top 5 observations according to the median life exp.
mle1952_top5 <- mle1952 %>% top_n(5, median)
# Selecting the bottom 5 observations according to the median life exp.
mle1952_bot5 <- mle1952 %>% top_n(-5, median)
# Combining the top and bottom 5 to make one list.
mle_table1952 <- do.call(rbind, list(mle1952_top5, mle1952_bot5))
# Creating a bar box plot for country by median life exp.
mle1952_plot <- ggplot(mle_table1952, aes(country, median)) +
geom_col(fill = "brown") +
# Adding appropriate titles and labels.
labs(title = "Top 5 and Bottom 5 Median Life Expectancies",
subtitle = "In 1952",
x = "Country",
y = "Median Life\nExpectancy") +
theme_gray() +
# Adjusting all title positioning.
theme(plot.title = element_text(hjust = 0.5, size = 15),
plot.subtitle = element_text(hjust = 0.5, size = 13),
axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0),
angle = 360,
vjust = 0.5),
axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) +
# Removing x axis label overlap.
scale_x_discrete(guide = guide_axis(n.dodge = 2))
# Printing table.
mle_table1952
## # A tibble: 10 x 2
## country median
## <fct> <dbl>
## 1 Denmark 70.8
## 2 Iceland 72.5
## 3 Netherlands 72.1
## 4 Norway 72.7
## 5 Sweden 71.9
## 6 Afghanistan 28.8
## 7 Angola 30.0
## 8 Gambia 30
## 9 Mozambique 31.3
## 10 Sierra Leone 30.3
# Printing plot.
mle1952_plot
Summarize and plot the median life expectancy (MLE) in 2007 for each of the 5 countries with the top MLE’s and for each of the 5 countries with the bottom MLE’s. You should have one summary table and one plot.
# Creating variable to store the median life expectancies for the year 2007 grouped by country.
mle07 <- gapminder %>%
filter(year == "2007") %>%
group_by(country) %>%
summarize(median = median(lifeExp))
# Selecting the top 5 observations according to the median life exp.
mle07_top5 <- mle07 %>% top_n(5, median)
# Selecting the bottom 5 observations according to the median life exp.
mle07_bot5 <- mle07 %>% top_n(-5, median)
# Combining the top and bottom 5 to make one list.
mle_table07 <- do.call(rbind, list(mle07_top5, mle07_bot5))
# Creating a bar box plot for country by median life exp.
mle07_plot <- ggplot(mle_table07, aes(country, median)) +
geom_col(fill = "brown") +
# Adding appropriate titles and labels.
labs(title = "Top 5 and Bottom 5 Median Life Expectancies",
subtitle = "In 2007",
x = "Country",
y = "Median Life\nExpectancy") +
theme_gray() +
# Adjusting all title positioning.
theme(plot.title = element_text(hjust = 0.5, size = 15),
plot.subtitle = element_text(hjust = 0.5, size = 13),
axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0),
angle = 360,
vjust = 0.5),
axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))) +
# Removing x axis label overlap.
scale_x_discrete(guide = guide_axis(n.dodge = 2))
# Printing table.
mle_table07
## # A tibble: 10 x 2
## country median
## <fct> <dbl>
## 1 Australia 81.2
## 2 Hong Kong, China 82.2
## 3 Iceland 81.8
## 4 Japan 82.6
## 5 Switzerland 81.7
## 6 Lesotho 42.6
## 7 Mozambique 42.1
## 8 Sierra Leone 42.6
## 9 Swaziland 39.6
## 10 Zambia 42.4
# Printing plot.
mle07_plot
Summarize and plot the median life expectancy in each year for the largest 5 countries in terms of 2007 population. You should have one summary table and one plot.
# Filtering to find top 5 countries in terms of 2007 population.
top5_pop_07 <- gapminder %>%
filter(year == 2007) %>%
top_n(5, pop)
# Storing those countries as a separate vector.
countries <- top5_pop_07$country
# Creating a subset of gapminder with filtered countries, grouped first by year then by the country with their respective median life expectancies.
mle_year <- gapminder %>%
filter(country %in% countries) %>%
group_by(year, country) %>%
summarise(mle = median(lifeExp))
# Creating a plot for median life expectancy by year, overlayed by country.
mle_year_plot <- ggplot(mle_year, aes(year, mle, color = country)) +
geom_line() +
# Adding appropriate titles and labels.
labs(title = "Median Life Expectancies from 1952 to 2007",
subtitle = "Of the countries with the highest population in 2007",
x = "Year",
y = "Median Life\nExpectancy",
col = "Country") +
# Adjusting all title positioning.
theme(plot.title = element_text(hjust = 0.5, size = 15),
plot.subtitle = element_text(hjust = 0.5, size = 12),
axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0),
angle = 360,
vjust = 0.5),
axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)))
# Printing table
mle_year
## # A tibble: 60 x 3
## # Groups: year [12]
## year country mle
## <int> <fct> <dbl>
## 1 1952 Brazil 50.9
## 2 1952 China 44
## 3 1952 India 37.4
## 4 1952 Indonesia 37.5
## 5 1952 United States 68.4
## 6 1957 Brazil 53.3
## 7 1957 China 50.5
## 8 1957 India 40.2
## 9 1957 Indonesia 39.9
## 10 1957 United States 69.5
## # ... with 50 more rows
# Printing plot.
mle_year_plot
Summarize and plot the median life expectancy in each year for each continent. You should have one summary table and one plot.
# Grouping gapminder data first by year then by continent then calculating the median life expectancy.
mle_continent <- gapminder %>%
group_by(year, continent) %>%
summarise(mle = median(lifeExp))
# Creating a plot for median life expectancy by year, overlayed by country.
mle_continent_plot <- ggplot(mle_continent, aes(year, mle, color = continent)) +
geom_line() +
theme_gray() +
# Adding appropriate titles and labels.
labs(title = "Median Life Expectancies from 1952 to 2007",
subtitle = "across the continents",
x = "Year",
y = "Median Life\nExpectancy",
col = "Continents") +
# Adjusting all title positioning.
theme(plot.title = element_text(hjust = 0.5, size = 15),
plot.subtitle = element_text(hjust = 0.5, size = 12),
axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0),
angle = 360,
vjust = 0.5),
axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)))
# Printing plot
mle_continent
## # A tibble: 60 x 3
## # Groups: year [12]
## year continent mle
## <int> <fct> <dbl>
## 1 1952 Africa 38.8
## 2 1952 Americas 54.7
## 3 1952 Asia 44.9
## 4 1952 Europe 65.9
## 5 1952 Oceania 69.3
## 6 1957 Africa 40.6
## 7 1957 Americas 56.1
## 8 1957 Asia 48.3
## 9 1957 Europe 67.6
## 10 1957 Oceania 70.3
## # ... with 50 more rows
# Printing plot.
mle_continent_plot