library(gapminder)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
Question a.Summarize and plot the median life expectancy (MLE) in 1952 for each of the 5 countries with the top MLE’s and for each of the 5 countries with the bottom MLE’s. You should have one summary table and one plot. Hint: Use the data in 1952 to find the top 5 countries and bottom 5 countries in MLE. Then, plot the MLE for each of the 10 countries. You should have 10 bars in one graph.
#We summarize and plot the median life expectancy (MLE) in 1952 for each of the 5 countries with the top MLE's and for each of the 5 countries with the bottom MLE's.
top5.in.lifeExp = gapminder %>% filter(year==1952) %>% arrange(lifeExp) %>% tail(5) %>% .$country %>% as.vector()
top5.in.lifeExp
## [1] "Denmark" "Sweden" "Netherlands" "Iceland" "Norway"
bot5.in.lifeExp = gapminder %>% filter(year==1952) %>% arrange(lifeExp) %>% head(5) %>% .$country %>% as.vector()
bot5.in.lifeExp
## [1] "Afghanistan" "Gambia" "Angola" "Sierra Leone" "Mozambique"
DF = gapminder %>% filter(year==1952 & country %in% top5.in.lifeExp)
DR = gapminder %>% filter(year==1952 & country %in% bot5.in.lifeExp)
DB = c(top5.in.lifeExp,bot5.in.lifeExp)
DBB = gapminder %>% filter(year==1952 & country %in% DB)
DBB
## # A tibble: 10 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Angola Africa 1952 30.0 4232095 3521.
## 3 Denmark Europe 1952 70.8 4334000 9692.
## 4 Gambia Africa 1952 30 284320 485.
## 5 Iceland Europe 1952 72.5 147962 7268.
## 6 Mozambique Africa 1952 31.3 6446316 469.
## 7 Netherlands Europe 1952 72.1 10381988 8942.
## 8 Norway Europe 1952 72.7 3327728 10095.
## 9 Sierra Leone Africa 1952 30.3 2143249 880.
## 10 Sweden Europe 1952 71.9 7124673 8528.
ggplot(DBB, aes(x = country, y = lifeExp)) +
geom_col() +
theme(axis.text.x = element_text(angle=90))
Question b.Summarize and plot the median life expectancy (MLE) in 2007 for each of the 5 countries with the top MLE’s and for each of the 5 countries with the bottom MLE’s. You should have one summary table and one plot.
#We summarize and plot the median life expectancy (MLE) in 2007 for each of the 5 countries with the top MLE's and for each of the 5 countries with the bottom MLE's
top5.in.lifeExp = gapminder %>% filter(year==2007) %>% arrange(lifeExp) %>% tail(5) %>% .$country %>% as.vector()
top5.in.lifeExp
## [1] "Australia" "Switzerland" "Iceland" "Hong Kong, China"
## [5] "Japan"
bot5.in.lifeExp = gapminder %>% filter(year==2007) %>% arrange(lifeExp) %>% head(5) %>% .$country %>% as.vector()
bot5.in.lifeExp
## [1] "Swaziland" "Mozambique" "Zambia" "Sierra Leone" "Lesotho"
DF = gapminder %>% filter(year==2007 & country %in% top5.in.lifeExp)
DR = gapminder %>% filter(year==2007 & country %in% bot5.in.lifeExp)
DB = c(top5.in.lifeExp,bot5.in.lifeExp)
DBB = gapminder %>% filter(year==2007 & country %in% DB)
DBB
## # A tibble: 10 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Australia Oceania 2007 81.2 20434176 34435.
## 2 Hong Kong, China Asia 2007 82.2 6980412 39725.
## 3 Iceland Europe 2007 81.8 301931 36181.
## 4 Japan Asia 2007 82.6 127467972 31656.
## 5 Lesotho Africa 2007 42.6 2012649 1569.
## 6 Mozambique Africa 2007 42.1 19951656 824.
## 7 Sierra Leone Africa 2007 42.6 6144562 863.
## 8 Swaziland Africa 2007 39.6 1133066 4513.
## 9 Switzerland Europe 2007 81.7 7554661 37506.
## 10 Zambia Africa 2007 42.4 11746035 1271.
ggplot(DBB, aes(x = country, y = lifeExp)) +
geom_col()+
theme(axis.text.x = element_text(angle=90))
Question c. Summarize and plot the median life expectancy in each year for the largest 5 countries in terms of 2007 population. You should have one summary table and one plot. Hint: Use the data in 2007 to find the top 5 countries in population. Then, plot MLE vs year for each of the 5 countries. You should have 5 curves and you should overlay them in one graph.
#We Summarize and plot the median life expectancy in each year for the largest 5 countries in terms of 2007 population. You should have one summary table and one plot.
big.country = gapminder %>% filter(year==2007) %>% arrange(pop) %>% tail(5) %>% .$country %>% as.vector()
DF = gapminder %>% filter(country %in% big.country)
DF
## # A tibble: 60 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Brazil Americas 1952 50.9 56602560 2109.
## 2 Brazil Americas 1957 53.3 65551171 2487.
## 3 Brazil Americas 1962 55.7 76039390 3337.
## 4 Brazil Americas 1967 57.6 88049823 3430.
## 5 Brazil Americas 1972 59.5 100840058 4986.
## 6 Brazil Americas 1977 61.5 114313951 6660.
## 7 Brazil Americas 1982 63.3 128962939 7031.
## 8 Brazil Americas 1987 65.2 142938076 7807.
## 9 Brazil Americas 1992 67.1 155975974 6950.
## 10 Brazil Americas 1997 69.4 168546719 7958.
## # ... with 50 more rows
p = ggplot(DF, aes(x = year, y = lifeExp, color = country))+
geom_line()
p
Question d. Summarize and plot the median life expectancy in each year for each continent. You should have one summary table and one plot.
# We summarize and plot the median life expectancy in each year for each continent
DF = gapminder %>% group_by(continent, year) %>% mutate(lifeExp = sum(pop*lifeExp)/sum(pop)) %>% subset(select = c(continent, year, lifeExp)) %>% unique()
ggplot(DF, aes(x = year, y = lifeExp, color = continent)) +
geom_line()
DF
## # A tibble: 60 x 3
## # Groups: continent, year [60]
## continent year lifeExp
## <fct> <int> <dbl>
## 1 Asia 1952 42.9
## 2 Asia 1957 47.3
## 3 Asia 1962 46.6
## 4 Asia 1967 53.9
## 5 Asia 1972 57.5
## 6 Asia 1977 59.6
## 7 Asia 1982 61.6
## 8 Asia 1987 63.5
## 9 Asia 1992 65.1
## 10 Asia 1997 66.8
## # ... with 50 more rows