library(gapminder)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

Question a.Summarize and plot the median life expectancy (MLE) in 1952 for each of the 5 countries with the top MLE’s and for each of the 5 countries with the bottom MLE’s. You should have one summary table and one plot. Hint: Use the data in 1952 to find the top 5 countries and bottom 5 countries in MLE. Then, plot the MLE for each of the 10 countries. You should have 10 bars in one graph.

#We summarize and plot the median life expectancy (MLE) in 1952 for each of the 5 countries with the top MLE's and for each of the 5 countries with the bottom MLE's.

top5.in.lifeExp = gapminder %>% filter(year==1952) %>% arrange(lifeExp) %>% tail(5) %>% .$country %>% as.vector()
top5.in.lifeExp
## [1] "Denmark"     "Sweden"      "Netherlands" "Iceland"     "Norway"
bot5.in.lifeExp = gapminder %>% filter(year==1952) %>% arrange(lifeExp) %>% head(5) %>% .$country %>% as.vector()
bot5.in.lifeExp
## [1] "Afghanistan"  "Gambia"       "Angola"       "Sierra Leone" "Mozambique"
DF = gapminder %>% filter(year==1952 & country %in% top5.in.lifeExp)
DR = gapminder %>% filter(year==1952 & country %in% bot5.in.lifeExp)
DB = c(top5.in.lifeExp,bot5.in.lifeExp)
DBB = gapminder %>% filter(year==1952 & country %in% DB)
DBB
## # A tibble: 10 x 6
##    country      continent  year lifeExp      pop gdpPercap
##    <fct>        <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Afghanistan  Asia       1952    28.8  8425333      779.
##  2 Angola       Africa     1952    30.0  4232095     3521.
##  3 Denmark      Europe     1952    70.8  4334000     9692.
##  4 Gambia       Africa     1952    30     284320      485.
##  5 Iceland      Europe     1952    72.5   147962     7268.
##  6 Mozambique   Africa     1952    31.3  6446316      469.
##  7 Netherlands  Europe     1952    72.1 10381988     8942.
##  8 Norway       Europe     1952    72.7  3327728    10095.
##  9 Sierra Leone Africa     1952    30.3  2143249      880.
## 10 Sweden       Europe     1952    71.9  7124673     8528.
ggplot(DBB, aes(x = country, y = lifeExp)) +
  geom_col() +
  theme(axis.text.x = element_text(angle=90))

Question b.Summarize and plot the median life expectancy (MLE) in 2007 for each of the 5 countries with the top MLE’s and for each of the 5 countries with the bottom MLE’s. You should have one summary table and one plot.

#We summarize and plot the median life expectancy (MLE) in 2007 for each of the 5 countries with the top MLE's and for each of the 5 countries with the bottom MLE's

top5.in.lifeExp = gapminder %>% filter(year==2007) %>% arrange(lifeExp) %>% tail(5) %>% .$country %>% as.vector()
top5.in.lifeExp
## [1] "Australia"        "Switzerland"      "Iceland"          "Hong Kong, China"
## [5] "Japan"
bot5.in.lifeExp = gapminder %>% filter(year==2007) %>% arrange(lifeExp) %>% head(5) %>% .$country %>% as.vector()
bot5.in.lifeExp
## [1] "Swaziland"    "Mozambique"   "Zambia"       "Sierra Leone" "Lesotho"
DF = gapminder %>% filter(year==2007 & country %in% top5.in.lifeExp)
DR = gapminder %>% filter(year==2007 & country %in% bot5.in.lifeExp)
DB = c(top5.in.lifeExp,bot5.in.lifeExp)
DBB = gapminder %>% filter(year==2007 & country %in% DB)
DBB
## # A tibble: 10 x 6
##    country          continent  year lifeExp       pop gdpPercap
##    <fct>            <fct>     <int>   <dbl>     <int>     <dbl>
##  1 Australia        Oceania    2007    81.2  20434176    34435.
##  2 Hong Kong, China Asia       2007    82.2   6980412    39725.
##  3 Iceland          Europe     2007    81.8    301931    36181.
##  4 Japan            Asia       2007    82.6 127467972    31656.
##  5 Lesotho          Africa     2007    42.6   2012649     1569.
##  6 Mozambique       Africa     2007    42.1  19951656      824.
##  7 Sierra Leone     Africa     2007    42.6   6144562      863.
##  8 Swaziland        Africa     2007    39.6   1133066     4513.
##  9 Switzerland      Europe     2007    81.7   7554661    37506.
## 10 Zambia           Africa     2007    42.4  11746035     1271.
ggplot(DBB, aes(x = country, y = lifeExp)) +
  geom_col()+
  theme(axis.text.x = element_text(angle=90))

Question c. Summarize and plot the median life expectancy in each year for the largest 5 countries in terms of 2007 population. You should have one summary table and one plot. Hint: Use the data in 2007 to find the top 5 countries in population. Then, plot MLE vs year for each of the 5 countries. You should have 5 curves and you should overlay them in one graph.

#We Summarize and plot the median life expectancy in each year for the largest 5 countries in terms of 2007 population. You should have one summary table and one plot.

big.country = gapminder %>% filter(year==2007) %>% arrange(pop) %>% tail(5) %>% .$country %>% as.vector()
DF = gapminder %>% filter(country %in% big.country)
DF
## # A tibble: 60 x 6
##    country continent  year lifeExp       pop gdpPercap
##    <fct>   <fct>     <int>   <dbl>     <int>     <dbl>
##  1 Brazil  Americas   1952    50.9  56602560     2109.
##  2 Brazil  Americas   1957    53.3  65551171     2487.
##  3 Brazil  Americas   1962    55.7  76039390     3337.
##  4 Brazil  Americas   1967    57.6  88049823     3430.
##  5 Brazil  Americas   1972    59.5 100840058     4986.
##  6 Brazil  Americas   1977    61.5 114313951     6660.
##  7 Brazil  Americas   1982    63.3 128962939     7031.
##  8 Brazil  Americas   1987    65.2 142938076     7807.
##  9 Brazil  Americas   1992    67.1 155975974     6950.
## 10 Brazil  Americas   1997    69.4 168546719     7958.
## # ... with 50 more rows
p = ggplot(DF, aes(x = year, y = lifeExp, color = country))+
  geom_line()
p

Question d. Summarize and plot the median life expectancy in each year for each continent. You should have one summary table and one plot.

# We summarize and plot the median life expectancy in each year for each continent
DF = gapminder %>% group_by(continent, year) %>% mutate(lifeExp = sum(pop*lifeExp)/sum(pop)) %>% subset(select = c(continent, year, lifeExp)) %>% unique()
ggplot(DF, aes(x = year, y = lifeExp, color = continent)) +
  geom_line() 

DF
## # A tibble: 60 x 3
## # Groups:   continent, year [60]
##    continent  year lifeExp
##    <fct>     <int>   <dbl>
##  1 Asia       1952    42.9
##  2 Asia       1957    47.3
##  3 Asia       1962    46.6
##  4 Asia       1967    53.9
##  5 Asia       1972    57.5
##  6 Asia       1977    59.6
##  7 Asia       1982    61.6
##  8 Asia       1987    63.5
##  9 Asia       1992    65.1
## 10 Asia       1997    66.8
## # ... with 50 more rows