# title: "project 4 Stat 360 by Bernard M."

library(gapminder)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# Part a

# In 1952, the median life expectancy for the top 5 countries is about 2.5 higher than the bottom 5 countries.

top5.in.MLE = gapminder %>% filter(year==1952) %>% arrange(lifeExp) %>% tail(5) %>%.$country %>%  as.vector()

top5.in.MLE
## [1] "Denmark"     "Sweden"      "Netherlands" "Iceland"     "Norway"
bottom5.in.MLE = gapminder %>% filter(year==1952) %>% arrange(lifeExp) %>% head(5)%>% .$country %>%  as.vector()

bottom5.in.MLE
## [1] "Afghanistan"  "Gambia"       "Angola"       "Sierra Leone" "Mozambique"
DS = c(top5.in.MLE,bottom5.in.MLE)
DSummary = gapminder %>% filter((year == 1952) & country %in% DS)

DSummary
## # A tibble: 10 x 6
##    country      continent  year lifeExp      pop gdpPercap
##    <fct>        <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Afghanistan  Asia       1952    28.8  8425333      779.
##  2 Angola       Africa     1952    30.0  4232095     3521.
##  3 Denmark      Europe     1952    70.8  4334000     9692.
##  4 Gambia       Africa     1952    30     284320      485.
##  5 Iceland      Europe     1952    72.5   147962     7268.
##  6 Mozambique   Africa     1952    31.3  6446316      469.
##  7 Netherlands  Europe     1952    72.1 10381988     8942.
##  8 Norway       Europe     1952    72.7  3327728    10095.
##  9 Sierra Leone Africa     1952    30.3  2143249      880.
## 10 Sweden       Europe     1952    71.9  7124673     8528.
T = ggplot(DSummary,aes(x = country,y = lifeExp)) + 
  theme(axis.text.x = element_text(angle = 90)) + 
  labs(title = "Graph of MLE of top 5 abd bottom 5 countries in 1952") +
  geom_col()

T

# Part b

#In 2007, Overall the median life expectancy(MLE) increased compared to MLE in 1952. The MLE for the top 5 Countries is about twice higher than that of the bottom 5 countries.

top5.in.MLE = gapminder %>% filter(year == 2007) %>% arrange(lifeExp) %>% tail(5) %>%.$country %>%  as.vector()

top5.in.MLE
## [1] "Australia"        "Switzerland"      "Iceland"          "Hong Kong, China"
## [5] "Japan"
bottom5.in.MLE = gapminder %>% filter(year == 2007) %>% arrange(lifeExp) %>% head(5)%>% .$country %>%  as.vector()

bottom5.in.MLE
## [1] "Swaziland"    "Mozambique"   "Zambia"       "Sierra Leone" "Lesotho"
DS = c(top5.in.MLE,bottom5.in.MLE)
DSummary = gapminder %>% filter((year == 2007) & country %in% DS)

DSummary
## # A tibble: 10 x 6
##    country          continent  year lifeExp       pop gdpPercap
##    <fct>            <fct>     <int>   <dbl>     <int>     <dbl>
##  1 Australia        Oceania    2007    81.2  20434176    34435.
##  2 Hong Kong, China Asia       2007    82.2   6980412    39725.
##  3 Iceland          Europe     2007    81.8    301931    36181.
##  4 Japan            Asia       2007    82.6 127467972    31656.
##  5 Lesotho          Africa     2007    42.6   2012649     1569.
##  6 Mozambique       Africa     2007    42.1  19951656      824.
##  7 Sierra Leone     Africa     2007    42.6   6144562      863.
##  8 Swaziland        Africa     2007    39.6   1133066     4513.
##  9 Switzerland      Europe     2007    81.7   7554661    37506.
## 10 Zambia           Africa     2007    42.4  11746035     1271.
T = ggplot(DSummary,aes(x = country,y = lifeExp)) + 
  theme(axis.text.x = element_text(angle = 90)) + 
  labs(title = "Graph of MLE of top 5 and bottom 5 countries in 2007") +
  geom_col()

T

# Part c

#In 2007, among the most populated countries, the MLE was slightly increasing in each country. The United States had higher MLE than the other 4 countries. 

top5.in.pop=gapminder %>% filter(year == 2007) %>% arrange(pop) %>% tail(5) %>% .$country %>% as.vector()

top5.in.pop
## [1] "Brazil"        "Indonesia"     "United States" "India"        
## [5] "China"
DSummary = gapminder %>% filter( country %in% top5.in.pop ) 

DSummary
## # A tibble: 60 x 6
##    country continent  year lifeExp       pop gdpPercap
##    <fct>   <fct>     <int>   <dbl>     <int>     <dbl>
##  1 Brazil  Americas   1952    50.9  56602560     2109.
##  2 Brazil  Americas   1957    53.3  65551171     2487.
##  3 Brazil  Americas   1962    55.7  76039390     3337.
##  4 Brazil  Americas   1967    57.6  88049823     3430.
##  5 Brazil  Americas   1972    59.5 100840058     4986.
##  6 Brazil  Americas   1977    61.5 114313951     6660.
##  7 Brazil  Americas   1982    63.3 128962939     7031.
##  8 Brazil  Americas   1987    65.2 142938076     7807.
##  9 Brazil  Americas   1992    67.1 155975974     6950.
## 10 Brazil  Americas   1997    69.4 168546719     7958.
## # … with 50 more rows
T = ggplot(DSummary,aes(x = year,y = lifeExp, color = country))+ 
  labs(title = "Graph of MLE for top 5 Most Populated countries in 2007") +
  geom_line()
T

# Part d

#The MLE increased over time from 1952 to 2007 in each continent.But The MLE in Africa wasn't improving between around 1990 to 2002. 

DT = gapminder %>% group_by(continent, year) %>% mutate(lifeExp = sum(pop*lifeExp)/sum(pop)) %>% subset(select = c(continent, year, lifeExp)) %>% unique()

DT
## # A tibble: 60 x 3
## # Groups:   continent, year [60]
##    continent  year lifeExp
##    <fct>     <int>   <dbl>
##  1 Asia       1952    42.9
##  2 Asia       1957    47.3
##  3 Asia       1962    46.6
##  4 Asia       1967    53.9
##  5 Asia       1972    57.5
##  6 Asia       1977    59.6
##  7 Asia       1982    61.6
##  8 Asia       1987    63.5
##  9 Asia       1992    65.1
## 10 Asia       1997    66.8
## # … with 50 more rows
T = ggplot(DT, aes(x = year, y = lifeExp, color = continent))+ 
  labs(title = "Graph of MLE In each year for each Continent") +
  geom_line() 

T