library(tidyverse)
library(gapminder)
library(knitr)
data(gapminder)
glimpse(gapminder)
## Observations: 1,704
## Variables: 6
## $ country   <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, ...
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia...
## $ year      <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992...
## $ lifeExp   <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.8...
## $ pop       <int> 8425333, 9240934, 10267083, 11537966, 13079460, 1488...
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 78...
# summary(gapminder$country)

Q 1a): For US what years is there available data?

gapminder %>% 
  select(country, year ) %>%
  filter( country == "United States" ) %>%
  group_by(year) %>%
  summarize(n = n()) %>%
  arrange(year)

Q 1b): For all of the countries in Americas, what years is avilable?

gapminder %>% 
  select(continent, year ) %>%
  filter( continent == "Americas" ) %>%
  group_by(year) %>%
  summarize(n = n()) %>%
  arrange(year)

Q 2a): For all of the countries in Americas, what years is avilable?

gapminder %>% 
  select( continent, country, year, lifeExp ) %>%
  filter( continent == "Americas", country == "United States", lifeExp > 70 )

Q 2b): For all of the countries in Americas, what years is avilable?

gapminder %>% 
  select( continent, country, year, lifeExp ) %>%
  filter( continent == "Americas", country == "Honduras", lifeExp > 70 )

Q 2c): For each country in the Americas, compute the average age of the measurenments since the county’s life expectancy exceeded 70?

gapminder %>% 
  select( continent, country, year, lifeExp ) %>%
  filter( continent == "Americas", lifeExp > 70 ) %>%
  group_by(country) %>%
  summarize(Ave_Age = mean(lifeExp))

Q 3a): In 2007, which countries had a population over 1Billion?

gapminder %>% 
  filter( year == 2007, pop > 1000000000 ) %>%
  group_by(country)

Q 3b): In 2007, which country reached 1Billion people first in what year?

gapminder %>% 
  filter( pop > 1000000000 ) %>%
  group_by(country, year) %>%
  summarise(n = n()) %>%
  arrange(year)

Q 4a): In 2007, which 3 countries had lowest GDPs?

gapminder %>% 
  filter( year == 2007 ) %>%
  group_by(country) %>%
  arrange(gdpPercap)

Q 4b): In 1962, which 3 countries had lowest GDPs?

gapminder %>% 
  filter( year == 1962 ) %>%
  group_by(country) %>%
  arrange(gdpPercap)

Q 4c): Which continent are the majority of the countires from each of the two years, 1962 and 2007?

gapminder %>% 
  filter( year == 1962 ) %>%
  group_by(country, continent) %>%
  arrange(gdpPercap)
gapminder %>% 
  filter( year == 2007 ) %>%
  group_by(country, continent) %>%
  arrange(gdpPercap)

Q 5a): Scatterplot of the x=gdp and y = lifeExp

gapminder %>% 
  ggplot(aes(x = gdpPercap, y = lifeExp)) +
  geom_point()

Q 5b): Scatterplot of the x=log(gdp) and y = log(lifeExp)

gapminder %>% 
  ggplot(aes(x = log(gdpPercap), y = log(lifeExp))) +
  geom_point()

Q 5c): Add color to the scatterplot to see each continent

gapminder %>% 
  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
  geom_point()

gapminder %>% 
  ggplot(aes(x = log(gdpPercap), y = log(lifeExp), color = continent)) +
  geom_point()

Q 5d)

Based on the graph: * Lower GDP per capital but higher life Expectancy: Asia * Higher GDP per capital but higher life Expectancy: Europe

gapminder %>% 
  summarise(n = n(), mean_GDP = mean(gdpPercap), mean_Life = mean(lifeExp)) %>%
  arrange(mean_GDP, mean_Life)
# Higher GDP, Higher LifeExp
gapminder %>% 
  filter( gdpPercap > 7215.327, lifeExp > 59.47444) %>%
  group_by(continent) %>%
  summarise(n =n()) %>%
  arrange(n)
# Lower GDP, Higher LifeExp
gapminder %>% 
  filter( gdpPercap < 7215.327, lifeExp > 59.47444) %>%
  group_by(continent) %>%
  summarise(n =n()) %>%
  arrange(n)

  • If we based on the mean GDP of each continet:
  • Lower GDP per capital but higher life Expectancy: Americas
  • Higher GDP per capital but higher life Expectancy: Oceania
gapminder %>% 
  group_by(continent) %>%
  summarise(n = n(), mean_GDP = mean(gdpPercap), mean_Life = mean(lifeExp)) %>%
  arrange(mean_GDP, mean_Life)