library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tibble)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(haven)
library(ggplot2)

gapminder <- read_csv("/Volumes/NetStorage/Yunis File/Class/Fall' 21/DATA 333/midterm/gapminder.csv")
## Rows: 1704 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): country, continent
## dbl (4): year, lifeExp, pop, gdpPercap
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gapminder
## # A tibble: 1,704 × 6
##    country     continent  year lifeExp      pop gdpPercap
##    <chr>       <chr>     <dbl>   <dbl>    <dbl>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Afghanistan Asia       1962    32.0 10267083      853.
##  4 Afghanistan Asia       1967    34.0 11537966      836.
##  5 Afghanistan Asia       1972    36.1 13079460      740.
##  6 Afghanistan Asia       1977    38.4 14880372      786.
##  7 Afghanistan Asia       1982    39.9 12881816      978.
##  8 Afghanistan Asia       1987    40.8 13867957      852.
##  9 Afghanistan Asia       1992    41.7 16317921      649.
## 10 Afghanistan Asia       1997    41.8 22227415      635.
## # … with 1,694 more rows
head(gapminder)
## # A tibble: 6 × 6
##   country     continent  year lifeExp      pop gdpPercap
##   <chr>       <chr>     <dbl>   <dbl>    <dbl>     <dbl>
## 1 Afghanistan Asia       1952    28.8  8425333      779.
## 2 Afghanistan Asia       1957    30.3  9240934      821.
## 3 Afghanistan Asia       1962    32.0 10267083      853.
## 4 Afghanistan Asia       1967    34.0 11537966      836.
## 5 Afghanistan Asia       1972    36.1 13079460      740.
## 6 Afghanistan Asia       1977    38.4 14880372      786.
summary(gapminder)
##    country           continent              year         lifeExp     
##  Length:1704        Length:1704        Min.   :1952   Min.   :23.60  
##  Class :character   Class :character   1st Qu.:1966   1st Qu.:48.20  
##  Mode  :character   Mode  :character   Median :1980   Median :60.71  
##                                        Mean   :1980   Mean   :59.47  
##                                        3rd Qu.:1993   3rd Qu.:70.85  
##                                        Max.   :2007   Max.   :82.60  
##       pop              gdpPercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :7.024e+06   Median :  3531.8  
##  Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :1.319e+09   Max.   :113523.1
names(gapminder)
## [1] "country"   "continent" "year"      "lifeExp"   "pop"       "gdpPercap"
# which total GDP for each country in 2002 # 
# GDP is equal to the product of total population and GDP per capital #

gapminder %>%
  filter(year == 2002) %>%
  mutate(gdp_2002 = pop * gdpPercap)
## # A tibble: 142 × 7
##    country     continent  year lifeExp       pop gdpPercap      gdp_2002
##    <chr>       <chr>     <dbl>   <dbl>     <dbl>     <dbl>         <dbl>
##  1 Afghanistan Asia       2002    42.1  25268405      727.  18363410424.
##  2 Albania     Europe     2002    75.7   3508512     4604.  16153932130.
##  3 Algeria     Africa     2002    71.0  31287142     5288. 165447670333.
##  4 Angola      Africa     2002    41.0  10866106     2773.  30134833901.
##  5 Argentina   Americas   2002    74.3  38331121     8798. 337223430800.
##  6 Australia   Oceania    2002    80.4  19546792    30688. 599847158654.
##  7 Austria     Europe     2002    79.0   8148312    32418. 264148781752.
##  8 Bahrain     Asia       2002    74.8    656397    23404.  15362026094.
##  9 Bangladesh  Asia       2002    62.0 135656790     1136. 154159077921.
## 10 Belgium     Europe     2002    78.3  10311970    30486. 314369518653.
## # … with 132 more rows
# Which country had the largest total GDP in 2002? #

# In 2002, United States had the largest total GDP at 1.124728e+13 # 
gapminder %>%
  filter(year == 2002) %>%
  mutate(gdp_2002 = pop * gdpPercap) %>%
  arrange(desc(gdp_2002))
## # A tibble: 142 × 7
##    country        continent  year lifeExp        pop gdpPercap gdp_2002
##    <chr>          <chr>     <dbl>   <dbl>      <dbl>     <dbl>    <dbl>
##  1 United States  Americas   2002    77.3  287675526    39097.  1.12e13
##  2 China          Asia       2002    72.0 1280400000     3119.  3.99e12
##  3 Japan          Asia       2002    82    127065841    28605.  3.63e12
##  4 Germany        Europe     2002    78.7   82350671    30036.  2.47e12
##  5 India          Asia       2002    62.9 1034172547     1747.  1.81e12
##  6 United Kingdom Europe     2002    78.5   59912431    29479.  1.77e12
##  7 France         Europe     2002    79.6   59925035    28926.  1.73e12
##  8 Italy          Europe     2002    80.2   57926999    27968.  1.62e12
##  9 Brazil         Americas   2002    71.0  179914212     8131.  1.46e12
## 10 Mexico         Americas   2002    74.9  102479927    10742.  1.10e12
## # … with 132 more rows
# What is the total world population in 2002? 
# What is the average lifeExp worldwide in 2002? # 

# The total world population in 2002 is 5886977579 # 
gapminder %>%
  filter(year == 2002) %>%
  summarise(world_pop_total = sum(pop))
## # A tibble: 1 × 1
##   world_pop_total
##             <dbl>
## 1      5886977579
# The average life expectancy worldwide in 2002 is 65.69492 # 
gapminder %>%
  filter(year == 2002) %>%
  summarise(world_lifeExp_mean = mean(lifeExp))
## # A tibble: 1 × 1
##   world_lifeExp_mean
##                <dbl>
## 1               65.7
# What is the total world population in each year? #
# What is the average life expectancy worldwide in each year? # 

gapminder %>%
  group_by(year) %>%
  summarize(life_mean = mean(lifeExp),
            sum_pop = sum(pop))
## # A tibble: 12 × 3
##     year life_mean    sum_pop
##    <dbl>     <dbl>      <dbl>
##  1  1952      49.1 2406957150
##  2  1957      51.5 2664404580
##  3  1962      53.6 2899782974
##  4  1967      55.7 3217478384
##  5  1972      57.6 3576977158
##  6  1977      59.6 3930045807
##  7  1982      61.5 4289436840
##  8  1987      63.2 4691477418
##  9  1992      64.2 5110710260
## 10  1997      65.0 5515204472
## 11  2002      65.7 5886977579
## 12  2007      67.0 6251013179
# total world population in each year #
mutate (gapminder) %>%
  group_by (year) %>%
  summarize(world_pop_total = sum(pop, na.rm = TRUE)) %>%
  arrange(world_pop_total)
## # A tibble: 12 × 2
##     year world_pop_total
##    <dbl>           <dbl>
##  1  1952      2406957150
##  2  1957      2664404580
##  3  1962      2899782974
##  4  1967      3217478384
##  5  1972      3576977158
##  6  1977      3930045807
##  7  1982      4289436840
##  8  1987      4691477418
##  9  1992      5110710260
## 10  1997      5515204472
## 11  2002      5886977579
## 12  2007      6251013179
# total average life expectancy worldwide in each year #
mutate (gapminder) %>%
  group_by (year) %>%
  summarize(world_lifeExp_mean = mean(lifeExp, na.rm = TRUE)) %>%  
  arrange(world_lifeExp_mean)
## # A tibble: 12 × 2
##     year world_lifeExp_mean
##    <dbl>              <dbl>
##  1  1952               49.1
##  2  1957               51.5
##  3  1962               53.6
##  4  1967               55.7
##  5  1972               57.6
##  6  1977               59.6
##  7  1982               61.5
##  8  1987               63.2
##  9  1992               64.2
## 10  1997               65.0
## 11  2002               65.7
## 12  2007               67.0
# Produce a table that shows the total population and the average life expectancy in each continent in every year.# 

gapminder %>%
  group_by(continent, year) %>%
  summarize(life_mean = mean(lifeExp),
            sum_pop = sum(pop))
## `summarise()` has grouped output by 'continent'. You can override using the `.groups` argument.
## # A tibble: 60 × 4
## # Groups:   continent [5]
##    continent  year life_mean   sum_pop
##    <chr>     <dbl>     <dbl>     <dbl>
##  1 Africa     1952      39.1 237640501
##  2 Africa     1957      41.3 264837738
##  3 Africa     1962      43.3 296516865
##  4 Africa     1967      45.3 335289489
##  5 Africa     1972      47.5 379879541
##  6 Africa     1977      49.6 433061021
##  7 Africa     1982      51.6 499348587
##  8 Africa     1987      53.3 574834110
##  9 Africa     1992      53.6 659081517
## 10 Africa     1997      53.6 743832984
## # … with 50 more rows
mutate(gapminder) %>%
group_by(continent, year) %>%
summarize(life_mean = mean(lifeExp),
            sum_pop = sum(pop)) %>%
arrange()
## `summarise()` has grouped output by 'continent'. You can override using the `.groups` argument.
## # A tibble: 60 × 4
## # Groups:   continent [5]
##    continent  year life_mean   sum_pop
##    <chr>     <dbl>     <dbl>     <dbl>
##  1 Africa     1952      39.1 237640501
##  2 Africa     1957      41.3 264837738
##  3 Africa     1962      43.3 296516865
##  4 Africa     1967      45.3 335289489
##  5 Africa     1972      47.5 379879541
##  6 Africa     1977      49.6 433061021
##  7 Africa     1982      51.6 499348587
##  8 Africa     1987      53.3 574834110
##  9 Africa     1992      53.6 659081517
## 10 Africa     1997      53.6 743832984
## # … with 50 more rows
# What is the longest life expectancy in each year? 
gapminder %>%
  group_by(year) %>%
  summarise(max_life = max(lifeExp))
## # A tibble: 12 × 2
##     year max_life
##    <dbl>    <dbl>
##  1  1952     72.7
##  2  1957     73.5
##  3  1962     73.7
##  4  1967     74.2
##  5  1972     74.7
##  6  1977     76.1
##  7  1982     77.1
##  8  1987     78.7
##  9  1992     79.4
## 10  1997     80.7
## 11  2002     82  
## 12  2007     82.6
#What is the longest life expectancy in each continent?
gapminder %>%
  group_by(continent) %>%
  summarise(max_life = max(lifeExp))
## # A tibble: 5 × 2
##   continent max_life
##   <chr>        <dbl>
## 1 Africa        76.4
## 2 Americas      80.7
## 3 Asia          82.6
## 4 Europe        81.8
## 5 Oceania       81.2
#Which country had the smallest GDP in the most recent year? 
range(gapminder$year)
## [1] 1952 2007
# Sao Tome and Principe had the smallest GDP in the 2007 at 3.190141e+08 #
gapminder %>%
  filter(year == 2007) %>%
  mutate(gdp_2007 = pop * gdpPercap) %>%
  arrange(gdp_2007)
## # A tibble: 142 × 7
##    country                  continent  year lifeExp     pop gdpPercap   gdp_2007
##    <chr>                    <chr>     <dbl>   <dbl>   <dbl>     <dbl>      <dbl>
##  1 Sao Tome and Principe    Africa     2007    65.5  199579     1598.     3.19e8
##  2 Comoros                  Africa     2007    65.2  710960      986.     7.01e8
##  3 Guinea-Bissau            Africa     2007    46.4 1472041      579.     8.53e8
##  4 Djibouti                 Africa     2007    54.8  496374     2082.     1.03e9
##  5 Gambia                   Africa     2007    59.4 1688359      753.     1.27e9
##  6 Liberia                  Africa     2007    45.7 3193942      415.     1.32e9
##  7 Central African Republic Africa     2007    44.7 4369038      706.     3.08e9
##  8 Eritrea                  Africa     2007    58.0 4906585      641.     3.15e9
##  9 Lesotho                  Africa     2007    42.6 2012649     1569.     3.16e9
## 10 Burundi                  Africa     2007    49.6 8390505      430.     3.61e9
## # … with 132 more rows