library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tibble)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(haven)
library(ggplot2)
gapminder <- read_csv("/Volumes/NetStorage/Yunis File/Class/Fall' 21/DATA 333/midterm/gapminder.csv")
## Rows: 1704 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): country, continent
## dbl (4): year, lifeExp, pop, gdpPercap
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gapminder
## # A tibble: 1,704 × 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
## 7 Afghanistan Asia 1982 39.9 12881816 978.
## 8 Afghanistan Asia 1987 40.8 13867957 852.
## 9 Afghanistan Asia 1992 41.7 16317921 649.
## 10 Afghanistan Asia 1997 41.8 22227415 635.
## # … with 1,694 more rows
head(gapminder)
## # A tibble: 6 × 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
summary(gapminder)
## country continent year lifeExp
## Length:1704 Length:1704 Min. :1952 Min. :23.60
## Class :character Class :character 1st Qu.:1966 1st Qu.:48.20
## Mode :character Mode :character Median :1980 Median :60.71
## Mean :1980 Mean :59.47
## 3rd Qu.:1993 3rd Qu.:70.85
## Max. :2007 Max. :82.60
## pop gdpPercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.794e+06 1st Qu.: 1202.1
## Median :7.024e+06 Median : 3531.8
## Mean :2.960e+07 Mean : 7215.3
## 3rd Qu.:1.959e+07 3rd Qu.: 9325.5
## Max. :1.319e+09 Max. :113523.1
names(gapminder)
## [1] "country" "continent" "year" "lifeExp" "pop" "gdpPercap"
# which total GDP for each country in 2002 #
# GDP is equal to the product of total population and GDP per capital #
gapminder %>%
filter(year == 2002) %>%
mutate(gdp_2002 = pop * gdpPercap)
## # A tibble: 142 × 7
## country continent year lifeExp pop gdpPercap gdp_2002
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan Asia 2002 42.1 25268405 727. 18363410424.
## 2 Albania Europe 2002 75.7 3508512 4604. 16153932130.
## 3 Algeria Africa 2002 71.0 31287142 5288. 165447670333.
## 4 Angola Africa 2002 41.0 10866106 2773. 30134833901.
## 5 Argentina Americas 2002 74.3 38331121 8798. 337223430800.
## 6 Australia Oceania 2002 80.4 19546792 30688. 599847158654.
## 7 Austria Europe 2002 79.0 8148312 32418. 264148781752.
## 8 Bahrain Asia 2002 74.8 656397 23404. 15362026094.
## 9 Bangladesh Asia 2002 62.0 135656790 1136. 154159077921.
## 10 Belgium Europe 2002 78.3 10311970 30486. 314369518653.
## # … with 132 more rows
# Which country had the largest total GDP in 2002? #
# In 2002, United States had the largest total GDP at 1.124728e+13 #
gapminder %>%
filter(year == 2002) %>%
mutate(gdp_2002 = pop * gdpPercap) %>%
arrange(desc(gdp_2002))
## # A tibble: 142 × 7
## country continent year lifeExp pop gdpPercap gdp_2002
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 United States Americas 2002 77.3 287675526 39097. 1.12e13
## 2 China Asia 2002 72.0 1280400000 3119. 3.99e12
## 3 Japan Asia 2002 82 127065841 28605. 3.63e12
## 4 Germany Europe 2002 78.7 82350671 30036. 2.47e12
## 5 India Asia 2002 62.9 1034172547 1747. 1.81e12
## 6 United Kingdom Europe 2002 78.5 59912431 29479. 1.77e12
## 7 France Europe 2002 79.6 59925035 28926. 1.73e12
## 8 Italy Europe 2002 80.2 57926999 27968. 1.62e12
## 9 Brazil Americas 2002 71.0 179914212 8131. 1.46e12
## 10 Mexico Americas 2002 74.9 102479927 10742. 1.10e12
## # … with 132 more rows
# What is the total world population in 2002?
# What is the average lifeExp worldwide in 2002? #
# The total world population in 2002 is 5886977579 #
gapminder %>%
filter(year == 2002) %>%
summarise(world_pop_total = sum(pop))
## # A tibble: 1 × 1
## world_pop_total
## <dbl>
## 1 5886977579
# The average life expectancy worldwide in 2002 is 65.69492 #
gapminder %>%
filter(year == 2002) %>%
summarise(world_lifeExp_mean = mean(lifeExp))
## # A tibble: 1 × 1
## world_lifeExp_mean
## <dbl>
## 1 65.7
# What is the total world population in each year? #
# What is the average life expectancy worldwide in each year? #
gapminder %>%
group_by(year) %>%
summarize(life_mean = mean(lifeExp),
sum_pop = sum(pop))
## # A tibble: 12 × 3
## year life_mean sum_pop
## <dbl> <dbl> <dbl>
## 1 1952 49.1 2406957150
## 2 1957 51.5 2664404580
## 3 1962 53.6 2899782974
## 4 1967 55.7 3217478384
## 5 1972 57.6 3576977158
## 6 1977 59.6 3930045807
## 7 1982 61.5 4289436840
## 8 1987 63.2 4691477418
## 9 1992 64.2 5110710260
## 10 1997 65.0 5515204472
## 11 2002 65.7 5886977579
## 12 2007 67.0 6251013179
# total world population in each year #
mutate (gapminder) %>%
group_by (year) %>%
summarize(world_pop_total = sum(pop, na.rm = TRUE)) %>%
arrange(world_pop_total)
## # A tibble: 12 × 2
## year world_pop_total
## <dbl> <dbl>
## 1 1952 2406957150
## 2 1957 2664404580
## 3 1962 2899782974
## 4 1967 3217478384
## 5 1972 3576977158
## 6 1977 3930045807
## 7 1982 4289436840
## 8 1987 4691477418
## 9 1992 5110710260
## 10 1997 5515204472
## 11 2002 5886977579
## 12 2007 6251013179
# total average life expectancy worldwide in each year #
mutate (gapminder) %>%
group_by (year) %>%
summarize(world_lifeExp_mean = mean(lifeExp, na.rm = TRUE)) %>%
arrange(world_lifeExp_mean)
## # A tibble: 12 × 2
## year world_lifeExp_mean
## <dbl> <dbl>
## 1 1952 49.1
## 2 1957 51.5
## 3 1962 53.6
## 4 1967 55.7
## 5 1972 57.6
## 6 1977 59.6
## 7 1982 61.5
## 8 1987 63.2
## 9 1992 64.2
## 10 1997 65.0
## 11 2002 65.7
## 12 2007 67.0
# Produce a table that shows the total population and the average life expectancy in each continent in every year.#
gapminder %>%
group_by(continent, year) %>%
summarize(life_mean = mean(lifeExp),
sum_pop = sum(pop))
## `summarise()` has grouped output by 'continent'. You can override using the `.groups` argument.
## # A tibble: 60 × 4
## # Groups: continent [5]
## continent year life_mean sum_pop
## <chr> <dbl> <dbl> <dbl>
## 1 Africa 1952 39.1 237640501
## 2 Africa 1957 41.3 264837738
## 3 Africa 1962 43.3 296516865
## 4 Africa 1967 45.3 335289489
## 5 Africa 1972 47.5 379879541
## 6 Africa 1977 49.6 433061021
## 7 Africa 1982 51.6 499348587
## 8 Africa 1987 53.3 574834110
## 9 Africa 1992 53.6 659081517
## 10 Africa 1997 53.6 743832984
## # … with 50 more rows
mutate(gapminder) %>%
group_by(continent, year) %>%
summarize(life_mean = mean(lifeExp),
sum_pop = sum(pop)) %>%
arrange()
## `summarise()` has grouped output by 'continent'. You can override using the `.groups` argument.
## # A tibble: 60 × 4
## # Groups: continent [5]
## continent year life_mean sum_pop
## <chr> <dbl> <dbl> <dbl>
## 1 Africa 1952 39.1 237640501
## 2 Africa 1957 41.3 264837738
## 3 Africa 1962 43.3 296516865
## 4 Africa 1967 45.3 335289489
## 5 Africa 1972 47.5 379879541
## 6 Africa 1977 49.6 433061021
## 7 Africa 1982 51.6 499348587
## 8 Africa 1987 53.3 574834110
## 9 Africa 1992 53.6 659081517
## 10 Africa 1997 53.6 743832984
## # … with 50 more rows
# What is the longest life expectancy in each year?
gapminder %>%
group_by(year) %>%
summarise(max_life = max(lifeExp))
## # A tibble: 12 × 2
## year max_life
## <dbl> <dbl>
## 1 1952 72.7
## 2 1957 73.5
## 3 1962 73.7
## 4 1967 74.2
## 5 1972 74.7
## 6 1977 76.1
## 7 1982 77.1
## 8 1987 78.7
## 9 1992 79.4
## 10 1997 80.7
## 11 2002 82
## 12 2007 82.6
#What is the longest life expectancy in each continent?
gapminder %>%
group_by(continent) %>%
summarise(max_life = max(lifeExp))
## # A tibble: 5 × 2
## continent max_life
## <chr> <dbl>
## 1 Africa 76.4
## 2 Americas 80.7
## 3 Asia 82.6
## 4 Europe 81.8
## 5 Oceania 81.2
#Which country had the smallest GDP in the most recent year?
range(gapminder$year)
## [1] 1952 2007
# Sao Tome and Principe had the smallest GDP in the 2007 at 3.190141e+08 #
gapminder %>%
filter(year == 2007) %>%
mutate(gdp_2007 = pop * gdpPercap) %>%
arrange(gdp_2007)
## # A tibble: 142 × 7
## country continent year lifeExp pop gdpPercap gdp_2007
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Sao Tome and Principe Africa 2007 65.5 199579 1598. 3.19e8
## 2 Comoros Africa 2007 65.2 710960 986. 7.01e8
## 3 Guinea-Bissau Africa 2007 46.4 1472041 579. 8.53e8
## 4 Djibouti Africa 2007 54.8 496374 2082. 1.03e9
## 5 Gambia Africa 2007 59.4 1688359 753. 1.27e9
## 6 Liberia Africa 2007 45.7 3193942 415. 1.32e9
## 7 Central African Republic Africa 2007 44.7 4369038 706. 3.08e9
## 8 Eritrea Africa 2007 58.0 4906585 641. 3.15e9
## 9 Lesotho Africa 2007 42.6 2012649 1569. 3.16e9
## 10 Burundi Africa 2007 49.6 8390505 430. 3.61e9
## # … with 132 more rows