summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
remove(list=ls())
library(tidyverse) ; library(gapminder)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
table(gapminder$year)
##
## 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 2002 2007
## 142 142 142 142 142 142 142 142 142 142 142 142
table(gapminder$country)
##
## Afghanistan Albania Algeria
## 12 12 12
## Angola Argentina Australia
## 12 12 12
## Austria Bahrain Bangladesh
## 12 12 12
## Belgium Benin Bolivia
## 12 12 12
## Bosnia and Herzegovina Botswana Brazil
## 12 12 12
## Bulgaria Burkina Faso Burundi
## 12 12 12
## Cambodia Cameroon Canada
## 12 12 12
## Central African Republic Chad Chile
## 12 12 12
## China Colombia Comoros
## 12 12 12
## Congo, Dem. Rep. Congo, Rep. Costa Rica
## 12 12 12
## Cote d'Ivoire Croatia Cuba
## 12 12 12
## Czech Republic Denmark Djibouti
## 12 12 12
## Dominican Republic Ecuador Egypt
## 12 12 12
## El Salvador Equatorial Guinea Eritrea
## 12 12 12
## Ethiopia Finland France
## 12 12 12
## Gabon Gambia Germany
## 12 12 12
## Ghana Greece Guatemala
## 12 12 12
## Guinea Guinea-Bissau Haiti
## 12 12 12
## Honduras Hong Kong, China Hungary
## 12 12 12
## Iceland India Indonesia
## 12 12 12
## Iran Iraq Ireland
## 12 12 12
## Israel Italy Jamaica
## 12 12 12
## Japan Jordan Kenya
## 12 12 12
## Korea, Dem. Rep. Korea, Rep. Kuwait
## 12 12 12
## Lebanon Lesotho Liberia
## 12 12 12
## Libya Madagascar Malawi
## 12 12 12
## Malaysia Mali Mauritania
## 12 12 12
## Mauritius Mexico Mongolia
## 12 12 12
## Montenegro Morocco Mozambique
## 12 12 12
## Myanmar Namibia Nepal
## 12 12 12
## Netherlands New Zealand Nicaragua
## 12 12 12
## Niger Nigeria Norway
## 12 12 12
## Oman Pakistan Panama
## 12 12 12
## Paraguay Peru Philippines
## 12 12 12
## Poland Portugal Puerto Rico
## 12 12 12
## Reunion Romania Rwanda
## 12 12 12
## Sao Tome and Principe Saudi Arabia Senegal
## 12 12 12
## Serbia Sierra Leone Singapore
## 12 12 12
## Slovak Republic Slovenia Somalia
## 12 12 12
## South Africa Spain Sri Lanka
## 12 12 12
## Sudan Swaziland Sweden
## 12 12 12
## Switzerland Syria Taiwan
## 12 12 12
## Tanzania Thailand Togo
## 12 12 12
## Trinidad and Tobago Tunisia Turkey
## 12 12 12
## Uganda United Kingdom United States
## 12 12 12
## Uruguay Venezuela Vietnam
## 12 12 12
## West Bank and Gaza Yemen, Rep. Zambia
## 12 12 12
## Zimbabwe
## 12
table(gapminder$continent)
##
## Africa Americas Asia Europe Oceania
## 624 300 396 360 24
1 Find the data for 2002
data_2002 <- gapminder %>% filter(year == 2002)
2 Find the data for Brazil in 2002. What is the life expectancy?
brazil_2002 <- gapminder %>% filter(country == "Brazil" & year == 2002)
brazil_2002$lifeExp
## [1] 71.006
3 Find which country has the lowest lifeExp.
lowest_lifeExp <- gapminder %>% arrange(lifeExp) %>% head(1)
lowest_lifeExp$country
## [1] Rwanda
## 142 Levels: Afghanistan Albania Algeria Angola Argentina Australia ... Zimbabwe
4 Find which country has the lowest lifeExp in 2002.
lowest_lifeExp_2002 <- data_2002 %>% arrange(lifeExp) %>% head(1)
lowest_lifeExp_2002$country
## [1] Zambia
## 142 Levels: Afghanistan Albania Algeria Angola Argentina Australia ... Zimbabwe
5 Find the lifeExp in Japan in 2002.
japan_2002 <- gapminder %>% filter(country == "Japan" & year == 2002)
japan_2002$lifeExp
## [1] 82
6 Find the countries whose lifeExp is higher than 80 in 2002. How many are there?
countries_above_80_2002 <- data_2002 %>% filter(lifeExp > 80)
nrow(countries_above_80_2002)
## [1] 7
7 Find the lifeExp in Europe across the years. Which year is the highest lifeExp in Europe?
europe_lifeExp <- gapminder %>% filter(continent == "Europe") %>% group_by(year) %>% summarise(mean_lifeExp = mean(lifeExp))
max_lifeExp_year <- europe_lifeExp %>% arrange(desc(mean_lifeExp)) %>% head(1)
max_lifeExp_year$year
## [1] 2007
8 Define gdp as it is equal to to gdpPercap * pop/10000 . Find the gdp of Europe in 2002.
europe_2002 <- gapminder %>% filter(continent == "Europe" & year == 2002) %>% mutate(gdp = gdpPercap * pop / 10000)
9 Which country has the highest gdp in Europe in 2002?
highest_gdp_europe_2002 <- europe_2002 %>% arrange(desc(gdp)) %>% head(1)
highest_gdp_europe_2002$country
## [1] Germany
## 142 Levels: Afghanistan Albania Algeria Angola Argentina Australia ... Zimbabwe
10 Save the data in 2002. Call it data_2002.
data_2002 <- gapminder %>% filter(year == 2002)
11 Use data_2002. Use ggplot. Plot gdpPercap vs lifeExp.
ggplot(data_2002, aes(x = gdpPercap, y = lifeExp)) + geom_point()
12 Use data_2002. Use ggplot. Plot gdpPercap vs lifeExp by continent (color)
ggplot(data_2002, aes(x = gdpPercap, y = lifeExp, color = continent)) + geom_point()
13 Use data_2002. Use ggplot. Plot gdpPercap vs lifeExp by continent and pop (color and size)
ggplot(data_2002, aes(x = gdpPercap, y = lifeExp, color = continent, size = pop)) + geom_point()
14 Get data for Europe in 2002. Call it data_Europe
data_Europe <- gapminder %>% filter(continent == "Europe" & year == 2002)
15 Use data_Europe. Use ggplot. Plot pop vs gdpPercap.
ggplot(data_Europe, aes(x = pop, y = gdpPercap)) + geom_point()
16 Use data_Europe. Use ggplot. Plot pop vs gdpPercap. Scale population by log10
ggplot(data_Europe, aes(x = pop, y = gdpPercap)) + geom_point() + scale_x_log10()
17 Use data_Europe. Use ggplot. Plot pop vs gdpPercap. Scale population by log10. Color the data by country.
ggplot(data_Europe, aes(x = pop, y = gdpPercap, color = country)) + geom_point() + scale_x_log10()
18 Use data_Europe. Use ggplot. Plot pop vs gdpPercap. Scale population by log10. Color the data by country and size it by lifeExp.
ggplot(data_Europe, aes(x = pop, y = gdpPercap, color = country, size = lifeExp)) + geom_point() + scale_x_log10()
19 Save the data for Americas. Call it data_Americas.
data_Americas <- gapminder %>% filter(continent == "Americas")
20 Use data_Americas. Plot year vs gdpPercap. Scale gdpPercap by log10. Color the data by country.
ggplot(data_Americas, aes(x = year, y = gdpPercap, color = country)) + geom_line() + scale_y_log10()