library(tidyverse)
library(gapminder)HDS 5.4.1
Begin by loading the tidyverse and gapminder packages in the code chunk above and adding your name as the author.
The gapminder data frame contained in the gapminder package gives data on life expectancy, GDP per capita, and population by country. We would like to subset the data frame by rows using the filter() function.
selecting Variables from the gapminder Data
Let’s start by creating a data frame that keeps the variables country, lifeExp, and pop. Modify this code by filling in the ______ to do so:
gapminder |>
select(country, lifeExp, pop)# A tibble: 1,704 × 3
country lifeExp pop
<fct> <dbl> <int>
1 Afghanistan 28.8 8425333
2 Afghanistan 30.3 9240934
3 Afghanistan 32.0 10267083
4 Afghanistan 34.0 11537966
5 Afghanistan 36.1 13079460
6 Afghanistan 38.4 14880372
7 Afghanistan 39.9 12881816
8 Afghanistan 40.8 13867957
9 Afghanistan 41.7 16317921
10 Afghanistan 41.8 22227415
# ℹ 1,694 more rows
Keep all of the variables from year to gdpPercap:
gapminder |>
select(year:gdpPercap)# A tibble: 1,704 × 4
year lifeExp pop gdpPercap
<int> <dbl> <int> <dbl>
1 1952 28.8 8425333 779.
2 1957 30.3 9240934 821.
3 1962 32.0 10267083 853.
4 1967 34.0 11537966 836.
5 1972 36.1 13079460 740.
6 1977 38.4 14880372 786.
7 1982 39.9 12881816 978.
8 1987 40.8 13867957 852.
9 1992 41.7 16317921 649.
10 1997 41.8 22227415 635.
# ℹ 1,694 more rows
Keep all of the variables except year:
gapminder |>
select(-year)# A tibble: 1,704 × 5
country continent lifeExp pop gdpPercap
<fct> <fct> <dbl> <int> <dbl>
1 Afghanistan Asia 28.8 8425333 779.
2 Afghanistan Asia 30.3 9240934 821.
3 Afghanistan Asia 32.0 10267083 853.
4 Afghanistan Asia 34.0 11537966 836.
5 Afghanistan Asia 36.1 13079460 740.
6 Afghanistan Asia 38.4 14880372 786.
7 Afghanistan Asia 39.9 12881816 978.
8 Afghanistan Asia 40.8 13867957 852.
9 Afghanistan Asia 41.7 16317921 649.
10 Afghanistan Asia 41.8 22227415 635.
# ℹ 1,694 more rows
Keep all of the variables except continent, year, and pop:
gapminder |>
select(!c(continent, year, pop))# A tibble: 1,704 × 3
country lifeExp gdpPercap
<fct> <dbl> <dbl>
1 Afghanistan 28.8 779.
2 Afghanistan 30.3 821.
3 Afghanistan 32.0 853.
4 Afghanistan 34.0 836.
5 Afghanistan 36.1 740.
6 Afghanistan 38.4 786.
7 Afghanistan 39.9 978.
8 Afghanistan 40.8 852.
9 Afghanistan 41.7 649.
10 Afghanistan 41.8 635.
# ℹ 1,694 more rows
Keep all of the variables that start with the letters “co”:
gapminder |>
select(starts_with("co"))# A tibble: 1,704 × 2
country continent
<fct> <fct>
1 Afghanistan Asia
2 Afghanistan Asia
3 Afghanistan Asia
4 Afghanistan Asia
5 Afghanistan Asia
6 Afghanistan Asia
7 Afghanistan Asia
8 Afghanistan Asia
9 Afghanistan Asia
10 Afghanistan Asia
# ℹ 1,694 more rows
Keep all of the variables that end with the letter “p”:
gapminder |>
select(ends_with("p"))# A tibble: 1,704 × 3
lifeExp pop gdpPercap
<dbl> <int> <dbl>
1 28.8 8425333 779.
2 30.3 9240934 821.
3 32.0 10267083 853.
4 34.0 11537966 836.
5 36.1 13079460 740.
6 38.4 14880372 786.
7 39.9 12881816 978.
8 40.8 13867957 852.
9 41.7 16317921 649.
10 41.8 22227415 635.
# ℹ 1,694 more rows
Keep all of the variables that contain the letter “e”:
gapminder |>
select(contains("e"))# A tibble: 1,704 × 4
continent year lifeExp gdpPercap
<fct> <int> <dbl> <dbl>
1 Asia 1952 28.8 779.
2 Asia 1957 30.3 821.
3 Asia 1962 32.0 853.
4 Asia 1967 34.0 836.
5 Asia 1972 36.1 740.
6 Asia 1977 38.4 786.
7 Asia 1982 39.9 978.
8 Asia 1987 40.8 852.
9 Asia 1992 41.7 649.
10 Asia 1997 41.8 635.
# ℹ 1,694 more rows
Create a data frame with only data from 1952 and only including the country, lifeExp, pop, and gdpPercap variables:
gapminder |>
filter(year == 1952) |>
select(country, lifeExp, pop, gdpPercap)# A tibble: 142 × 4
country lifeExp pop gdpPercap
<fct> <dbl> <int> <dbl>
1 Afghanistan 28.8 8425333 779.
2 Albania 55.2 1282697 1601.
3 Algeria 43.1 9279525 2449.
4 Angola 30.0 4232095 3521.
5 Argentina 62.5 17876956 5911.
6 Australia 69.1 8691212 10040.
7 Austria 66.8 6927772 6137.
8 Bahrain 50.9 120447 9867.
9 Bangladesh 37.5 46886859 684.
10 Belgium 68 8730405 8343.
# ℹ 132 more rows
relocateing Variables in the gapminder Data
Move the year variable to the front of the data frame:
gapminder |>
relocate(year)# A tibble: 1,704 × 6
year country continent lifeExp pop gdpPercap
<int> <fct> <fct> <dbl> <int> <dbl>
1 1952 Afghanistan Asia 28.8 8425333 779.
2 1957 Afghanistan Asia 30.3 9240934 821.
3 1962 Afghanistan Asia 32.0 10267083 853.
4 1967 Afghanistan Asia 34.0 11537966 836.
5 1972 Afghanistan Asia 36.1 13079460 740.
6 1977 Afghanistan Asia 38.4 14880372 786.
7 1982 Afghanistan Asia 39.9 12881816 978.
8 1987 Afghanistan Asia 40.8 13867957 852.
9 1992 Afghanistan Asia 41.7 16317921 649.
10 1997 Afghanistan Asia 41.8 22227415 635.
# ℹ 1,694 more rows
Move the lifeExp variable after gdpPercap:
gapminder |>
relocate(lifeExp, .after = gdpPercap)# A tibble: 1,704 × 6
country continent year pop gdpPercap lifeExp
<fct> <fct> <int> <int> <dbl> <dbl>
1 Afghanistan Asia 1952 8425333 779. 28.8
2 Afghanistan Asia 1957 9240934 821. 30.3
3 Afghanistan Asia 1962 10267083 853. 32.0
4 Afghanistan Asia 1967 11537966 836. 34.0
5 Afghanistan Asia 1972 13079460 740. 36.1
6 Afghanistan Asia 1977 14880372 786. 38.4
7 Afghanistan Asia 1982 12881816 978. 39.9
8 Afghanistan Asia 1987 13867957 852. 40.8
9 Afghanistan Asia 1992 16317921 649. 41.7
10 Afghanistan Asia 1997 22227415 635. 41.8
# ℹ 1,694 more rows
Create a data frame with only data from Europe, include only the country, year, and lifeExp variables, and move the lifeExp variable to before year:
gapminder |>
filter(continent == "Europe") |>
select(country, year, lifeExp) |>
relocate(lifeExp, .before = year)# A tibble: 360 × 3
country lifeExp year
<fct> <dbl> <int>
1 Albania 55.2 1952
2 Albania 59.3 1957
3 Albania 64.8 1962
4 Albania 66.2 1967
5 Albania 67.7 1972
6 Albania 68.9 1977
7 Albania 70.4 1982
8 Albania 72 1987
9 Albania 71.6 1992
10 Albania 73.0 1997
# ℹ 350 more rows