Saayed Alam
December 12, 2018
library(tidyverse)
#loading a dataset
library(gapminder)
#gapminder has 1704 rows and 6 variables
gapminder## # A tibble: 1,704 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
## 7 Afghanistan Asia 1982 39.9 12881816 978.
## 8 Afghanistan Asia 1987 40.8 13867957 852.
## 9 Afghanistan Asia 1992 41.7 16317921 649.
## 10 Afghanistan Asia 1997 41.8 22227415 635.
## # ... with 1,694 more rows
#selecting a subset of the rows of a data frame
gapminder %>%
filter(year == 1987) %>%
head()## # A tibble: 6 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1987 40.8 13867957 852.
## 2 Albania Europe 1987 72 3075321 3739.
## 3 Algeria Africa 1987 65.8 23254956 5681.
## 4 Angola Africa 1987 39.9 7874230 2430.
## 5 Argentina Americas 1987 70.8 31620918 9140.
## 6 Australia Oceania 1987 76.3 16257249 21889.
#numerical summary applied to a column
gapminder %>%
filter(year == 1987 & country == 'Bangladesh') %>%
summarise(Max_Life_Expectancy = max(lifeExp))## # A tibble: 1 x 1
## Max_Life_Expectancy
## <dbl>
## 1 52.8
#numerical summary for all levels of a categorical column
gapminder %>%
filter(year == 1987) %>%
group_by(continent) %>%
summarise(Max_Life_Expectancy = max(lifeExp))## # A tibble: 5 x 2
## continent Max_Life_Expectancy
## <fct> <dbl>
## 1 Africa 71.9
## 2 Americas 76.9
## 3 Asia 78.7
## 4 Europe 77.4
## 5 Oceania 76.3
#creating a new variable based on other variables
gapminder %>%
mutate(gdp = pop * gdpPercap) %>%
head()## # A tibble: 6 x 7
## country continent year lifeExp pop gdpPercap gdp
## <fct> <fct> <int> <dbl> <int> <dbl> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779. 6567086330.
## 2 Afghanistan Asia 1957 30.3 9240934 821. 7585448670.
## 3 Afghanistan Asia 1962 32.0 10267083 853. 8758855797.
## 4 Afghanistan Asia 1967 34.0 11537966 836. 9648014150.
## 5 Afghanistan Asia 1972 36.1 13079460 740. 9678553274.
## 6 Afghanistan Asia 1977 38.4 14880372 786. 11697659231.
#reordering the rows based on values of one or more columns
gapminder %>%
filter(year < 2000) %>%
arrange(desc(lifeExp)) %>%
head()## # A tibble: 6 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Japan Asia 1997 80.7 125956499 28817.
## 2 Hong Kong, China Asia 1997 80 6495918 28378.
## 3 Sweden Europe 1997 79.4 8897619 25267.
## 4 Switzerland Europe 1997 79.4 7193761 32135.
## 5 Japan Asia 1992 79.4 124329269 26825.
## 6 Iceland Europe 1997 79.0 271192 28061.