Load packages

library(tidyverse)

Dataset

Import the gapminder dataset, in one of two ways:

  1. Use library(gapminder) to load the gapminder package, OR
  2. Use read_csv("filename.csv") to read the csv file
df1 <- read_csv("gapminder-data.csv") # IMPORTANT! The file must be in the working directory

Inspect the data

head(df1)
## # A tibble: 6 × 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Afghanistan Asia       1952    28.8  8425333      779.
## 2 Afghanistan Asia       1957    30.3  9240934      821.
## 3 Afghanistan Asia       1962    32.0 10267083      853.
## 4 Afghanistan Asia       1967    34.0 11537966      836.
## 5 Afghanistan Asia       1972    36.1 13079460      740.
## 6 Afghanistan Asia       1977    38.4 14880372      786.
glimpse(df1)
## Rows: 1,704
## Columns: 6
## $ country   <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", …
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, …
## $ year      <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, …
## $ lifeExp   <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.8…
## $ pop       <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 12…
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134, …

Descriptive statistics

summary(df1)
##         country        continent        year         lifeExp     
##  Afghanistan:  12   Africa  :624   Min.   :1952   Min.   :23.60  
##  Albania    :  12   Americas:300   1st Qu.:1966   1st Qu.:48.20  
##  Algeria    :  12   Asia    :396   Median :1980   Median :60.71  
##  Angola     :  12   Europe  :360   Mean   :1980   Mean   :59.47  
##  Argentina  :  12   Oceania : 24   3rd Qu.:1993   3rd Qu.:70.85  
##  Australia  :  12                  Max.   :2007   Max.   :82.60  
##  (Other)    :1632                                                
##       pop              gdpPercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :7.024e+06   Median :  3531.8  
##  Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :1.319e+09   Max.   :113523.1  
## 

Data manipulation

df1 %>% 
  rename(income_per_capita = gdpPercap) %>% 
  mutate(log_y = log(income_per_capita)) %>% 
  filter(year == 2007)
## # A tibble: 142 × 7
##    country     continent  year lifeExp       pop income_per_capita log_y
##    <fct>       <fct>     <int>   <dbl>     <int>             <dbl> <dbl>
##  1 Afghanistan Asia       2007    43.8  31889923              975.  6.88
##  2 Albania     Europe     2007    76.4   3600523             5937.  8.69
##  3 Algeria     Africa     2007    72.3  33333216             6223.  8.74
##  4 Angola      Africa     2007    42.7  12420476             4797.  8.48
##  5 Argentina   Americas   2007    75.3  40301927            12779.  9.46
##  6 Australia   Oceania    2007    81.2  20434176            34435. 10.4 
##  7 Austria     Europe     2007    79.8   8199783            36126. 10.5 
##  8 Bahrain     Asia       2007    75.6    708573            29796. 10.3 
##  9 Bangladesh  Asia       2007    64.1 150448339             1391.  7.24
## 10 Belgium     Europe     2007    79.4  10392226            33693. 10.4 
## # … with 132 more rows
# store the results in a new dataframe
df2 <- df1 %>% 
  rename(income_per_capita = gdpPercap) %>% 
  mutate(log_y = log(income_per_capita)) %>% 
  filter(year == 2007)
df2
## # A tibble: 142 × 7
##    country     continent  year lifeExp       pop income_per_capita log_y
##    <fct>       <fct>     <int>   <dbl>     <int>             <dbl> <dbl>
##  1 Afghanistan Asia       2007    43.8  31889923              975.  6.88
##  2 Albania     Europe     2007    76.4   3600523             5937.  8.69
##  3 Algeria     Africa     2007    72.3  33333216             6223.  8.74
##  4 Angola      Africa     2007    42.7  12420476             4797.  8.48
##  5 Argentina   Americas   2007    75.3  40301927            12779.  9.46
##  6 Australia   Oceania    2007    81.2  20434176            34435. 10.4 
##  7 Austria     Europe     2007    79.8   8199783            36126. 10.5 
##  8 Bahrain     Asia       2007    75.6    708573            29796. 10.3 
##  9 Bangladesh  Asia       2007    64.1 150448339             1391.  7.24
## 10 Belgium     Europe     2007    79.4  10392226            33693. 10.4 
## # … with 132 more rows

Scatterplots

ggplot(df2, aes(x = income_per_capita, y = lifeExp)) +
  geom_point() +
  labs(title = "Fig. 1. Life expectancy vs Income per capita, 2007")

ggplot(df2, aes(x = income_per_capita, y = lifeExp)) +
  geom_point() +
  geom_smooth(method = lm, se = FALSE) +
  labs(title = "Fig. 2. Life expectancy vs Income per capita, 2007")

ggplot(df2, aes(x = log_y, y = lifeExp)) +
  geom_point() +
  geom_smooth(method = lm, se = FALSE) +
  labs(title = "Fig. 3. Life expectancy vs log(Income per capita), 2007")

Get the 8 countries with the highest per-capita incomes

  1. Use arrange() or arrange(desc()) to arrange countries based on income per capita
  2. Use head() or tail() to get the top rows or bottom rows
df2 %>% 
  arrange(desc(income_per_capita))
## # A tibble: 142 × 7
##    country          continent  year lifeExp       pop income_per_capita log_y
##    <fct>            <fct>     <int>   <dbl>     <int>             <dbl> <dbl>
##  1 Norway           Europe     2007    80.2   4627926            49357.  10.8
##  2 Kuwait           Asia       2007    77.6   2505559            47307.  10.8
##  3 Singapore        Asia       2007    80.0   4553009            47143.  10.8
##  4 United States    Americas   2007    78.2 301139947            42952.  10.7
##  5 Ireland          Europe     2007    78.9   4109086            40676.  10.6
##  6 Hong Kong, China Asia       2007    82.2   6980412            39725.  10.6
##  7 Switzerland      Europe     2007    81.7   7554661            37506.  10.5
##  8 Netherlands      Europe     2007    79.8  16570613            36798.  10.5
##  9 Canada           Americas   2007    80.7  33390141            36319.  10.5
## 10 Iceland          Europe     2007    81.8    301931            36181.  10.5
## # … with 132 more rows
# store the results in a new dataframe
df2_top8 <- df2 %>% 
  arrange(desc(income_per_capita)) %>% 
  head(8)
df2_top8
## # A tibble: 8 × 7
##   country          continent  year lifeExp       pop income_per_capita log_y
##   <fct>            <fct>     <int>   <dbl>     <int>             <dbl> <dbl>
## 1 Norway           Europe     2007    80.2   4627926            49357.  10.8
## 2 Kuwait           Asia       2007    77.6   2505559            47307.  10.8
## 3 Singapore        Asia       2007    80.0   4553009            47143.  10.8
## 4 United States    Americas   2007    78.2 301139947            42952.  10.7
## 5 Ireland          Europe     2007    78.9   4109086            40676.  10.6
## 6 Hong Kong, China Asia       2007    82.2   6980412            39725.  10.6
## 7 Switzerland      Europe     2007    81.7   7554661            37506.  10.5
## 8 Netherlands      Europe     2007    79.8  16570613            36798.  10.5

Barplots

ggplot(df2_top8, aes(x = country, y = income_per_capita)) +
  geom_col() +
  labs(x = "", y = "Income per capita ($)",
       title = "Fig. 4. Barplot; alphabetically ordered",
       subtitle = "Top 8: Highest per-capita GDP in 2007")

ggplot(df2_top8, 
       aes(x = country, y = income_per_capita)) +
  geom_col() +
  coord_flip() +
  labs(x = "", y = "Income per capita ($)",
       title = "Fig. 5. Barplot with axes flipped",       
       subtitle = "Top 8: Highest per-capita GDP in 2007")

ggplot(df2_top8, 
       aes(x = reorder(country, income_per_capita), y = income_per_capita)) +
  geom_col() +
  coord_flip() +
  labs(x = "", y = "Income per capita ($)",
       title = "Fig. 6 Descending order",
       subtitle = "Top 8: Highest per-capita GDP in 2007")

ggplot(df2_top8, 
       aes(x = reorder(country, -income_per_capita), y = income_per_capita)) +
  geom_col() +
  coord_flip() +
  labs(x = "", y = "Income per capita ($)",
       title = "Fig. 7 Ascending order",
       subtitle = "Top 8: Highest per-capita GDP in 2007")

Theend