Load packages

library(tidyverse)
library(gapminder)

Make a copy of gapminder

df1 <- gapminder

Inspect the data: The first 6 rows

df1 %>% 
  head()
## # A tibble: 6 × 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Afghanistan Asia       1952    28.8  8425333      779.
## 2 Afghanistan Asia       1957    30.3  9240934      821.
## 3 Afghanistan Asia       1962    32.0 10267083      853.
## 4 Afghanistan Asia       1967    34.0 11537966      836.
## 5 Afghanistan Asia       1972    36.1 13079460      740.
## 6 Afghanistan Asia       1977    38.4 14880372      786.

Inspect the data: The last 6 rows

df1 %>% 
  tail()
## # A tibble: 6 × 6
##   country  continent  year lifeExp      pop gdpPercap
##   <fct>    <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Zimbabwe Africa     1982    60.4  7636524      789.
## 2 Zimbabwe Africa     1987    62.4  9216418      706.
## 3 Zimbabwe Africa     1992    60.4 10704340      693.
## 4 Zimbabwe Africa     1997    46.8 11404948      792.
## 5 Zimbabwe Africa     2002    40.0 11926563      672.
## 6 Zimbabwe Africa     2007    43.5 12311143      470.

Dimensions and variables

glimpse(df1)
## Rows: 1,704
## Columns: 6
## $ country   <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", …
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, …
## $ year      <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, …
## $ lifeExp   <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.8…
## $ pop       <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 12…
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134, …
df1 %>% count(continent)
## # A tibble: 5 × 2
##   continent     n
##   <fct>     <int>
## 1 Africa      624
## 2 Americas    300
## 3 Asia        396
## 4 Europe      360
## 5 Oceania      24

Scatterplot: A basic plot

ggplot(df1, aes(x = gdpPercap, y = lifeExp)) + 
  geom_point()

Scatterplot: Points in red!

ggplot(df1, aes(x = gdpPercap, y = lifeExp)) + 
  geom_point(color = "red") +
  labs(title = "Seeing red?")

Scatterplot: Mapping continent to color!

ggplot(df1, aes(x = gdpPercap, y = lifeExp, color = continent)) + 
  geom_point() +
  labs(title = "One color for each continent!")

FILTER: Select one country

df1 %>% 
  filter(country == "China")
## # A tibble: 12 × 6
##    country continent  year lifeExp        pop gdpPercap
##    <fct>   <fct>     <int>   <dbl>      <int>     <dbl>
##  1 China   Asia       1952    44    556263527      400.
##  2 China   Asia       1957    50.5  637408000      576.
##  3 China   Asia       1962    44.5  665770000      488.
##  4 China   Asia       1967    58.4  754550000      613.
##  5 China   Asia       1972    63.1  862030000      677.
##  6 China   Asia       1977    64.0  943455000      741.
##  7 China   Asia       1982    65.5 1000281000      962.
##  8 China   Asia       1987    67.3 1084035000     1379.
##  9 China   Asia       1992    68.7 1164970000     1656.
## 10 China   Asia       1997    70.4 1230075000     2289.
## 11 China   Asia       2002    72.0 1280400000     3119.
## 12 China   Asia       2007    73.0 1318683096     4959.
# Create a new data frame called df_China
df_China <- df1 %>% 
  filter(country == "China")

ggplot(df_China, aes(x = year, y = lifeExp)) + 
  geom_point() +
  labs(title = "Points only!")

ggplot(df_China, aes(x = year, y = lifeExp)) + 
  geom_line() +
  labs(title = "Line only!")

ggplot(df_China, aes(x = year, y = lifeExp)) + 
  geom_point() +
  geom_line() +
  labs(title = "Both points and line!")

FILTER: Select two countries

df1 %>% 
  filter(country %in% c("China", "Brazil"))
## # A tibble: 24 × 6
##    country continent  year lifeExp       pop gdpPercap
##    <fct>   <fct>     <int>   <dbl>     <int>     <dbl>
##  1 Brazil  Americas   1952    50.9  56602560     2109.
##  2 Brazil  Americas   1957    53.3  65551171     2487.
##  3 Brazil  Americas   1962    55.7  76039390     3337.
##  4 Brazil  Americas   1967    57.6  88049823     3430.
##  5 Brazil  Americas   1972    59.5 100840058     4986.
##  6 Brazil  Americas   1977    61.5 114313951     6660.
##  7 Brazil  Americas   1982    63.3 128962939     7031.
##  8 Brazil  Americas   1987    65.2 142938076     7807.
##  9 Brazil  Americas   1992    67.1 155975974     6950.
## 10 Brazil  Americas   1997    69.4 168546719     7958.
## # … with 14 more rows
## # ℹ Use `print(n = ...)` to see more rows
# Create a new dataframe called df_two
df_two <- df1 %>% 
filter(country %in% c("China", "Brazil"))

ggplot(df_two, aes(x = year, y = lifeExp)) + 
  geom_point() +
  labs(title = "Sactterplot, in one color!")

ggplot(df_two, aes(x = year, y = lifeExp, color = country)) + 
  geom_point() +
  labs(title = "Scatterplot, but different colors")

ggplot(df_two, aes(x = year, y = lifeExp, color = country)) + 
  geom_line() +
  labs(title = "Line charts; one color for each country")

Set axis labels, titles, subtitles and captions

ggplot(df_two, aes(x = year, y = lifeExp, color = country)) + 
  geom_line() +
  labs(x = "", # we don't want any label for the X-axis
       y = "Life expectancy (years)",
       title = "Life expectancy in China and Brazil, 1952-2007",
       subtitle = "Wow, look at the increase over the period!",
       caption = "Source: World Bank via gapminder")


– Theend –