This post demonstrates the use of tidyverse package to explore the data set. In addition, ggplot2 will support the exploration.

library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.3.0     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.4
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.6.3
## -- Conflicts --------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library(gapminder) # this package contains the data
## Warning: package 'gapminder' was built under R version 3.6.3
df <- gapminder
  1. Smell-testing dataset There are many funcions that can be used such as: str(), summary(), head(), tail()…
str(df)
## Classes 'tbl_df', 'tbl' and 'data.frame':    1704 obs. of  6 variables:
##  $ country  : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ year     : int  1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
##  $ lifeExp  : num  28.8 30.3 32 34 36.1 ...
##  $ pop      : int  8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
##  $ gdpPercap: num  779 821 853 836 740 ...

There are six variables: ** country
** continent
** year ** lifeExp: life expectancy at birth ** pop: total population ** gdpPercap: per-capita GDP

head(df)
## # A tibble: 6 x 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Afghanistan Asia       1952    28.8  8425333      779.
## 2 Afghanistan Asia       1957    30.3  9240934      821.
## 3 Afghanistan Asia       1962    32.0 10267083      853.
## 4 Afghanistan Asia       1967    34.0 11537966      836.
## 5 Afghanistan Asia       1972    36.1 13079460      740.
## 6 Afghanistan Asia       1977    38.4 14880372      786.
summary(gapminder)
##         country        continent        year         lifeExp     
##  Afghanistan:  12   Africa  :624   Min.   :1952   Min.   :23.60  
##  Albania    :  12   Americas:300   1st Qu.:1966   1st Qu.:48.20  
##  Algeria    :  12   Asia    :396   Median :1980   Median :60.71  
##  Angola     :  12   Europe  :360   Mean   :1980   Mean   :59.47  
##  Argentina  :  12   Oceania : 24   3rd Qu.:1993   3rd Qu.:70.85  
##  Australia  :  12                  Max.   :2007   Max.   :82.60  
##  (Other)    :1632                                                
##       pop              gdpPercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :7.024e+06   Median :  3531.8  
##  Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :1.319e+09   Max.   :113523.1  
## 
  1. Counting For categorical variables such as country, continent, year, we can count the unique values. Example: How many countries, continents, and reported years are there in this data ?
df %>% 
  summarise(nb_country = n_distinct(country),
            nb_continent = n_distinct(continent),
            nb_year= n_distinct(year))
## # A tibble: 1 x 3
##   nb_country nb_continent nb_year
##        <int>        <int>   <int>
## 1        142            5      12

Although the variable year in the raw data is recorded as interger, we will consider this variable as categorical. There are only 12 unique years. We can see the earliest year is 1952, and it seems that the data are recorded every five years.

  1. Select the top N values by group

3A. By variable

Finding the highest/smallest values. Example: Top 10 countries with the highest life expentencay (for a specific year)

df %>%
  filter(year == 2007) %>%   
  select(continent,country, lifeExp) %>%
  arrange(desc(lifeExp)) %>% 
  head(10)
## # A tibble: 10 x 3
##    continent country          lifeExp
##    <fct>     <fct>              <dbl>
##  1 Asia      Japan               82.6
##  2 Asia      Hong Kong, China    82.2
##  3 Europe    Iceland             81.8
##  4 Europe    Switzerland         81.7
##  5 Oceania   Australia           81.2
##  6 Europe    Spain               80.9
##  7 Europe    Sweden              80.9
##  8 Asia      Israel              80.7
##  9 Europe    France              80.7
## 10 Americas  Canada              80.7

Similarly, we can find the 10 countries with the lowest life expectency.

df %>%
  filter(year == 2007) %>%   
  select(continent, country, lifeExp) %>%
  arrange(lifeExp) %>% 
  head(10)
## # A tibble: 10 x 3
##    continent country                  lifeExp
##    <fct>     <fct>                      <dbl>
##  1 Africa    Swaziland                   39.6
##  2 Africa    Mozambique                  42.1
##  3 Africa    Zambia                      42.4
##  4 Africa    Sierra Leone                42.6
##  5 Africa    Lesotho                     42.6
##  6 Africa    Angola                      42.7
##  7 Africa    Zimbabwe                    43.5
##  8 Asia      Afghanistan                 43.8
##  9 Africa    Central African Republic    44.7
## 10 Africa    Liberia                     45.7

Top 10 GDP per capita by country

df %>% 
  filter(year == 2007) %>%   
  select(continent,country, gdpPercap) %>%
  arrange(desc(gdpPercap)) %>% 
  head(10) 
## # A tibble: 10 x 3
##    continent country          gdpPercap
##    <fct>     <fct>                <dbl>
##  1 Europe    Norway              49357.
##  2 Asia      Kuwait              47307.
##  3 Asia      Singapore           47143.
##  4 Americas  United States       42952.
##  5 Europe    Ireland             40676.
##  6 Asia      Hong Kong, China    39725.
##  7 Europe    Switzerland         37506.
##  8 Europe    Netherlands         36798.
##  9 Americas  Canada              36319.
## 10 Europe    Iceland             36181.

For each continent, what are the top 3 countries with hisghest GDP

df %>% 
  filter(year == 1997 & continent != "Oceania") %>%   
  select(continent, country, gdpPercap,pop,lifeExp) %>%
  group_by(continent) %>%
  arrange(continent,desc(gdpPercap)) %>% 
  top_n(3, gdpPercap)
## # A tibble: 12 x 5
## # Groups:   continent [4]
##    continent country       gdpPercap       pop lifeExp
##    <fct>     <fct>             <dbl>     <int>   <dbl>
##  1 Africa    Gabon            14723.   1126189    60.5
##  2 Africa    Libya             9467.   4759670    71.6
##  3 Africa    Botswana          8647.   1536536    52.6
##  4 Americas  United States    35767. 272911760    76.8
##  5 Americas  Canada           28955.  30305843    78.6
##  6 Americas  Puerto Rico      16999.   3759430    74.9
##  7 Asia      Kuwait           40301.   1765345    76.2
##  8 Asia      Singapore        33519.   3802309    77.2
##  9 Asia      Japan            28817. 125956499    80.7
## 10 Europe    Norway           41283.   4405672    78.3
## 11 Europe    Switzerland      32135.   7193761    79.4
## 12 Europe    Netherlands      30246.  15604464    78.0
df %>% 
  filter(year == 1997 & continent != "Oceania") %>%   
  select(continent, country, gdpPercap,pop,lifeExp) %>%
  group_by(continent) %>%
  arrange(continent,desc(gdpPercap)) %>% 
  top_n(3, gdpPercap) %>% 
  ggplot(aes(x=gdpPercap, y=lifeExp, size = pop, color=country, shape = continent)) +
  geom_point(alpha=0.7) +
  scale_size(range = c(5,20), name = "Population (M)") 

df %>% 
  filter(year == 2007 & continent != "Oceania") %>% 
  ggplot(aes(x=gdpPercap, y=lifeExp, size = pop, color=continent)) +
  geom_point(alpha=0.7) +
  scale_size(range = c(.5, 24), name="Population (M)")

df %>% 
  filter(continent == "Oceania") %>% 
  ggplot(aes(x = year,y = lifeExp,color = country)) +
  geom_line( size = 1.0)+
  ggtitle("Life expectency in Ocenia from 1952 to 1997")

3.Basic statistics: mean, median, max, min

df%>%
  filter(year == 2007 & continent != "Oceania") %>%
  group_by(continent) %>%
  summarise(med = median(lifeExp),
            avg = mean(lifeExp),
            min = min(lifeExp),
            max = max(lifeExp))
## # A tibble: 4 x 5
##   continent   med   avg   min   max
##   <fct>     <dbl> <dbl> <dbl> <dbl>
## 1 Africa     52.9  54.8  39.6  76.4
## 2 Americas   72.9  73.6  60.9  80.7
## 3 Asia       72.4  70.7  43.8  82.6
## 4 Europe     78.6  77.6  71.8  81.8

It maybe better to visualize that with box-plot

df%>%
  filter(year == 2007 & continent != "Oceania") %>%
  group_by(continent) %>%
  ggplot(aes(x = continent, y = lifeExp)) +
  geom_boxplot(outlier.colour = "red") +
  geom_jitter(position = position_jitter(width = 0.1, height = 0), 
              alpha = 0.75)

# GDP per capita less than 50000 ,lifeExp and Continent
df %>% 
  filter(year == 2007 & continent != "Oceania") %>% 
  ggplot(aes(log(gdpPercap),
             lifeExp,
             col = continent)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = lm) + facet_wrap(~continent)
## `geom_smooth()` using formula 'y ~ x'

# GPD per capita less than 50000 ,lifeExp and gdpPercap

df %>% 
  filter(year == 2007  & continent != "Oceania" ) %>% 
  ggplot(aes(log(gdpPercap),lifeExp, col = lifeExp)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = lm) +
  facet_wrap(~continent)
## `geom_smooth()` using formula 'y ~ x'

Compare the average life expectency after 40

ggplot(data = df %>% 
         filter(year%in% c("1957", "1997") & continent != "Oceania") %>% 
         group_by(year, continent) %>% 
         summarise(Avg_life_expectancy = mean(lifeExp)),
       aes(x = continent, y = Avg_life_expectancy, fill = as.factor(year))) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(x = "", y = "Average Life Expectancy", fill = "Year") +
  scale_fill_manual(values = c("lightblue","darkblue")) 

What countries have grown the most over the last 10 years?

top5_countries <- df %>% 
  select(continent,year, country, gdpPercap) %>%
  filter(year %in% c("1997", "2007"))%>%
  pivot_wider(names_from=year, values_from = gdpPercap) %>% 
  mutate(gdp_difference = `2007` - `1997`) %>% 
  top_n(5,gdp_difference)

top5_countries 
## # A tibble: 5 x 5
##   continent country          `1997` `2007` gdp_difference
##   <fct>     <fct>             <dbl>  <dbl>          <dbl>
## 1 Asia      Bahrain          20292. 29796.          9504.
## 2 Europe    Finland          23724. 33207.          9483.
## 3 Asia      Hong Kong, China 28378. 39725.         11347.
## 4 Europe    Ireland          24522. 40676.         16154.
## 5 Asia      Singapore        33519. 47143.         13624.
top_countries <- top5_countries$country

df %>% filter(country %in% top_countries) %>% 
  ggplot(aes(x = year, y = gdpPercap, col = country))+
  geom_line(size = 1) 

Generate separate histograms of life expectancy for each continent

df %>% 
  filter(continent != "Oceania") %>% 
  ggplot(aes(x = lifeExp)) +
  geom_histogram() +
  facet_wrap(~ continent)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Generate a scatterplot of the relationship between per capita GDP and life expectancy

ggplot(df,  aes(x = gdpPercap, y = lifeExp)) +
  geom_point()

ggplot(df,
       mapping = aes(x = gdpPercap, y = lifeExp, color = continent)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Use faceting to identify differences

df %>% 
  filter(continent != "Oceania") %>% 
ggplot( aes(x = gdpPercap, y = lifeExp, color = continent)) +
  geom_point() +
  geom_smooth() +
  facet_wrap(~ continent)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

library(viridis)
## Warning: package 'viridis' was built under R version 3.6.3
## Loading required package: viridisLite
## 
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
## 
##     viridis_pal
# Show a bubbleplot
df %>%
  mutate(pop=pop/1000000) %>%
  arrange(desc(pop)) %>%
  mutate(country = factor(country)) %>%
  ggplot(aes(x=gdpPercap,
             y=lifeExp, 
             size = pop, 
             color = continent)) +
  geom_point(alpha = 0.7) +
  scale_size(range = c(1.4, 19), name = "Population (M)") +
  scale_color_viridis(discrete=TRUE, guide=FALSE) +
  theme(legend.position="bottom")

library(ggrepel)
## Warning: package 'ggrepel' was built under R version 3.6.3
data(gapminder)

Change over time

df <- filter(gapminder, country %in% c("Canada", "Cambodia")) %>% 
  mutate(year = as.Date(paste(year, "-01-01", sep = "", format='%Y-%b-%d')))

ggplot(df, aes(x = year, y = lifeExp, colour = country)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  scale_colour_manual(values = c("Canada" = "blue", "Cambodia" = "red")) + 
  scale_x_date(breaks = df$year, date_labels = "%Y") +
  scale_y_continuous(limits = c(0, NA), labels = scales::comma) +
  labs(title = "",
       subtitle = "Life expectancy in Canada and Cambodia, 1952-2007",
       caption = "Source: Gapminder.org  |  @traffordDataLab",
       x = "",
       y = "Age (years)",
       colour = NULL) +
  theme(panel.grid.major.x = element_blank(),
        legend.position = "bottom")

Slope chart

df <- filter(gapminder, country %in% c("Canada", "Cambodia") & year %in% c(1952, 2007))

ggplot(df) + 
  geom_line(aes(x = as.factor(year), y = gdpPercap, group = country, colour = country), size = 2, alpha = 0.8) + 
  geom_point(aes(x = as.factor(year), y = gdpPercap, group = country, colour = country), size = 5, alpha = 0.8) + 
  geom_text(data = subset(df, year == 1952), 
            aes(x = as.factor(year), y = gdpPercap, colour = country, 
                label = paste(country, scales::dollar(round(gdpPercap, 0)), sep = ", "), 
            size = 4, hjust = 1.2)) +
  geom_text(data = subset(df, year == 2007), 
            aes(x = as.factor(year), y = gdpPercap, colour = country, label = scales::dollar(round(gdpPercap, 0))), 
            size = 4, hjust = -0.3) +
  scale_colour_brewer(palette = "Set2") +
  labs(title = "Change in GDP per from 1952 to 2007",
       x = NULL,
       y = NULL,
       colour = NULL) +

  theme(panel.grid.major = element_blank(),
        axis.text.y = element_blank(),
        legend.position = "none")

df2 <- gapminder %>% 
  filter(country %in% c("France", "Germany", "Ireland", "Italy")) %>% 
  mutate(year = as.Date(paste(year, "-01-01", sep = "", format='%Y-%b-%d')))
df2
## # A tibble: 48 x 6
##    country continent year       lifeExp      pop gdpPercap
##    <fct>   <fct>     <date>       <dbl>    <int>     <dbl>
##  1 France  Europe    1952-01-01    67.4 42459667     7030.
##  2 France  Europe    1957-01-01    68.9 44310863     8663.
##  3 France  Europe    1962-01-01    70.5 47124000    10560.
##  4 France  Europe    1967-01-01    71.6 49569000    13000.
##  5 France  Europe    1972-01-01    72.4 51732000    16107.
##  6 France  Europe    1977-01-01    73.8 53165019    18293.
##  7 France  Europe    1982-01-01    74.9 54433565    20294.
##  8 France  Europe    1987-01-01    76.3 55630100    22066.
##  9 France  Europe    1992-01-01    77.5 57374179    24704.
## 10 France  Europe    1997-01-01    78.6 58623428    25890.
## # ... with 38 more rows

Correlation:Scatterplot

ggplot(filter(gapminder, year == 2007), aes(x = gdpPercap, y = lifeExp)) +
  scale_x_log10(labels = scales::dollar) +
  geom_point(aes(size = pop, fill = continent), shape = 21, colour = "white", alpha = 0.9) +
  scale_fill_brewer(palette = "Set2") +
  scale_size_continuous(range = c(1, 20)) +
  labs(title = "Relationship between life expectancy and income, 2007",
       x = "GDP per capita ($)",
       y = "Life expectency (years)") +
  guides(size = FALSE) +
  theme(panel.grid.major.x = element_blank(),
        legend.position = "right", 
        legend.title = element_blank())

In Europe, which countries have GDP above the median (in 2007) ?

df <- gapminder %>%
  filter(year == 2007 & continent == "Europe") %>%
  mutate(median = median(gdpPercap),
         diff = gdpPercap - median,
         type = ifelse(gdpPercap < median, "Below", "Above")) %>% 
  arrange(diff) %>% 
  mutate(country = factor(country, levels = country))

df
## # A tibble: 30 x 9
##    country         continent  year lifeExp    pop gdpPercap median    diff type 
##    <fct>           <fct>     <int>   <dbl>  <int>     <dbl>  <dbl>   <dbl> <chr>
##  1 Albania         Europe     2007    76.4 3.60e6     5937. 28054. -22117. Below
##  2 Bosnia and Her~ Europe     2007    74.9 4.55e6     7446. 28054. -20608. Below
##  3 Turkey          Europe     2007    71.8 7.12e7     8458. 28054. -19596. Below
##  4 Montenegro      Europe     2007    74.5 6.85e5     9254. 28054. -18800. Below
##  5 Serbia          Europe     2007    74.0 1.02e7     9787. 28054. -18268. Below
##  6 Bulgaria        Europe     2007    73.0 7.32e6    10681. 28054. -17373. Below
##  7 Romania         Europe     2007    72.5 2.23e7    10808. 28054. -17246. Below
##  8 Croatia         Europe     2007    75.7 4.49e6    14619. 28054. -13435. Below
##  9 Poland          Europe     2007    75.6 3.85e7    15390. 28054. -12664. Below
## 10 Hungary         Europe     2007    73.3 9.96e6    18009. 28054. -10045. Below
## # ... with 20 more rows
ggplot(df, aes(x = country, y = diff, label = country)) + 
  geom_col(aes(fill = type), width = 0.5, alpha = 0.8)  +
  scale_y_continuous(expand = c(0, 0), 
                     labels = scales::dollar) +
  scale_fill_manual(labels = c("Above median", "Below median"),
                    values = c("Above" = "purple", "Below" = "blue")) + 
  labs(title =  "GDP per capita, 2007",
       x = NULL,
       y = NULL,
       fill = NULL) + 
  coord_flip() +
  theme(panel.grid.major.y = element_blank())

Distribution: Density plot

ggplot(filter(gapminder, year == 2007 & continent != "Oceania"), aes(x = lifeExp)) + 
  geom_density(aes(fill = continent), size = 0.1, alpha = 0.5) +
  scale_fill_brewer(palette = "Set2") +
  labs(title =  "Life expectancy distribution in 2007",
       x = "Age (years)",
       y = "",
       fill = NULL) +
  theme(panel.grid.major.x = element_blank())

Boxplot

ggplot(filter(gapminder, year == 2007), aes(x = continent, y = lifeExp, fill = continent)) + 
  geom_boxplot(colour = "#757575", alpha = 0.8) +
  scale_fill_brewer(palette = "Set2") +
  labs(title =  "Life expectancy distributions, 2007",
       x = "",
       y = "Age (years)") +
  theme(panel.grid.major.x = element_blank(),
        legend.position = "none")

Ridgeline plot

library(ggridges)
df <- gapminder %>% filter(year == 2007 & continent != "Oceania")

ggplot(df, aes(x = lifeExp, y = fct_rev(continent), fill = continent)) +
  geom_density_ridges(colour = "#bdbdbd", size = 0.5, alpha = 0.5) +
  scale_x_continuous(expand = c(0,0)) +
  scale_y_discrete(expand = c(0,0)) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Life expectancy distribution, 2007",
       x = "Life Expectency (years)",
       y = "") +
  theme(panel.grid.major.x = element_blank(),
        legend.position = "none")
## Picking joint bandwidth of 2.48

Magnitude

Bar chart (vertical)

df <- gapminder %>%
      filter(year == 2007) %>%
      group_by(continent) %>%
      summarise(median = median(gdpPercap))

ggplot(df, aes(x = continent, y = median, fill = continent)) + 
  geom_col(alpha = 0.8) +
  scale_fill_brewer(palette = "Set2") +
  scale_y_continuous(labels = scales::dollar, expand = c(0, 0)) +
  labs(title = "",
       subtitle = "Median GDP per capita by continent, 2007",
       caption = "Source: Gapminder.org  |  @traffordDataLab",
       x = NULL,
       y = "GDP per capita",
       fill = NULL) +
  theme(panel.grid.major.x = element_blank(),
        legend.position = "none")

Grouped bar chart

df <- gapminder %>% 
  filter(year > 1990) %>%
  group_by(year, continent) %>%
  summarise(totalpop = sum(as.double(pop)))

ggplot(df, aes(x = year, y = totalpop, group = continent, fill = continent)) + 
  geom_col(position = "dodge", colour = "#757575", size = 0.2, alpha = 0.8) +
  scale_x_continuous(breaks = seq(1992, 2007, 5), expand = c(0, 0)) +
  scale_y_continuous(labels = scales::comma, expand = c(0, 0)) +
  scale_fill_brewer(palette = "Set2") +
  labs(title =  "Total population by continent, 1990-2007",
       x = NULL,
       y = NULL,
       fill = NULL) +
  theme(panel.grid.major.x = element_blank(),
        legend.position = "bottom")

Stacked bar chart

Part-to-whole

100% stacked bar chart

df <- gapminder %>% 
  filter(year > 1990) %>%
  group_by(year, continent) %>%
  summarise(totalpop = sum(as.double(pop)))

ggplot(df, aes(x = year, y = totalpop, fill = continent)) + 
  geom_col(position = "fill", colour = "#757575", size = 0.2, alpha = 0.8) +
  scale_x_continuous(breaks = seq(1992, 2007, 5), expand = c(0, 0)) +
  scale_y_continuous(labels = scales::percent, expand = c(0, 0)) +
  scale_fill_brewer(palette = "Set2") +
  guides(fill = guide_legend(reverse = T)) +
  labs(title = "Proportion of total population by continent, 1990-2007",
       x = NULL,
       y = NULL,
       fill = NULL) +
  theme(panel.grid.major.x = element_blank(),
        legend.position = "right")

Treemap

library(treemapify)
df <- gapminder %>% 
  filter(year == 2007 & continent != "Oceania") %>%
  mutate(gdp = pop * gdpPercap)

ggplot(df, aes(area = gdp, fill = continent, subgroup = continent, label = country)) +
  geom_treemap() +
  geom_treemap_subgroup_border(colour = "black") +
  geom_treemap_subgroup_text(fontface = "bold", colour = "#f0f0f0", alpha = 0.7, place = "bottomleft") +
  geom_treemap_text(colour = "white", place = "centre", reflow = TRUE) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Country GDP by continent, 2007",
       x = NULL, 
       y = NULL, 
       fill = NULL) +
  theme(legend.position = "none")

Ranking Ordered bar chart (horizontal)

df <- gapminder %>%
  filter(year == 2007) %>%
  group_by(continent) %>%
  summarise(median = median(gdpPercap))

ggplot(df, aes(reorder(continent, -median,sum), median)) +
  geom_col(fill = "#fc6721", alpha = 0.8) +
  scale_y_continuous(expand = c(0, 0), labels = scales::dollar) +
  coord_flip() +
  labs(title = "Median GDP per capita by continent, 2007",
       x = NULL,
       y = "GDP per capita",
       fill = NULL) +
  theme(panel.grid.major.y = element_blank())

Lollipop chart

df <- gapminder %>% 
  filter(year == 2007 & continent == "Americas") %>% 
  arrange(gdpPercap) %>% 
  mutate(country = factor(country, levels = country))

ggplot(df, aes(x = gdpPercap, y = country)) +
  geom_segment(aes(x = 0, xend = gdpPercap, 
                   y = country, yend = country), 
               colour = "purple") + 
  geom_point(colour = "blue", size = 5, alpha = 0.8) +
  scale_x_continuous(expand = c(0, 0), 
                     limits = c(0, max(df$gdpPercap) * 1.1),
                     labels = scales::dollar) +
  labs(title = "",
       subtitle = "GDP per capita in American countries, 2007",
       x = NULL, 
       y = NULL, 
       fill = NULL) +

  theme(panel.grid.major = element_blank(),
        axis.text.y = element_text(hjust = 0))