Setup

knitr::opts_chunk$set(echo = TRUE, fig.align = 'center')

# Load your package when you want to use it:
pacman::p_load(tidyverse)

# Changing default theme to theme_test()
theme_set(theme_test())
theme_update(plot.title = element_text(hjust = 0.5,
                                       size = 14))

We’ll be using the penguins data set, which is in the palmerpenguins package:

pacman::p_load(palmerpenguins)

# Using data(data_set) will have it appear in the global environment:
data(penguins)

Bar Charts with raw data using geom_bar()

If we are working with a “raw” data set (unsummarized, each row represents 1 case), we can use geom_bar() to form the bar chart.

If you want the bars to be vertical, map the categorical variable to x. If the bars are to be horizontal, map the categorical variable to y

# Vertical Bars
ggplot(
  data = penguins,
  mapping = aes(x = species)
) +
  
  # geom_bar() creates a bar chart for raw data
  geom_bar(
    fill = "aquamarine",    # bar color
    color = "black",        # bar outline
    width = 0.5          # width of the bars
  ) +
  
  labs(x = "Penguin Species") +    # change the x-axis label
  
  scale_y_continuous(expand = c(0, 0, 0.05, 0))

By default geom_bar() will display the counts on the non-assigned axis. But what do we do if we want proportions?

It’s easiest to use summarized data and calculate the proportions ourselves, but there is a way using geom_bar()

Just use the following aesthetics inside aes() inside geom_bar() (won’t explain what they do, but it gets the job done!)

  • x = ..prop.. or y = ..prop.. (whichever one hasn’t been assigned)
  • group = 1

Let’s have the bars be horizontal:

ggplot(
  data = penguins,
  # fct_infreq() will reorder the groups IN FREQuency order
  mapping = aes(y = fct_infreq(species))
) +
  
  geom_bar(
    mapping = aes(
      # display proportions on the x-axis instead of counts
      x = after_stat(prop),  
      group = 1    # tells geom_bar() how to calculate the proportions
    ),
    fill = "violet",
    color = "black",
    width = 0.5) + 
  
  labs(
    y = "Penguin Species",
    x = "Proportion"
  ) + 
  
  # Changing the x-axis to be a percent
  scale_x_continuous(
    expand = c(0, 0, 0.05, 0),
    labels = scales::label_percent()
  )

Bar charts with summarized data using geom_col()

If we have a data set with the data already summarised, we will need to map the counts for species to y. However, geom_bar() needs either x or y to be unassigned. So what would it look like if we tried to use geom_bar() with a summarised data set?

Quickest way to make a summarized data set is to use the count() function in the dplyr package

# Let's create a data frame that summarizes the penguin species
species_summarised <- 
  penguins |> 
  count(species) |>
  # Change the column name from n to Count
  rename(count = n) |> 
  
  # Next let's add the column for the proportion as well using count/sum(count)
  mutate(prop = count/sum(count))

species_summarised  

Now to create a bar chart using the summarized data frame. We can specify the groups on one axis and the counts on the other. But if we do, we can’t use geom_bar() anymore. Instead, we use geom_col(), which stands for “geometry column”

# Bar chart displaying counts
ggplot(
  data = species_summarised,
  # fct_reorder will order the species by a second column
  mapping = aes(x = fct_reorder(species, count))
) + 
  
  geom_col(
    mapping = aes(y = count),
    fill = "steelblue1",
    color = "black",
    width = 0.5
  ) +
  
  labs(title = "Bar Chart with Geom_col") + 
  
  scale_y_continuous(expand = c(0, 0, 0.05, 0))

### Bar charts displaying proportions
ggplot(
  data = species_summarised,
  # Using -prop will order the rows from tallest to shortest
  mapping = aes(x = fct_reorder(species, -prop))
) + 
  
  geom_col(
    mapping = aes(y = prop),
    fill = "orange2",
    color = "black",
    width = 0.5
  ) +
  
  labs(
    title = "Bar Chart for Proportions",
    x = "Penguin Species",
    y = "Proportion"
  ) +
  
  scale_y_continuous(
    expand = c(0, 0, 0.05, 0),
    labels = scales::label_percent()
  )

Benefits of geom_col() over geom_bar()

There are some additional benefits for working with the summarized data and geom_col() vs the raw data and geom_bar()

Adding the counts above the bars

We can add the counts (or proportions) above the bars using geom_text() and labels = argument inside aes()

# Display the proportions on the y-axis and the counts above the bars
ggplot(
  data = species_summarised,
  mapping = aes(
    x = fct_reorder(species, count),
    y = prop
  )
) + 
  
  geom_col(
    fill = "steelblue1",
    color = "black",
    width = 0.5
  ) +
  
  geom_text(
    mapping = aes(label = count),
    vjust = -0.25                # vjust = vertical justification. 
  ) +
  
  labs(title = "Bar Chart with Counts above Bars",
       x = "Penguin Species",
       y = "Proportion") +
  
  scale_y_continuous(expand = c(0, 0, 0.05, 0))

We can swap it as well, but it is a good idea to round (or show percentages instead of proportions!)

An easier way is to use geom_bar_text() from ggfittext

# Using mutate to add a column with the percentage and sign
ggplot(
  data = species_summarised,
  mapping = aes(
    x = fct_reorder(species, count),
    y = prop
  )
) + 
  
  geom_col(
    fill = "orange2",
    color = "black",
    width = 0.5
  ) +
  
  ggfittext::geom_bar_text(
    mapping = aes(label = paste0(round(prop*100, digits = 1), "%")),
    contrast = T
  ) +               
  
  labs(
    title = "Bar Chart with Percentage in Bars",
    x = "Penguin Species",
    y = "Proportion"
  ) +
  
  scale_y_continuous(
    expand = c(0, 0, 0.05, 0),
    labels = scales::label_percent()
  )

Pie Charts

Don’t use them

Waffle Charts

Waffle charts are somewhat new and an alternative to pie charts. You can use geom_waffle() inside the waffle package to create them.

To use geom_waffle(), you need to work with the summarized data and specify 2 arguments:

  1. fill = the column with the group names
  2. values = the column with the counts
#install.packages("waffle", repos = "https://cinc.rud.is")
library(waffle)
## Warning: package 'waffle' was built under R version 4.4.3
ggplot(
  data = species_summarised,
  mapping = aes(
    fill = species,
    values = count
  )
) + 
  
  # make the waffle have about the same number of rows and columns 
  geom_waffle(
    n_rows = round(sqrt(nrow(penguins)))
  ) +  
  
  labs(fill = "Penguin Species") + 
  
  coord_equal() + 
  
  theme_void() + 
  
  theme(legend.position = "top")