knitr::opts_chunk$set(echo = TRUE, fig.align = 'center')
# Load your package when you want to use it:
pacman::p_load(tidyverse)
# Changing default theme to theme_test()
theme_set(theme_test())
theme_update(plot.title = element_text(hjust = 0.5,
                                       size = 14))
We’ll be using the penguins data set, which is in the palmerpenguins package:
pacman::p_load(palmerpenguins)
# Using data(data_set) will have it appear in the global environment:
data(penguins)
If we are working with a “raw” data set (unsummarized, each row
represents 1 case), we can use geom_bar() to form the bar
chart.
If you want the bars to be vertical, map the categorical variable to
x. If the bars are to be horizontal, map the categorical
variable to y
# Vertical Bars
ggplot(
  data = penguins,
  mapping = aes(x = species)
) +
  
  # geom_bar() creates a bar chart for raw data
  geom_bar(
    fill = "aquamarine",    # bar color
    color = "black",        # bar outline
    width = 0.5          # width of the bars
  ) +
  
  labs(x = "Penguin Species") +    # change the x-axis label
  
  scale_y_continuous(expand = c(0, 0, 0.05, 0))
By default geom_bar() will display the counts on the
non-assigned axis. But what do we do if we want proportions?
It’s easiest to use summarized data and calculate the proportions
ourselves, but there is a way using geom_bar()
Just use the following aesthetics inside aes() inside
geom_bar() (won’t explain what they do, but it gets the job
done!)
x = ..prop.. or y = ..prop.. (whichever
one hasn’t been assigned)group = 1Let’s have the bars be horizontal:
ggplot(
  data = penguins,
  # fct_infreq() will reorder the groups IN FREQuency order
  mapping = aes(y = fct_infreq(species))
) +
  
  geom_bar(
    mapping = aes(
      # display proportions on the x-axis instead of counts
      x = after_stat(prop),  
      group = 1    # tells geom_bar() how to calculate the proportions
    ),
    fill = "violet",
    color = "black",
    width = 0.5) + 
  
  labs(
    y = "Penguin Species",
    x = "Proportion"
  ) + 
  
  # Changing the x-axis to be a percent
  scale_x_continuous(
    expand = c(0, 0, 0.05, 0),
    labels = scales::label_percent()
  )
geom_col()If we have a data set with the data already summarised, we will need
to map the counts for species to y. However, geom_bar()
needs either x or y to be unassigned. So what would it look like if we
tried to use geom_bar() with a summarised data set?
Quickest way to make a summarized data set is to use the
count() function in the dplyr package
# Let's create a data frame that summarizes the penguin species
species_summarised <- 
  penguins |> 
  count(species) |>
  # Change the column name from n to Count
  rename(count = n) |> 
  
  # Next let's add the column for the proportion as well using count/sum(count)
  mutate(prop = count/sum(count))
species_summarised  
Now to create a bar chart using the summarized data frame. We can
specify the groups on one axis and the counts on the other. But if we
do, we can’t use geom_bar() anymore. Instead, we use
geom_col(), which stands for “geometry column”
# Bar chart displaying counts
ggplot(
  data = species_summarised,
  # fct_reorder will order the species by a second column
  mapping = aes(x = fct_reorder(species, count))
) + 
  
  geom_col(
    mapping = aes(y = count),
    fill = "steelblue1",
    color = "black",
    width = 0.5
  ) +
  
  labs(title = "Bar Chart with Geom_col") + 
  
  scale_y_continuous(expand = c(0, 0, 0.05, 0))
### Bar charts displaying proportions
ggplot(
  data = species_summarised,
  # Using -prop will order the rows from tallest to shortest
  mapping = aes(x = fct_reorder(species, -prop))
) + 
  
  geom_col(
    mapping = aes(y = prop),
    fill = "orange2",
    color = "black",
    width = 0.5
  ) +
  
  labs(
    title = "Bar Chart for Proportions",
    x = "Penguin Species",
    y = "Proportion"
  ) +
  
  scale_y_continuous(
    expand = c(0, 0, 0.05, 0),
    labels = scales::label_percent()
  )
geom_col() over
geom_bar()There are some additional benefits for working with the summarized
data and geom_col() vs the raw data and
geom_bar()
We can add the counts (or proportions) above the bars using
geom_text() and labels = argument inside
aes()
# Display the proportions on the y-axis and the counts above the bars
ggplot(
  data = species_summarised,
  mapping = aes(
    x = fct_reorder(species, count),
    y = prop
  )
) + 
  
  geom_col(
    fill = "steelblue1",
    color = "black",
    width = 0.5
  ) +
  
  geom_text(
    mapping = aes(label = count),
    vjust = -0.25                # vjust = vertical justification. 
  ) +
  
  labs(title = "Bar Chart with Counts above Bars",
       x = "Penguin Species",
       y = "Proportion") +
  
  scale_y_continuous(expand = c(0, 0, 0.05, 0))
We can swap it as well, but it is a good idea to round (or show percentages instead of proportions!)
An easier way is to use geom_bar_text() from
ggfittext
# Using mutate to add a column with the percentage and sign
ggplot(
  data = species_summarised,
  mapping = aes(
    x = fct_reorder(species, count),
    y = prop
  )
) + 
  
  geom_col(
    fill = "orange2",
    color = "black",
    width = 0.5
  ) +
  
  ggfittext::geom_bar_text(
    mapping = aes(label = paste0(round(prop*100, digits = 1), "%")),
    contrast = T
  ) +               
  
  labs(
    title = "Bar Chart with Percentage in Bars",
    x = "Penguin Species",
    y = "Proportion"
  ) +
  
  scale_y_continuous(
    expand = c(0, 0, 0.05, 0),
    labels = scales::label_percent()
  )
Don’t use them
Waffle charts are somewhat new and an alternative to pie charts. You
can use geom_waffle() inside the waffle package to
create them.
To use geom_waffle(), you need to work with the
summarized data and specify 2 arguments:
fill = the column with the group namesvalues = the column with the counts#install.packages("waffle", repos = "https://cinc.rud.is")
library(waffle)
## Warning: package 'waffle' was built under R version 4.4.3
ggplot(
  data = species_summarised,
  mapping = aes(
    fill = species,
    values = count
  )
) + 
  
  # make the waffle have about the same number of rows and columns 
  geom_waffle(
    n_rows = round(sqrt(nrow(penguins)))
  ) +  
  
  labs(fill = "Penguin Species") + 
  
  coord_equal() + 
  
  theme_void() + 
  
  theme(legend.position = "top")