Setup

knitr::opts_chunk$set(echo = T,
                      fig.align = "center")

# Load the tidyverse packages:
library(tidyverse)

The diamonds data

# We'll use the diamonds data frame, stored in ggplot2.  Take a look at it:
data(diamonds)
tibble(diamonds)
## # A tibble: 53,940 × 10
##    carat cut       color clarity depth table price     x     y     z
##    <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
##  1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
##  2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
##  3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
##  4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
##  5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
##  6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
##  7  0.24 Very Good I     VVS1     62.3    57   336  3.95  3.98  2.47
##  8  0.26 Very Good H     SI1      61.9    55   337  4.07  4.11  2.53
##  9  0.22 Fair      E     VS2      65.1    61   337  3.87  3.78  2.49
## 10  0.23 Very Good H     VS1      59.4    61   338  4     4.05  2.39
## # ℹ 53,930 more rows
# For more info, do help(diamonds) on the console

1) Bar Charts for 1 variable

1.1) Create a bar chart of counts for cut

# Start by calling ggplot() and specifying
# data = diamonds
# and mapping cut to the x-axis using aes()
ggplot(
  data = diamonds,
  mapping = aes(x = cut)
) +
  
  # Using geom_bar to create a bar chart with a black outline and white bars
  geom_bar(
    color = "black",
    fill = "white"
  ) + 
  
  scale_y_continuous(expand = c(0, 0, 0.05, 0)) + 
  
  theme_classic()

1.2) Create a graph of proportions for diamond color

Need to calculate the proportions ourselves if we want to create a bar chart for proportions

# Start by forming a table using count()
diamonds |> 
  count(color) |> 
  
  # Use mutate() to calculate the proportions
  mutate(
    prop = n/sum(n)
  ) |> 
  
  # Don't need to specify data = in ggplot since we are piping it into ggplot
  # Map color to the x-axis and proportion to the y-axis
  ggplot(
    mapping = aes(
      x = color, 
      y = prop
    )
  ) +
  
  # Since we specified the y aesthetic, we need to use geom_col instead of geom_bar
  geom_col(
    color = "black",
    fill = "purple"
  ) + 
  
  # Changing the y-axis label to say Proportions
  labs(
    y = "Proportions",
    title = "Percentage of Different Colored Diamonds"
  ) + 
  
  # And changing the theme
  theme_classic() + 
  
  # Making the bars sit on the x-axis
  scale_y_continuous(expand = c(0, 0, 0.05, 0))

1.3) Create a bar chart of percentages for clarity

## Two ways to create a bar chart with percentages
# Way 1) Change proportion to percentage in aes() by multiplying proportions by 100
# This is the same process as 1.2, but we multiply the proportion by 100 inside of aes()
diamonds |> 
  count(color) |> 
  
  # Use mutate() to calculate the proportions
  mutate(
    prop = n/sum(n)
  ) |> 
  
  # Don't need to specify data = in ggplot since we are piping it into ggplot
  # Map color to the x-axis and proportion to the y-axis
  ggplot(
    mapping = aes(
      x = color, 
      y = prop*100
    )
  ) +
  
  geom_col(
    color = "black",
           fill = "lightblue"
    ) + 
  
  labs(
    y = "Percentage",
    title = "Percentage of Different Colored Diamonds"
  ) + 
  
  theme_classic() + 
  
  scale_y_continuous(expand = c(0, 0, 0.05, 0))

# Way 2) Same as 1.2, but add scale_y_continuous(labels = scales::percent) to the ggplot object somewhere
diamonds |> 
  count(color) |> 
  
  # Use mutate() to calculate the proportions
  mutate(
    prop = n/sum(n)
  ) |> 
  
  # Don't need to specify data = in ggplot since we are piping it into ggplot
  # Map color to the x-axis and proportion to the y-axis
  ggplot(
    mapping = aes(
      x = color, 
      y = prop
    )
  ) +
  
  geom_col(
    color = "black",
    fill = "lightblue"
  ) + 
  
  labs(
    y = NULL,
    title = "Percentage of Different Colored Diamonds"
  ) + 
  
  theme_classic() + 
  
  scale_y_continuous(
    labels = scales::percent,
    expand = c(0, 0, 0.05, 0)
  )

Bar Charts for 2 variables: Cut and Clarity

2.1) Stacked bar chart of counts

Create a stacked bar chart with clarity on the x axis and cut to the fill aesthetic. Have the y-axis display the counts

#  Map clarity to the x axis, and cut to the fill aesthetic.
ggplot(
  data = diamonds,
  mapping = aes(
    x = clarity, 
    fill = cut
  )
) +
  geom_bar() + 
  
  # Changing the theme
  theme_classic() + 
  
  scale_y_continuous(expand = c(0, 0, 0.05, 0))

2.2) Stacked bar chart of proportions

Create a similar plot as the previous one, but have the plot be a stacked bar chart with proportions of cut per clarity group on the y-axis.

The main change is including position = "fill" inside geom_bar()

ggplot(
  data = diamonds, 
  mapping = aes(
    x = clarity,
    fill = cut
  )
) +
  
  geom_bar(
    color = "black",
    position = "fill"
  ) + 
  
  labs(
    y = "Conditional Proportion",
    fill = "Cut"
  ) + 
  
  theme_classic() + 
  
  scale_y_continuous(expand = c(0, 0, 0.05, 0))

2.3) Side-by-side bar chart of proportions

Create a similar plot as 2.2), but have the plot be a side-by-side instead of stacked

Steps:

  1. Need to create a data frame with the conditional proportions using count() and mutate()

  2. Create the bar chart using ggplot() and geom_col()

  3. Specify side-by-side bars with the argument position = "dodge2" in geom_col()

diamonds |> 
  count(clarity, cut) |> 
  mutate(
    .by = clarity,
    prop = n/sum(n)
  ) |> 
  
  ggplot(
    mapping = aes(
      x = clarity, 
      y = prop,
      fill = cut
    )
  ) +
  
  geom_col(
    color = "black",
    position = "dodge2"
  ) + 
  
  labs(
    y = "Conditional Proportion",
    fill = "Cut"
  ) + 
  
  theme_classic() + 
  
  # Changing the legend position to be on the top of the plot
  theme(legend.position = "top") + 
  
  scale_y_continuous(
    expand = c(0, 0, 0.05, 0),
    labels = scales::label_percent()
  )