knitr::opts_chunk$set(echo = T,
fig.align = "center")
# Load the tidyverse packages:
library(tidyverse)
# We'll use the diamonds data frame, stored in ggplot2. Take a look at it:
data(diamonds)
tibble(diamonds)
## # A tibble: 53,940 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## 7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47
## 8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49
## 10 0.23 Very Good H VS1 59.4 61 338 4 4.05 2.39
## # ℹ 53,930 more rows
# For more info, do help(diamonds) on the console
# Start by calling ggplot() and specifying
# data = diamonds
# and mapping cut to the x-axis using aes()
ggplot(
data = diamonds,
mapping = aes(x = cut)
) +
# Using geom_bar to create a bar chart with a black outline and white bars
geom_bar(
color = "black",
fill = "white"
) +
scale_y_continuous(expand = c(0, 0, 0.05, 0)) +
theme_classic()
Need to calculate the proportions ourselves if we want to create a bar chart for proportions
# Start by forming a table using count()
diamonds |>
count(color) |>
# Use mutate() to calculate the proportions
mutate(
prop = n/sum(n)
) |>
# Don't need to specify data = in ggplot since we are piping it into ggplot
# Map color to the x-axis and proportion to the y-axis
ggplot(
mapping = aes(
x = color,
y = prop
)
) +
# Since we specified the y aesthetic, we need to use geom_col instead of geom_bar
geom_col(
color = "black",
fill = "purple"
) +
# Changing the y-axis label to say Proportions
labs(
y = "Proportions",
title = "Percentage of Different Colored Diamonds"
) +
# And changing the theme
theme_classic() +
# Making the bars sit on the x-axis
scale_y_continuous(expand = c(0, 0, 0.05, 0))
## Two ways to create a bar chart with percentages
# Way 1) Change proportion to percentage in aes() by multiplying proportions by 100
# This is the same process as 1.2, but we multiply the proportion by 100 inside of aes()
diamonds |>
count(color) |>
# Use mutate() to calculate the proportions
mutate(
prop = n/sum(n)
) |>
# Don't need to specify data = in ggplot since we are piping it into ggplot
# Map color to the x-axis and proportion to the y-axis
ggplot(
mapping = aes(
x = color,
y = prop*100
)
) +
geom_col(
color = "black",
fill = "lightblue"
) +
labs(
y = "Percentage",
title = "Percentage of Different Colored Diamonds"
) +
theme_classic() +
scale_y_continuous(expand = c(0, 0, 0.05, 0))
# Way 2) Same as 1.2, but add scale_y_continuous(labels = scales::percent) to the ggplot object somewhere
diamonds |>
count(color) |>
# Use mutate() to calculate the proportions
mutate(
prop = n/sum(n)
) |>
# Don't need to specify data = in ggplot since we are piping it into ggplot
# Map color to the x-axis and proportion to the y-axis
ggplot(
mapping = aes(
x = color,
y = prop
)
) +
geom_col(
color = "black",
fill = "lightblue"
) +
labs(
y = NULL,
title = "Percentage of Different Colored Diamonds"
) +
theme_classic() +
scale_y_continuous(
labels = scales::percent,
expand = c(0, 0, 0.05, 0)
)
Create a stacked bar chart with clarity on the
x
axis and cut to the fill
aesthetic.
Have the y-axis display the counts
# Map clarity to the x axis, and cut to the fill aesthetic.
ggplot(
data = diamonds,
mapping = aes(
x = clarity,
fill = cut
)
) +
geom_bar() +
# Changing the theme
theme_classic() +
scale_y_continuous(expand = c(0, 0, 0.05, 0))
Create a similar plot as the previous one, but have the plot be a stacked bar chart with proportions of cut per clarity group on the y-axis.
The main change is including position = "fill"
inside geom_bar()
ggplot(
data = diamonds,
mapping = aes(
x = clarity,
fill = cut
)
) +
geom_bar(
color = "black",
position = "fill"
) +
labs(
y = "Conditional Proportion",
fill = "Cut"
) +
theme_classic() +
scale_y_continuous(expand = c(0, 0, 0.05, 0))
Create a similar plot as 2.2), but have the plot be a side-by-side instead of stacked
Steps:
Need to create a data frame with the conditional proportions
using count()
and mutate()
Create the bar chart using ggplot()
and
geom_col()
Specify side-by-side bars with the argument
position = "dodge2"
in geom_col()
diamonds |>
count(clarity, cut) |>
mutate(
.by = clarity,
prop = n/sum(n)
) |>
ggplot(
mapping = aes(
x = clarity,
y = prop,
fill = cut
)
) +
geom_col(
color = "black",
position = "dodge2"
) +
labs(
y = "Conditional Proportion",
fill = "Cut"
) +
theme_classic() +
# Changing the legend position to be on the top of the plot
theme(legend.position = "top") +
scale_y_continuous(
expand = c(0, 0, 0.05, 0),
labels = scales::label_percent()
)