knitr::opts_chunk$set(echo = TRUE)
# Start by loading the tidyverse, gt, and skimr package
pacman::p_load(tidyverse, skimr, ggfittext)
# Next, read in the Titanic Data set
titanic <- read.csv("https://raw.githubusercontent.com/Shammalamala/DS-1870-Data/main/titanic.csv")
# Changing class to a factor and the order of the levels (groups)
titanic$class <-
factor(x = titanic$class,
levels = c("First", "Second", "Third", "Crew"))
# Reversing the order of the groups for survival
titanic$survival <-
fct_rev(titanic$survival)
If you want to create a bar chart with text on or above the bars, you’ll need to start by summarizing the data. The summarized data set will need 3 columns:
So let’s start by making a data frame with the class, counts, and prop columns and call it class_sum
class_sum <-
titanic |>
count(
class,
name = "counts"
) |>
mutate(
prop = counts/sum(counts)
)
class_sum
## class counts prop
## 1 First 325 0.1477273
## 2 Second 285 0.1295455
## 3 Third 706 0.3209091
## 4 Crew 884 0.4018182
Next, we’ll create a bar chart using the summarized data and
geom_col()
gg_class_bar <-
ggplot(
data = class_sum,
mapping = aes(
x = class,
y = prop
)
) +
# Adding the bars
geom_col(
fill = "steelblue",
color = "black"
) +
# Adding a title
labs(
title = "Titanic Passengers",
x = "Passenger Class",
y = NULL # Removing the label for the y-axis
) +
# Changing the theme and centering the title and future subtitle
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5,
size = 16),
plot.subtitle = element_text(hjust = 0.5,
size = 12)
) +
# Showing percentages on the y-axis and having the bars sit on the x-axis
scale_y_continuous(
labels = scales::label_percent(),
expand = c(0, 0, 0.05, 0)
)
gg_class_bar
Proportions or Percentages are usually more useful to display than the counts, but we do lose some information only displaying the proportions. Namely, how many passengers there are in each group and the overall size of the data.
One option is to include both the counts and percentages by displaying one of them on the y-axis and adding the text of the other.
How can we add the counts to the bars?
ggfittext
and geom_bar_text()
We can use the geom_bar_text()
function in the
ggfittext
package (loaded in the set up code chunk) to add
the counts to the graph. The geom
needs three (3)
aesthetics: x
, y
, and label
(the
text to add to the graph)
We don’t need to specify x
and y
because we
mapped them in ggplot()
, so geom_bar_text()
will “inherit” them. But we do need to map the label
aesthetic to the count column inside of the geom
just like
we would map any other aesthetic
gg_class_bar +
# adding the counts to the top of the bars
geom_bar_text(
mapping = aes(
label = counts
),
fontface = "bold", # making the counts bolded
contrast = T # color of the text changes depending on the color of the bar
) +
# Adding a subtitle to indicate what the text represents
labs(
subtitle = "Number of Passengers are at the top of the bar"
)
Let’s look at survival rate by passenger class again:
survival_by_class <-
titanic |>
count(
class,
survival,
name = "counts"
) |>
# Calculating the survival and death rate per class
mutate(
.by = class,
prop = counts/sum(counts)
)
survival_by_class
## class survival counts prop
## 1 First Dead 122 0.3753846
## 2 First Alive 203 0.6246154
## 3 Second Dead 167 0.5859649
## 4 Second Alive 118 0.4140351
## 5 Third Dead 528 0.7478754
## 6 Third Alive 178 0.2521246
## 7 Crew Dead 673 0.7613122
## 8 Crew Alive 211 0.2386878
Next, the stacked bar chart:
gg_survival_bar <-
ggplot(
data = survival_by_class,
mapping = aes(
x = class,
y = prop,
fill = survival
)
) +
# Changing the labels
labs(
y = NULL,
fill = NULL
) +
# Changing the theme
theme_bw() +
# Having the bars sit on the x-axis
scale_y_continuous(
labels = scales::label_percent(),
expand = c(0, 0, 0.05, 0)
) +
scale_fill_manual(
values = c("Dead" = "tomato","Alive" = "steelblue")
)
gg_survival_bar
We can add the counts to each bar the exact same way we did with the
first bar chart, we just need to add a place = "middle"
and
position = "stack"
arguments!
gg_survival_bar +
# Adding the stacked bars
geom_col(
color = "black"
) +
# Adding the text
# need position = "stack" because that's the default position for geom_col()
geom_bar_text(
mapping = aes(
label = counts
),
place = "middle",
position = "stack",
contrast = T,
fontface = "bold"
)
If you want the bars to be side-by-side, we can add
position = "dodge2"
to both geom_col()
and
geom_bar_text()
gg_survival_bar +
# Adding the side by side bars
geom_col(
color = "black",
position = "dodge2"
) +
# Adding the text
geom_bar_text(
mapping = aes(
label = counts
),
position = "dodge2",
place = "top",
contrast = T,
fontface = "bold"
)