# Load required packages:tidyverse, here, patchwork, medicaldata
pacman :: p_load(tidyverse, here, patchwork, medicaldata,knitr,DT,usethis,ggtext)Plotting labels with ggplot2
Packages
Load packages using the
pacman::p_load()function.Packages:
tidyverse,here,patchwork,medicaldata.
Introduction to text geoms in
Focus on
geom_text()for simple labeling andgeom_label()for emphasized labels.Apply these geoms on bar plots, stacked bars, dodged bars, normalized stacked bars and circular plots.
Start with a simple bar plot using fake data for practice.
# Create example data frame with categories A,B,C and counts 10,20,15
data <-
data.frame(
category = c("A","B","C"),
count = c(10, 20, 15)
)
data category count
1 A 10
2 B 20
3 C 15
# Generate a bar plot using ggplot2, mapping category to x and count to y,
# Use geom_col() and fill with "steelblue"
# and geom_text () to add labelsggplot(data, mapping = aes(
x = category,
y = count
)) +
geom_col(fill = "steelblue")+
geom_text(aes(label = count))- As you can see however, the placement of our text is odd. Can fix this with
nudge_y. Also increase text size.
ggplot(data, mapping = aes(
x = category,
y = count
)) +
geom_col(fill = "steelblue")+
geom_text(aes(label = count), nudge_y = 2, size = 5 )- Explore negative values of
nudge_yto move text down.
ggplot(data, mapping = aes(
x = category,
y = count
)) +
geom_col(fill = "steelblue")+
geom_text(aes(label = count), nudge_y = -2, size = 5 )- For horizontal bar plots, use
nudge_xto adjust horizontally.
ggplot(data, mapping = aes(
x = count,
y = category
)) +
geom_col(fill = "steelblue")+
geom_text(aes(label = count), nudge_x = 1, size = 5 )- We can also use the
geom_label ()function for text labels with background rectangles.
# Replace geom_text with geom_label for labels with background
ggplot(data, mapping = aes(
x = count,
y = category
)) +
geom_col(fill = "steelblue")+
geom_label(aes(label = count), nudge_x = 1, size = 5 )ggplot(data, mapping = aes(
x = category,
y = count
)) +
geom_col(fill = "steelblue")+
geom_label(aes(label = count), nudge_y = -1, size = 5 )- We can fill and color aesthetics in geom_label () for background and text color.
ggplot(data, mapping = aes(
x = category,
y = count
)) +
geom_col(fill = "steelblue")+
geom_label(aes(label = count),
nudge_y = -1,
size = 5,
color = "darkblue",
fill = "lightblue")# Adjust fill and color in geom_label for background and text color changePRO-TIP: Setting a custom theme to streamline your plotting process
Combine an existing theme with your own customization: create a custom theme combining theme_dark with large bold axis labels:
# Define a custom theme with theme_light and large bold axis titles
theme_dark_custom <-
theme_dark() +
theme(axis.title = element_text(size = 16, face = "bold"))Can use this on a single plot as follows
ggplot(data, mapping = aes(
x = category,
y = count
)) +
geom_col(fill = "steelblue")+
geom_label(aes(label = count),
nudge_y = -1,
size = 5,
color = "darkblue",
fill = "lightblue")+
theme_dark_custom- Set the custom theme as the default for all plots
# set the custom theme as the default for all plots
theme_set(theme_dark_custom)- Observe the effect: theme_dark_custom automatically applies to every new plot.
# Redraw an earlier plot using the new global theme
ggplot(data, aes(
x = category,
y = count
))+
geom_col(fill = "steelblue")+
geom_label(aes(label = count),
nudge_y = -3,
fill = "royalblue4",
color = "white")To set the default theme back to the original, use theme_set(theme_gray()).
theme_set(theme_gray())ggplot(data, aes(
x = category,
y = count
))+
geom_col(fill = "steelblue")+
geom_label(aes(label = count),
nudge_y = -3,
fill = "royalblue4",
color = "white")The vjust and hjust arguments
Explore
vjustandhjustin ggplot2 for adjusting text position.These arguments control Vertical and horizontal justification of text.
Understanding hjust (horizontal justification)
hjustmodifies horizontal position of text relative to anchor points.
hjust values ranges from 0 to 1
‘hjust = 0’ : Left edge aligns with anchor point.
‘hjust = 0.5’ : Centered on anchor point.
‘hjust = 1’ : Right edge aligns with anchor point.
# Example to show hjust in action
# Use geom_text to display different hjust values ona plot
#example
df <-
data.frame(x=1, y=1)
# Base plot with a point
base_p <-
ggplot(df, aes(x,y)) +
geom_point() +
theme_void()
base_p + geom_text(aes(label = "text"))p_hjust_0 <- base_p + geom_text(aes(label = "hjust =0"),hjust = 0)
p_hjust_0.25 <- base_p + geom_text(aes(label = "hjust =0.25"),hjust = 0.25)
p_hjust_0.5 <- base_p + geom_text(aes(label = "hjust =0.5"),hjust = 0.5)
p_hjust_0.75 <- base_p + geom_text(aes(label = "hjust =0.75"),hjust = 0.75)
p_hjust_1 <- base_p + geom_text(aes(label = "hjust =1"),hjust = 1)
# Combine plots with patchwork
p_hjust_0 / p_hjust_0.25 / p_hjust_0.5 / p_hjust_0.75 / p_hjust_1# Import data from csv
tb_outcomes <-
read.csv("C:/Users/Perminus Njiru/OneDrive - LVCT Health/Desktop/Freecodecamp/R/Care_and_Treatment_Analysis/Input/benin_tb.csv")
head(tb_outcomes) period period_date hospital outcome cases diagnosis_type
1 2015Q4 2015-10-01 St Jean De Dieu failed 0 bacteriological
2 2015Q4 2015-10-01 St Jean De Dieu unevaluated 0 bacteriological
3 2015Q4 2015-10-01 St Jean De Dieu died 0 bacteriological
4 2015Q4 2015-10-01 St Jean De Dieu lost 0 bacteriological
5 2015Q4 2015-10-01 St Jean De Dieu completed 0 bacteriological
6 2015Q4 2015-10-01 St Jean De Dieu cured 11 bacteriological
hospital_sums <-
tb_outcomes %>%
group_by(hospital) %>%
summarise(cases = sum(cases,na.rm = TRUE),.groups = "drop_last")
hospital_sums# A tibble: 6 × 2
hospital cases
<chr> <int>
1 CHPP Akron 875
2 CS Abomey-Calavi 791
3 Hopital Bethesda 256
4 Hopital Savalou 80
5 Hopital St Luc 168
6 St Jean De Dieu 171
ggplot(hospital_sums,aes(x = hospital, y = cases)) +
geom_col(fill = "steelblue") +
geom_text(aes(label = cases),vjust = -0.2)ggplot(hospital_sums,aes(x = hospital, y = cases)) +
geom_col(fill = "steelblue") +
geom_text(aes(label = paste(cases, "\ncases")),
size = 5,
angle = 0,
alpha = 0.5,
color = "black",
family = "mono",
fontface = "bold",
hjust = 0.5,
vjust = -0.2,
nudge_y = -10,
lineheight = 0.8) +
theme(axis.text.x = element_text(angle = 90))Labeling Stacked Bar Plots
We explore labeling in stacked bar plots with two categorical variables.
We use the tb_outb_outcomes %>%
DT::datatable()outcome dataset, summarizing by period_date and diagnosis_type.
tb_outcomes %>% DT::datatable()tb_sum <- tb_outcomes %>%
group_by(period_date,diagnosis_type) %>%
summarise(cases = sum(cases, na.rm = TRUE),.groups = "drop")
head(tb_sum,5)# A tibble: 5 × 3
period_date diagnosis_type cases
<chr> <chr> <int>
1 2015-01-01 bacteriological 143
2 2015-01-01 clinical 47
3 2015-04-01 bacteriological 163
4 2015-04-01 clinical 35
5 2015-07-01 bacteriological 146
# Summarize the data by period and diagnosis type
# Group by period_date and diagnosis_type
# Summarise cases = Sum(cases)- Create a simple stacked bar plot using
ggplot2
quarter_dx_bar <-
ggplot(tb_sum, aes(x = period_date,
y = cases,
fill = diagnosis_type)) +
geom_col() +
labs(title = "New and relapse TB cases per quarter",
subtitle = "Data from six health facilities in Benin, 2015-2017")
quarter_dx_bar- We aim to add text labels to each bar segment using cases.
quarter_dx_bar +
geom_text(aes(label = cases))Notice the misalignment of labels - they don’t match bar heights.
To correct this, set position = “stack” in
geom_text().
quarter_dx_bar +
geom_text(aes(label = cases),
position = "stack")- For vertical alignment inside bars, we can use
vjustingeom_text()
quarter_dx_bar +
geom_text(aes(label = cases),
position = "stack",
vjust = 1.5)- But what if we want to center the labels in each bar segment? Can we use
position_stack(vjust = 0.5)
quarter_dx_bar +
geom_text(aes(label = cases),
position = position_stack(vjust = 0.5))- This approach is effective for horizontal bar plots - flip axes with
coord_flip().
quarter_dx_bar +
geom_text(aes(label = cases),
position = position_stack(vjust = 0.5)) +
coord_flip()Final product looks like this
quarter_dx_bar +
geom_text(aes(label = cases),
position = position_stack(vjust = 0.5),
color = "white",
fontface = "bold") +
coord_flip()aus_tb_notifs <-
aus_tb <- read.csv("C:/Users/Perminus Njiru/OneDrive - LVCT Health/Desktop/Freecodecamp/R/Care_and_Treatment_Analysis/labels_using_ggplot/data/aus_tb_notifs_modified.csv")
aus_tb_notifs %>%
DT::datatable()Q: Practice with labeling stacked plots
Create a stacked bar plot showing the distribution per year of TB cases in rural and urban areas using the aus_tb_notifs dataset. Use geom_text() and adjust the position of the labels for clarity.
Hint: Pivot the data so that area_type is a column, then summarize the data by year and area_type, calculating the sum of cases (cases) for each group. The pivoting is done for you in the code below.
# Pivot the data
aus_tb_notifs %>%
pivot_longer(cols = c(rural,urban),
names_to = "area_type",
values_to = "cases") %>%
DT::datatable()# Pivot the data
aus_tb_notifs %>%
pivot_longer(cols = c(rural,urban),
names_to = "area_type",
values_to = "cases") %>%
# Summarize the data by year and area type
group_by(year,area_type) %>%
summarise(total_case = sum(cases, na.rm = TRUE),.groups = "drop") %>%
# Create the stacked bar plot
ggplot(aes(x = year,
y = total_case,
fill = area_type)) +
geom_col() +
geom_text(
aes(label = total_case),
position = position_stack(vjust = 0.5),
color = "white",
fontface = "bold",
size = 3
) +
labs(
title = "Distribution of TB Cases by Area Type per Year",
X = "Year",
y = "Total Cases",
fill = "Area Type"
) +
theme_minimal()Ignoring unknown labels:
• X : "Year"
Labeling Dodged Bar Plots
Exploring dodged bar charts: multiple categories displayed side by side
Start by grouping
tb_outcomesdataset byhospitalanddiagnosis_type.Calculate the sum of cases (
cases) for each group.
hospital_dx_cases <-
tb_outcomes %>%
group_by(hospital,diagnosis_type) %>%
summarise(cases = sum(cases,na.rm = TRUE),.groups = "drop_last")
hospital_dx_cases# A tibble: 12 × 3
# Groups: hospital [6]
hospital diagnosis_type cases
<chr> <chr> <int>
1 CHPP Akron bacteriological 695
2 CHPP Akron clinical 180
3 CS Abomey-Calavi bacteriological 671
4 CS Abomey-Calavi clinical 120
5 Hopital Bethesda bacteriological 139
6 Hopital Bethesda clinical 117
7 Hopital Savalou bacteriological 70
8 Hopital Savalou clinical 10
9 Hopital St Luc bacteriological 149
10 Hopital St Luc clinical 19
11 St Jean De Dieu bacteriological 100
12 St Jean De Dieu clinical 71
- Create a dodged bar chart, setting
position = "dodge"ingeom_col()
hospital_dx_bar <-
hospital_dx_cases %>%
ggplot(aes(x = hospital,
y = cases,
fill = diagnosis_type)) +
geom_col(position = "dodge")- Annotate chart with
geom_text()to display the labels
hospital_dx_bar +
geom_text(aes(label = cases))- Not quite right! Need to adjust geom_text() with position = position_dodge() for proper alignment with bars.
hospital_dx_bar +
geom_text(aes(label = cases),
position = position_dodge(width = 0.9))- Finally, shift labels up slightly with
vjustfor better visualization.
hospital_dx_bar +
geom_text(aes(label = cases),
position = position_dodge(width = 0.9),
vjust = -0.2)Labeling Percent - stacked Bar Plots
- For percentage stacked bar plot, need to format labels into percentages to match the segments on the chart.
tb_outcomes %>%
group_by(hospital, diagnosis_type) %>%
summarise(total_cases = sum(cases),.groups = "drop_last") %>%
mutate(prop = total_cases / sum(total_cases)) %>%
ggplot(aes(x = hospital,
y = total_cases,
fill = diagnosis_type)) +
geom_col(position = "fill") +
geom_text(aes(label = scales::percent(prop, accuracy = 1)),
position = position_fill(vjust = 0.5),
color = "white",
fontface = "bold",
size = 4.5)+
coord_flip()+
theme_minimal()+
labs(x = "",
y = "Proportion",
fill = "Diagnosis Method",
title = "New and Relapse Tuberculosis Cases Diagnosis",
subtitle = "Data from six health facilities in Benin, 2015 - 2017")- Let’s start with a stacked bar plot showing raw counts.
hosp_dx_sum <-
tb_outcomes %>%
group_by(hospital,diagnosis_type) %>%
summarise(total_cases = sum(cases, na.rm = TRUE),.groups = "drop_last")
hosp_dx_sum# A tibble: 12 × 3
# Groups: hospital [6]
hospital diagnosis_type total_cases
<chr> <chr> <int>
1 CHPP Akron bacteriological 695
2 CHPP Akron clinical 180
3 CS Abomey-Calavi bacteriological 671
4 CS Abomey-Calavi clinical 120
5 Hopital Bethesda bacteriological 139
6 Hopital Bethesda clinical 117
7 Hopital Savalou bacteriological 70
8 Hopital Savalou clinical 10
9 Hopital St Luc bacteriological 149
10 Hopital St Luc clinical 19
11 St Jean De Dieu bacteriological 100
12 St Jean De Dieu clinical 71
hosp_dx_sum %>%
ggplot(aes(x = hospital,
y = total_cases,
fill = diagnosis_type)) +
geom_col() +
geom_text(aes(label = total_cases),
position = "stack")- Now we can normalize this by using the “fill” position for
geom_col()andgeom_text(). or rather,position_fill().
hosp_dx_sum %>%
ggplot(aes(x = hospital,
y = total_cases,
fill = diagnosis_type)) +
geom_col(position = position_fill()) +
geom_text(aes(label = total_cases),
position = position_fill())- Next, replace raw values with percentages.
hosp_dx_sum <-
tb_outcomes %>%
group_by(hospital,diagnosis_type) %>%
summarise(total_cases = sum(cases, na.rm = TRUE),.groups = "drop_last") %>%
group_by(hospital) %>%
mutate(prop = total_cases/sum(total_cases))
hosp_dx_sum# A tibble: 12 × 4
# Groups: hospital [6]
hospital diagnosis_type total_cases prop
<chr> <chr> <int> <dbl>
1 CHPP Akron bacteriological 695 0.794
2 CHPP Akron clinical 180 0.206
3 CS Abomey-Calavi bacteriological 671 0.848
4 CS Abomey-Calavi clinical 120 0.152
5 Hopital Bethesda bacteriological 139 0.543
6 Hopital Bethesda clinical 117 0.457
7 Hopital Savalou bacteriological 70 0.875
8 Hopital Savalou clinical 10 0.125
9 Hopital St Luc bacteriological 149 0.887
10 Hopital St Luc clinical 19 0.113
11 St Jean De Dieu bacteriological 100 0.585
12 St Jean De Dieu clinical 71 0.415
- Create a bar chart with the
propcolumn.
hosp_dx_fill <-
hosp_dx_sum %>%
ggplot(aes(x = hospital,
y = prop,
fill = diagnosis_type)) +
geom_col()- Add label using
geom_text()
hosp_dx_fill +
geom_text(aes(label = prop),
position = position_fill())- Improve label readability by formatting decimals and percentages.
hosp_dx_fill +
geom_text(aes(label = scales::percent(prop)),
position = position_fill())- Center labels using
vjustinposition_fill().
hosp_dx_fill +
geom_text(aes(label = scales::percent(prop)),
position = position_fill(vjust = 0.5))- Enhance readability with flipped coordinates and aesthetic tweaks.
hosp_dx_fill +
geom_text(aes(label = scales::percent(prop, accuracy = 1)),
position = position_fill(vjust = 0.5),
color = "white",
fontface = "bold",
size = 4.5) +
theme_light() +
coord_flip()Labeling Circular Plot
- Begin with summarizing the data by calculating total for each hospital.
total_results <-
tb_outcomes %>%
group_by(hospital) %>%
summarise(total_cases = sum(cases))
total_results# A tibble: 6 × 2
hospital total_cases
<chr> <int>
1 CHPP Akron 875
2 CS Abomey-Calavi 791
3 Hopital Bethesda 256
4 Hopital Savalou 80
5 Hopital St Luc 168
6 St Jean De Dieu 171
- Create a simple bar chart to visualize the data before moving to circular plots.
ggplot(total_results,
aes(x = 4,
y = total_cases,
fill = hospital)) +
geom_col()- Transform the bar chart into a basic pie chart using
coord_polar().
Outcome_pie <-
ggplot(total_results,
aes(x = 4,
y = total_cases,
fill = hospital)) +
geom_col() +
theme_void()+
labs(
title = "TB Cases in **Rural** and *Urban* Areas", # bold and italic
subtitle = "Cases <span style='color:red'>increased</span> in 2020" # coloured text
) +
theme(
plot.title = element_markdown(), # enables markdown in title
plot.subtitle = element_markdown() # enables markdown in subtitle
)+
coord_polar(theta = "y")
Outcome_pie- Next, focus on adding labels to the pie chart.
Outcome_pie +
geom_text(aes(label = total_cases),
position = position_stack(vjust = 0.5))- Create a base donut chart with
xlim().
Outcome_donut <-
ggplot(total_results,
aes(x = 4,
y = total_cases,
fill = hospital)) +
geom_col() +
theme_void()+
labs(
title = "TB Cases in **Rural** and *Urban* Areas", # bold and italic
subtitle = "Cases <span style='color:red'>increased</span> in 2020" # coloured text
) +
theme(
plot.title = element_markdown(), # enables markdown in title
plot.subtitle = element_markdown() # enables markdown in subtitle
)+
xlim(c(0,4.5))+
coord_polar(theta = "y")
Outcome_donut- Next, focus on adding labels to the donut chart.
Outcome_donut +
geom_text(aes(label = total_cases),
position = position_stack(vjust = 0.5))- Enhance the chart’s aesthetics with
theme_void(), color adjustments, and label styling.
Outcome_donut +
geom_text(aes(label = total_cases),
position = position_stack(vjust = 0.5),
color = "white",
fontface = "bold") +
scale_fill_viridis_d()# scale_fill_viridis_d(option = "viridis") # default - purple to yellow
Outcome_pie +
geom_label(aes(label = total_cases),
position = position_stack(vjust = 0.5),
color = "white",
fontface = "bold",
show.legend = FALSE) +
scale_fill_viridis_d()# scale_fill_viridis_d(option = "magma") # black to yellow
Outcome_pie +
geom_label(aes(label = total_cases),
position = position_stack(vjust = 0.5),
color = "white",
fontface = "bold",
show.legend = FALSE) +
scale_fill_viridis_d(option = "magma")# scale_fill_viridis_d(option = "plasma") # purple to yellow-pink
Outcome_pie +
geom_label(aes(label = total_cases),
position = position_stack(vjust = 0.5),
color = "white",
fontface = "bold",
show.legend = FALSE) +
scale_fill_viridis_d(option = "plasma")# scale_fill_viridis_d(option = "inferno") # black to yellow-white
Outcome_pie +
geom_label(aes(label = total_cases),
position = position_stack(vjust = 0.5),
color = "white",
fontface = "bold",
show.legend = FALSE) +
scale_fill_viridis_d(option = "inferno")# scale_fill_viridis_d(option = "cividis") # blue to yellow
Outcome_pie +
geom_label(aes(label = total_cases),
position = position_stack(vjust = 0.5),
color = "white",
fontface = "bold",
show.legend = FALSE) +
scale_fill_viridis_d(option = "cividis")Ask yourself: what type is my fill variable?
Words/names? → scale_fill_viridis_d()
Numbers (smooth)? → scale_fill_viridis_c()
Numbers (grouped)? → scale_fill_viridis_b()
Consider the example below, which uses for the plot title, subtitle and bar labels:
# Data and plot
medicaldata::strep_tb %>%
count(gender) %>%
mutate(gender_label = paste0(
"<b><span style='font-size:16pt'>", n, "</span></b> ",
if_else(gender == "M", "men", "women"))) %>%
ggplot(aes(x = gender,
fill = gender,
y = n)) +
geom_col() +
scale_fill_manual(values = c("M" = "#ee6c4d", "F" = "#424874")) +
labs(
title = "<b><span style='color:#424874; font-size:19pt'>Female</span> vs
<span style='color:#ee6c4d; font-size:19pt'>Male</span>
Patients in Strep Study</b>",
subtitle = "<span style='color:gray60'>A demonstration of custom text labels with
</span>**{ggtext}**") +
theme_classic() +
theme(plot.title = element_textbox_simple(),
plot.subtitle = element_textbox_simple(),
legend.position = "none",
axis.text.x = element_blank()) +
geom_richtext(aes(label = gender_label, y = n/2),
label.r = grid::unit(5, "pt"), fill = "white")