::p_load(tidyverse,
pacman
viridis,
ggbreak, scales)
Dental Data completeness
Front. Oral Health, 03 June 2025
Sec. Oral Health Promotion
Volume 6 - 2025 | https://doi.org/10.3389/froh.2025.1535164
Original Image
Figure 1. (a) Data completeness of tobacco use by provider and year. (b) Data completeness of alcohol use by year and provider type. (c) Data completeness of drug use by year and provider type.
<- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vT4ZkIi2x2a5m1sVXvVvay3gR4mWWLA1MrdLIma9dYL85aBlkjJwiNr8cY16eIKHd2ypQlpexnMjIbz/pub?gid=2055237550&single=true&output=csv") df
Rows: 36 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Provider, Item
dbl (2): Year, Percentage
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Since the data is not available, I extracted the data from the same graph
head(df)
# A tibble: 6 × 4
Provider Year Item Percentage
<chr> <dbl> <chr> <dbl>
1 Student 2017 Tobacco (%) 48
2 <NA> 2018 <NA> 48
3 <NA> 2019 <NA> 47
4 Faculty 2017 <NA> 18
5 <NA> 2018 <NA> 21
6 <NA> 2019 <NA> 20
<- df |>
df fill(Provider, Item)
head(df)
# A tibble: 6 × 4
Provider Year Item Percentage
<chr> <dbl> <chr> <dbl>
1 Student 2017 Tobacco (%) 48
2 Student 2018 Tobacco (%) 48
3 Student 2019 Tobacco (%) 47
4 Faculty 2017 Tobacco (%) 18
5 Faculty 2018 Tobacco (%) 21
6 Faculty 2019 Tobacco (%) 20
|>
df mutate(Provider = fct_relevel(Provider, "Student", "Resident", "Faculty", "Other")) |>
ggplot(aes(x = Provider,
y = Percentage,
fill = Provider)) +
geom_col() +
facet_grid(Year ~ Item) +
# scale_y_continuous(breaks = c(0, 50, 100)) + # Set custom breaks for the y-axis
# theme_minimal() +
ylim(0, 100) +
geom_hline(yintercept = 50, color = "#8a226a", linetype = "dashed") + # Add a red line at 50%
labs(title = "Comparison of Provider Percentages by Year and Item, Stratified by Provider Type",
caption = "Source DOI:10.3389/froh.2025.1535164") +
scale_fill_viridis_d(option = "I") # Correct function for discrete data
Warning in viridisLite::viridis(n, alpha, begin, end, direction, option):
Option 'I' does not exist. Defaulting to 'viridis'.
Completeness by race / provider
<- tribble(
provider_data ~Provider, ~`2017 (%)`, ~`2018 (%)`, ~`2019 (%)`, ~`2017–2019 (%)`,
"Student", 80.4, 80.2, 79.3, 80.0,
"Faculty", 59.0, 59.5, 57.8, 58.8,
"Resident", 68.0, 69.5, 68.7, 68.8,
"Other", 86.7, 68.2, 62.6, 70.8
)
|>
provider_data select(-`2017–2019 (%)`) |> # Remove the summary column
pivot_longer(
cols = starts_with("20"),
names_to = "Year",
values_to = "Percentage"
|>
) mutate(
Year = str_extract(Year, "\\d{4}") |> as.integer() # Extract and convert year
|>
) ggplot(aes(x = as.factor(Year),
y = Percentage,
color = Provider,
group = Provider)) +
ylim(50, 100) +
geom_line(size = 1) +
geom_point(size = 2) +
labs(
title = "Percentage Over Time by Provider Type",
x = "Year",
y = "Percentage"
+
) theme_minimal()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Table 3. Completeness of health history Variable by providers for years 2017–2019.
tribble(
~Provider, ~`2017 (%)`, ~`2018 (%)`, ~`2019 (%)`, ~`2017–2019 (%)`,
"Student", 74.3, 77.5, 77.9, 76.6,
"Faculty", 56.3, 60.6, 60.1, 59.1,
"Resident", 38.3, 41.5, 40.7, 40.2,
"Other", 79.5, 83.1, 59.0, 72.8
|>
) select(-`2017–2019 (%)`) |> # Drop summary column
pivot_longer(
cols = starts_with("20"),
names_to = "Year",
values_to = "Percentage"
|>
) mutate(
Year = str_extract(Year, "\\d{4}") |> as.integer()
|>
) ggplot(aes(x = as.factor(Year),
y = Percentage,
group = Provider,
color = Provider)) +
ylim(0, 100) +
geom_line(size = 1) +
geom_point(size = 2) +
# scale_y_continuous(labels = label_percent(accuracy = 1)) +
labs(
title = "Table 3. Completeness of health history variable by providers (2017–2019)",
x = "Year",
y = "Percentage"
+
) theme_minimal()
Table 4. Completeness of pain ratings by providers for years 2017–2019.
tribble(
~Provider, ~`2017 (%)`, ~`2018 (%)`, ~`2019 (%)`, ~`2017–2019 (%)`,
"Student", 1.3, 1.5, 1.8, 1.6,
"Faculty", 3.1, 3.6, 3.8, 3.5,
"Resident", 0.4, 0.3, 0.3, 0.3,
"Other", 0.0, 0.0, 0.1, 0.1
|>
) select(-`2017–2019 (%)`) |>
pivot_longer(
cols = starts_with("20"),
names_to = "Year",
values_to = "Percentage"
|>
) mutate(
Year = str_extract(Year, "\\d{4}") |> as.integer()
|>
) ggplot(aes(x = as.factor(Year),
y = Percentage,
group = Provider,
color = Provider)) +
geom_line(size = 1) +
geom_point(size = 2) +
ylim(0, 100) +
# scale_y_break(c(10, 90)) + # ← this is the key line to break the y-axis
labs(
title = "Table 4. Completeness of pain ratings by providers for years 2017–2019",
x = "Year",
y = "Percentage"
+
) # scale_y_continuous(labels = label_percent(accuracy = 0.1)) + # Format y-axis as %
theme_minimal()