Dental Data completeness

Front. Oral Health, 03 June 2025

Sec. Oral Health Promotion

Volume 6 - 2025 | https://doi.org/10.3389/froh.2025.1535164

Original Image

Figure 1. (a) Data completeness of tobacco use by provider and year. (b) Data completeness of alcohol use by year and provider type. (c) Data completeness of drug use by year and provider type.

pacman::p_load(tidyverse, 
               viridis, 
               ggbreak, 
               scales)

df <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vT4ZkIi2x2a5m1sVXvVvay3gR4mWWLA1MrdLIma9dYL85aBlkjJwiNr8cY16eIKHd2ypQlpexnMjIbz/pub?gid=2055237550&single=true&output=csv")

Rows: 36 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Provider, Item
dbl (2): Year, Percentage

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Since the data is not available, I extracted the data from the same graph

head(df)

# A tibble: 6 × 4
  Provider  Year Item        Percentage
  <chr>    <dbl> <chr>            <dbl>
1 Student   2017 Tobacco (%)         48
2 <NA>      2018 <NA>                48
3 <NA>      2019 <NA>                47
4 Faculty   2017 <NA>                18
5 <NA>      2018 <NA>                21
6 <NA>      2019 <NA>                20

df <- df |> 
  fill(Provider, Item)

head(df)

# A tibble: 6 × 4
  Provider  Year Item        Percentage
  <chr>    <dbl> <chr>            <dbl>
1 Student   2017 Tobacco (%)         48
2 Student   2018 Tobacco (%)         48
3 Student   2019 Tobacco (%)         47
4 Faculty   2017 Tobacco (%)         18
5 Faculty   2018 Tobacco (%)         21
6 Faculty   2019 Tobacco (%)         20

df |> 
mutate(Provider = fct_relevel(Provider, "Student", "Resident", "Faculty", "Other")) |> 
  ggplot(aes(x = Provider, 
             y = Percentage, 
             fill = Provider)) +
  geom_col() + 
  facet_grid(Year ~ Item) + 
  
   #  scale_y_continuous(breaks = c(0, 50, 100)) + # Set custom breaks for the y-axis

  # theme_minimal() + 
   ylim(0, 100) +
    geom_hline(yintercept = 50, color = "#8a226a", linetype = "dashed") + # Add a red line at 50%
labs(title = "Comparison of Provider Percentages by Year and Item, Stratified by Provider Type", 
     caption = "Source DOI:10.3389/froh.2025.1535164") + 
    scale_fill_viridis_d(option = "I")  # Correct function for discrete data

Warning in viridisLite::viridis(n, alpha, begin, end, direction, option):
Option 'I' does not exist. Defaulting to 'viridis'.

Completeness by race / provider

provider_data <- tribble(
  ~Provider, ~`2017 (%)`, ~`2018 (%)`, ~`2019 (%)`, ~`2017–2019 (%)`,
  "Student", 80.4, 80.2, 79.3, 80.0,
  "Faculty", 59.0, 59.5, 57.8, 58.8,
  "Resident", 68.0, 69.5, 68.7, 68.8,
  "Other", 86.7, 68.2, 62.6, 70.8
)

provider_data |>
  select(-`2017–2019 (%)`) |>  # Remove the summary column
  pivot_longer(
    cols = starts_with("20"),
    names_to = "Year",
    values_to = "Percentage"
  ) |>
  mutate(
    Year = str_extract(Year, "\\d{4}") |> as.integer()  # Extract and convert year
  ) |>
  ggplot(aes(x = as.factor(Year), 
             y = Percentage, 
             color = Provider, 
             group = Provider)) +
  ylim(50, 100) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  labs(
    title = "Percentage Over Time by Provider Type",
    x = "Year",
    y = "Percentage"
  ) +
  theme_minimal()

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Table 3. Completeness of health history Variable by providers for years 2017–2019.

tribble(
  ~Provider, ~`2017 (%)`, ~`2018 (%)`, ~`2019 (%)`, ~`2017–2019 (%)`,
  "Student", 74.3, 77.5, 77.9, 76.6,
  "Faculty", 56.3, 60.6, 60.1, 59.1,
  "Resident", 38.3, 41.5, 40.7, 40.2,
  "Other", 79.5, 83.1, 59.0, 72.8
) |>
  select(-`2017–2019 (%)`) |>  # Drop summary column
  pivot_longer(
    cols = starts_with("20"),
    names_to = "Year",
    values_to = "Percentage"
  ) |>
  mutate(
    Year = str_extract(Year, "\\d{4}") |> as.integer()
  ) |>
  ggplot(aes(x = as.factor(Year), 
             y = Percentage,
             group = Provider, 
             color = Provider)) +
  ylim(0, 100) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  # scale_y_continuous(labels = label_percent(accuracy = 1)) +
  labs(
    title = "Table 3. Completeness of health history variable by providers (2017–2019)",
    x = "Year",
    y = "Percentage"
  ) +
  theme_minimal()

Table 4. Completeness of pain ratings by providers for years 2017–2019.

tribble(
  ~Provider, ~`2017 (%)`, ~`2018 (%)`, ~`2019 (%)`, ~`2017–2019 (%)`,
  "Student", 1.3, 1.5, 1.8, 1.6,
  "Faculty", 3.1, 3.6, 3.8, 3.5,
  "Resident", 0.4, 0.3, 0.3, 0.3,
  "Other", 0.0, 0.0, 0.1, 0.1
) |>
  select(-`2017–2019 (%)`) |>
  pivot_longer(
    cols = starts_with("20"),
    names_to = "Year",
    values_to = "Percentage"
  ) |>
  mutate(
    Year = str_extract(Year, "\\d{4}") |> as.integer()
  ) |>
  ggplot(aes(x = as.factor(Year), 
             y = Percentage, 
             group = Provider, 
             color = Provider)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  ylim(0, 100) +
  # scale_y_break(c(10, 90)) +  # ← this is the key line to break the y-axis
  labs(
    title = "Table 4. Completeness of pain ratings by providers for years 2017–2019",
    x = "Year",
    y = "Percentage"
  ) +
  # scale_y_continuous(labels = label_percent(accuracy = 0.1)) +  # Format y-axis as %
  theme_minimal()