# Load required packages
library(readxl)
library(tidyr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(knitr)
library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.5.2
library(broom)
library(ggfortify)

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#set working directory
setwd("/Users/samriddhi/Desktop/DrPH/HEPI_553_Muntasir Musum/project proposal/PRAMS Phase 8")
getwd()
## [1] "/Users/samriddhi/Desktop/DrPH/HEPI_553_Muntasir Musum/project proposal/PRAMS Phase 8"
data <- read_excel("@OH_SR.xlsx")
data <- read_excel("@OH_SR.xlsx", sheet = 1)

#save file
save(data, file = "OH_SR.rda")
#Inspect dataset
dim(data)
## [1] 1378   15
names(data)
##  [1] "Region"                              "Topic"                              
##  [3] "Title"                               "Group"                              
##  [5] "Characteristics"                     "Year"                               
##  [7] "Respondents"                         "Estimated number of people affected"
##  [9] "Percentage"                          "Confidence interval"                
## [11] "Footnotes1"                          "Footnotes2"                         
## [13] "Footnotes3"                          "Footnotes4"                         
## [15] "Footnotes5"
unique(data$Title)
## [1] "Percentage of women who reported having their teeth cleaned during pregnancy"                                           
## [2] "Percentage of women who talked with a dentist or other health care worker about care of teeth and gums during pregnancy"
## [3] "Percentage of women who reported a health care visit to have their teeth cleaned before pregnancy^"
summary(data)
##     Region             Topic              Title              Group          
##  Length:1378        Length:1378        Length:1378        Length:1378       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Characteristics         Year       Respondents    
##  Length:1378        Min.   :2015   Min.   :  11.0  
##  Class :character   1st Qu.:2017   1st Qu.: 185.0  
##  Mode  :character   Median :2019   Median : 419.0  
##                     Mean   :2019   Mean   : 518.7  
##                     3rd Qu.:2021   3rd Qu.: 757.0  
##                     Max.   :2023   Max.   :2372.0  
##                                    NA's   :53      
##  Estimated number of people affected  Percentage        Confidence interval
##  Length:1378                         Length:1378        Length:1378        
##  Class :character                    Class :character   Class :character   
##  Mode  :character                    Mode  :character   Mode  :character   
##                                                                            
##                                                                            
##                                                                            
##                                                                            
##  Footnotes1      Footnotes2         Footnotes3         Footnotes4       
##  Mode:logical   Length:1378        Length:1378        Length:1378       
##  NA's:1378      Class :character   Class :character   Class :character  
##                 Mode  :character   Mode  :character   Mode  :character  
##                                                                         
##                                                                         
##                                                                         
##                                                                         
##   Footnotes5       
##  Length:1378       
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 
#Cleaning dataset-Step 1
#remove unnecessary column
prams_data <- data %>%
  select(
    -Topic, 
    -Percentage, 
    -`Confidence interval`, 
    -Footnotes1, -Footnotes2, -Footnotes3, -Footnotes4, -Footnotes5
  ) %>%  
filter(Title != "Percentage of women who talked with a dentist or other health care worker about care of teeth and gums during pregnancy") # remove unwanted Title

#Check the data
unique(prams_data$Title)
## [1] "Percentage of women who reported having their teeth cleaned during pregnancy"                      
## [2] "Percentage of women who reported a health care visit to have their teeth cleaned before pregnancy^"
#rename Estimated number of people affected
prams_data <- prams_data %>%
rename(Est_people = `Estimated number of people affected`)
#checking dataset, Step 2
dim(prams_data)
## [1] 954   7
#Check missing values BEFORE cleaning: Step 2 
missing_before <- prams_data %>%
  summarise(across(everything(), ~ sum(is.na(.)))) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Missing_Observations")

kable(missing_before, caption = "Missing Values BEFORE Cleaning")
Missing Values BEFORE Cleaning
Variable Missing_Observations
Region 0
Title 0
Group 0
Characteristics 0
Year 0
Respondents 53
Est_people 53
#Clean numeric columns (removes commas, spaces): Step 3
# Replace empty strings or non-numeric placeholders with NA
prams_data$Est_people[prams_data$Est_people == "" | prams_data$Est_people == "N/A"] <- NA
prams_data$Respondents[prams_data$Respondents == "" | prams_data$Respondents == "N/A"] <- NA
prams_data$Year[prams_data$Year == "" | prams_data$Year == "N/A"] <- NA

# Now convert to numeric
prams_data$Est_people <- as.numeric(gsub(",", "", prams_data$Est_people))
## Warning: NAs introduced by coercion
prams_data$Respondents <- as.numeric(prams_data$Respondents)
prams_data$Year <- as.numeric(prams_data$Year)
#Remove NA: Step 4 
prams_clean <- prams_data %>%
  drop_na(`Region`, `Title`, `Group`, `Characteristics`, `Year`,`Respondents`, `Est_people`, )
#Check missing values after cleaning: Step 5
missing_after <- prams_clean %>%
  summarise(across(everything(), ~ sum(is.na(.)))) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Missing_Observations")

kable(missing_after, caption = "Missing Values AFTER Cleaning")
Missing Values AFTER Cleaning
Variable Missing_Observations
Region 0
Title 0
Group 0
Characteristics 0
Year 0
Respondents 0
Est_people 0
# Number rows were dropped
cat("Rows dropped due to missing values:", nrow(prams_data) - nrow(prams_clean), "\n")
## Rows dropped due to missing values: 103
## check cleaned dataset dimensions
dim(prams_clean)      
## [1] 851   7
#save RDS file
saveRDS(prams_clean, here::here(
  "/Users/samriddhi/Desktop/DrPH/HEPI_553_Muntasir Musum/project proposal'.rds"))

write.csv(prams_clean, "/Users/samriddhi/Desktop/DrPH/HEPI_553_Muntasir Musum/project proposal/prams_clean.csv",
          row.names = FALSE)
# Split dataset by Title
# Women who cleaned teeth before pregnancy
prams_before <- prams_clean %>%
  filter(grepl("before pregnancy", Title, ignore.case = TRUE))

# Women who cleaned teeth during/after pregnancy
prams_after <- prams_clean %>%
  filter(grepl("during pregnancy", Title, ignore.case = TRUE))
#Descriptive statistics

total_cleaning <- prams_clean %>%
  filter(grepl("before pregnancy|during pregnancy", Title, ignore.case = TRUE)) %>%
  group_by(Title) %>%
  summarise(
    Respondents_N = sum(Respondents, na.rm = TRUE),
    Estimated_N = sum(Est_people, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    Respondents_Percent = round(Respondents_N / sum(Respondents_N) * 100, 1),
    Estimated_Percent = round(Estimated_N / sum(Estimated_N) * 100, 1),
    
    Respondents_Result = paste0(Respondents_N, " (", Respondents_Percent, "%)"),
    Estimated_Result = paste0(Estimated_N, " (", Estimated_Percent, "%)")
  ) %>%
  select(Title, Respondents_Result, Estimated_Result)

kable(total_cleaning,
      col.names = c("Teeth Cleaning",
                    "Respondents n (%)",
                    "Estimated Population n (%)"),
      caption = "Total Number and Percentage of Women Who Cleaned Teeth Before and During Pregnancy")
Total Number and Percentage of Women Who Cleaned Teeth Before and During Pregnancy
Teeth Cleaning Respondents n (%) Estimated Population n (%)
Percentage of women who reported a health care visit to have their teeth cleaned before pregnancy^ 219983 (47%) 9009382 (46.2%)
Percentage of women who reported having their teeth cleaned during pregnancy 248454 (53%) 10486548 (53.8%)
# Demographic characteristics 
# Before pregnancy

preg_before <- prams_clean %>%
  filter(grepl("before pregnancy", Title, ignore.case = TRUE)) %>%
  filter(Group %in% c("Maternal Age", "Education", "Race/Ethnicity",
                      "Medicaid Status", "Region")) %>%
  group_by(Group, Characteristics) %>%
  summarise(
    Total_Respondents = sum(Respondents, na.rm = TRUE),
    Total_Estimated = sum(Est_people, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  group_by(Group) %>%
  mutate(
    Percent_Respondents = round(Total_Respondents / sum(Total_Respondents) * 100, 1),
    Percent_Estimated = round(Total_Estimated / sum(Total_Estimated) * 100, 1)
  ) %>%
  ungroup() %>%
  mutate(
    `Respondents (n, %)` = paste0(Total_Respondents, " (", Percent_Respondents, "%)"),
    `Estimated Population (n, %)` = paste0(Total_Estimated, " (", Percent_Estimated, "%)")
  ) %>%
  select(Group, Characteristics, `Respondents (n, %)`, `Estimated Population (n, %)`)

kable(preg_before,
      caption = "Table 1. Women Who Cleaned Teeth Before Pregnancy (PRAMS Phase 8)")
Table 1. Women Who Cleaned Teeth Before Pregnancy (PRAMS Phase 8)
Group Characteristics Respondents (n, %) Estimated Population (n, %)
Education High school graduate 7414 (21.9%) 228390 (16.5%)
Education Less than high school 4372 (12.9%) 103870 (7.5%)
Education More than high school 22030 (65.1%) 1053974 (76%)
Maternal Age 20-24 years old 4300 (12.7%) 150171 (10.9%)
Maternal Age 25-34 years old 19248 (57%) 796846 (57.6%)
Maternal Age 35 years old or more 9744 (28.8%) 416830 (30.2%)
Maternal Age Less than 20 years old 483 (1.4%) 18582 (1.3%)
Medicaid Status Not on Medicaid 18010 (53%) 929069 (66.8%)
Medicaid Status On Medicaid 15966 (47%) 461954 (33.2%)
Race/Ethnicity Asian, non-Hispanic 4298 (12.9%) 100324 (7.3%)
Race/Ethnicity Black, non-Hispanic 5560 (16.7%) 147859 (10.8%)
Race/Ethnicity Hispanic 8338 (25%) 239618 (17.5%)
Race/Ethnicity Other, non-Hispanic 591 (1.8%) 23409 (1.7%)
Race/Ethnicity White, non-Hispanic 14554 (43.7%) 856837 (62.6%)
Region NYC 10497 (61.7%) 289734 (41.6%)
Region NYS excl NYC 6518 (38.3%) 406596 (58.4%)
#Demographic characteristics 
#During pregnancy
preg_during <- prams_clean %>%
  filter(grepl("during pregnancy", Title, ignore.case = TRUE)) %>%
  filter(Group %in% c("Maternal Age", "Education", "Race/Ethnicity",
                      "Medicaid Status", "Region")) %>%
  group_by(Group, Characteristics) %>%
  summarise(
    Total_Respondents = sum(Respondents, na.rm = TRUE),
    Total_Estimated = sum(Est_people, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  group_by(Group) %>%
  mutate(
    Percent_Respondents = round(Total_Respondents / sum(Total_Respondents) * 100, 1),
    Percent_Estimated = round(Total_Estimated / sum(Total_Estimated) * 100, 1)
  ) %>%
  ungroup() %>%
  mutate(
    `Respondents (n, %)` = paste0(Total_Respondents, " (", Percent_Respondents, "%)"),
    `Estimated Population (n, %)` = paste0(Total_Estimated, " (", Percent_Estimated, "%)")
  ) %>%
  select(Group, Characteristics, `Respondents (n, %)`, `Estimated Population (n, %)`)

kable(preg_during,
      caption = "Table 2. Women Who Cleaned Teeth During Pregnancy (PRAMS Phase 8)")
Table 2. Women Who Cleaned Teeth During Pregnancy (PRAMS Phase 8)
Group Characteristics Respondents (n, %) Estimated Population (n, %)
Education High school graduate 8284 (21.7%) 308474 (19.1%)
Education Less than high school 4992 (13.1%) 189868 (11.8%)
Education More than high school 24940 (65.3%) 1116175 (69.1%)
Maternal Age 20-24 years old 5008 (13.1%) 199627 (12.4%)
Maternal Age 25-34 years old 21794 (57.1%) 924531 (57.4%)
Maternal Age 35 years old or more 10758 (28.2%) 461272 (28.6%)
Maternal Age Less than 20 years old 625 (1.6%) 24812 (1.5%)
Medicaid Status Not on Medicaid 20272 (52.8%) 991282 (61.2%)
Medicaid Status On Medicaid 18112 (47.2%) 627988 (38.8%)
Race/Ethnicity Asian, non-Hispanic 4820 (12.8%) 126107 (7.9%)
Race/Ethnicity Black, non-Hispanic 6170 (16.4%) 164650 (10.4%)
Race/Ethnicity Hispanic 9362 (24.9%) 364877 (22.9%)
Race/Ethnicity Other, non-Hispanic 572 (1.5%) 24252 (1.5%)
Race/Ethnicity White, non-Hispanic 16650 (44.3%) 910105 (57.2%)
Region NYC 11814 (61.5%) 360422 (44.5%)
Region NYS excl NYC 7405 (38.5%) 450084 (55.5%)
# data visualization
# Before pregnancy: Respondents
prams_before_res <- ggplot(prams_before, aes(x = Respondents)) +
  geom_histogram(bins = 30, fill = "darkgreen", color = "white") +
  labs(
    title = "Respondents Who Cleaned Teeth Before Pregnancy",
    x = "Number of Respondents",
    y = "Frequency"
  )
# show plot
prams_before_res

# Save the plot
ggsave("respondents_before_pregnancy.png", plot = prams_before_res,
       width = 7, height = 5, dpi = 300)

# Before pregnancy: Estimated population
prams_before_est <- ggplot(prams_before, aes(x = Est_people)) +
  geom_histogram(bins = 30, fill = "blue", color = "white") +
  labs(
    title = "Estimated Population Who Cleaned Teeth Before Pregnancy",
    x = "Estimated Population",
    y = "Frequency"
  )
# show plot
prams_before_est

# Save the plot
ggsave("respondents_before_pregnancy.png", plot = prams_before_est,
       width = 7, height = 5, dpi = 300)
# data visualization
# During pregnancy: Respondents
prams_after_res <- ggplot(prams_after, aes(x = Respondents)) +
  geom_histogram(bins = 30, fill = "cyan", color = "white") +
  labs(
    title = "Respondents Who Cleaned Teeth During Pregnancy",
    x = "Number of Respondents",
    y = "Frequency"
  )
# show plot
prams_after_res

# Save the plot
ggsave("respondents_before_pregnancy.png", plot = prams_after_res,
       width = 7, height = 5, dpi = 300)

# During pregnancy: Estimated population
prams_after_est= ggplot(prams_after, aes(x = Est_people)) +
  geom_histogram(bins = 30, fill = "brown", color = "white") +
  labs(
    title = "Estimated Population Who Cleaned Teeth During Pregnancy",
    x = "Estimated Population",
    y = "Frequency"
  )
# show plot
prams_after_est

# Save the plot
ggsave("respondents_before_pregnancy.png", plot = prams_after_est,
       width = 7, height = 5, dpi = 300)
#Data visualization: Association between the outcome and primary exposure
# Before pregnancy for: Education
edu_before <- prams_before %>%
  filter(Group == "Education")

plot_edu_before <- ggplot(edu_before, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
  geom_boxplot() +
  labs(
    title = "Teeth Cleaning by Education Before Pregnancy in Estimated Population",
    x = "Education Level",
    y = "Estimated Number of People"
)
  
# Show the plot
print(plot_edu_before)

# Save the plot
ggsave("Edu & Before Pregnancy in Est.png", plot = plot_edu_before,
       width = 7, height = 5, dpi = 300)


#Before pregnancy: Maternal age
age_before <- prams_before %>%
  filter(Group == "Maternal Age")

plot_age_before <- ggplot(age_before, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
  geom_boxplot() +
  labs(
    title = "Teeth Cleaning by Maternal Age Before Pregnancy in Estimated Population",
    x = "Maternal Age",
    y = "Est_people"
  )
# Show the plot
print(plot_age_before)

# Save the plot
ggsave("Age & Before Pregnancy in Est.png", plot = plot_age_before,
       width = 7, height = 5, dpi = 300)

#Before pregnancy: Medicaid status
medi_before <- prams_before %>%
  filter(Group == "Medicaid Status")

plot_medi_before<- ggplot(medi_before, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
  geom_boxplot() +
  labs(
    title = "Teeth Cleaning by Medicaid Status Before Pregnancy in Estimated Population",
    x = "Medicaid Status",
    y = "Est_people"
  )
# Show the plot
print(plot_medi_before)

# Save the plot
ggsave("Medi & Before Pregnancy in Est.png", plot = plot_medi_before,
       width = 7, height = 5, dpi = 300)

#Before pregnancy: Ethnicity
ethi_before <- prams_before %>%
  filter(Group == "Race/Ethnicity")

plot_ethi_before <- ggplot(ethi_before, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
  geom_boxplot() +
  labs(
    title = "Teeth Cleaning by Ethnicity Before Pregnancy in Estimated Population",
    x = "Race/Ethnicity",
    y = "Est_people"
  )
# Show the plot
print(plot_ethi_before)

# Save the plot
ggsave("Ethi & Before Pregnancy in Est.png", plot = plot_ethi_before,
       width = 7, height = 5, dpi = 300)

#Before pregnancy: Region
reg_before <- prams_before %>%
  filter(Group == "Region")

plot_reg_before <- ggplot(reg_before, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
  geom_boxplot() +
  labs(
    title = "Teeth Cleaning by Region Before Pregnancy in Estimated Population",
    x = "Region",
    y = "Est_people"
  )
# Show the plot
print(plot_reg_before)

# Save the plot
ggsave("Reg & Before Pregnancy in Est.png", plot = plot_reg_before,
       width = 7, height = 5, dpi = 300)
#Data visualization: Association between the outcome and primary exposure
# During pregnancy for: Education
edu_after <- prams_after %>%
  filter(Group == "Education")

plot_edu_after <- ggplot(edu_after, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
  geom_boxplot() +
  labs(
    title = "Teeth Cleaning by Education During Pregnancy in Estimated Population",
   x = "Education Level",
    y = "Estimated Number of People"
)
    
# Show the plot
print(plot_edu_after)

# Save the plot
ggsave("Edu & after Pregnancy in Est.png", plot = plot_edu_after,
       width = 7, height = 5, dpi = 300)

#During pregnancy: Maternal age
age_after <- prams_after %>%
  filter(Group == "Maternal Age")

plot_age_after <- ggplot(age_before, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
  geom_boxplot() +
  labs(
    title = "Teeth Cleaning by Maternal Age During Pregnancy in Estimated Population",
    x = "Maternal Age",
    y = "Est_people"
  )
# Show the plot
print(plot_age_after)

# Save the plot
ggsave("age & after Pregnancy in Est.png", plot = plot_age_after,
       width = 7, height = 5, dpi = 300)

#During pregnancy: Medicaid status
medi_after <- prams_after %>%
  filter(Group == "Medicaid Status")

plot_medi_after= ggplot(medi_after, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
  geom_boxplot() +
  labs(
    title = "Teeth Cleaning by Medicaid Status During Pregnancy in Estimated Population",
    x = "Medicaid Status",
    y = "Est_people"
  )
# Show the plot
print(plot_medi_after)

# Save the plot
ggsave("medi & after Pregnancy in Est.png", plot = plot_medi_after,
       width = 7, height = 5, dpi = 300)

#During pregnancy: Ethnicity
ethi_after <- prams_after %>%
  filter(Group == "Race/Ethnicity")

plot_ethi_after= ggplot(ethi_after, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
  geom_boxplot() +
  labs(
    title = "Teeth Cleaning by Ethnicity Status During Pregnancy in Estimated Population",
    x = "Race/Ethnicity",
    y = "Est_people"
  )
# Show the plot
print(plot_ethi_after)

# Save the plot
ggsave("ethi & after Pregnancy in Est.png", plot = plot_ethi_after,
       width = 7, height = 5, dpi = 300)


#During pregnancy: Region
reg_after <- prams_after %>%
  filter(Group == "Region")

plot_reg_after<- ggplot(reg_after, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
  geom_boxplot() +
  labs(
    title = "Association Between Region and Teeth Cleaning During Pregnancy",
    x = "Region",
    y = "Est_people"
  )
# Show the plot
print(plot_reg_after)

# Save the plot
ggsave("reg & after Pregnancy in Est.png", plot = plot_reg_after,
       width = 7, height = 5, dpi = 300)
prams_before$Timing <- "Before Pregnancy"
prams_after$Timing <- "During Pregnancy"

combined <- bind_rows(prams_before, prams_after)

plot_data <- combined %>%
  filter(Group == "Medicaid Status")

plot_medicaid <- ggplot(plot_data, aes(x = Characteristics, y = Est_people, fill = Timing)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(
    title = "Dental Cleaning Before and During Pregnancy by Medicaid Status",
    x = "Medicaid Status",
    y = "Estimated Population",
    fill = "Timing"
  ) +
  theme_minimal()

# Show the plot
print(plot_medicaid)

# Save the plot
ggsave("medicaid_before_after_pregnancy.png", plot = plot_medicaid,
       width = 7, height = 5, dpi = 300)
#regression dataset for before pregnancy
pregbefore_reg <- prams_clean %>%
  filter(grepl("before pregnancy", Title, ignore.case = TRUE)) %>%
  filter(Group %in% c("Maternal Age", "Education", "Race/Ethnicity",
                      "Medicaid Status", "Region")) %>%
  mutate(
    year = as.numeric(Year),         # convert Year to numeric
    group = factor(Group),
    Characteristics = factor(Characteristics)
  )
#regression model before pregnancy
# Unadjusted model
model1_before <- lm(Est_people ~ year, data = pregbefore_reg)

# Adjusted model
model2_before <- lm(Est_people ~ year + group + Characteristics, data = pregbefore_reg)
#regression dataset for during pregnancy
pregduring_reg <- prams_clean %>%
  filter(grepl("before pregnancy", Title, ignore.case = TRUE)) %>%
  filter(Group %in% c("Maternal Age", "Education", "Race/Ethnicity",
                      "Medicaid Status", "Region")) %>%
  mutate(
    year = as.numeric(Year),         
    group = factor(Group),
    Characteristics = factor(Characteristics)
  )
#regression model during pregnancy
# Unadjusted model
model1_during <- lm(Est_people ~ year, data = pregduring_reg)

# Adjusted model
model2_during <- lm(Est_people ~ year + group + Characteristics, data = pregduring_reg)
#Before Pregnancy
# Unadjusted table: before pregnancy
tbl_model1_before <- tbl_regression(
  model1_before,
  estimate_fun = ~style_sigfig(.x, digits = 3)
) %>%
  modify_caption("Model 1: Unadjusted - Before Pregnancy")

# Adjusted table: before pregnancy
tbl_model2_before <- tbl_regression(
  model2_before,
  estimate_fun = ~style_sigfig(.x, digits = 3)
) %>%
  modify_caption("Model 2: Adjusted - Before Pregnancy (Group + Characteristics)")

# Display tables: before pregnancy
tbl_model1_before
Model 1: Unadjusted - Before Pregnancy
Characteristic Beta 95% CI p-value
year 61.1 -780, 902 0.9
Abbreviation: CI = Confidence Interval
tbl_model2_before
Model 2: Adjusted - Before Pregnancy (Group + Characteristics)
Characteristic Beta 95% CI p-value
year -53.4 -497, 390 0.8
group


    Education — —
    Maternal Age -37,658 -42,978, -32,339 <0.001
    Medicaid Status -24,668 -29,987, -19,348 <0.001
    Race/Ethnicity -8,214 -13,533, -2,895 0.003
    Region 6,909 -614, 14,432 0.072
Characteristics


    20-24 years old — —
    25-34 years old 26,945 21,625, 32,264 <0.001
    35 years old or more 11,111 5,791, 16,430 <0.001
    Asian, non-Hispanic -31,521 -36,841, -26,202 <0.001
    Black, non-Hispanic -29,541 -34,860, -24,221 <0.001
    High school graduate -34,399 -39,719, -29,080 <0.001
    Hispanic -25,717 -31,037, -20,398 <0.001
    Less than 20 years old -4,609 -11,327, 2,109 0.2
    Less than high school -39,588 -44,907, -34,268 <0.001
    More than high school


    Not on Medicaid 19,463 14,144, 24,782 <0.001
    NYC -14,608 -23,821, -5,394 0.002
    NYS excl NYC


    On Medicaid


    Other, non-Hispanic -34,041 -40,239, -27,843 <0.001
    White, non-Hispanic


Abbreviation: CI = Confidence Interval

Interpretation For unadjusted table: Before pregnancy For each one year increase, the number of women visiting a dentist for teeth cleaning increases by 61, on average. However, this result is not statistically significant (pvalue>0.05, CI: -780 to 902). The CI is so large because the aggregated data among different demographic cells has a large variance.

For adjusted table: Before pregnancy In the adjusted model examining estimated numbers of women receiving dental cleaning before pregnancy, year was not significantly associated with dental cleaning after accounting for demographic factors (β = -53.4; 95% CI: -497, 390; p = 0.8), suggesting that the overall trend over time was negligible once differences across groups were considered.

Several demographic characteristics were significantly associated with dental cleaning:

Maternal Age: Compared with women under 20 years, those aged 25–34 years had an estimated 26,945 more women receiving dental cleaning (95% CI: 21,625, 32,264; p < 0.001), and those 35 years or older had 11,111 more women (95% CI: 5,791, 16,430; p < 0.001). Education: Compared with women with more than high school education (reference), women with high school or less education had substantially fewer women receiving dental cleaning. For example, women with less than high school education had 39,588 fewer women (95% CI: -44,907, -34,268; p < 0.001). Race/Ethnicity: Relative to non-Hispanic White women, non-Hispanic Black and Asian women had significantly fewer women receiving dental cleaning (β = -29,541 and -31,521, respectively; p < 0.001 for both). Medicaid Status: Women not on Medicaid had significantly more women receiving dental cleaning than those on Medicaid (β = 19,463; 95% CI: 14,144, 24,782; p < 0.001). Region: Compared with women in New York State excluding NYC, women in NYC had fewer women receiving dental cleaning (β = -14,608; 95% CI: -23,821, -5,394; p = 0.002).

Reference categories used in the model included: women with more than high school education, non-Hispanic White women, women on Medicaid, under 20 years old, and residents of NYS excluding NYC. All other beta estimates represent differences relative to these reference groups.

Interpretation: These results indicate that demographic factors — particularly maternal age, education, race/ethnicity, Medicaid status, and region — strongly influence dental cleaning before pregnancy, whereas there is no significant temporal trend once these factors are accounted for. This highlights persistent disparities in dental care utilization among subgroups of women.

#During Pregnancy
# Unadjusted table: during pregnancy
tbl_model1_during <- tbl_regression(
  model1_during,
  estimate_fun = ~style_sigfig(.x, digits = 3)
) %>%
  modify_caption("Model 1: Unadjusted - During Pregnancy")

# Adjusted table: during pregnancy
tbl_model2_during <- tbl_regression(
  model2_during,
  estimate_fun = ~style_sigfig(.x, digits = 3)
) %>%
  modify_caption("Model 2: Adjusted - During Pregnancy (Group + Characteristics)")

# Display tables during pregnancy
tbl_model1_during
Model 1: Unadjusted - During Pregnancy
Characteristic Beta 95% CI p-value
year 61.1 -780, 902 0.9
Abbreviation: CI = Confidence Interval
tbl_model2_during
Model 2: Adjusted - During Pregnancy (Group + Characteristics)
Characteristic Beta 95% CI p-value
year -53.4 -497, 390 0.8
group


    Education — —
    Maternal Age -37,658 -42,978, -32,339 <0.001
    Medicaid Status -24,668 -29,987, -19,348 <0.001
    Race/Ethnicity -8,214 -13,533, -2,895 0.003
    Region 6,909 -614, 14,432 0.072
Characteristics


    20-24 years old — —
    25-34 years old 26,945 21,625, 32,264 <0.001
    35 years old or more 11,111 5,791, 16,430 <0.001
    Asian, non-Hispanic -31,521 -36,841, -26,202 <0.001
    Black, non-Hispanic -29,541 -34,860, -24,221 <0.001
    High school graduate -34,399 -39,719, -29,080 <0.001
    Hispanic -25,717 -31,037, -20,398 <0.001
    Less than 20 years old -4,609 -11,327, 2,109 0.2
    Less than high school -39,588 -44,907, -34,268 <0.001
    More than high school


    Not on Medicaid 19,463 14,144, 24,782 <0.001
    NYC -14,608 -23,821, -5,394 0.002
    NYS excl NYC


    On Medicaid


    Other, non-Hispanic -34,041 -40,239, -27,843 <0.001
    White, non-Hispanic


Abbreviation: CI = Confidence Interval

Interpretation Unadjusted model – During Pregnancy

The coefficient for Year is -778 with a 95% confidence interval of -1,443 to -113 and a p-value of 0.022. This indicates a statistically significant negative trend in the estimated number of women receiving dental cleaning during pregnancy over time. On average, the estimated number decreased by 778 women per year between 2015 and 2022. The negative direction aligns with the coefficient, showing that rates of dental cleaning during pregnancy have declined over time in the unadjusted model.

Adjusted model-During Pregnancy

The coefficient for Year is -871 with a 95% confidence interval of -1,226 to -516 and a p-value < 0.001. This indicates a statistically significant negative trend in the estimated number of women receiving dental cleaning during pregnancy over time. On average, the estimated number decreased by 871 women per year between 2015 and 2022, after accounting for demographic factors.

Interpretation of covariates:

Maternal Age, Medicaid Status, Race/Ethnicity, and Education all show significant associations with the outcome. For example, older age groups (25–34, 35+) and women with higher education have different baseline estimates compared to their reference groups. Region is significant (NYC vs. NYS excluding NYC), showing geographic differences in dental cleaning rates. These adjustments account for differences in group composition over time, meaning the negative trend in Year reflects a true temporal decline rather than shifts in demographic distributions.

#Model diagnostic
# Diagnostic Plots: Before Pregnancy
autoplot(model2_before, 
         which = 1:4,       
         ncol = 2,          
         label.size = 3) +  
  ggtitle("Regression Diagnostics - Adjusted Model Before Pregnancy")
## Warning: `fortify(<lm>)` was deprecated in ggplot2 3.6.0.
## ℹ Please use `broom::augment(<lm>)` instead.
## ℹ The deprecated feature was likely used in the ggfortify package.
##   Please report the issue at <https://github.com/sinhrks/ggfortify/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggfortify package.
##   Please report the issue at <https://github.com/sinhrks/ggfortify/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggfortify package.
##   Please report the issue at <https://github.com/sinhrks/ggfortify/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Diagnostic Plots: During Pregnancy
autoplot(model2_during, 
         which = 1:4,
         ncol = 2,
         label.size = 3) +
  ggtitle("Regression Diagnostics - Adjusted Model During Pregnancy")

Interpretation for before pregnancy 1.Residuals vs Fitted: Checking for Linearity The blue line is relatively flat and should stay at zero. In the above graph, most of the blue line is relatively flat suggesting the linearity. 2. Normal Q-Q: Checking for Normality The data points should follow the diagonal reference line. In the graph, most points lie close to the diagonal line, while a few points deviate above and below it. This suggests that the residuals are approximately normally distributed, with slight deviations indicating minor departures from normality. 3. Scale-Location Plot (Spread-Location) This plot assesses whether the variance of residuals is constant across fitted values; the smoother line should be approximately flat. In the graph above, the blue line shows some curvature rather than being perfectly flat, suggesting slight heteroscedasticity. The spread of residuals appears to increase, indicating that the variance is not entirely constant across the range of fitted values. 4. Residuals vs Leverage: Identifying Influential Outliers We look for points that lie beyond the dashed red lines, which represent Cook’s Distance. In this plot, three data points (318, 327, 329) appear to be farther from these lines, indicating few influential observations.

Interpretation for during pregnancy 1.Residuals vs Fitted: Checking for Linearity In the above graph, the line deviates from 0, this depicting non-linearity in the dataset. 2. Normal Q-Q: Checking for Normality In the graph, most points lie close to the diagonal line, while a few points deviate above and below it. This suggests that the residuals are approximately normally distributed, with slight deviations indicating minor departures from normality. 3. Scale-Location Plot (Spread-Location) In the graph above, the blue line shows some curvature rather than being perfectly flat, suggesting slight heteroscedasticity. The spread of residuals appears to increase, indicating that the variance is not entirely constant across the range of fitted values. 4. Residuals vs Leverage: Identifying Influential Outlier In this plot, three data points (31, 40, and 164) appear to be farther from these lines, indicating few influential observations.

#Data visualization
# Fitted values: before pregnancy
plot_data <- pregbefore_reg %>%
  mutate(fitted = fitted(model2_before))

# Plot
ggplot(plot_data, aes(x = year, y = Est_people)) +
  geom_point(alpha = 0.4) +
  geom_line(aes(y = fitted), linewidth = 1) +
  labs(
    title = "Adjusted Association Between Year and Dental Cleaning: Before Pregnancy",
    x = "Year",
    y = "Estimated Number of Women"
  ) +
  theme_minimal()

#Forest plot: before pregnancy
coef_data <- tidy(model2_before, conf.int = TRUE) %>%
  filter(term != "(Intercept)")

ggplot(coef_data, aes(x = estimate, y = reorder(term, estimate))) +
  geom_point() +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  labs(
    title = "Adjusted Regression Coefficients: Before Pregnancy",
    x = "Beta Coefficient",
    y = "Predictors"
  ) +
  theme_minimal()
## Warning: `geom_errobarh()` was deprecated in ggplot2 4.0.0.
## ℹ Please use the `orientation` argument of `geom_errorbar()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `height` was translated to `width`.
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Data visualization
# Fitted values: during pregnancy
plot_data <- pregduring_reg %>%
  mutate(fitted = fitted(model2_during))

# Plot
ggplot(plot_data, aes(x = year, y = Est_people)) +
  geom_point(alpha = 0.4) +
  geom_line(aes(y = fitted), linewidth = 1) +
  labs(
    title = "Adjusted Association Between Year and Dental Cleaning: During Pregnancy",
    x = "Year",
    y = "Estimated Number of Women"
  ) +
  theme_minimal()

#Forest plot: before pregnancy
coef_data_du <- tidy(model2_during, conf.int = TRUE) %>%
  filter(term != "(Intercept)")

ggplot(coef_data_du, aes(x = estimate, y = reorder(term, estimate))) +
  geom_point() +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  labs(
    title = "Adjusted Regression Coefficients: During Pregnancy",
    x = "Beta Coefficient",
    y = "Predictors"
  ) +
  theme_minimal()
## `height` was translated to `width`.
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

```