# Load required packages
library(readxl)
library(tidyr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(knitr)
library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.5.2
library(broom)
library(ggfortify)
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
#set working directory
setwd("/Users/samriddhi/Desktop/DrPH/HEPI_553_Muntasir Musum/project proposal/PRAMS Phase 8")
getwd()
## [1] "/Users/samriddhi/Desktop/DrPH/HEPI_553_Muntasir Musum/project proposal/PRAMS Phase 8"
data <- read_excel("@OH_SR.xlsx")
data <- read_excel("@OH_SR.xlsx", sheet = 1)
#save file
save(data, file = "OH_SR.rda")
#Inspect dataset
dim(data)
## [1] 1378 15
names(data)
## [1] "Region" "Topic"
## [3] "Title" "Group"
## [5] "Characteristics" "Year"
## [7] "Respondents" "Estimated number of people affected"
## [9] "Percentage" "Confidence interval"
## [11] "Footnotes1" "Footnotes2"
## [13] "Footnotes3" "Footnotes4"
## [15] "Footnotes5"
unique(data$Title)
## [1] "Percentage of women who reported having their teeth cleaned during pregnancy"
## [2] "Percentage of women who talked with a dentist or other health care worker about care of teeth and gums during pregnancy"
## [3] "Percentage of women who reported a health care visit to have their teeth cleaned before pregnancy^"
summary(data)
## Region Topic Title Group
## Length:1378 Length:1378 Length:1378 Length:1378
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Characteristics Year Respondents
## Length:1378 Min. :2015 Min. : 11.0
## Class :character 1st Qu.:2017 1st Qu.: 185.0
## Mode :character Median :2019 Median : 419.0
## Mean :2019 Mean : 518.7
## 3rd Qu.:2021 3rd Qu.: 757.0
## Max. :2023 Max. :2372.0
## NA's :53
## Estimated number of people affected Percentage Confidence interval
## Length:1378 Length:1378 Length:1378
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Footnotes1 Footnotes2 Footnotes3 Footnotes4
## Mode:logical Length:1378 Length:1378 Length:1378
## NA's:1378 Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Footnotes5
## Length:1378
## Class :character
## Mode :character
##
##
##
##
#Cleaning dataset-Step 1
#remove unnecessary column
prams_data <- data %>%
select(
-Topic,
-Percentage,
-`Confidence interval`,
-Footnotes1, -Footnotes2, -Footnotes3, -Footnotes4, -Footnotes5
) %>%
filter(Title != "Percentage of women who talked with a dentist or other health care worker about care of teeth and gums during pregnancy") # remove unwanted Title
#Check the data
unique(prams_data$Title)
## [1] "Percentage of women who reported having their teeth cleaned during pregnancy"
## [2] "Percentage of women who reported a health care visit to have their teeth cleaned before pregnancy^"
#rename Estimated number of people affected
prams_data <- prams_data %>%
rename(Est_people = `Estimated number of people affected`)
#checking dataset, Step 2
dim(prams_data)
## [1] 954 7
#Check missing values BEFORE cleaning: Step 2
missing_before <- prams_data %>%
summarise(across(everything(), ~ sum(is.na(.)))) %>%
pivot_longer(cols = everything(), names_to = "Variable", values_to = "Missing_Observations")
kable(missing_before, caption = "Missing Values BEFORE Cleaning")
| Variable | Missing_Observations |
|---|---|
| Region | 0 |
| Title | 0 |
| Group | 0 |
| Characteristics | 0 |
| Year | 0 |
| Respondents | 53 |
| Est_people | 53 |
#Clean numeric columns (removes commas, spaces): Step 3
# Replace empty strings or non-numeric placeholders with NA
prams_data$Est_people[prams_data$Est_people == "" | prams_data$Est_people == "N/A"] <- NA
prams_data$Respondents[prams_data$Respondents == "" | prams_data$Respondents == "N/A"] <- NA
prams_data$Year[prams_data$Year == "" | prams_data$Year == "N/A"] <- NA
# Now convert to numeric
prams_data$Est_people <- as.numeric(gsub(",", "", prams_data$Est_people))
## Warning: NAs introduced by coercion
prams_data$Respondents <- as.numeric(prams_data$Respondents)
prams_data$Year <- as.numeric(prams_data$Year)
#Remove NA: Step 4
prams_clean <- prams_data %>%
drop_na(`Region`, `Title`, `Group`, `Characteristics`, `Year`,`Respondents`, `Est_people`, )
#Check missing values after cleaning: Step 5
missing_after <- prams_clean %>%
summarise(across(everything(), ~ sum(is.na(.)))) %>%
pivot_longer(cols = everything(), names_to = "Variable", values_to = "Missing_Observations")
kable(missing_after, caption = "Missing Values AFTER Cleaning")
| Variable | Missing_Observations |
|---|---|
| Region | 0 |
| Title | 0 |
| Group | 0 |
| Characteristics | 0 |
| Year | 0 |
| Respondents | 0 |
| Est_people | 0 |
# Number rows were dropped
cat("Rows dropped due to missing values:", nrow(prams_data) - nrow(prams_clean), "\n")
## Rows dropped due to missing values: 103
## check cleaned dataset dimensions
dim(prams_clean)
## [1] 851 7
#save RDS file
saveRDS(prams_clean, here::here(
"/Users/samriddhi/Desktop/DrPH/HEPI_553_Muntasir Musum/project proposal'.rds"))
write.csv(prams_clean, "/Users/samriddhi/Desktop/DrPH/HEPI_553_Muntasir Musum/project proposal/prams_clean.csv",
row.names = FALSE)
# Split dataset by Title
# Women who cleaned teeth before pregnancy
prams_before <- prams_clean %>%
filter(grepl("before pregnancy", Title, ignore.case = TRUE))
# Women who cleaned teeth during/after pregnancy
prams_after <- prams_clean %>%
filter(grepl("during pregnancy", Title, ignore.case = TRUE))
#Descriptive statistics
total_cleaning <- prams_clean %>%
filter(grepl("before pregnancy|during pregnancy", Title, ignore.case = TRUE)) %>%
group_by(Title) %>%
summarise(
Respondents_N = sum(Respondents, na.rm = TRUE),
Estimated_N = sum(Est_people, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
Respondents_Percent = round(Respondents_N / sum(Respondents_N) * 100, 1),
Estimated_Percent = round(Estimated_N / sum(Estimated_N) * 100, 1),
Respondents_Result = paste0(Respondents_N, " (", Respondents_Percent, "%)"),
Estimated_Result = paste0(Estimated_N, " (", Estimated_Percent, "%)")
) %>%
select(Title, Respondents_Result, Estimated_Result)
kable(total_cleaning,
col.names = c("Teeth Cleaning",
"Respondents n (%)",
"Estimated Population n (%)"),
caption = "Total Number and Percentage of Women Who Cleaned Teeth Before and During Pregnancy")
| Teeth Cleaning | Respondents n (%) | Estimated Population n (%) |
|---|---|---|
| Percentage of women who reported a health care visit to have their teeth cleaned before pregnancy^ | 219983 (47%) | 9009382 (46.2%) |
| Percentage of women who reported having their teeth cleaned during pregnancy | 248454 (53%) | 10486548 (53.8%) |
# Demographic characteristics
# Before pregnancy
preg_before <- prams_clean %>%
filter(grepl("before pregnancy", Title, ignore.case = TRUE)) %>%
filter(Group %in% c("Maternal Age", "Education", "Race/Ethnicity",
"Medicaid Status", "Region")) %>%
group_by(Group, Characteristics) %>%
summarise(
Total_Respondents = sum(Respondents, na.rm = TRUE),
Total_Estimated = sum(Est_people, na.rm = TRUE),
.groups = "drop"
) %>%
group_by(Group) %>%
mutate(
Percent_Respondents = round(Total_Respondents / sum(Total_Respondents) * 100, 1),
Percent_Estimated = round(Total_Estimated / sum(Total_Estimated) * 100, 1)
) %>%
ungroup() %>%
mutate(
`Respondents (n, %)` = paste0(Total_Respondents, " (", Percent_Respondents, "%)"),
`Estimated Population (n, %)` = paste0(Total_Estimated, " (", Percent_Estimated, "%)")
) %>%
select(Group, Characteristics, `Respondents (n, %)`, `Estimated Population (n, %)`)
kable(preg_before,
caption = "Table 1. Women Who Cleaned Teeth Before Pregnancy (PRAMS Phase 8)")
| Group | Characteristics | Respondents (n, %) | Estimated Population (n, %) |
|---|---|---|---|
| Education | High school graduate | 7414 (21.9%) | 228390 (16.5%) |
| Education | Less than high school | 4372 (12.9%) | 103870 (7.5%) |
| Education | More than high school | 22030 (65.1%) | 1053974 (76%) |
| Maternal Age | 20-24 years old | 4300 (12.7%) | 150171 (10.9%) |
| Maternal Age | 25-34 years old | 19248 (57%) | 796846 (57.6%) |
| Maternal Age | 35 years old or more | 9744 (28.8%) | 416830 (30.2%) |
| Maternal Age | Less than 20 years old | 483 (1.4%) | 18582 (1.3%) |
| Medicaid Status | Not on Medicaid | 18010 (53%) | 929069 (66.8%) |
| Medicaid Status | On Medicaid | 15966 (47%) | 461954 (33.2%) |
| Race/Ethnicity | Asian, non-Hispanic | 4298 (12.9%) | 100324 (7.3%) |
| Race/Ethnicity | Black, non-Hispanic | 5560 (16.7%) | 147859 (10.8%) |
| Race/Ethnicity | Hispanic | 8338 (25%) | 239618 (17.5%) |
| Race/Ethnicity | Other, non-Hispanic | 591 (1.8%) | 23409 (1.7%) |
| Race/Ethnicity | White, non-Hispanic | 14554 (43.7%) | 856837 (62.6%) |
| Region | NYC | 10497 (61.7%) | 289734 (41.6%) |
| Region | NYS excl NYC | 6518 (38.3%) | 406596 (58.4%) |
#Demographic characteristics
#During pregnancy
preg_during <- prams_clean %>%
filter(grepl("during pregnancy", Title, ignore.case = TRUE)) %>%
filter(Group %in% c("Maternal Age", "Education", "Race/Ethnicity",
"Medicaid Status", "Region")) %>%
group_by(Group, Characteristics) %>%
summarise(
Total_Respondents = sum(Respondents, na.rm = TRUE),
Total_Estimated = sum(Est_people, na.rm = TRUE),
.groups = "drop"
) %>%
group_by(Group) %>%
mutate(
Percent_Respondents = round(Total_Respondents / sum(Total_Respondents) * 100, 1),
Percent_Estimated = round(Total_Estimated / sum(Total_Estimated) * 100, 1)
) %>%
ungroup() %>%
mutate(
`Respondents (n, %)` = paste0(Total_Respondents, " (", Percent_Respondents, "%)"),
`Estimated Population (n, %)` = paste0(Total_Estimated, " (", Percent_Estimated, "%)")
) %>%
select(Group, Characteristics, `Respondents (n, %)`, `Estimated Population (n, %)`)
kable(preg_during,
caption = "Table 2. Women Who Cleaned Teeth During Pregnancy (PRAMS Phase 8)")
| Group | Characteristics | Respondents (n, %) | Estimated Population (n, %) |
|---|---|---|---|
| Education | High school graduate | 8284 (21.7%) | 308474 (19.1%) |
| Education | Less than high school | 4992 (13.1%) | 189868 (11.8%) |
| Education | More than high school | 24940 (65.3%) | 1116175 (69.1%) |
| Maternal Age | 20-24 years old | 5008 (13.1%) | 199627 (12.4%) |
| Maternal Age | 25-34 years old | 21794 (57.1%) | 924531 (57.4%) |
| Maternal Age | 35 years old or more | 10758 (28.2%) | 461272 (28.6%) |
| Maternal Age | Less than 20 years old | 625 (1.6%) | 24812 (1.5%) |
| Medicaid Status | Not on Medicaid | 20272 (52.8%) | 991282 (61.2%) |
| Medicaid Status | On Medicaid | 18112 (47.2%) | 627988 (38.8%) |
| Race/Ethnicity | Asian, non-Hispanic | 4820 (12.8%) | 126107 (7.9%) |
| Race/Ethnicity | Black, non-Hispanic | 6170 (16.4%) | 164650 (10.4%) |
| Race/Ethnicity | Hispanic | 9362 (24.9%) | 364877 (22.9%) |
| Race/Ethnicity | Other, non-Hispanic | 572 (1.5%) | 24252 (1.5%) |
| Race/Ethnicity | White, non-Hispanic | 16650 (44.3%) | 910105 (57.2%) |
| Region | NYC | 11814 (61.5%) | 360422 (44.5%) |
| Region | NYS excl NYC | 7405 (38.5%) | 450084 (55.5%) |
# data visualization
# Before pregnancy: Respondents
prams_before_res <- ggplot(prams_before, aes(x = Respondents)) +
geom_histogram(bins = 30, fill = "darkgreen", color = "white") +
labs(
title = "Respondents Who Cleaned Teeth Before Pregnancy",
x = "Number of Respondents",
y = "Frequency"
)
# show plot
prams_before_res
# Save the plot
ggsave("respondents_before_pregnancy.png", plot = prams_before_res,
width = 7, height = 5, dpi = 300)
# Before pregnancy: Estimated population
prams_before_est <- ggplot(prams_before, aes(x = Est_people)) +
geom_histogram(bins = 30, fill = "blue", color = "white") +
labs(
title = "Estimated Population Who Cleaned Teeth Before Pregnancy",
x = "Estimated Population",
y = "Frequency"
)
# show plot
prams_before_est
# Save the plot
ggsave("respondents_before_pregnancy.png", plot = prams_before_est,
width = 7, height = 5, dpi = 300)
# data visualization
# During pregnancy: Respondents
prams_after_res <- ggplot(prams_after, aes(x = Respondents)) +
geom_histogram(bins = 30, fill = "cyan", color = "white") +
labs(
title = "Respondents Who Cleaned Teeth During Pregnancy",
x = "Number of Respondents",
y = "Frequency"
)
# show plot
prams_after_res
# Save the plot
ggsave("respondents_before_pregnancy.png", plot = prams_after_res,
width = 7, height = 5, dpi = 300)
# During pregnancy: Estimated population
prams_after_est= ggplot(prams_after, aes(x = Est_people)) +
geom_histogram(bins = 30, fill = "brown", color = "white") +
labs(
title = "Estimated Population Who Cleaned Teeth During Pregnancy",
x = "Estimated Population",
y = "Frequency"
)
# show plot
prams_after_est
# Save the plot
ggsave("respondents_before_pregnancy.png", plot = prams_after_est,
width = 7, height = 5, dpi = 300)
#Data visualization: Association between the outcome and primary exposure
# Before pregnancy for: Education
edu_before <- prams_before %>%
filter(Group == "Education")
plot_edu_before <- ggplot(edu_before, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
geom_boxplot() +
labs(
title = "Teeth Cleaning by Education Before Pregnancy in Estimated Population",
x = "Education Level",
y = "Estimated Number of People"
)
# Show the plot
print(plot_edu_before)
# Save the plot
ggsave("Edu & Before Pregnancy in Est.png", plot = plot_edu_before,
width = 7, height = 5, dpi = 300)
#Before pregnancy: Maternal age
age_before <- prams_before %>%
filter(Group == "Maternal Age")
plot_age_before <- ggplot(age_before, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
geom_boxplot() +
labs(
title = "Teeth Cleaning by Maternal Age Before Pregnancy in Estimated Population",
x = "Maternal Age",
y = "Est_people"
)
# Show the plot
print(plot_age_before)
# Save the plot
ggsave("Age & Before Pregnancy in Est.png", plot = plot_age_before,
width = 7, height = 5, dpi = 300)
#Before pregnancy: Medicaid status
medi_before <- prams_before %>%
filter(Group == "Medicaid Status")
plot_medi_before<- ggplot(medi_before, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
geom_boxplot() +
labs(
title = "Teeth Cleaning by Medicaid Status Before Pregnancy in Estimated Population",
x = "Medicaid Status",
y = "Est_people"
)
# Show the plot
print(plot_medi_before)
# Save the plot
ggsave("Medi & Before Pregnancy in Est.png", plot = plot_medi_before,
width = 7, height = 5, dpi = 300)
#Before pregnancy: Ethnicity
ethi_before <- prams_before %>%
filter(Group == "Race/Ethnicity")
plot_ethi_before <- ggplot(ethi_before, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
geom_boxplot() +
labs(
title = "Teeth Cleaning by Ethnicity Before Pregnancy in Estimated Population",
x = "Race/Ethnicity",
y = "Est_people"
)
# Show the plot
print(plot_ethi_before)
# Save the plot
ggsave("Ethi & Before Pregnancy in Est.png", plot = plot_ethi_before,
width = 7, height = 5, dpi = 300)
#Before pregnancy: Region
reg_before <- prams_before %>%
filter(Group == "Region")
plot_reg_before <- ggplot(reg_before, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
geom_boxplot() +
labs(
title = "Teeth Cleaning by Region Before Pregnancy in Estimated Population",
x = "Region",
y = "Est_people"
)
# Show the plot
print(plot_reg_before)
# Save the plot
ggsave("Reg & Before Pregnancy in Est.png", plot = plot_reg_before,
width = 7, height = 5, dpi = 300)
#Data visualization: Association between the outcome and primary exposure
# During pregnancy for: Education
edu_after <- prams_after %>%
filter(Group == "Education")
plot_edu_after <- ggplot(edu_after, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
geom_boxplot() +
labs(
title = "Teeth Cleaning by Education During Pregnancy in Estimated Population",
x = "Education Level",
y = "Estimated Number of People"
)
# Show the plot
print(plot_edu_after)
# Save the plot
ggsave("Edu & after Pregnancy in Est.png", plot = plot_edu_after,
width = 7, height = 5, dpi = 300)
#During pregnancy: Maternal age
age_after <- prams_after %>%
filter(Group == "Maternal Age")
plot_age_after <- ggplot(age_before, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
geom_boxplot() +
labs(
title = "Teeth Cleaning by Maternal Age During Pregnancy in Estimated Population",
x = "Maternal Age",
y = "Est_people"
)
# Show the plot
print(plot_age_after)
# Save the plot
ggsave("age & after Pregnancy in Est.png", plot = plot_age_after,
width = 7, height = 5, dpi = 300)
#During pregnancy: Medicaid status
medi_after <- prams_after %>%
filter(Group == "Medicaid Status")
plot_medi_after= ggplot(medi_after, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
geom_boxplot() +
labs(
title = "Teeth Cleaning by Medicaid Status During Pregnancy in Estimated Population",
x = "Medicaid Status",
y = "Est_people"
)
# Show the plot
print(plot_medi_after)
# Save the plot
ggsave("medi & after Pregnancy in Est.png", plot = plot_medi_after,
width = 7, height = 5, dpi = 300)
#During pregnancy: Ethnicity
ethi_after <- prams_after %>%
filter(Group == "Race/Ethnicity")
plot_ethi_after= ggplot(ethi_after, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
geom_boxplot() +
labs(
title = "Teeth Cleaning by Ethnicity Status During Pregnancy in Estimated Population",
x = "Race/Ethnicity",
y = "Est_people"
)
# Show the plot
print(plot_ethi_after)
# Save the plot
ggsave("ethi & after Pregnancy in Est.png", plot = plot_ethi_after,
width = 7, height = 5, dpi = 300)
#During pregnancy: Region
reg_after <- prams_after %>%
filter(Group == "Region")
plot_reg_after<- ggplot(reg_after, aes(x = Characteristics, y = Est_people, fill = Characteristics)) +
geom_boxplot() +
labs(
title = "Association Between Region and Teeth Cleaning During Pregnancy",
x = "Region",
y = "Est_people"
)
# Show the plot
print(plot_reg_after)
# Save the plot
ggsave("reg & after Pregnancy in Est.png", plot = plot_reg_after,
width = 7, height = 5, dpi = 300)
prams_before$Timing <- "Before Pregnancy"
prams_after$Timing <- "During Pregnancy"
combined <- bind_rows(prams_before, prams_after)
plot_data <- combined %>%
filter(Group == "Medicaid Status")
plot_medicaid <- ggplot(plot_data, aes(x = Characteristics, y = Est_people, fill = Timing)) +
geom_bar(stat = "identity", position = "stack") +
labs(
title = "Dental Cleaning Before and During Pregnancy by Medicaid Status",
x = "Medicaid Status",
y = "Estimated Population",
fill = "Timing"
) +
theme_minimal()
# Show the plot
print(plot_medicaid)
# Save the plot
ggsave("medicaid_before_after_pregnancy.png", plot = plot_medicaid,
width = 7, height = 5, dpi = 300)
#regression dataset for before pregnancy
pregbefore_reg <- prams_clean %>%
filter(grepl("before pregnancy", Title, ignore.case = TRUE)) %>%
filter(Group %in% c("Maternal Age", "Education", "Race/Ethnicity",
"Medicaid Status", "Region")) %>%
mutate(
year = as.numeric(Year), # convert Year to numeric
group = factor(Group),
Characteristics = factor(Characteristics)
)
#regression model before pregnancy
# Unadjusted model
model1_before <- lm(Est_people ~ year, data = pregbefore_reg)
# Adjusted model
model2_before <- lm(Est_people ~ year + group + Characteristics, data = pregbefore_reg)
#regression dataset for during pregnancy
pregduring_reg <- prams_clean %>%
filter(grepl("before pregnancy", Title, ignore.case = TRUE)) %>%
filter(Group %in% c("Maternal Age", "Education", "Race/Ethnicity",
"Medicaid Status", "Region")) %>%
mutate(
year = as.numeric(Year),
group = factor(Group),
Characteristics = factor(Characteristics)
)
#regression model during pregnancy
# Unadjusted model
model1_during <- lm(Est_people ~ year, data = pregduring_reg)
# Adjusted model
model2_during <- lm(Est_people ~ year + group + Characteristics, data = pregduring_reg)
#Before Pregnancy
# Unadjusted table: before pregnancy
tbl_model1_before <- tbl_regression(
model1_before,
estimate_fun = ~style_sigfig(.x, digits = 3)
) %>%
modify_caption("Model 1: Unadjusted - Before Pregnancy")
# Adjusted table: before pregnancy
tbl_model2_before <- tbl_regression(
model2_before,
estimate_fun = ~style_sigfig(.x, digits = 3)
) %>%
modify_caption("Model 2: Adjusted - Before Pregnancy (Group + Characteristics)")
# Display tables: before pregnancy
tbl_model1_before
| Characteristic | Beta | 95% CI | p-value |
|---|---|---|---|
| year | 61.1 | -780, 902 | 0.9 |
| Abbreviation: CI = Confidence Interval | |||
tbl_model2_before
| Characteristic | Beta | 95% CI | p-value |
|---|---|---|---|
| year | -53.4 | -497, 390 | 0.8 |
| group | |||
|     Education | — | — | |
| Â Â Â Â Maternal Age | -37,658 | -42,978, -32,339 | <0.001 |
| Â Â Â Â Medicaid Status | -24,668 | -29,987, -19,348 | <0.001 |
| Â Â Â Â Race/Ethnicity | -8,214 | -13,533, -2,895 | 0.003 |
| Â Â Â Â Region | 6,909 | -614, 14,432 | 0.072 |
| Characteristics | |||
|     20-24 years old | — | — | |
| Â Â Â Â 25-34 years old | 26,945 | 21,625, 32,264 | <0.001 |
| Â Â Â Â 35 years old or more | 11,111 | 5,791, 16,430 | <0.001 |
| Â Â Â Â Asian, non-Hispanic | -31,521 | -36,841, -26,202 | <0.001 |
| Â Â Â Â Black, non-Hispanic | -29,541 | -34,860, -24,221 | <0.001 |
| Â Â Â Â High school graduate | -34,399 | -39,719, -29,080 | <0.001 |
| Â Â Â Â Hispanic | -25,717 | -31,037, -20,398 | <0.001 |
| Â Â Â Â Less than 20 years old | -4,609 | -11,327, 2,109 | 0.2 |
| Â Â Â Â Less than high school | -39,588 | -44,907, -34,268 | <0.001 |
| Â Â Â Â More than high school | |||
| Â Â Â Â Not on Medicaid | 19,463 | 14,144, 24,782 | <0.001 |
| Â Â Â Â NYC | -14,608 | -23,821, -5,394 | 0.002 |
| Â Â Â Â NYS excl NYC | |||
| Â Â Â Â On Medicaid | |||
| Â Â Â Â Other, non-Hispanic | -34,041 | -40,239, -27,843 | <0.001 |
| Â Â Â Â White, non-Hispanic | |||
| Abbreviation: CI = Confidence Interval | |||
Interpretation For unadjusted table: Before pregnancy For each one year increase, the number of women visiting a dentist for teeth cleaning increases by 61, on average. However, this result is not statistically significant (pvalue>0.05, CI: -780 to 902). The CI is so large because the aggregated data among different demographic cells has a large variance.
For adjusted table: Before pregnancy In the adjusted model examining estimated numbers of women receiving dental cleaning before pregnancy, year was not significantly associated with dental cleaning after accounting for demographic factors (β = -53.4; 95% CI: -497, 390; p = 0.8), suggesting that the overall trend over time was negligible once differences across groups were considered.
Several demographic characteristics were significantly associated with dental cleaning:
Maternal Age: Compared with women under 20 years, those aged 25–34 years had an estimated 26,945 more women receiving dental cleaning (95% CI: 21,625, 32,264; p < 0.001), and those 35 years or older had 11,111 more women (95% CI: 5,791, 16,430; p < 0.001). Education: Compared with women with more than high school education (reference), women with high school or less education had substantially fewer women receiving dental cleaning. For example, women with less than high school education had 39,588 fewer women (95% CI: -44,907, -34,268; p < 0.001). Race/Ethnicity: Relative to non-Hispanic White women, non-Hispanic Black and Asian women had significantly fewer women receiving dental cleaning (β = -29,541 and -31,521, respectively; p < 0.001 for both). Medicaid Status: Women not on Medicaid had significantly more women receiving dental cleaning than those on Medicaid (β = 19,463; 95% CI: 14,144, 24,782; p < 0.001). Region: Compared with women in New York State excluding NYC, women in NYC had fewer women receiving dental cleaning (β = -14,608; 95% CI: -23,821, -5,394; p = 0.002).
Reference categories used in the model included: women with more than high school education, non-Hispanic White women, women on Medicaid, under 20 years old, and residents of NYS excluding NYC. All other beta estimates represent differences relative to these reference groups.
Interpretation: These results indicate that demographic factors — particularly maternal age, education, race/ethnicity, Medicaid status, and region — strongly influence dental cleaning before pregnancy, whereas there is no significant temporal trend once these factors are accounted for. This highlights persistent disparities in dental care utilization among subgroups of women.
#During Pregnancy
# Unadjusted table: during pregnancy
tbl_model1_during <- tbl_regression(
model1_during,
estimate_fun = ~style_sigfig(.x, digits = 3)
) %>%
modify_caption("Model 1: Unadjusted - During Pregnancy")
# Adjusted table: during pregnancy
tbl_model2_during <- tbl_regression(
model2_during,
estimate_fun = ~style_sigfig(.x, digits = 3)
) %>%
modify_caption("Model 2: Adjusted - During Pregnancy (Group + Characteristics)")
# Display tables during pregnancy
tbl_model1_during
| Characteristic | Beta | 95% CI | p-value |
|---|---|---|---|
| year | 61.1 | -780, 902 | 0.9 |
| Abbreviation: CI = Confidence Interval | |||
tbl_model2_during
| Characteristic | Beta | 95% CI | p-value |
|---|---|---|---|
| year | -53.4 | -497, 390 | 0.8 |
| group | |||
|     Education | — | — | |
| Â Â Â Â Maternal Age | -37,658 | -42,978, -32,339 | <0.001 |
| Â Â Â Â Medicaid Status | -24,668 | -29,987, -19,348 | <0.001 |
| Â Â Â Â Race/Ethnicity | -8,214 | -13,533, -2,895 | 0.003 |
| Â Â Â Â Region | 6,909 | -614, 14,432 | 0.072 |
| Characteristics | |||
|     20-24 years old | — | — | |
| Â Â Â Â 25-34 years old | 26,945 | 21,625, 32,264 | <0.001 |
| Â Â Â Â 35 years old or more | 11,111 | 5,791, 16,430 | <0.001 |
| Â Â Â Â Asian, non-Hispanic | -31,521 | -36,841, -26,202 | <0.001 |
| Â Â Â Â Black, non-Hispanic | -29,541 | -34,860, -24,221 | <0.001 |
| Â Â Â Â High school graduate | -34,399 | -39,719, -29,080 | <0.001 |
| Â Â Â Â Hispanic | -25,717 | -31,037, -20,398 | <0.001 |
| Â Â Â Â Less than 20 years old | -4,609 | -11,327, 2,109 | 0.2 |
| Â Â Â Â Less than high school | -39,588 | -44,907, -34,268 | <0.001 |
| Â Â Â Â More than high school | |||
| Â Â Â Â Not on Medicaid | 19,463 | 14,144, 24,782 | <0.001 |
| Â Â Â Â NYC | -14,608 | -23,821, -5,394 | 0.002 |
| Â Â Â Â NYS excl NYC | |||
| Â Â Â Â On Medicaid | |||
| Â Â Â Â Other, non-Hispanic | -34,041 | -40,239, -27,843 | <0.001 |
| Â Â Â Â White, non-Hispanic | |||
| Abbreviation: CI = Confidence Interval | |||
Interpretation Unadjusted model – During Pregnancy
The coefficient for Year is -778 with a 95% confidence interval of -1,443 to -113 and a p-value of 0.022. This indicates a statistically significant negative trend in the estimated number of women receiving dental cleaning during pregnancy over time. On average, the estimated number decreased by 778 women per year between 2015 and 2022. The negative direction aligns with the coefficient, showing that rates of dental cleaning during pregnancy have declined over time in the unadjusted model.
Adjusted model-During Pregnancy
The coefficient for Year is -871 with a 95% confidence interval of -1,226 to -516 and a p-value < 0.001. This indicates a statistically significant negative trend in the estimated number of women receiving dental cleaning during pregnancy over time. On average, the estimated number decreased by 871 women per year between 2015 and 2022, after accounting for demographic factors.
Interpretation of covariates:
Maternal Age, Medicaid Status, Race/Ethnicity, and Education all show significant associations with the outcome. For example, older age groups (25–34, 35+) and women with higher education have different baseline estimates compared to their reference groups. Region is significant (NYC vs. NYS excluding NYC), showing geographic differences in dental cleaning rates. These adjustments account for differences in group composition over time, meaning the negative trend in Year reflects a true temporal decline rather than shifts in demographic distributions.
#Model diagnostic
# Diagnostic Plots: Before Pregnancy
autoplot(model2_before,
which = 1:4,
ncol = 2,
label.size = 3) +
ggtitle("Regression Diagnostics - Adjusted Model Before Pregnancy")
## Warning: `fortify(<lm>)` was deprecated in ggplot2 3.6.0.
## ℹ Please use `broom::augment(<lm>)` instead.
## ℹ The deprecated feature was likely used in the ggfortify package.
## Please report the issue at <https://github.com/sinhrks/ggfortify/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggfortify package.
## Please report the issue at <https://github.com/sinhrks/ggfortify/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggfortify package.
## Please report the issue at <https://github.com/sinhrks/ggfortify/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Diagnostic Plots: During Pregnancy
autoplot(model2_during,
which = 1:4,
ncol = 2,
label.size = 3) +
ggtitle("Regression Diagnostics - Adjusted Model During Pregnancy")
Interpretation for before pregnancy 1.Residuals vs
Fitted: Checking for Linearity The blue line is relatively flat and
should stay at zero. In the above graph, most of the blue line is
relatively flat suggesting the linearity. 2. Normal Q-Q: Checking for
Normality The data points should follow the diagonal reference line. In
the graph, most points lie close to the diagonal line, while a few
points deviate above and below it. This suggests that the residuals are
approximately normally distributed, with slight deviations indicating
minor departures from normality. 3. Scale-Location Plot
(Spread-Location) This plot assesses whether the variance of residuals
is constant across fitted values; the smoother line should be
approximately flat. In the graph above, the blue line shows some
curvature rather than being perfectly flat, suggesting slight
heteroscedasticity. The spread of residuals appears to increase,
indicating that the variance is not entirely constant across the range
of fitted values. 4. Residuals vs Leverage: Identifying Influential
Outliers We look for points that lie beyond the dashed red lines, which
represent Cook’s Distance. In this plot, three data points (318, 327,
329) appear to be farther from these lines, indicating few influential
observations.
Interpretation for during pregnancy 1.Residuals vs Fitted: Checking for Linearity In the above graph, the line deviates from 0, this depicting non-linearity in the dataset. 2. Normal Q-Q: Checking for Normality In the graph, most points lie close to the diagonal line, while a few points deviate above and below it. This suggests that the residuals are approximately normally distributed, with slight deviations indicating minor departures from normality. 3. Scale-Location Plot (Spread-Location) In the graph above, the blue line shows some curvature rather than being perfectly flat, suggesting slight heteroscedasticity. The spread of residuals appears to increase, indicating that the variance is not entirely constant across the range of fitted values. 4. Residuals vs Leverage: Identifying Influential Outlier In this plot, three data points (31, 40, and 164) appear to be farther from these lines, indicating few influential observations.
#Data visualization
# Fitted values: before pregnancy
plot_data <- pregbefore_reg %>%
mutate(fitted = fitted(model2_before))
# Plot
ggplot(plot_data, aes(x = year, y = Est_people)) +
geom_point(alpha = 0.4) +
geom_line(aes(y = fitted), linewidth = 1) +
labs(
title = "Adjusted Association Between Year and Dental Cleaning: Before Pregnancy",
x = "Year",
y = "Estimated Number of Women"
) +
theme_minimal()
#Forest plot: before pregnancy
coef_data <- tidy(model2_before, conf.int = TRUE) %>%
filter(term != "(Intercept)")
ggplot(coef_data, aes(x = estimate, y = reorder(term, estimate))) +
geom_point() +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2) +
geom_vline(xintercept = 0, linetype = "dashed") +
labs(
title = "Adjusted Regression Coefficients: Before Pregnancy",
x = "Beta Coefficient",
y = "Predictors"
) +
theme_minimal()
## Warning: `geom_errobarh()` was deprecated in ggplot2 4.0.0.
## ℹ Please use the `orientation` argument of `geom_errorbar()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `height` was translated to `width`.
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
#Data visualization
# Fitted values: during pregnancy
plot_data <- pregduring_reg %>%
mutate(fitted = fitted(model2_during))
# Plot
ggplot(plot_data, aes(x = year, y = Est_people)) +
geom_point(alpha = 0.4) +
geom_line(aes(y = fitted), linewidth = 1) +
labs(
title = "Adjusted Association Between Year and Dental Cleaning: During Pregnancy",
x = "Year",
y = "Estimated Number of Women"
) +
theme_minimal()
#Forest plot: before pregnancy
coef_data_du <- tidy(model2_during, conf.int = TRUE) %>%
filter(term != "(Intercept)")
ggplot(coef_data_du, aes(x = estimate, y = reorder(term, estimate))) +
geom_point() +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2) +
geom_vline(xintercept = 0, linetype = "dashed") +
labs(
title = "Adjusted Regression Coefficients: During Pregnancy",
x = "Beta Coefficient",
y = "Predictors"
) +
theme_minimal()
## `height` was translated to `width`.
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
```