# install packages
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(readr)
library(dplyr)
library(broom)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(conflicted)
conflicts_prefer(dplyr::filter)
## [conflicted] Will prefer dplyr::filter over any other package.
conflicts_prefer(dplyr::select)
## [conflicted] Will prefer dplyr::select over any other package.
options(conflicted.print = "plain")
# read in data
options(conflicted.print = "plain")
data <- read_csv("deathdata - Sheet1.csv")
## Rows: 1619 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): Notes, Age of Father 11, Age of Father 11 Code, Age of Mother 10, A...
## dbl (4): WIC Code, Infertility Treatment Used Code, Fetal Deaths, Percent of...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# clean data
# collapse age categories
data <- data %>%
mutate(
AgeFatherCollapsed = case_when(
`Age of Father 11` %in% c("15-19 years", "20-24 years") ~ "Under 25",
`Age of Father 11` %in% c("25-29 years", "30-34 years") ~ "25-34",
`Age of Father 11` %in% c("35-39 years", "40-44 years") ~ "35-44",
`Age of Father 11` %in% c("45-49 years", "50-54 years", "55 years and older") ~ "45+",
`Age of Father 11` %in% c("Unknown or Not Stated", "Not Reported") ~ "Unknown",
TRUE ~ as.character(`Age of Father 11`)
),
AgeMotherCollapsed = case_when(
`Age of Mother 10` %in% c("Under 15 years", "15 - 17 years", "18 - 19 years") ~ "Under 20",
`Age of Mother 10` %in% c("20 - 24 years") ~ "20-24",
`Age of Mother 10` %in% c("25 - 29 years") ~ "25-29",
`Age of Mother 10` %in% c("30 - 34 years") ~ "30-34",
`Age of Mother 10` %in% c("35 - 39 years") ~ "35-39",
`Age of Mother 10` %in% c("40 - 44 years") ~ "40-44",
`Age of Mother 10` %in% c("45 - 49 years", "50 years and over") ~ "45+",
TRUE ~ as.character(`Age of Mother 10`)
),
EduMotherCollapsed = case_when(
`Mother's Education` %in% c("8th grade or less", "9th through 12th grade with no diploma") ~ "Less than high school",
`Mother's Education` %in% c("High school graduate or GED completed", "Some college credit, but no degree") ~ "High school or some college",
`Mother's Education` %in% c("Associate degree (AA, AS)", "Bachelor’s degree (BA, BS)", "Master’s degree (MA, MS)", "Doctorate or professional degree (PhD, MD, JD, etc.)") ~ "College or higher",
`Mother's Education` %in% c("Unknown", "Not Reported") ~ "Unknown",
TRUE ~ as.character(`Mother's Education`)
)
)
print(data)
## # A tibble: 1,619 × 16
## Notes `Age of Father 11` `Age of Father 11 Code` `Age of Mother 10`
## <chr> <chr> <chr> <chr>
## 1 <NA> 15-19 years 15-19 Under 15 years
## 2 <NA> 15-19 years 15-19 15 - 17 years
## 3 <NA> 15-19 years 15-19 15 - 17 years
## 4 <NA> 15-19 years 15-19 15 - 17 years
## 5 <NA> 15-19 years 15-19 15 - 17 years
## 6 <NA> 15-19 years 15-19 15 - 17 years
## 7 <NA> 15-19 years 15-19 15 - 17 years
## 8 <NA> 15-19 years 15-19 15 - 17 years
## 9 <NA> 15-19 years 15-19 15 - 17 years
## 10 <NA> 15-19 years 15-19 15 - 17 years
## # ℹ 1,609 more rows
## # ℹ 12 more variables: `Age of Mother 10 Code` <chr>,
## # `Mother's Education` <chr>, `Mother's Education Code` <chr>, WIC <chr>,
## # `WIC Code` <dbl>, `Infertility Treatment Used` <chr>,
## # `Infertility Treatment Used Code` <dbl>, `Fetal Deaths` <dbl>,
## # `Percent of Total Deaths` <dbl>, AgeFatherCollapsed <chr>,
## # AgeMotherCollapsed <chr>, EduMotherCollapsed <chr>
# linear regression
model <- lm(`Percent of Total Deaths` ~ `AgeFatherCollapsed` + `AgeMotherCollapsed` +
`EduMotherCollapsed` + `WIC` +
`Infertility Treatment Used`, data = data)
summary(model)
##
## Call:
## lm(formula = `Percent of Total Deaths` ~ AgeFatherCollapsed +
## AgeMotherCollapsed + EduMotherCollapsed + WIC + `Infertility Treatment Used`,
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.17837 -0.05526 -0.02007 0.02153 1.51563
##
## Coefficients:
## Estimate
## (Intercept) 0.172989
## AgeFatherCollapsed35-44 -0.023605
## AgeFatherCollapsed45+ -0.085390
## AgeFatherCollapsedUnder 25 -0.021579
## AgeFatherCollapsedUnknown -0.004141
## AgeMotherCollapsed25-29 years 0.003231
## AgeMotherCollapsed30-34 years 0.001377
## AgeMotherCollapsed35-39 years -0.018547
## AgeMotherCollapsed40-44 years -0.052961
## AgeMotherCollapsed45-49 years -0.084054
## AgeMotherCollapsedUnder 20 -0.053932
## EduMotherCollapsedCollege or higher -0.046066
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) -0.069544
## EduMotherCollapsedHigh school or some college 0.033730
## EduMotherCollapsedLess than high school -0.044199
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) -0.033397
## EduMotherCollapsedSome college credit, but not a degree -0.002922
## EduMotherCollapsedUnknown 0.086482
## EduMotherCollapsedUnknown or Not Stated -0.005735
## WICNot Reported -0.106130
## WICUnknown or Not Stated -0.075295
## WICYes -0.055951
## `Infertility Treatment Used`Not Reported -0.079564
## `Infertility Treatment Used`Unknown or Not Stated -0.093051
## `Infertility Treatment Used`Yes -0.107034
## Std. Error
## (Intercept) 0.013906
## AgeFatherCollapsed35-44 0.009133
## AgeFatherCollapsed45+ 0.011817
## AgeFatherCollapsedUnder 25 0.011795
## AgeFatherCollapsedUnknown 0.008327
## AgeMotherCollapsed25-29 years 0.010581
## AgeMotherCollapsed30-34 years 0.010529
## AgeMotherCollapsed35-39 years 0.011049
## AgeMotherCollapsed40-44 years 0.012505
## AgeMotherCollapsed45-49 years 0.028774
## AgeMotherCollapsedUnder 20 0.012883
## EduMotherCollapsedCollege or higher 0.013964
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) 0.021645
## EduMotherCollapsedHigh school or some college 0.011676
## EduMotherCollapsedLess than high school 0.011677
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) 0.015197
## EduMotherCollapsedSome college credit, but not a degree 0.012246
## EduMotherCollapsedUnknown 0.022799
## EduMotherCollapsedUnknown or Not Stated 0.012430
## WICNot Reported 0.010450
## WICUnknown or Not Stated 0.008455
## WICYes 0.008387
## `Infertility Treatment Used`Not Reported 0.017659
## `Infertility Treatment Used`Unknown or Not Stated 0.009141
## `Infertility Treatment Used`Yes 0.012812
## t value
## (Intercept) 12.440
## AgeFatherCollapsed35-44 -2.585
## AgeFatherCollapsed45+ -7.226
## AgeFatherCollapsedUnder 25 -1.830
## AgeFatherCollapsedUnknown -0.497
## AgeMotherCollapsed25-29 years 0.305
## AgeMotherCollapsed30-34 years 0.131
## AgeMotherCollapsed35-39 years -1.679
## AgeMotherCollapsed40-44 years -4.235
## AgeMotherCollapsed45-49 years -2.921
## AgeMotherCollapsedUnder 20 -4.186
## EduMotherCollapsedCollege or higher -3.299
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) -3.213
## EduMotherCollapsedHigh school or some college 2.889
## EduMotherCollapsedLess than high school -3.785
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) -2.198
## EduMotherCollapsedSome college credit, but not a degree -0.239
## EduMotherCollapsedUnknown 3.793
## EduMotherCollapsedUnknown or Not Stated -0.461
## WICNot Reported -10.156
## WICUnknown or Not Stated -8.905
## WICYes -6.671
## `Infertility Treatment Used`Not Reported -4.506
## `Infertility Treatment Used`Unknown or Not Stated -10.179
## `Infertility Treatment Used`Yes -8.354
## Pr(>|t|)
## (Intercept) < 2e-16
## AgeFatherCollapsed35-44 0.009840
## AgeFatherCollapsed45+ 7.77e-13
## AgeFatherCollapsedUnder 25 0.067506
## AgeFatherCollapsedUnknown 0.619043
## AgeMotherCollapsed25-29 years 0.760150
## AgeMotherCollapsed30-34 years 0.895976
## AgeMotherCollapsed35-39 years 0.093420
## AgeMotherCollapsed40-44 years 2.42e-05
## AgeMotherCollapsed45-49 years 0.003538
## AgeMotherCollapsedUnder 20 2.99e-05
## EduMotherCollapsedCollege or higher 0.000993
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) 0.001341
## EduMotherCollapsedHigh school or some college 0.003921
## EduMotherCollapsedLess than high school 0.000159
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) 0.028123
## EduMotherCollapsedSome college credit, but not a degree 0.811411
## EduMotherCollapsedUnknown 0.000154
## EduMotherCollapsedUnknown or Not Stated 0.644588
## WICNot Reported < 2e-16
## WICUnknown or Not Stated < 2e-16
## WICYes 3.52e-11
## `Infertility Treatment Used`Not Reported 7.11e-06
## `Infertility Treatment Used`Unknown or Not Stated < 2e-16
## `Infertility Treatment Used`Yes < 2e-16
##
## (Intercept) ***
## AgeFatherCollapsed35-44 **
## AgeFatherCollapsed45+ ***
## AgeFatherCollapsedUnder 25 .
## AgeFatherCollapsedUnknown
## AgeMotherCollapsed25-29 years
## AgeMotherCollapsed30-34 years
## AgeMotherCollapsed35-39 years .
## AgeMotherCollapsed40-44 years ***
## AgeMotherCollapsed45-49 years **
## AgeMotherCollapsedUnder 20 ***
## EduMotherCollapsedCollege or higher ***
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) **
## EduMotherCollapsedHigh school or some college **
## EduMotherCollapsedLess than high school ***
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) *
## EduMotherCollapsedSome college credit, but not a degree
## EduMotherCollapsedUnknown ***
## EduMotherCollapsedUnknown or Not Stated
## WICNot Reported ***
## WICUnknown or Not Stated ***
## WICYes ***
## `Infertility Treatment Used`Not Reported ***
## `Infertility Treatment Used`Unknown or Not Stated ***
## `Infertility Treatment Used`Yes ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1197 on 1557 degrees of freedom
## (37 observations deleted due to missingness)
## Multiple R-squared: 0.1921, Adjusted R-squared: 0.1797
## F-statistic: 15.43 on 24 and 1557 DF, p-value: < 2.2e-16
# exploratory data analysis
# group data by father's age category
data %>%
group_by(AgeFatherCollapsed) %>%
summarise(
# mean
mean_fetal_death_rate = mean(`Percent of Total Deaths`, na.rm = TRUE),
# std
sd_fetal_death_rate = sd(`Percent of Total Deaths`, na.rm = TRUE),
# number of categories
n = n()
)
# ANOVA
anova_model <- aov(`Percent of Total Deaths` ~ AgeFatherCollapsed, data = data)
# TukeyHSD
TukeyHSD(anova_model)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = `Percent of Total Deaths` ~ AgeFatherCollapsed, data = data)
##
## $AgeFatherCollapsed
## diff lwr upr p adj
## 35-44-25-34 -0.029566177 -0.05603045 -0.003101908 0.0196242
## 45+-25-34 -0.066325927 -0.09923859 -0.033413265 0.0000004
## Under 25-25-34 -0.018157766 -0.05120785 0.014892320 0.5623602
## Unknown-25-34 -0.022293128 -0.04629661 0.001710349 0.0832483
## 45+-35-44 -0.036759749 -0.07038937 -0.003130126 0.0240217
## Under 25-35-44 0.011408411 -0.02235572 0.045172541 0.8881808
## Unknown-35-44 0.007273049 -0.01770444 0.032250543 0.9320515
## Under 25-45+ 0.048168161 0.00914378 0.087192541 0.0068689
## Unknown-45+ 0.044032799 0.01230330 0.075762296 0.0014689
## Unknown-Under 25 -0.004135362 -0.03600739 0.027736661 0.9966271
#interaction
# linear regression with all interaction effects
model1 <- lm(`Percent of Total Deaths` ~
(AgeFatherCollapsed + AgeMotherCollapsed +
EduMotherCollapsed + WIC +
`Infertility Treatment Used`)^2,
data = data)
# filtering out significant interaction terms
tidy(model1) %>%
filter(p.value < 0.05) %>%
dplyr::select(term, estimate, std.error, statistic, p.value)
#check correlations
# select and convert relevant columns to numeric
data_numeric <- data %>%
mutate(
AgeFatherCollapsed = as.numeric(factor(AgeFatherCollapsed)),
AgeMotherCollapsed = as.numeric(factor(AgeMotherCollapsed)),
EduMotherCollapsed = as.numeric(factor(EduMotherCollapsed)),
WIC = as.numeric(factor(WIC)),
InfertilityTreatmentUsed = as.numeric(factor(`Infertility Treatment Used`))
)
# correlation matrix
correlation_matrix <- cor(data_numeric %>%
dplyr::select(AgeFatherCollapsed, AgeMotherCollapsed,
EduMotherCollapsed, WIC, InfertilityTreatmentUsed),
use = "complete.obs")
print(correlation_matrix)
## AgeFatherCollapsed AgeMotherCollapsed
## AgeFatherCollapsed 1.00000000 0.14680609
## AgeMotherCollapsed 0.14680609 1.00000000
## EduMotherCollapsed 0.10848709 0.03814675
## WIC 0.01990136 -0.02033211
## InfertilityTreatmentUsed -0.04281391 -0.06578029
## EduMotherCollapsed WIC
## AgeFatherCollapsed 0.10848709 0.01990136
## AgeMotherCollapsed 0.03814675 -0.02033211
## EduMotherCollapsed 1.00000000 0.03105695
## WIC 0.03105695 1.00000000
## InfertilityTreatmentUsed 0.05504106 -0.23349141
## InfertilityTreatmentUsed
## AgeFatherCollapsed -0.04281391
## AgeMotherCollapsed -0.06578029
## EduMotherCollapsed 0.05504106
## WIC -0.23349141
## InfertilityTreatmentUsed 1.00000000
# vif
vif(model)
## GVIF Df GVIF^(1/(2*Df))
## AgeFatherCollapsed 1.577228 4 1.058612
## AgeMotherCollapsed 1.553854 6 1.037411
## EduMotherCollapsed 2.733819 8 1.064874
## WIC 1.644051 3 1.086390
## `Infertility Treatment Used` 2.219102 3 1.142079
print(vif)
## function (mod, ...)
## {
## UseMethod("vif")
## }
## <bytecode: 0x555f22e8aaa0>
## <environment: namespace:car>
# model validation
# residuals vs fitted plot
plot(model1$fitted.values, resid(model1),
xlab = "Fitted values", ylab = "Residuals",
main = "Residuals vs Fitted")
abline(h = 0, col = "red", lty = 2)

# QQ plot of residuals
qqnorm(resid(model1), main = "QQ Plot of Residuals")
qqline(resid(model1), col = "blue", lwd = 2)

# histogram of residuals
hist(resid(model1), breaks = 30,
main = "Histogram of Residuals",
xlab = "Residuals", col = "lightblue")

# trying a quadratic model to fix residual issues
# square term
data_numeric <- data_numeric %>%
mutate(AgeFatherCollapsed2 = AgeFatherCollapsed^2)
# regression
model_quad <- lm(
`Percent of Total Deaths` ~
(AgeFatherCollapsed2 + AgeMotherCollapsed +
EduMotherCollapsed + WIC +
InfertilityTreatmentUsed)^2,
data = data_numeric
)
# residuals vs. fitted plot
plot(model_quad$fitted.values, resid(model1),
xlab = "Fitted values", ylab = "Residuals",
main = "Residuals vs Fitted")
abline(h = 0, col = "red", lty = 2)

# QQ norm plot
qqnorm(resid(model_quad), main = "QQ Plot of Residuals")
qqline(resid(model_quad), col = "blue", lwd = 2)

sig <- tidy(model) %>%
dplyr::filter(str_detect(term, ":"), p.value < 0.05)
# running regression
tidy_model <- tidy(model_quad)
main_effects <- tidy_model %>%
filter(!str_detect(term, ":"), p.value < 0.05)
significant_interactions <- tidy(model_quad) %>%
filter(grepl(":", term), p.value < 0.05)
significant_terms <- bind_rows(main_effects, significant_interactions)
print(significant_terms)
## # A tibble: 10 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 3.49e-1 0.0349 9.99 8.37e-23
## 2 AgeFatherCollapsed2 -3.90e-3 0.00133 -2.93 3.45e- 3
## 3 AgeMotherCollapsed -2.92e-2 0.00716 -4.07 4.89e- 5
## 4 EduMotherCollapsed -1.25e-2 0.00512 -2.44 1.47e- 2
## 5 WIC -5.58e-2 0.00963 -5.79 8.38e- 9
## 6 InfertilityTreatmentUsed -7.60e-2 0.0121 -6.27 4.68e-10
## 7 AgeFatherCollapsed2:EduMotherCollapsed 4.16e-4 0.000128 3.24 1.22e- 3
## 8 AgeMotherCollapsed:WIC 3.06e-3 0.00156 1.96 4.99e- 2
## 9 AgeMotherCollapsed:InfertilityTreatmen… 7.56e-3 0.00237 3.19 1.43e- 3
## 10 EduMotherCollapsed:WIC 2.83e-3 0.00120 2.37 1.81e- 2
# get specific relationships of significance
data$AgeFatherCollapsed <- factor(data$AgeFatherCollapsed)
data$AgeMotherCollapsed <- factor(data$AgeMotherCollapsed)
data$EduMotherCollapsed <- factor(data$EduMotherCollapsed)
data$WIC <- factor(data$WIC)
data$InfertilityTreatmentUsed <- factor(data$`Infertility Treatment Used`)
model2 <- lm(`Percent of Total Deaths` ~
(AgeFatherCollapsed + AgeMotherCollapsed +
EduMotherCollapsed + WIC +
InfertilityTreatmentUsed)^2,
data = data)
significant_terms <- tidy(model2) %>%
filter(p.value < 0.05) %>%
dplyr::select(term, estimate, std.error, statistic, p.value)
print(significant_terms)
## # A tibble: 112 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.261 0.0328 7.94 4.27e-15
## 2 AgeFatherCollapsed35-44 -0.200 0.0340 -5.89 4.94e- 9
## 3 AgeFatherCollapsed45+ -0.381 0.0524 -7.26 6.46e-13
## 4 AgeFatherCollapsedUnder 25 -0.0896 0.0446 -2.01 4.48e- 2
## 5 AgeFatherCollapsedUnknown -0.108 0.0291 -3.71 2.17e- 4
## 6 AgeMotherCollapsed25-29 years 0.213 0.0350 6.10 1.37e- 9
## 7 AgeMotherCollapsed30-34 years 0.207 0.0348 5.95 3.49e- 9
## 8 AgeMotherCollapsed40-44 years -0.119 0.0424 -2.82 4.91e- 3
## 9 AgeMotherCollapsed45-49 years -0.164 0.0796 -2.06 3.96e- 2
## 10 AgeMotherCollapsedUnder 20 -0.200 0.0419 -4.78 1.96e- 6
## # ℹ 102 more rows
# correlation
# correlation matrix
correlation_matrix <- cor(data_numeric %>%
dplyr::select(AgeFatherCollapsed, AgeMotherCollapsed,
EduMotherCollapsed, WIC, InfertilityTreatmentUsed),
use = "complete.obs")
print(correlation_matrix)
## AgeFatherCollapsed AgeMotherCollapsed
## AgeFatherCollapsed 1.00000000 0.14680609
## AgeMotherCollapsed 0.14680609 1.00000000
## EduMotherCollapsed 0.10848709 0.03814675
## WIC 0.01990136 -0.02033211
## InfertilityTreatmentUsed -0.04281391 -0.06578029
## EduMotherCollapsed WIC
## AgeFatherCollapsed 0.10848709 0.01990136
## AgeMotherCollapsed 0.03814675 -0.02033211
## EduMotherCollapsed 1.00000000 0.03105695
## WIC 0.03105695 1.00000000
## InfertilityTreatmentUsed 0.05504106 -0.23349141
## InfertilityTreatmentUsed
## AgeFatherCollapsed -0.04281391
## AgeMotherCollapsed -0.06578029
## EduMotherCollapsed 0.05504106
## WIC -0.23349141
## InfertilityTreatmentUsed 1.00000000
# VIF
vif <- vif(model)
print(vif)
## GVIF Df GVIF^(1/(2*Df))
## AgeFatherCollapsed 1.577228 4 1.058612
## AgeMotherCollapsed 1.553854 6 1.037411
## EduMotherCollapsed 2.733819 8 1.064874
## WIC 1.644051 3 1.086390
## `Infertility Treatment Used` 2.219102 3 1.142079
# reduced model
reduced <- glm( `Percent of Total Deaths` ~
AgeFatherCollapsed^2 +
AgeMotherCollapsed +
EduMotherCollapsed +
WIC +
InfertilityTreatmentUsed +
# significant interactions
AgeFatherCollapsed:AgeMotherCollapsed +
AgeFatherCollapsed:EduMotherCollapsed +
AgeFatherCollapsed:WIC +
AgeMotherCollapsed:AgeFatherCollapsed +
WIC:InfertilityTreatmentUsed,
data = data
)
# extract coefficients summary
coefs <- summary(reduced)$coefficients
# filter significant terms
sig_coefs <- coefs[coefs[, "Pr(>|t|)"] < 0.05, ]
# filter positives and negatives
positive_results <- sig_coefs[sig_coefs[, "Estimate"] > 0, ]
negative_results <- sig_coefs[sig_coefs[, "Estimate"] < 0, ]
print(positive_results)
## Estimate
## (Intercept) 0.25046836
## AgeMotherCollapsed25-29 years 0.07130901
## AgeMotherCollapsed30-34 years 0.05113566
## AgeFatherCollapsed35-44:AgeMotherCollapsed35-39 years 0.18036533
## AgeFatherCollapsed35-44:AgeMotherCollapsed40-44 years 0.19886203
## AgeFatherCollapsed45+:AgeMotherCollapsed40-44 years 0.18746324
## AgeFatherCollapsedUnknown:AgeMotherCollapsed40-44 years 0.07566745
## AgeFatherCollapsed45+:AgeMotherCollapsed45-49 years 0.14478334
## AgeFatherCollapsedUnknown:EduMotherCollapsedHigh school or some college 0.06156282
## AgeFatherCollapsedUnder 25:EduMotherCollapsedLess than high school 0.13279736
## AgeFatherCollapsedUnknown:EduMotherCollapsedLess than high school 0.09643252
## AgeFatherCollapsedUnknown:EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) 0.07451658
## AgeFatherCollapsedUnknown:EduMotherCollapsedUnknown or Not Stated 0.16396120
## AgeFatherCollapsedUnknown:WICNot Reported 0.10902976
## AgeFatherCollapsed35-44:WICUnknown or Not Stated 0.04796127
## AgeFatherCollapsed45+:WICUnknown or Not Stated 0.14678751
## AgeFatherCollapsedUnder 25:WICUnknown or Not Stated 0.08335834
## AgeFatherCollapsedUnknown:WICUnknown or Not Stated 0.08249853
## AgeFatherCollapsed45+:WICYes 0.08657888
## AgeFatherCollapsedUnder 25:WICYes 0.07357059
## WICNot Reported:InfertilityTreatmentUsedNot Reported 0.37220752
## WICNot Reported:InfertilityTreatmentUsedUnknown or Not Stated 0.16057363
## WICUnknown or Not Stated:InfertilityTreatmentUsedUnknown or Not Stated 0.13981350
## WICYes:InfertilityTreatmentUsedUnknown or Not Stated 0.05824336
## Std. Error
## (Intercept) 0.02199397
## AgeMotherCollapsed25-29 years 0.01694177
## AgeMotherCollapsed30-34 years 0.01719512
## AgeFatherCollapsed35-44:AgeMotherCollapsed35-39 years 0.03176353
## AgeFatherCollapsed35-44:AgeMotherCollapsed40-44 years 0.03827651
## AgeFatherCollapsed45+:AgeMotherCollapsed40-44 years 0.05303472
## AgeFatherCollapsedUnknown:AgeMotherCollapsed40-44 years 0.03336647
## AgeFatherCollapsed45+:AgeMotherCollapsed45-49 years 0.07122384
## AgeFatherCollapsedUnknown:EduMotherCollapsedHigh school or some college 0.02858583
## AgeFatherCollapsedUnder 25:EduMotherCollapsedLess than high school 0.04765455
## AgeFatherCollapsedUnknown:EduMotherCollapsedLess than high school 0.02852209
## AgeFatherCollapsedUnknown:EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) 0.03637753
## AgeFatherCollapsedUnknown:EduMotherCollapsedUnknown or Not Stated 0.02991923
## AgeFatherCollapsedUnknown:WICNot Reported 0.02347646
## AgeFatherCollapsed35-44:WICUnknown or Not Stated 0.02155150
## AgeFatherCollapsed45+:WICUnknown or Not Stated 0.03147773
## AgeFatherCollapsedUnder 25:WICUnknown or Not Stated 0.02769763
## AgeFatherCollapsedUnknown:WICUnknown or Not Stated 0.02024229
## AgeFatherCollapsed45+:WICYes 0.02597934
## AgeFatherCollapsedUnder 25:WICYes 0.02592145
## WICNot Reported:InfertilityTreatmentUsedNot Reported 0.03942157
## WICNot Reported:InfertilityTreatmentUsedUnknown or Not Stated 0.02303759
## WICUnknown or Not Stated:InfertilityTreatmentUsedUnknown or Not Stated 0.02020377
## WICYes:InfertilityTreatmentUsedUnknown or Not Stated 0.02475445
## t value
## (Intercept) 11.388046
## AgeMotherCollapsed25-29 years 4.209064
## AgeMotherCollapsed30-34 years 2.973846
## AgeFatherCollapsed35-44:AgeMotherCollapsed35-39 years 5.678379
## AgeFatherCollapsed35-44:AgeMotherCollapsed40-44 years 5.195406
## AgeFatherCollapsed45+:AgeMotherCollapsed40-44 years 3.534727
## AgeFatherCollapsedUnknown:AgeMotherCollapsed40-44 years 2.267769
## AgeFatherCollapsed45+:AgeMotherCollapsed45-49 years 2.032793
## AgeFatherCollapsedUnknown:EduMotherCollapsedHigh school or some college 2.153613
## AgeFatherCollapsedUnder 25:EduMotherCollapsedLess than high school 2.786667
## AgeFatherCollapsedUnknown:EduMotherCollapsedLess than high school 3.380976
## AgeFatherCollapsedUnknown:EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) 2.048424
## AgeFatherCollapsedUnknown:EduMotherCollapsedUnknown or Not Stated 5.480129
## AgeFatherCollapsedUnknown:WICNot Reported 4.644215
## AgeFatherCollapsed35-44:WICUnknown or Not Stated 2.225426
## AgeFatherCollapsed45+:WICUnknown or Not Stated 4.663218
## AgeFatherCollapsedUnder 25:WICUnknown or Not Stated 3.009584
## AgeFatherCollapsedUnknown:WICUnknown or Not Stated 4.075553
## AgeFatherCollapsed45+:WICYes 3.332605
## AgeFatherCollapsedUnder 25:WICYes 2.838213
## WICNot Reported:InfertilityTreatmentUsedNot Reported 9.441721
## WICNot Reported:InfertilityTreatmentUsedUnknown or Not Stated 6.970071
## WICUnknown or Not Stated:InfertilityTreatmentUsedUnknown or Not Stated 6.920169
## WICYes:InfertilityTreatmentUsedUnknown or Not Stated 2.352844
## Pr(>|t|)
## (Intercept) 7.256739e-29
## AgeMotherCollapsed25-29 years 2.717974e-05
## AgeMotherCollapsed30-34 years 2.988323e-03
## AgeFatherCollapsed35-44:AgeMotherCollapsed35-39 years 1.632867e-08
## AgeFatherCollapsed35-44:AgeMotherCollapsed40-44 years 2.326277e-07
## AgeFatherCollapsed45+:AgeMotherCollapsed40-44 years 4.207137e-04
## AgeFatherCollapsedUnknown:AgeMotherCollapsed40-44 years 2.348633e-02
## AgeFatherCollapsed45+:AgeMotherCollapsed45-49 years 4.225089e-02
## AgeFatherCollapsedUnknown:EduMotherCollapsedHigh school or some college 3.143093e-02
## AgeFatherCollapsedUnder 25:EduMotherCollapsedLess than high school 5.393044e-03
## AgeFatherCollapsedUnknown:EduMotherCollapsedLess than high school 7.410212e-04
## AgeFatherCollapsedUnknown:EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) 4.069389e-02
## AgeFatherCollapsedUnknown:EduMotherCollapsedUnknown or Not Stated 4.985582e-08
## AgeFatherCollapsedUnknown:WICNot Reported 3.715078e-06
## AgeFatherCollapsed35-44:WICUnknown or Not Stated 2.620221e-02
## AgeFatherCollapsed45+:WICUnknown or Not Stated 3.392349e-06
## AgeFatherCollapsedUnder 25:WICUnknown or Not Stated 2.660111e-03
## AgeFatherCollapsedUnknown:WICUnknown or Not Stated 4.833214e-05
## AgeFatherCollapsed45+:WICYes 8.815140e-04
## AgeFatherCollapsedUnder 25:WICYes 4.598348e-03
## WICNot Reported:InfertilityTreatmentUsedNot Reported 1.365591e-20
## WICNot Reported:InfertilityTreatmentUsedUnknown or Not Stated 4.747453e-12
## WICUnknown or Not Stated:InfertilityTreatmentUsedUnknown or Not Stated 6.685740e-12
## WICYes:InfertilityTreatmentUsedUnknown or Not Stated 1.876021e-02
print(negative_results)
## Estimate
## AgeFatherCollapsed35-44 -0.11872073
## AgeFatherCollapsed45+ -0.20132485
## AgeFatherCollapsedUnder 25 -0.09584690
## AgeFatherCollapsedUnknown -0.09392159
## AgeMotherCollapsed35-39 years -0.06147959
## AgeMotherCollapsed40-44 years -0.16257100
## AgeMotherCollapsed45-49 years -0.14831729
## AgeMotherCollapsedUnder 20 -0.12787741
## EduMotherCollapsedCollege or higher -0.07223809
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) -0.13609652
## EduMotherCollapsedLess than high school -0.09100405
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) -0.08666991
## EduMotherCollapsedUnknown or Not Stated -0.07826217
## WICNot Reported -0.23964968
## WICUnknown or Not Stated -0.17745661
## WICYes -0.10579105
## InfertilityTreatmentUsedNot Reported -0.25716733
## InfertilityTreatmentUsedUnknown or Not Stated -0.20988375
## InfertilityTreatmentUsedYes -0.16750100
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed25-29 years -0.17709187
## AgeFatherCollapsedUnknown:AgeMotherCollapsed25-29 years -0.06448844
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed30-34 years -0.20351826
## AgeFatherCollapsedUnknown:AgeMotherCollapsed30-34 years -0.05797466
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed35-39 years -0.16323340
## Std. Error
## AgeFatherCollapsed35-44 0.03530013
## AgeFatherCollapsed45+ 0.05355382
## AgeFatherCollapsedUnder 25 0.04751518
## AgeFatherCollapsedUnknown 0.03023218
## AgeMotherCollapsed35-39 years 0.01960944
## AgeMotherCollapsed40-44 years 0.02766331
## AgeMotherCollapsed45-49 years 0.04665700
## AgeMotherCollapsedUnder 20 0.02931195
## EduMotherCollapsedCollege or higher 0.02379288
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) 0.03667513
## EduMotherCollapsedLess than high school 0.02094302
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) 0.02561363
## EduMotherCollapsedUnknown or Not Stated 0.02253605
## WICNot Reported 0.01963781
## WICUnknown or Not Stated 0.01617566
## WICYes 0.01565797
## InfertilityTreatmentUsedNot Reported 0.02255790
## InfertilityTreatmentUsedUnknown or Not Stated 0.01354198
## InfertilityTreatmentUsedYes 0.01283327
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed25-29 years 0.02981184
## AgeFatherCollapsedUnknown:AgeMotherCollapsed25-29 years 0.02391540
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed30-34 years 0.03516393
## AgeFatherCollapsedUnknown:AgeMotherCollapsed30-34 years 0.02381780
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed35-39 years 0.05538919
## t value
## AgeFatherCollapsed35-44 -3.363181
## AgeFatherCollapsed45+ -3.759300
## AgeFatherCollapsedUnder 25 -2.017185
## AgeFatherCollapsedUnknown -3.106676
## AgeMotherCollapsed35-39 years -3.135204
## AgeMotherCollapsed40-44 years -5.876774
## AgeMotherCollapsed45-49 years -3.178886
## AgeMotherCollapsedUnder 20 -4.362637
## EduMotherCollapsedCollege or higher -3.036122
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) -3.710867
## EduMotherCollapsedLess than high school -4.345317
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) -3.383741
## EduMotherCollapsedUnknown or Not Stated -3.472754
## WICNot Reported -12.203481
## WICUnknown or Not Stated -10.970595
## WICYes -6.756370
## InfertilityTreatmentUsedNot Reported -11.400322
## InfertilityTreatmentUsedUnknown or Not Stated -15.498753
## InfertilityTreatmentUsedYes -13.052088
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed25-29 years -5.940320
## AgeFatherCollapsedUnknown:AgeMotherCollapsed25-29 years -2.696524
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed30-34 years -5.787700
## AgeFatherCollapsedUnknown:AgeMotherCollapsed30-34 years -2.434090
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed35-39 years -2.947026
## Pr(>|t|)
## AgeFatherCollapsed35-44 7.900912e-04
## AgeFatherCollapsed45+ 1.769919e-04
## AgeFatherCollapsedUnder 25 4.385575e-02
## AgeFatherCollapsedUnknown 1.927850e-03
## AgeMotherCollapsed35-39 years 1.750972e-03
## AgeMotherCollapsed40-44 years 5.155605e-09
## AgeMotherCollapsed45-49 years 1.508892e-03
## AgeMotherCollapsedUnder 20 1.373639e-05
## EduMotherCollapsedCollege or higher 2.438101e-03
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) 2.141681e-04
## EduMotherCollapsedLess than high school 1.485144e-05
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) 7.336571e-04
## EduMotherCollapsedUnknown or Not Stated 5.299230e-04
## WICNot Reported 1.033602e-32
## WICUnknown or Not Stated 5.558313e-27
## WICYes 2.025084e-11
## InfertilityTreatmentUsedNot Reported 6.374559e-29
## InfertilityTreatmentUsedUnknown or Not Stated 2.463073e-50
## InfertilityTreatmentUsedYes 6.107116e-37
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed25-29 years 3.536970e-09
## AgeFatherCollapsedUnknown:AgeMotherCollapsed25-29 years 7.085872e-03
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed30-34 years 8.689375e-09
## AgeFatherCollapsedUnknown:AgeMotherCollapsed30-34 years 1.504640e-02
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed35-39 years 3.258484e-03
# original model
# extract coefficients summary
coefs <- summary(model)$coefficients
# filter significant terms
significant_terms <- coefs[coefs[, "Pr(>|t|)"] < 0.05, ]
print(significant_terms)
## Estimate
## (Intercept) 0.17298855
## AgeFatherCollapsed35-44 -0.02360548
## AgeFatherCollapsed45+ -0.08538972
## AgeMotherCollapsed40-44 years -0.05296061
## AgeMotherCollapsed45-49 years -0.08405359
## AgeMotherCollapsedUnder 20 -0.05393178
## EduMotherCollapsedCollege or higher -0.04606648
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) -0.06954444
## EduMotherCollapsedHigh school or some college 0.03372967
## EduMotherCollapsedLess than high school -0.04419937
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) -0.03339732
## EduMotherCollapsedUnknown 0.08648237
## WICNot Reported -0.10612983
## WICUnknown or Not Stated -0.07529499
## WICYes -0.05595147
## `Infertility Treatment Used`Not Reported -0.07956380
## `Infertility Treatment Used`Unknown or Not Stated -0.09305102
## `Infertility Treatment Used`Yes -0.10703437
## Std. Error
## (Intercept) 0.013905917
## AgeFatherCollapsed35-44 0.009133179
## AgeFatherCollapsed45+ 0.011817440
## AgeMotherCollapsed40-44 years 0.012504773
## AgeMotherCollapsed45-49 years 0.028774416
## AgeMotherCollapsedUnder 20 0.012882985
## EduMotherCollapsedCollege or higher 0.013964262
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) 0.021644925
## EduMotherCollapsedHigh school or some college 0.011676263
## EduMotherCollapsedLess than high school 0.011677021
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) 0.015196993
## EduMotherCollapsedUnknown 0.022798577
## WICNot Reported 0.010450359
## WICUnknown or Not Stated 0.008455219
## WICYes 0.008387039
## `Infertility Treatment Used`Not Reported 0.017659001
## `Infertility Treatment Used`Unknown or Not Stated 0.009141024
## `Infertility Treatment Used`Yes 0.012812242
## t value
## (Intercept) 12.439924
## AgeFatherCollapsed35-44 -2.584585
## AgeFatherCollapsed45+ -7.225738
## AgeMotherCollapsed40-44 years -4.235232
## AgeMotherCollapsed45-49 years -2.921123
## AgeMotherCollapsedUnder 20 -4.186280
## EduMotherCollapsedCollege or higher -3.298884
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) -3.212967
## EduMotherCollapsedHigh school or some college 2.888738
## EduMotherCollapsedLess than high school -3.785158
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) -2.197627
## EduMotherCollapsedUnknown 3.793323
## WICNot Reported -10.155616
## WICUnknown or Not Stated -8.905150
## WICYes -6.671183
## `Infertility Treatment Used`Not Reported -4.505566
## `Infertility Treatment Used`Unknown or Not Stated -10.179496
## `Infertility Treatment Used`Yes -8.354070
## Pr(>|t|)
## (Intercept) 6.136448e-34
## AgeFatherCollapsed35-44 9.839907e-03
## AgeFatherCollapsed45+ 7.773506e-13
## AgeMotherCollapsed40-44 years 2.416805e-05
## AgeMotherCollapsed45-49 years 3.537966e-03
## AgeMotherCollapsedUnder 20 2.994269e-05
## EduMotherCollapsedCollege or higher 9.926116e-04
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) 1.340589e-03
## EduMotherCollapsedHigh school or some college 3.921405e-03
## EduMotherCollapsedLess than high school 1.594390e-04
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) 2.812253e-02
## EduMotherCollapsedUnknown 1.543308e-04
## WICNot Reported 1.658498e-23
## WICUnknown or Not Stated 1.451290e-18
## WICYes 3.515657e-11
## `Infertility Treatment Used`Not Reported 7.113530e-06
## `Infertility Treatment Used`Unknown or Not Stated 1.317933e-23
## `Infertility Treatment Used`Yes 1.440464e-16
par(mfrow = c(2, 3))
# original model
plot(model1$fitted.values, resid(model1),
xlab = "Fitted values", ylab = "Residuals",
main = "Residuals vs Fitted (Original)")
abline(h = 0, col = "red", lty = 2)
# QQ plot of residuals - original model
qqnorm(resid(model1), main = "QQ Plot (Original)")
qqline(resid(model1), col = "blue", lwd = 2)
# histogram of residuals - original model
hist(resid(model1), breaks = 30,
main = "Histogram (Original)",
xlab = "Residuals", col = "lightblue")
# quadratic model
plot(model_quad$fitted.values, resid(model_quad),
xlab = "Fitted values", ylab = "Residuals",
main = "Residuals vs Fitted (Quadratic)")
abline(h = 0, col = "red", lty = 2)
# QQ plot of residuals - quadratic model
qqnorm(resid(model_quad), main = "QQ Plot (Quadratic)")
qqline(resid(model_quad), col = "blue", lwd = 2)
# QQ plot of residuals - quadratic model
hist(resid(model_quad), breaks = 30,
main = "Histogram (Quadratic)",
xlab = "Residuals", col = "lightblue")

#imputing the data
#loading in mice package
library(mice)
data <- data %>%
mutate(
AgeFatherCollapsed = ifelse(AgeFatherCollapsed == "Unknown", NA, AgeFatherCollapsed),
AgeFatherCollapsed = factor(AgeFatherCollapsed),
AgeMotherCollapsed = factor(AgeMotherCollapsed),
EduMotherCollapsed = factor(EduMotherCollapsed),
WIC = factor(WIC),
`Infertility Treatment Used` = factor(`Infertility Treatment Used`)
)
impute_data <- data %>%
select(
AgeFatherCollapsed,
AgeMotherCollapsed,
EduMotherCollapsed,
WIC,
`Infertility Treatment Used`,
`Percent of Total Deaths`
)
meth <- make.method(impute_data)
meth["AgeFatherCollapsed"] <- "polyreg"
meth[c("AgeMotherCollapsed",
"EduMotherCollapsed",
"WIC",
"Infertility Treatment Used",
"Percent of Total Deaths")] <- ""
imp <- mice(
impute_data,
method = meth,
m = 5,
seed = 123
)
##
## iter imp variable
## 1 1 AgeFatherCollapsed
## 1 2 AgeFatherCollapsed
## 1 3 AgeFatherCollapsed
## 1 4 AgeFatherCollapsed
## 1 5 AgeFatherCollapsed
## 2 1 AgeFatherCollapsed
## 2 2 AgeFatherCollapsed
## 2 3 AgeFatherCollapsed
## 2 4 AgeFatherCollapsed
## 2 5 AgeFatherCollapsed
## 3 1 AgeFatherCollapsed
## 3 2 AgeFatherCollapsed
## 3 3 AgeFatherCollapsed
## 3 4 AgeFatherCollapsed
## 3 5 AgeFatherCollapsed
## 4 1 AgeFatherCollapsed
## 4 2 AgeFatherCollapsed
## 4 3 AgeFatherCollapsed
## 4 4 AgeFatherCollapsed
## 4 5 AgeFatherCollapsed
## 5 1 AgeFatherCollapsed
## 5 2 AgeFatherCollapsed
## 5 3 AgeFatherCollapsed
## 5 4 AgeFatherCollapsed
## 5 5 AgeFatherCollapsed
fit <- with(
imp,
lm(`Percent of Total Deaths` ~
AgeFatherCollapsed +
AgeMotherCollapsed +
EduMotherCollapsed +
WIC +
`Infertility Treatment Used`)
)
summary(pool(fit))
# checking proportion of missing paternal age by maternal age
data <- data %>%
mutate(
AgeFatherCollapsed = case_when(
`Age of Father 11` %in% c("15-19 years", "20-24 years") ~ "Under 25",
`Age of Father 11` %in% c("25-29 years", "30-34 years") ~ "25-34",
`Age of Father 11` %in% c("35-39 years", "40-44 years") ~ "35-44",
`Age of Father 11` %in% c("45-49 years", "50-54 years", "55 years and older") ~ "45+",
`Age of Father 11` %in% c("Unknown or Not Stated", "Not Reported") ~ "Unknown",
TRUE ~ as.character(`Age of Father 11`)
),
AgeMotherCollapsed = case_when(
`Age of Mother 10` %in% c("Under 15 years", "15 - 17 years", "18 - 19 years") ~ "Under 20",
`Age of Mother 10` == "20-24 years" ~ "20-24",
`Age of Mother 10` == "25-29 years" ~ "25-29",
`Age of Mother 10` == "30-34 years" ~ "30-34",
`Age of Mother 10` == "35-39 years" ~ "35-39",
`Age of Mother 10` == "40-44 years" ~ "40-44",
`Age of Mother 10` == "45-49 years" ~ "45+",
TRUE ~ as.character(`Age of Mother 10`)
),
FatherAgeMissing = ifelse(AgeFatherCollapsed == "Unknown", 1, 0),
FatherAgeMissing = factor(FatherAgeMissing)
)
proportion_table <- data %>%
group_by(AgeMotherCollapsed) %>%
summarise(
n_missing = sum(FatherAgeMissing == 1),
n_total = n(),
prop_missing = n_missing / n_total
) %>%
arrange(factor(AgeMotherCollapsed,
levels = c("Under 20", "20-24", "25-29", "30-34",
"35-39", "40-44", "45+")))
print(proportion_table)
## # A tibble: 8 × 4
## AgeMotherCollapsed n_missing n_total prop_missing
## <chr> <int> <int> <dbl>
## 1 Under 20 72 151 0.477
## 2 20-24 75 228 0.329
## 3 25-29 94 315 0.298
## 4 30-34 107 362 0.296
## 5 35-39 96 316 0.304
## 6 40-44 62 190 0.326
## 7 45+ 6 20 0.3
## 8 <NA> NA 37 NA
ggplot(data, aes(x = factor(AgeMotherCollapsed,
levels = c("Under 20", "20-24", "25-29", "30-34",
"35-39", "40-44", "45+")),
fill = FatherAgeMissing)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "Proportion of Missing Paternal Age by Maternal Age",
x = "Maternal Age Group",
y = "Proportion",
fill = "Father Age Missing"
) +
theme_minimal()

# checking proportion of missing paternal age by WIC status
data <- data %>%
mutate(WIC = factor(WIC))
proportion_table_wic <- data %>%
group_by(WIC) %>%
summarise(
n_missing = sum(FatherAgeMissing == 1),
n_total = n(),
prop_missing = n_missing / n_total
) %>%
arrange(WIC)
print(proportion_table_wic)
## # A tibble: 5 × 4
## WIC n_missing n_total prop_missing
## <fct> <int> <int> <dbl>
## 1 No 163 628 0.260
## 2 Not Reported 136 262 0.519
## 3 Unknown or Not Stated 118 339 0.348
## 4 Yes 95 353 0.269
## 5 <NA> NA 37 NA
ggplot(data, aes(x = WIC, fill = FatherAgeMissing)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "Proportion of Missing Paternal Age by WIC Participation",
x = "WIC Participation",
y = "Proportion",
fill = "Father Age Missing"
) +
theme_minimal()

# checking for blocks of missingnes in WIC data
data <- data %>%
mutate(FatherAgeMissing = ifelse(AgeFatherCollapsed == "Unknown", 1, 0))
wic_missing_summary <- data %>%
group_by(WIC) %>%
summarise(
total = n(),
missing_father = sum(FatherAgeMissing),
known_father = total - missing_father
) %>%
arrange(desc(total))
wic_missing_summary