# install packages 
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(readr)
library(dplyr)
library(broom)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(conflicted)
conflicts_prefer(dplyr::filter)
## [conflicted] Will prefer dplyr::filter over any other package.
conflicts_prefer(dplyr::select)
## [conflicted] Will prefer dplyr::select over any other package.
options(conflicted.print = "plain")
# read in data 
options(conflicted.print = "plain")
data <- read_csv("deathdata - Sheet1.csv")
## Rows: 1619 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): Notes, Age of Father 11, Age of Father 11 Code, Age of Mother 10, A...
## dbl (4): WIC Code, Infertility Treatment Used Code, Fetal Deaths, Percent of...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# clean data 
# collapse age categories 
data <- data %>%
  mutate(
    AgeFatherCollapsed = case_when(
      `Age of Father 11` %in% c("15-19 years", "20-24 years") ~ "Under 25",
      `Age of Father 11` %in% c("25-29 years", "30-34 years") ~ "25-34",
      `Age of Father 11` %in% c("35-39 years", "40-44 years") ~ "35-44",
      `Age of Father 11` %in% c("45-49 years", "50-54 years", "55 years and older") ~ "45+",
      `Age of Father 11` %in% c("Unknown or Not Stated", "Not Reported") ~ "Unknown",
      TRUE ~ as.character(`Age of Father 11`)
    ),

    AgeMotherCollapsed = case_when(
      `Age of Mother 10` %in% c("Under 15 years", "15 - 17 years", "18 - 19 years") ~ "Under 20",
      `Age of Mother 10` %in% c("20 - 24 years") ~ "20-24",
      `Age of Mother 10` %in% c("25 - 29 years") ~ "25-29",
      `Age of Mother 10` %in% c("30 - 34 years") ~ "30-34",
      `Age of Mother 10` %in% c("35 - 39 years") ~ "35-39",
      `Age of Mother 10` %in% c("40 - 44 years") ~ "40-44",
      `Age of Mother 10` %in% c("45 - 49 years", "50 years and over") ~ "45+",
      TRUE ~ as.character(`Age of Mother 10`)
    ),

    EduMotherCollapsed = case_when(
      `Mother's Education` %in% c("8th grade or less", "9th through 12th grade with no diploma") ~ "Less than high school",
      `Mother's Education` %in% c("High school graduate or GED completed", "Some college credit, but no degree") ~ "High school or some college",
      `Mother's Education` %in% c("Associate degree (AA, AS)", "Bachelor’s degree (BA, BS)", "Master’s degree (MA, MS)", "Doctorate or professional degree (PhD, MD, JD, etc.)") ~ "College or higher",
      `Mother's Education` %in% c("Unknown", "Not Reported") ~ "Unknown",
      TRUE ~ as.character(`Mother's Education`)
    )
  )

print(data)
## # A tibble: 1,619 × 16
##    Notes `Age of Father 11` `Age of Father 11 Code` `Age of Mother 10`
##    <chr> <chr>              <chr>                   <chr>             
##  1 <NA>  15-19 years        15-19                   Under 15 years    
##  2 <NA>  15-19 years        15-19                   15 - 17 years     
##  3 <NA>  15-19 years        15-19                   15 - 17 years     
##  4 <NA>  15-19 years        15-19                   15 - 17 years     
##  5 <NA>  15-19 years        15-19                   15 - 17 years     
##  6 <NA>  15-19 years        15-19                   15 - 17 years     
##  7 <NA>  15-19 years        15-19                   15 - 17 years     
##  8 <NA>  15-19 years        15-19                   15 - 17 years     
##  9 <NA>  15-19 years        15-19                   15 - 17 years     
## 10 <NA>  15-19 years        15-19                   15 - 17 years     
## # ℹ 1,609 more rows
## # ℹ 12 more variables: `Age of Mother 10 Code` <chr>,
## #   `Mother's Education` <chr>, `Mother's Education Code` <chr>, WIC <chr>,
## #   `WIC Code` <dbl>, `Infertility Treatment Used` <chr>,
## #   `Infertility Treatment Used Code` <dbl>, `Fetal Deaths` <dbl>,
## #   `Percent of Total Deaths` <dbl>, AgeFatherCollapsed <chr>,
## #   AgeMotherCollapsed <chr>, EduMotherCollapsed <chr>
# linear regression  
model <- lm(`Percent of Total Deaths` ~ `AgeFatherCollapsed` + `AgeMotherCollapsed` + 
            `EduMotherCollapsed` + `WIC` + 
            `Infertility Treatment Used`, data = data)

summary(model)
## 
## Call:
## lm(formula = `Percent of Total Deaths` ~ AgeFatherCollapsed + 
##     AgeMotherCollapsed + EduMotherCollapsed + WIC + `Infertility Treatment Used`, 
##     data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.17837 -0.05526 -0.02007  0.02153  1.51563 
## 
## Coefficients:
##                                                                                        Estimate
## (Intercept)                                                                            0.172989
## AgeFatherCollapsed35-44                                                               -0.023605
## AgeFatherCollapsed45+                                                                 -0.085390
## AgeFatherCollapsedUnder 25                                                            -0.021579
## AgeFatherCollapsedUnknown                                                             -0.004141
## AgeMotherCollapsed25-29 years                                                          0.003231
## AgeMotherCollapsed30-34 years                                                          0.001377
## AgeMotherCollapsed35-39 years                                                         -0.018547
## AgeMotherCollapsed40-44 years                                                         -0.052961
## AgeMotherCollapsed45-49 years                                                         -0.084054
## AgeMotherCollapsedUnder 20                                                            -0.053932
## EduMotherCollapsedCollege or higher                                                   -0.046066
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) -0.069544
## EduMotherCollapsedHigh school or some college                                          0.033730
## EduMotherCollapsedLess than high school                                               -0.044199
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA)                       -0.033397
## EduMotherCollapsedSome college credit, but not a degree                               -0.002922
## EduMotherCollapsedUnknown                                                              0.086482
## EduMotherCollapsedUnknown or Not Stated                                               -0.005735
## WICNot Reported                                                                       -0.106130
## WICUnknown or Not Stated                                                              -0.075295
## WICYes                                                                                -0.055951
## `Infertility Treatment Used`Not Reported                                              -0.079564
## `Infertility Treatment Used`Unknown or Not Stated                                     -0.093051
## `Infertility Treatment Used`Yes                                                       -0.107034
##                                                                                       Std. Error
## (Intercept)                                                                             0.013906
## AgeFatherCollapsed35-44                                                                 0.009133
## AgeFatherCollapsed45+                                                                   0.011817
## AgeFatherCollapsedUnder 25                                                              0.011795
## AgeFatherCollapsedUnknown                                                               0.008327
## AgeMotherCollapsed25-29 years                                                           0.010581
## AgeMotherCollapsed30-34 years                                                           0.010529
## AgeMotherCollapsed35-39 years                                                           0.011049
## AgeMotherCollapsed40-44 years                                                           0.012505
## AgeMotherCollapsed45-49 years                                                           0.028774
## AgeMotherCollapsedUnder 20                                                              0.012883
## EduMotherCollapsedCollege or higher                                                     0.013964
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD)   0.021645
## EduMotherCollapsedHigh school or some college                                           0.011676
## EduMotherCollapsedLess than high school                                                 0.011677
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA)                         0.015197
## EduMotherCollapsedSome college credit, but not a degree                                 0.012246
## EduMotherCollapsedUnknown                                                               0.022799
## EduMotherCollapsedUnknown or Not Stated                                                 0.012430
## WICNot Reported                                                                         0.010450
## WICUnknown or Not Stated                                                                0.008455
## WICYes                                                                                  0.008387
## `Infertility Treatment Used`Not Reported                                                0.017659
## `Infertility Treatment Used`Unknown or Not Stated                                       0.009141
## `Infertility Treatment Used`Yes                                                         0.012812
##                                                                                       t value
## (Intercept)                                                                            12.440
## AgeFatherCollapsed35-44                                                                -2.585
## AgeFatherCollapsed45+                                                                  -7.226
## AgeFatherCollapsedUnder 25                                                             -1.830
## AgeFatherCollapsedUnknown                                                              -0.497
## AgeMotherCollapsed25-29 years                                                           0.305
## AgeMotherCollapsed30-34 years                                                           0.131
## AgeMotherCollapsed35-39 years                                                          -1.679
## AgeMotherCollapsed40-44 years                                                          -4.235
## AgeMotherCollapsed45-49 years                                                          -2.921
## AgeMotherCollapsedUnder 20                                                             -4.186
## EduMotherCollapsedCollege or higher                                                    -3.299
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD)  -3.213
## EduMotherCollapsedHigh school or some college                                           2.889
## EduMotherCollapsedLess than high school                                                -3.785
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA)                        -2.198
## EduMotherCollapsedSome college credit, but not a degree                                -0.239
## EduMotherCollapsedUnknown                                                               3.793
## EduMotherCollapsedUnknown or Not Stated                                                -0.461
## WICNot Reported                                                                       -10.156
## WICUnknown or Not Stated                                                               -8.905
## WICYes                                                                                 -6.671
## `Infertility Treatment Used`Not Reported                                               -4.506
## `Infertility Treatment Used`Unknown or Not Stated                                     -10.179
## `Infertility Treatment Used`Yes                                                        -8.354
##                                                                                       Pr(>|t|)
## (Intercept)                                                                            < 2e-16
## AgeFatherCollapsed35-44                                                               0.009840
## AgeFatherCollapsed45+                                                                 7.77e-13
## AgeFatherCollapsedUnder 25                                                            0.067506
## AgeFatherCollapsedUnknown                                                             0.619043
## AgeMotherCollapsed25-29 years                                                         0.760150
## AgeMotherCollapsed30-34 years                                                         0.895976
## AgeMotherCollapsed35-39 years                                                         0.093420
## AgeMotherCollapsed40-44 years                                                         2.42e-05
## AgeMotherCollapsed45-49 years                                                         0.003538
## AgeMotherCollapsedUnder 20                                                            2.99e-05
## EduMotherCollapsedCollege or higher                                                   0.000993
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) 0.001341
## EduMotherCollapsedHigh school or some college                                         0.003921
## EduMotherCollapsedLess than high school                                               0.000159
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA)                       0.028123
## EduMotherCollapsedSome college credit, but not a degree                               0.811411
## EduMotherCollapsedUnknown                                                             0.000154
## EduMotherCollapsedUnknown or Not Stated                                               0.644588
## WICNot Reported                                                                        < 2e-16
## WICUnknown or Not Stated                                                               < 2e-16
## WICYes                                                                                3.52e-11
## `Infertility Treatment Used`Not Reported                                              7.11e-06
## `Infertility Treatment Used`Unknown or Not Stated                                      < 2e-16
## `Infertility Treatment Used`Yes                                                        < 2e-16
##                                                                                          
## (Intercept)                                                                           ***
## AgeFatherCollapsed35-44                                                               ** 
## AgeFatherCollapsed45+                                                                 ***
## AgeFatherCollapsedUnder 25                                                            .  
## AgeFatherCollapsedUnknown                                                                
## AgeMotherCollapsed25-29 years                                                            
## AgeMotherCollapsed30-34 years                                                            
## AgeMotherCollapsed35-39 years                                                         .  
## AgeMotherCollapsed40-44 years                                                         ***
## AgeMotherCollapsed45-49 years                                                         ** 
## AgeMotherCollapsedUnder 20                                                            ***
## EduMotherCollapsedCollege or higher                                                   ***
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) ** 
## EduMotherCollapsedHigh school or some college                                         ** 
## EduMotherCollapsedLess than high school                                               ***
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA)                       *  
## EduMotherCollapsedSome college credit, but not a degree                                  
## EduMotherCollapsedUnknown                                                             ***
## EduMotherCollapsedUnknown or Not Stated                                                  
## WICNot Reported                                                                       ***
## WICUnknown or Not Stated                                                              ***
## WICYes                                                                                ***
## `Infertility Treatment Used`Not Reported                                              ***
## `Infertility Treatment Used`Unknown or Not Stated                                     ***
## `Infertility Treatment Used`Yes                                                       ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1197 on 1557 degrees of freedom
##   (37 observations deleted due to missingness)
## Multiple R-squared:  0.1921, Adjusted R-squared:  0.1797 
## F-statistic: 15.43 on 24 and 1557 DF,  p-value: < 2.2e-16
# exploratory data analysis 

# group data by father's age category 
data %>%
  group_by(AgeFatherCollapsed) %>%
  summarise(
    # mean
    mean_fetal_death_rate = mean(`Percent of Total Deaths`, na.rm = TRUE),
    # std 
    sd_fetal_death_rate = sd(`Percent of Total Deaths`, na.rm = TRUE),
    # number of categories 
    n = n()
  )
# ANOVA 
anova_model <- aov(`Percent of Total Deaths` ~ AgeFatherCollapsed, data = data)

# TukeyHSD 
TukeyHSD(anova_model)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = `Percent of Total Deaths` ~ AgeFatherCollapsed, data = data)
## 
## $AgeFatherCollapsed
##                          diff         lwr          upr     p adj
## 35-44-25-34      -0.029566177 -0.05603045 -0.003101908 0.0196242
## 45+-25-34        -0.066325927 -0.09923859 -0.033413265 0.0000004
## Under 25-25-34   -0.018157766 -0.05120785  0.014892320 0.5623602
## Unknown-25-34    -0.022293128 -0.04629661  0.001710349 0.0832483
## 45+-35-44        -0.036759749 -0.07038937 -0.003130126 0.0240217
## Under 25-35-44    0.011408411 -0.02235572  0.045172541 0.8881808
## Unknown-35-44     0.007273049 -0.01770444  0.032250543 0.9320515
## Under 25-45+      0.048168161  0.00914378  0.087192541 0.0068689
## Unknown-45+       0.044032799  0.01230330  0.075762296 0.0014689
## Unknown-Under 25 -0.004135362 -0.03600739  0.027736661 0.9966271
#interaction 

# linear regression with all interaction effects 
model1 <- lm(`Percent of Total Deaths` ~ 
              (AgeFatherCollapsed + AgeMotherCollapsed + 
               EduMotherCollapsed + WIC + 
               `Infertility Treatment Used`)^2,
            data = data)

# filtering out significant interaction terms 

tidy(model1) %>%
  filter(p.value < 0.05) %>%
  dplyr::select(term, estimate, std.error, statistic, p.value)
#check correlations 

# select and convert relevant columns to numeric
data_numeric <- data %>%
  mutate(
    AgeFatherCollapsed = as.numeric(factor(AgeFatherCollapsed)),
    AgeMotherCollapsed = as.numeric(factor(AgeMotherCollapsed)),
    EduMotherCollapsed = as.numeric(factor(EduMotherCollapsed)),
    WIC = as.numeric(factor(WIC)),
    InfertilityTreatmentUsed = as.numeric(factor(`Infertility Treatment Used`))
  )

# correlation matrix 
correlation_matrix <- cor(data_numeric %>%
                            dplyr::select(AgeFatherCollapsed, AgeMotherCollapsed, 
                                          EduMotherCollapsed, WIC, InfertilityTreatmentUsed),
                          use = "complete.obs")
print(correlation_matrix)
##                          AgeFatherCollapsed AgeMotherCollapsed
## AgeFatherCollapsed               1.00000000         0.14680609
## AgeMotherCollapsed               0.14680609         1.00000000
## EduMotherCollapsed               0.10848709         0.03814675
## WIC                              0.01990136        -0.02033211
## InfertilityTreatmentUsed        -0.04281391        -0.06578029
##                          EduMotherCollapsed         WIC
## AgeFatherCollapsed               0.10848709  0.01990136
## AgeMotherCollapsed               0.03814675 -0.02033211
## EduMotherCollapsed               1.00000000  0.03105695
## WIC                              0.03105695  1.00000000
## InfertilityTreatmentUsed         0.05504106 -0.23349141
##                          InfertilityTreatmentUsed
## AgeFatherCollapsed                    -0.04281391
## AgeMotherCollapsed                    -0.06578029
## EduMotherCollapsed                     0.05504106
## WIC                                   -0.23349141
## InfertilityTreatmentUsed               1.00000000
# vif 
vif(model)
##                                  GVIF Df GVIF^(1/(2*Df))
## AgeFatherCollapsed           1.577228  4        1.058612
## AgeMotherCollapsed           1.553854  6        1.037411
## EduMotherCollapsed           2.733819  8        1.064874
## WIC                          1.644051  3        1.086390
## `Infertility Treatment Used` 2.219102  3        1.142079
print(vif)
## function (mod, ...) 
## {
##     UseMethod("vif")
## }
## <bytecode: 0x555f22e8aaa0>
## <environment: namespace:car>
# model validation 

# residuals vs fitted plot 
plot(model1$fitted.values, resid(model1),
     xlab = "Fitted values", ylab = "Residuals",
     main = "Residuals vs Fitted")
abline(h = 0, col = "red", lty = 2)

# QQ plot of residuals
qqnorm(resid(model1), main = "QQ Plot of Residuals")
qqline(resid(model1), col = "blue", lwd = 2)

# histogram of residuals 
hist(resid(model1), breaks = 30, 
     main = "Histogram of Residuals", 
     xlab = "Residuals", col = "lightblue")

# trying a quadratic model to fix residual issues

# square term 
data_numeric <- data_numeric %>%
  mutate(AgeFatherCollapsed2 = AgeFatherCollapsed^2)

# regression 
model_quad <- lm(
  `Percent of Total Deaths` ~ 
    (AgeFatherCollapsed2 + AgeMotherCollapsed +
     EduMotherCollapsed + WIC +
     InfertilityTreatmentUsed)^2,
  data = data_numeric
)

# residuals vs. fitted plot 
plot(model_quad$fitted.values, resid(model1),
     xlab = "Fitted values", ylab = "Residuals",
     main = "Residuals vs Fitted")
abline(h = 0, col = "red", lty = 2)

# QQ norm plot 
qqnorm(resid(model_quad), main = "QQ Plot of Residuals")
qqline(resid(model_quad), col = "blue", lwd = 2)

sig <- tidy(model) %>%
  dplyr::filter(str_detect(term, ":"), p.value < 0.05)
# running regression 

tidy_model <- tidy(model_quad)

main_effects <- tidy_model %>%
  filter(!str_detect(term, ":"), p.value < 0.05)

significant_interactions <- tidy(model_quad) %>%
  filter(grepl(":", term), p.value < 0.05)

significant_terms <- bind_rows(main_effects, significant_interactions)

print(significant_terms)
## # A tibble: 10 × 5
##    term                                    estimate std.error statistic  p.value
##    <chr>                                      <dbl>     <dbl>     <dbl>    <dbl>
##  1 (Intercept)                              3.49e-1  0.0349        9.99 8.37e-23
##  2 AgeFatherCollapsed2                     -3.90e-3  0.00133      -2.93 3.45e- 3
##  3 AgeMotherCollapsed                      -2.92e-2  0.00716      -4.07 4.89e- 5
##  4 EduMotherCollapsed                      -1.25e-2  0.00512      -2.44 1.47e- 2
##  5 WIC                                     -5.58e-2  0.00963      -5.79 8.38e- 9
##  6 InfertilityTreatmentUsed                -7.60e-2  0.0121       -6.27 4.68e-10
##  7 AgeFatherCollapsed2:EduMotherCollapsed   4.16e-4  0.000128      3.24 1.22e- 3
##  8 AgeMotherCollapsed:WIC                   3.06e-3  0.00156       1.96 4.99e- 2
##  9 AgeMotherCollapsed:InfertilityTreatmen…  7.56e-3  0.00237       3.19 1.43e- 3
## 10 EduMotherCollapsed:WIC                   2.83e-3  0.00120       2.37 1.81e- 2
# get specific relationships of significance 

data$AgeFatherCollapsed <- factor(data$AgeFatherCollapsed)
data$AgeMotherCollapsed <- factor(data$AgeMotherCollapsed)
data$EduMotherCollapsed <- factor(data$EduMotherCollapsed)
data$WIC <- factor(data$WIC)
data$InfertilityTreatmentUsed <- factor(data$`Infertility Treatment Used`)

model2 <- lm(`Percent of Total Deaths` ~ 
                (AgeFatherCollapsed + AgeMotherCollapsed + 
                 EduMotherCollapsed + WIC + 
                 InfertilityTreatmentUsed)^2,
             data = data)

significant_terms <- tidy(model2) %>%
  filter(p.value < 0.05) %>%
  dplyr::select(term, estimate, std.error, statistic, p.value)

print(significant_terms)
## # A tibble: 112 × 5
##    term                          estimate std.error statistic  p.value
##    <chr>                            <dbl>     <dbl>     <dbl>    <dbl>
##  1 (Intercept)                     0.261     0.0328      7.94 4.27e-15
##  2 AgeFatherCollapsed35-44        -0.200     0.0340     -5.89 4.94e- 9
##  3 AgeFatherCollapsed45+          -0.381     0.0524     -7.26 6.46e-13
##  4 AgeFatherCollapsedUnder 25     -0.0896    0.0446     -2.01 4.48e- 2
##  5 AgeFatherCollapsedUnknown      -0.108     0.0291     -3.71 2.17e- 4
##  6 AgeMotherCollapsed25-29 years   0.213     0.0350      6.10 1.37e- 9
##  7 AgeMotherCollapsed30-34 years   0.207     0.0348      5.95 3.49e- 9
##  8 AgeMotherCollapsed40-44 years  -0.119     0.0424     -2.82 4.91e- 3
##  9 AgeMotherCollapsed45-49 years  -0.164     0.0796     -2.06 3.96e- 2
## 10 AgeMotherCollapsedUnder 20     -0.200     0.0419     -4.78 1.96e- 6
## # ℹ 102 more rows
# correlation 

# correlation matrix
correlation_matrix <- cor(data_numeric %>%
                            dplyr::select(AgeFatherCollapsed, AgeMotherCollapsed, 
                                          EduMotherCollapsed, WIC, InfertilityTreatmentUsed),
                          use = "complete.obs")
print(correlation_matrix)
##                          AgeFatherCollapsed AgeMotherCollapsed
## AgeFatherCollapsed               1.00000000         0.14680609
## AgeMotherCollapsed               0.14680609         1.00000000
## EduMotherCollapsed               0.10848709         0.03814675
## WIC                              0.01990136        -0.02033211
## InfertilityTreatmentUsed        -0.04281391        -0.06578029
##                          EduMotherCollapsed         WIC
## AgeFatherCollapsed               0.10848709  0.01990136
## AgeMotherCollapsed               0.03814675 -0.02033211
## EduMotherCollapsed               1.00000000  0.03105695
## WIC                              0.03105695  1.00000000
## InfertilityTreatmentUsed         0.05504106 -0.23349141
##                          InfertilityTreatmentUsed
## AgeFatherCollapsed                    -0.04281391
## AgeMotherCollapsed                    -0.06578029
## EduMotherCollapsed                     0.05504106
## WIC                                   -0.23349141
## InfertilityTreatmentUsed               1.00000000
# VIF
vif <- vif(model)
print(vif)
##                                  GVIF Df GVIF^(1/(2*Df))
## AgeFatherCollapsed           1.577228  4        1.058612
## AgeMotherCollapsed           1.553854  6        1.037411
## EduMotherCollapsed           2.733819  8        1.064874
## WIC                          1.644051  3        1.086390
## `Infertility Treatment Used` 2.219102  3        1.142079
# reduced model 

reduced <- glm( `Percent of Total Deaths` ~ 
  AgeFatherCollapsed^2 + 
  AgeMotherCollapsed + 
  EduMotherCollapsed + 
  WIC + 
  InfertilityTreatmentUsed +

  # significant interactions
  AgeFatherCollapsed:AgeMotherCollapsed +
  AgeFatherCollapsed:EduMotherCollapsed +
  AgeFatherCollapsed:WIC +
  AgeMotherCollapsed:AgeFatherCollapsed +
  WIC:InfertilityTreatmentUsed,

  data = data

)

# extract coefficients summary
coefs <- summary(reduced)$coefficients

# filter significant terms
sig_coefs <- coefs[coefs[, "Pr(>|t|)"] < 0.05, ]

# filter positives and negatives
positive_results <- sig_coefs[sig_coefs[, "Estimate"] > 0, ]
negative_results <- sig_coefs[sig_coefs[, "Estimate"] < 0, ]

print(positive_results)
##                                                                                             Estimate
## (Intercept)                                                                               0.25046836
## AgeMotherCollapsed25-29 years                                                             0.07130901
## AgeMotherCollapsed30-34 years                                                             0.05113566
## AgeFatherCollapsed35-44:AgeMotherCollapsed35-39 years                                     0.18036533
## AgeFatherCollapsed35-44:AgeMotherCollapsed40-44 years                                     0.19886203
## AgeFatherCollapsed45+:AgeMotherCollapsed40-44 years                                       0.18746324
## AgeFatherCollapsedUnknown:AgeMotherCollapsed40-44 years                                   0.07566745
## AgeFatherCollapsed45+:AgeMotherCollapsed45-49 years                                       0.14478334
## AgeFatherCollapsedUnknown:EduMotherCollapsedHigh school or some college                   0.06156282
## AgeFatherCollapsedUnder 25:EduMotherCollapsedLess than high school                        0.13279736
## AgeFatherCollapsedUnknown:EduMotherCollapsedLess than high school                         0.09643252
## AgeFatherCollapsedUnknown:EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) 0.07451658
## AgeFatherCollapsedUnknown:EduMotherCollapsedUnknown or Not Stated                         0.16396120
## AgeFatherCollapsedUnknown:WICNot Reported                                                 0.10902976
## AgeFatherCollapsed35-44:WICUnknown or Not Stated                                          0.04796127
## AgeFatherCollapsed45+:WICUnknown or Not Stated                                            0.14678751
## AgeFatherCollapsedUnder 25:WICUnknown or Not Stated                                       0.08335834
## AgeFatherCollapsedUnknown:WICUnknown or Not Stated                                        0.08249853
## AgeFatherCollapsed45+:WICYes                                                              0.08657888
## AgeFatherCollapsedUnder 25:WICYes                                                         0.07357059
## WICNot Reported:InfertilityTreatmentUsedNot Reported                                      0.37220752
## WICNot Reported:InfertilityTreatmentUsedUnknown or Not Stated                             0.16057363
## WICUnknown or Not Stated:InfertilityTreatmentUsedUnknown or Not Stated                    0.13981350
## WICYes:InfertilityTreatmentUsedUnknown or Not Stated                                      0.05824336
##                                                                                           Std. Error
## (Intercept)                                                                               0.02199397
## AgeMotherCollapsed25-29 years                                                             0.01694177
## AgeMotherCollapsed30-34 years                                                             0.01719512
## AgeFatherCollapsed35-44:AgeMotherCollapsed35-39 years                                     0.03176353
## AgeFatherCollapsed35-44:AgeMotherCollapsed40-44 years                                     0.03827651
## AgeFatherCollapsed45+:AgeMotherCollapsed40-44 years                                       0.05303472
## AgeFatherCollapsedUnknown:AgeMotherCollapsed40-44 years                                   0.03336647
## AgeFatherCollapsed45+:AgeMotherCollapsed45-49 years                                       0.07122384
## AgeFatherCollapsedUnknown:EduMotherCollapsedHigh school or some college                   0.02858583
## AgeFatherCollapsedUnder 25:EduMotherCollapsedLess than high school                        0.04765455
## AgeFatherCollapsedUnknown:EduMotherCollapsedLess than high school                         0.02852209
## AgeFatherCollapsedUnknown:EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) 0.03637753
## AgeFatherCollapsedUnknown:EduMotherCollapsedUnknown or Not Stated                         0.02991923
## AgeFatherCollapsedUnknown:WICNot Reported                                                 0.02347646
## AgeFatherCollapsed35-44:WICUnknown or Not Stated                                          0.02155150
## AgeFatherCollapsed45+:WICUnknown or Not Stated                                            0.03147773
## AgeFatherCollapsedUnder 25:WICUnknown or Not Stated                                       0.02769763
## AgeFatherCollapsedUnknown:WICUnknown or Not Stated                                        0.02024229
## AgeFatherCollapsed45+:WICYes                                                              0.02597934
## AgeFatherCollapsedUnder 25:WICYes                                                         0.02592145
## WICNot Reported:InfertilityTreatmentUsedNot Reported                                      0.03942157
## WICNot Reported:InfertilityTreatmentUsedUnknown or Not Stated                             0.02303759
## WICUnknown or Not Stated:InfertilityTreatmentUsedUnknown or Not Stated                    0.02020377
## WICYes:InfertilityTreatmentUsedUnknown or Not Stated                                      0.02475445
##                                                                                             t value
## (Intercept)                                                                               11.388046
## AgeMotherCollapsed25-29 years                                                              4.209064
## AgeMotherCollapsed30-34 years                                                              2.973846
## AgeFatherCollapsed35-44:AgeMotherCollapsed35-39 years                                      5.678379
## AgeFatherCollapsed35-44:AgeMotherCollapsed40-44 years                                      5.195406
## AgeFatherCollapsed45+:AgeMotherCollapsed40-44 years                                        3.534727
## AgeFatherCollapsedUnknown:AgeMotherCollapsed40-44 years                                    2.267769
## AgeFatherCollapsed45+:AgeMotherCollapsed45-49 years                                        2.032793
## AgeFatherCollapsedUnknown:EduMotherCollapsedHigh school or some college                    2.153613
## AgeFatherCollapsedUnder 25:EduMotherCollapsedLess than high school                         2.786667
## AgeFatherCollapsedUnknown:EduMotherCollapsedLess than high school                          3.380976
## AgeFatherCollapsedUnknown:EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA)  2.048424
## AgeFatherCollapsedUnknown:EduMotherCollapsedUnknown or Not Stated                          5.480129
## AgeFatherCollapsedUnknown:WICNot Reported                                                  4.644215
## AgeFatherCollapsed35-44:WICUnknown or Not Stated                                           2.225426
## AgeFatherCollapsed45+:WICUnknown or Not Stated                                             4.663218
## AgeFatherCollapsedUnder 25:WICUnknown or Not Stated                                        3.009584
## AgeFatherCollapsedUnknown:WICUnknown or Not Stated                                         4.075553
## AgeFatherCollapsed45+:WICYes                                                               3.332605
## AgeFatherCollapsedUnder 25:WICYes                                                          2.838213
## WICNot Reported:InfertilityTreatmentUsedNot Reported                                       9.441721
## WICNot Reported:InfertilityTreatmentUsedUnknown or Not Stated                              6.970071
## WICUnknown or Not Stated:InfertilityTreatmentUsedUnknown or Not Stated                     6.920169
## WICYes:InfertilityTreatmentUsedUnknown or Not Stated                                       2.352844
##                                                                                               Pr(>|t|)
## (Intercept)                                                                               7.256739e-29
## AgeMotherCollapsed25-29 years                                                             2.717974e-05
## AgeMotherCollapsed30-34 years                                                             2.988323e-03
## AgeFatherCollapsed35-44:AgeMotherCollapsed35-39 years                                     1.632867e-08
## AgeFatherCollapsed35-44:AgeMotherCollapsed40-44 years                                     2.326277e-07
## AgeFatherCollapsed45+:AgeMotherCollapsed40-44 years                                       4.207137e-04
## AgeFatherCollapsedUnknown:AgeMotherCollapsed40-44 years                                   2.348633e-02
## AgeFatherCollapsed45+:AgeMotherCollapsed45-49 years                                       4.225089e-02
## AgeFatherCollapsedUnknown:EduMotherCollapsedHigh school or some college                   3.143093e-02
## AgeFatherCollapsedUnder 25:EduMotherCollapsedLess than high school                        5.393044e-03
## AgeFatherCollapsedUnknown:EduMotherCollapsedLess than high school                         7.410212e-04
## AgeFatherCollapsedUnknown:EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA) 4.069389e-02
## AgeFatherCollapsedUnknown:EduMotherCollapsedUnknown or Not Stated                         4.985582e-08
## AgeFatherCollapsedUnknown:WICNot Reported                                                 3.715078e-06
## AgeFatherCollapsed35-44:WICUnknown or Not Stated                                          2.620221e-02
## AgeFatherCollapsed45+:WICUnknown or Not Stated                                            3.392349e-06
## AgeFatherCollapsedUnder 25:WICUnknown or Not Stated                                       2.660111e-03
## AgeFatherCollapsedUnknown:WICUnknown or Not Stated                                        4.833214e-05
## AgeFatherCollapsed45+:WICYes                                                              8.815140e-04
## AgeFatherCollapsedUnder 25:WICYes                                                         4.598348e-03
## WICNot Reported:InfertilityTreatmentUsedNot Reported                                      1.365591e-20
## WICNot Reported:InfertilityTreatmentUsedUnknown or Not Stated                             4.747453e-12
## WICUnknown or Not Stated:InfertilityTreatmentUsedUnknown or Not Stated                    6.685740e-12
## WICYes:InfertilityTreatmentUsedUnknown or Not Stated                                      1.876021e-02
print(negative_results)
##                                                                                          Estimate
## AgeFatherCollapsed35-44                                                               -0.11872073
## AgeFatherCollapsed45+                                                                 -0.20132485
## AgeFatherCollapsedUnder 25                                                            -0.09584690
## AgeFatherCollapsedUnknown                                                             -0.09392159
## AgeMotherCollapsed35-39 years                                                         -0.06147959
## AgeMotherCollapsed40-44 years                                                         -0.16257100
## AgeMotherCollapsed45-49 years                                                         -0.14831729
## AgeMotherCollapsedUnder 20                                                            -0.12787741
## EduMotherCollapsedCollege or higher                                                   -0.07223809
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) -0.13609652
## EduMotherCollapsedLess than high school                                               -0.09100405
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA)                       -0.08666991
## EduMotherCollapsedUnknown or Not Stated                                               -0.07826217
## WICNot Reported                                                                       -0.23964968
## WICUnknown or Not Stated                                                              -0.17745661
## WICYes                                                                                -0.10579105
## InfertilityTreatmentUsedNot Reported                                                  -0.25716733
## InfertilityTreatmentUsedUnknown or Not Stated                                         -0.20988375
## InfertilityTreatmentUsedYes                                                           -0.16750100
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed25-29 years                              -0.17709187
## AgeFatherCollapsedUnknown:AgeMotherCollapsed25-29 years                               -0.06448844
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed30-34 years                              -0.20351826
## AgeFatherCollapsedUnknown:AgeMotherCollapsed30-34 years                               -0.05797466
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed35-39 years                              -0.16323340
##                                                                                       Std. Error
## AgeFatherCollapsed35-44                                                               0.03530013
## AgeFatherCollapsed45+                                                                 0.05355382
## AgeFatherCollapsedUnder 25                                                            0.04751518
## AgeFatherCollapsedUnknown                                                             0.03023218
## AgeMotherCollapsed35-39 years                                                         0.01960944
## AgeMotherCollapsed40-44 years                                                         0.02766331
## AgeMotherCollapsed45-49 years                                                         0.04665700
## AgeMotherCollapsedUnder 20                                                            0.02931195
## EduMotherCollapsedCollege or higher                                                   0.02379288
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) 0.03667513
## EduMotherCollapsedLess than high school                                               0.02094302
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA)                       0.02561363
## EduMotherCollapsedUnknown or Not Stated                                               0.02253605
## WICNot Reported                                                                       0.01963781
## WICUnknown or Not Stated                                                              0.01617566
## WICYes                                                                                0.01565797
## InfertilityTreatmentUsedNot Reported                                                  0.02255790
## InfertilityTreatmentUsedUnknown or Not Stated                                         0.01354198
## InfertilityTreatmentUsedYes                                                           0.01283327
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed25-29 years                              0.02981184
## AgeFatherCollapsedUnknown:AgeMotherCollapsed25-29 years                               0.02391540
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed30-34 years                              0.03516393
## AgeFatherCollapsedUnknown:AgeMotherCollapsed30-34 years                               0.02381780
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed35-39 years                              0.05538919
##                                                                                          t value
## AgeFatherCollapsed35-44                                                                -3.363181
## AgeFatherCollapsed45+                                                                  -3.759300
## AgeFatherCollapsedUnder 25                                                             -2.017185
## AgeFatherCollapsedUnknown                                                              -3.106676
## AgeMotherCollapsed35-39 years                                                          -3.135204
## AgeMotherCollapsed40-44 years                                                          -5.876774
## AgeMotherCollapsed45-49 years                                                          -3.178886
## AgeMotherCollapsedUnder 20                                                             -4.362637
## EduMotherCollapsedCollege or higher                                                    -3.036122
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD)  -3.710867
## EduMotherCollapsedLess than high school                                                -4.345317
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA)                        -3.383741
## EduMotherCollapsedUnknown or Not Stated                                                -3.472754
## WICNot Reported                                                                       -12.203481
## WICUnknown or Not Stated                                                              -10.970595
## WICYes                                                                                 -6.756370
## InfertilityTreatmentUsedNot Reported                                                  -11.400322
## InfertilityTreatmentUsedUnknown or Not Stated                                         -15.498753
## InfertilityTreatmentUsedYes                                                           -13.052088
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed25-29 years                               -5.940320
## AgeFatherCollapsedUnknown:AgeMotherCollapsed25-29 years                                -2.696524
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed30-34 years                               -5.787700
## AgeFatherCollapsedUnknown:AgeMotherCollapsed30-34 years                                -2.434090
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed35-39 years                               -2.947026
##                                                                                           Pr(>|t|)
## AgeFatherCollapsed35-44                                                               7.900912e-04
## AgeFatherCollapsed45+                                                                 1.769919e-04
## AgeFatherCollapsedUnder 25                                                            4.385575e-02
## AgeFatherCollapsedUnknown                                                             1.927850e-03
## AgeMotherCollapsed35-39 years                                                         1.750972e-03
## AgeMotherCollapsed40-44 years                                                         5.155605e-09
## AgeMotherCollapsed45-49 years                                                         1.508892e-03
## AgeMotherCollapsedUnder 20                                                            1.373639e-05
## EduMotherCollapsedCollege or higher                                                   2.438101e-03
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) 2.141681e-04
## EduMotherCollapsedLess than high school                                               1.485144e-05
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA)                       7.336571e-04
## EduMotherCollapsedUnknown or Not Stated                                               5.299230e-04
## WICNot Reported                                                                       1.033602e-32
## WICUnknown or Not Stated                                                              5.558313e-27
## WICYes                                                                                2.025084e-11
## InfertilityTreatmentUsedNot Reported                                                  6.374559e-29
## InfertilityTreatmentUsedUnknown or Not Stated                                         2.463073e-50
## InfertilityTreatmentUsedYes                                                           6.107116e-37
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed25-29 years                              3.536970e-09
## AgeFatherCollapsedUnknown:AgeMotherCollapsed25-29 years                               7.085872e-03
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed30-34 years                              8.689375e-09
## AgeFatherCollapsedUnknown:AgeMotherCollapsed30-34 years                               1.504640e-02
## AgeFatherCollapsedUnder 25:AgeMotherCollapsed35-39 years                              3.258484e-03
# original model 

# extract coefficients summary
coefs <- summary(model)$coefficients

# filter significant terms
significant_terms <- coefs[coefs[, "Pr(>|t|)"] < 0.05, ]
print(significant_terms)
##                                                                                          Estimate
## (Intercept)                                                                            0.17298855
## AgeFatherCollapsed35-44                                                               -0.02360548
## AgeFatherCollapsed45+                                                                 -0.08538972
## AgeMotherCollapsed40-44 years                                                         -0.05296061
## AgeMotherCollapsed45-49 years                                                         -0.08405359
## AgeMotherCollapsedUnder 20                                                            -0.05393178
## EduMotherCollapsedCollege or higher                                                   -0.04606648
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) -0.06954444
## EduMotherCollapsedHigh school or some college                                          0.03372967
## EduMotherCollapsedLess than high school                                               -0.04419937
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA)                       -0.03339732
## EduMotherCollapsedUnknown                                                              0.08648237
## WICNot Reported                                                                       -0.10612983
## WICUnknown or Not Stated                                                              -0.07529499
## WICYes                                                                                -0.05595147
## `Infertility Treatment Used`Not Reported                                              -0.07956380
## `Infertility Treatment Used`Unknown or Not Stated                                     -0.09305102
## `Infertility Treatment Used`Yes                                                       -0.10703437
##                                                                                        Std. Error
## (Intercept)                                                                           0.013905917
## AgeFatherCollapsed35-44                                                               0.009133179
## AgeFatherCollapsed45+                                                                 0.011817440
## AgeMotherCollapsed40-44 years                                                         0.012504773
## AgeMotherCollapsed45-49 years                                                         0.028774416
## AgeMotherCollapsedUnder 20                                                            0.012882985
## EduMotherCollapsedCollege or higher                                                   0.013964262
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) 0.021644925
## EduMotherCollapsedHigh school or some college                                         0.011676263
## EduMotherCollapsedLess than high school                                               0.011677021
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA)                       0.015196993
## EduMotherCollapsedUnknown                                                             0.022798577
## WICNot Reported                                                                       0.010450359
## WICUnknown or Not Stated                                                              0.008455219
## WICYes                                                                                0.008387039
## `Infertility Treatment Used`Not Reported                                              0.017659001
## `Infertility Treatment Used`Unknown or Not Stated                                     0.009141024
## `Infertility Treatment Used`Yes                                                       0.012812242
##                                                                                          t value
## (Intercept)                                                                            12.439924
## AgeFatherCollapsed35-44                                                                -2.584585
## AgeFatherCollapsed45+                                                                  -7.225738
## AgeMotherCollapsed40-44 years                                                          -4.235232
## AgeMotherCollapsed45-49 years                                                          -2.921123
## AgeMotherCollapsedUnder 20                                                             -4.186280
## EduMotherCollapsedCollege or higher                                                    -3.298884
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD)  -3.212967
## EduMotherCollapsedHigh school or some college                                           2.888738
## EduMotherCollapsedLess than high school                                                -3.785158
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA)                        -2.197627
## EduMotherCollapsedUnknown                                                               3.793323
## WICNot Reported                                                                       -10.155616
## WICUnknown or Not Stated                                                               -8.905150
## WICYes                                                                                 -6.671183
## `Infertility Treatment Used`Not Reported                                               -4.505566
## `Infertility Treatment Used`Unknown or Not Stated                                     -10.179496
## `Infertility Treatment Used`Yes                                                        -8.354070
##                                                                                           Pr(>|t|)
## (Intercept)                                                                           6.136448e-34
## AgeFatherCollapsed35-44                                                               9.839907e-03
## AgeFatherCollapsed45+                                                                 7.773506e-13
## AgeMotherCollapsed40-44 years                                                         2.416805e-05
## AgeMotherCollapsed45-49 years                                                         3.537966e-03
## AgeMotherCollapsedUnder 20                                                            2.994269e-05
## EduMotherCollapsedCollege or higher                                                   9.926116e-04
## EduMotherCollapsedDoctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD) 1.340589e-03
## EduMotherCollapsedHigh school or some college                                         3.921405e-03
## EduMotherCollapsedLess than high school                                               1.594390e-04
## EduMotherCollapsedMaster's degree (MA, MS, MEng, MEd, MSW, MBA)                       2.812253e-02
## EduMotherCollapsedUnknown                                                             1.543308e-04
## WICNot Reported                                                                       1.658498e-23
## WICUnknown or Not Stated                                                              1.451290e-18
## WICYes                                                                                3.515657e-11
## `Infertility Treatment Used`Not Reported                                              7.113530e-06
## `Infertility Treatment Used`Unknown or Not Stated                                     1.317933e-23
## `Infertility Treatment Used`Yes                                                       1.440464e-16
par(mfrow = c(2, 3)) 

# original model 
plot(model1$fitted.values, resid(model1),
     xlab = "Fitted values", ylab = "Residuals",
     main = "Residuals vs Fitted (Original)")
abline(h = 0, col = "red", lty = 2)

# QQ plot of residuals - original model 
qqnorm(resid(model1), main = "QQ Plot (Original)")
qqline(resid(model1), col = "blue", lwd = 2)

# histogram of residuals - original model 
hist(resid(model1), breaks = 30,
     main = "Histogram (Original)",
     xlab = "Residuals", col = "lightblue")

# quadratic model
plot(model_quad$fitted.values, resid(model_quad),
     xlab = "Fitted values", ylab = "Residuals",
     main = "Residuals vs Fitted (Quadratic)")
abline(h = 0, col = "red", lty = 2)

# QQ plot of residuals - quadratic model 
qqnorm(resid(model_quad), main = "QQ Plot (Quadratic)")
qqline(resid(model_quad), col = "blue", lwd = 2)

# QQ plot of residuals - quadratic model 
hist(resid(model_quad), breaks = 30,
     main = "Histogram (Quadratic)",
     xlab = "Residuals", col = "lightblue")

#imputing the data 

#loading in mice package 
library(mice)


data <- data %>%
  mutate(
    AgeFatherCollapsed = ifelse(AgeFatherCollapsed == "Unknown", NA, AgeFatherCollapsed),
    AgeFatherCollapsed = factor(AgeFatherCollapsed),
    AgeMotherCollapsed = factor(AgeMotherCollapsed),
    EduMotherCollapsed = factor(EduMotherCollapsed),
    WIC = factor(WIC),
    `Infertility Treatment Used` = factor(`Infertility Treatment Used`)
  )

impute_data <- data %>%
  select(
    AgeFatherCollapsed,
    AgeMotherCollapsed,
    EduMotherCollapsed,
    WIC,
    `Infertility Treatment Used`,
    `Percent of Total Deaths`
  )

meth <- make.method(impute_data)
meth["AgeFatherCollapsed"] <- "polyreg"  
meth[c("AgeMotherCollapsed",
       "EduMotherCollapsed",
       "WIC",
       "Infertility Treatment Used",
       "Percent of Total Deaths")] <- ""  

imp <- mice(
  impute_data,
  method = meth,
  m = 5,
  seed = 123
)
## 
##  iter imp variable
##   1   1  AgeFatherCollapsed
##   1   2  AgeFatherCollapsed
##   1   3  AgeFatherCollapsed
##   1   4  AgeFatherCollapsed
##   1   5  AgeFatherCollapsed
##   2   1  AgeFatherCollapsed
##   2   2  AgeFatherCollapsed
##   2   3  AgeFatherCollapsed
##   2   4  AgeFatherCollapsed
##   2   5  AgeFatherCollapsed
##   3   1  AgeFatherCollapsed
##   3   2  AgeFatherCollapsed
##   3   3  AgeFatherCollapsed
##   3   4  AgeFatherCollapsed
##   3   5  AgeFatherCollapsed
##   4   1  AgeFatherCollapsed
##   4   2  AgeFatherCollapsed
##   4   3  AgeFatherCollapsed
##   4   4  AgeFatherCollapsed
##   4   5  AgeFatherCollapsed
##   5   1  AgeFatherCollapsed
##   5   2  AgeFatherCollapsed
##   5   3  AgeFatherCollapsed
##   5   4  AgeFatherCollapsed
##   5   5  AgeFatherCollapsed
fit <- with(
  imp,
  lm(`Percent of Total Deaths` ~
       AgeFatherCollapsed +
       AgeMotherCollapsed +
       EduMotherCollapsed +
       WIC +
       `Infertility Treatment Used`)
)

summary(pool(fit))
# checking proportion of missing paternal age by maternal age 

data <- data %>%
  mutate(
    AgeFatherCollapsed = case_when(
      `Age of Father 11` %in% c("15-19 years", "20-24 years") ~ "Under 25",
      `Age of Father 11` %in% c("25-29 years", "30-34 years") ~ "25-34",
      `Age of Father 11` %in% c("35-39 years", "40-44 years") ~ "35-44",
      `Age of Father 11` %in% c("45-49 years", "50-54 years", "55 years and older") ~ "45+",
      `Age of Father 11` %in% c("Unknown or Not Stated", "Not Reported") ~ "Unknown",
      TRUE ~ as.character(`Age of Father 11`)
    ),
    AgeMotherCollapsed = case_when(
      `Age of Mother 10` %in% c("Under 15 years", "15 - 17 years", "18 - 19 years") ~ "Under 20",
      `Age of Mother 10` == "20-24 years" ~ "20-24",
      `Age of Mother 10` == "25-29 years" ~ "25-29",
      `Age of Mother 10` == "30-34 years" ~ "30-34",
      `Age of Mother 10` == "35-39 years" ~ "35-39",
      `Age of Mother 10` == "40-44 years" ~ "40-44",
      `Age of Mother 10` == "45-49 years" ~ "45+",
      TRUE ~ as.character(`Age of Mother 10`)
    ),
    FatherAgeMissing = ifelse(AgeFatherCollapsed == "Unknown", 1, 0),
    FatherAgeMissing = factor(FatherAgeMissing)
  )

proportion_table <- data %>%
  group_by(AgeMotherCollapsed) %>%
  summarise(
    n_missing = sum(FatherAgeMissing == 1),
    n_total   = n(),
    prop_missing = n_missing / n_total
  ) %>%
  arrange(factor(AgeMotherCollapsed, 
                 levels = c("Under 20", "20-24", "25-29", "30-34", 
                            "35-39", "40-44", "45+")))
print(proportion_table)
## # A tibble: 8 × 4
##   AgeMotherCollapsed n_missing n_total prop_missing
##   <chr>                  <int>   <int>        <dbl>
## 1 Under 20                  72     151        0.477
## 2 20-24                     75     228        0.329
## 3 25-29                     94     315        0.298
## 4 30-34                    107     362        0.296
## 5 35-39                     96     316        0.304
## 6 40-44                     62     190        0.326
## 7 45+                        6      20        0.3  
## 8 <NA>                      NA      37       NA
ggplot(data, aes(x = factor(AgeMotherCollapsed, 
                            levels = c("Under 20", "20-24", "25-29", "30-34", 
                                       "35-39", "40-44", "45+")), 
                 fill = FatherAgeMissing)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    title = "Proportion of Missing Paternal Age by Maternal Age",
    x = "Maternal Age Group",
    y = "Proportion",
    fill = "Father Age Missing"
  ) +
  theme_minimal()

# checking proportion of missing paternal age by WIC status 

data <- data %>%
  mutate(WIC = factor(WIC))

proportion_table_wic <- data %>%
  group_by(WIC) %>%
  summarise(
    n_missing = sum(FatherAgeMissing == 1),
    n_total   = n(),
    prop_missing = n_missing / n_total
  ) %>%
  arrange(WIC)
print(proportion_table_wic)
## # A tibble: 5 × 4
##   WIC                   n_missing n_total prop_missing
##   <fct>                     <int>   <int>        <dbl>
## 1 No                          163     628        0.260
## 2 Not Reported                136     262        0.519
## 3 Unknown or Not Stated       118     339        0.348
## 4 Yes                          95     353        0.269
## 5 <NA>                         NA      37       NA
ggplot(data, aes(x = WIC, fill = FatherAgeMissing)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    title = "Proportion of Missing Paternal Age by WIC Participation",
    x = "WIC Participation",
    y = "Proportion",
    fill = "Father Age Missing"
  ) +
  theme_minimal()

# checking for blocks of missingnes in WIC data 

data <- data %>%
  mutate(FatherAgeMissing = ifelse(AgeFatherCollapsed == "Unknown", 1, 0))

wic_missing_summary <- data %>%
  group_by(WIC) %>%
  summarise(
    total = n(),
    missing_father = sum(FatherAgeMissing),
    known_father = total - missing_father
  ) %>%
  arrange(desc(total))

wic_missing_summary