Jacob M. Souch

library(readr)

Warning: package 'readr' was built under R version 4.3.2

getwd()

[1] "C:/Users/jacob/University of Texas at San Antonio/TEAM - Cossman Crew - General"

setwd("C:/Users/jacob/Downloads")
mhas0121dummies<-read.csv("MHAS0121 NO LABELS NO NEGATIVE TIMES (2).csv")

mhas0121noNA<-mhas0121dummies

#update.packages()
#Split data by person-month
library(survival)
library(dplyr)

Warning: package 'dplyr' was built under R version 4.3.2


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

mhas0121noNA<-mhas0121noNA %>%
  mutate(
        startmo = round(ageint*12),
        endmo = round(agecensor*12),
        dead = died0121
       )
head(mhas0121noNA, n= 10)

   id locsize01 perwght01 died18 died21 mobirth yrbirth female moint yrint
1   1         1       300     NA     NA      10    1941      0     8  2001
2   2         1       253      0      0       6    1931      0     7  2001
3   3         1       454      0      0       6    1950      0     7  2001
4   4         1       337     NA     NA      10    1936      0     7  2001
5   5         1       295      0      0       3    1944      1     7  2001
6   6         1       253      0      0       5    1948      1     7  2001
7   7         1       490     NA     NA       8    1927      0     8  2001
8   8         1       537      0      0       2    1945      0     7  2001
9   9         1       370      1     NA       9    1946      1     7  2001
10 10         1       237      0     NA       1    1948      0     7  2001
   schooling died0103 refused0103 lost0103 followup0103 died0312 refused0312
1          0        0           0        0            1        0           0
2          0        0           0        0            1        0           0
3          3        0           0        0            1        0           0
4          0        0           0        0            1        1           0
5          0        0           0        0            1        0           0
6          0        0           0        0            1        0           0
7          6        0           0        0            1        1           0
8          3        0           0        0            1        0           0
9          1        0           0        0            1        0           0
10         3        0           0        0            1        0           0
   lost0312 followup0312 died1215 refused1215 lost1215 followup1215 died1518
1         0            1        1           0        0            0       NA
2         0            1        0           0        0            1        0
3         0            1        0           0        0            1        0
4         0            0       NA           0       NA           NA       NA
5         0            1        0           0        0            1        0
6         0            1        0           0        0            1        0
7         0            0       NA           0       NA           NA       NA
8         0            1        0           0        0            1        0
9         0            1        0           0        0            1        1
10        0            1        0           0       NA           NA        0
   refused1518 lost1518 followup1518 died1821 refused1821 lost1821 followup1821
1            0       NA           NA       NA           0       NA           NA
2            0        0            1        0           0        0            1
3            0        0            1        0           0        0            1
4            0       NA           NA       NA           0       NA           NA
5            0        0            1        0           0        0            1
6            0        0            1        0           0        0            1
7            0       NA           NA       NA           0       NA           NA
8            0        0            1        0           0        0            1
9            0        0            0       NA           0       NA           NA
10           0       NA           NA       NA           0       NA           NA
   died0121 mocensor yrcensor   ageint agecensor ageintexact exposure_months
1         1       11     2015 59.83333  74.08334    59.83333             171
2         0       12     2021 70.08334  90.50000    70.08334             245
3         0       12     2021 51.08333  71.50000    51.08333             245
4         1        6     2011 64.75000  74.66666    64.75000             119
5         0       12     2021 57.33333  77.75000    57.33333             245
6         0       12     2021 53.16667  73.58334    53.16667             245
7         1        5     2012 74.00000  84.75000    74.00000             129
8         0        1     2022 56.41667  76.91666    56.41667             246
9         1        2     2015 54.83333  68.41666    54.83333             163
10       NA       11     2012 53.50000  64.83334    53.50000             136
   educlevel ageintmo agecensormo educ1 educ2 educ3 educ4 locsize011 locsize012
1          0      718         889     1     0     0     0          1          0
2          0      841        1086     1     0     0     0          1          0
3         15      613         858     0     1     0     0          1          0
4          0      777         896     1     0     0     0          1          0
5          0      688         933     1     0     0     0          1          0
6          0      638         883     1     0     0     0          1          0
7         68      888        1017     0     0     1     0          1          0
8         15      677         923     0     1     0     0          1          0
9         15      658         821     0     1     0     0          1          0
10        15      642         778     0     1     0     0          1          0
   locsize013 locsize014 startmo endmo dead
1           0          0     718   889    1
2           0          0     841  1086    0
3           0          0     613   858    0
4           0          0     777   896    1
5           0          0     688   933    0
6           0          0     638   883    0
7           0          0     888  1017    1
8           0          0     677   923    0
9           0          0     658   821    1
10          0          0     642   778   NA

max(mhas0121noNA$exposure_months)

[1] NA

#Possible max time is 296 months, but using an even higher threshold in case things change

#rm(mhas_personmonth)
# Code adapted from Mills (2011): section 3.5
# Expands data into person-months, up to maximum number age in months observed in data
mhas_personmonth<-survSplit(mhas0121noNA, cut=c(1:2000), start="startmo", end="endmo", event="dead")

# Sorting data by ID
mhas_personmonth<-mhas_personmonth[order (mhas_personmonth$id, mhas_personmonth$ageintmo),]

#Creating time-varying age variables for different model specifications0
mhas_personmonth<-mhas_personmonth %>%
  mutate(
  age=startmo/12, #could truncate if one wanted to, but I don't 
  agesq=age^2,
  agectr=age-50,
  ageln=logb(age),
  .after = "died0121"
  )

library(dplyr)
library(ggplot2)

Warning: package 'ggplot2' was built under R version 4.3.3

#install.packages("AMR")
library(AMR)

Warning: package 'AMR' was built under R version 4.3.3

mhas_personmonth <- mhas_personmonth %>% 
  mutate(
  # Create age groups
  age5 = dplyr::case_when(
    age >= 50 & age < 55 ~ "50-54",
    age >= 55 & age < 60 ~ "55-59",
    age >= 60 & age < 65 ~ "60-64",
    age >= 65 & age < 70 ~ "65-69",
    age >= 70 & age < 75 ~ "70-74",
    age >= 75 & age < 80 ~ "75-79",
    age >= 80 & age < 85 ~ "80-84",
    age >= 85             ~ "85+"
  ),
  # Convert to factor
  age5 = factor(
    age5,
    level = c("50-54", "55-59","60-64","65-69","70-74","75-79","80-84","85+")
)
)

mhas_personmonth <- mhas_personmonth %>%
  mutate(age5b = case_when(age >= 50 & age < 55 ~ "50-54",
                          age >= 55 & age < 60 ~ "55-59",
                          age >= 60 & age < 65 ~ "60-64",
                          age >= 65 & age < 70 ~ "65-69",
                          age >= 70 & age < 75 ~ "70-74",
                          age >= 75 & age < 80 ~ "75-79",
                          age >= 80 & age < 85 ~ "80-84",
                          age >= 85 ~ "85+"),
         age5b = factor(age5, level = c("50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85+")))

#write.csv(mhas_personmonth,"C:\\Users\\eay621\\OneDrive - University of Texas at San Antonio\\3. TEACHING\\EVENT HISTORY ANALYSIS\\EHA Fall 2024\\4. Problem sets\\MHAS0121_PERSONMONTHS.csv", row.names = TRUE)

# Export to CSV to explore more easily (at least for me)
#testPMdata <- mhas_personmonth[which(mhas_personmonth$id < 20, ]
#testPMdata <- subset(mhas_personmonth, id < 50, select=c(id,ageint,agecensor,startmo,endmo,age,age5,died0121,dead))

#write.csv(testPMdata,"C:\\Users\\eay621\\OneDrive - University of Texas at San Antonio\\3. TEACHING\\EVENT HISTORY ANALYSIS\\EHA Fall 2024\\4. Problem sets\\MHAS0121_PERSONMONTHS_TEST.csv", row.names = TRUE)

A. Models

i. Not controlling for age.

model_i <- glm(dead ~ female + schooling + locsize01, family = binomial, data = mhas_personmonth)
summary(model_i)


Call:
glm(formula = dead ~ female + schooling + locsize01, family = binomial, 
    data = mhas_personmonth)

Coefficients:
             Estimate Std. Error  z value Pr(>|z|)    
(Intercept) -5.341879   0.040457 -132.038  < 2e-16 ***
female      -0.169605   0.029420   -5.765 8.16e-09 ***
schooling   -0.063895   0.003981  -16.050  < 2e-16 ***
locsize01   -0.044814   0.013057   -3.432 0.000599 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 64671  on 1548755  degrees of freedom
Residual deviance: 64370  on 1548752  degrees of freedom
  (2761 observations deleted due to missingness)
AIC: 64378

Number of Fisher Scoring iterations: 9

ii. Controlling for age linearly.

model_ii <- glm(dead ~ female + schooling + locsize01 + age, family = binomial, data = mhas_personmonth)
summary(model_ii)


Call:
glm(formula = dead ~ female + schooling + locsize01 + age, family = binomial, 
    data = mhas_personmonth)

Coefficients:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept) -11.590878   0.124633 -93.000  < 2e-16 ***
female       -0.249253   0.029513  -8.445  < 2e-16 ***
schooling    -0.016573   0.003927  -4.220 2.44e-05 ***
locsize01    -0.043019   0.013166  -3.267  0.00109 ** 
age           0.082480   0.001475  55.907  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 64671  on 1548755  degrees of freedom
Residual deviance: 61321  on 1548751  degrees of freedom
  (2761 observations deleted due to missingness)
AIC: 61331

Number of Fisher Scoring iterations: 9

iii. Controlling for age and age-squared.

mhas_personmonth <- mhas_personmonth %>%
  mutate(age_squared = age^2)

model_iii <- glm(dead ~ female + schooling + locsize01 + age + age_squared, family = binomial, data = mhas_personmonth)
summary(model_iii)


Call:
glm(formula = dead ~ female + schooling + locsize01 + age + age_squared, 
    family = binomial, data = mhas_personmonth)

Coefficients:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept) -12.950667   0.670706 -19.309  < 2e-16 ***
female       -0.251018   0.029522  -8.503  < 2e-16 ***
schooling    -0.016543   0.003926  -4.213 2.51e-05 ***
locsize01    -0.043204   0.013167  -3.281  0.00103 ** 
age           0.117773   0.017160   6.863 6.73e-12 ***
age_squared  -0.000225   0.000109  -2.064  0.03902 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 64671  on 1548755  degrees of freedom
Residual deviance: 61317  on 1548750  degrees of freedom
  (2761 observations deleted due to missingness)
AIC: 61329

Number of Fisher Scoring iterations: 9

iv. Controlling for 5-year age groups (top-coded at 85+)

mhas_personmonth <- mhas_personmonth %>%
  mutate(age_group = cut(age, breaks = seq(0, 90, by = 5), right = FALSE))
         
model_iv <- glm(dead ~ female  + schooling + locsize01 + age + age_group, family = binomial, data = mhas_personmonth)
summary(model_iv)


Call:
glm(formula = dead ~ female + schooling + locsize01 + age + age_group, 
    family = binomial, data = mhas_personmonth)

Coefficients:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)      -12.212054   0.609769 -20.027  < 2e-16 ***
female            -0.275696   0.031532  -8.743  < 2e-16 ***
schooling         -0.019635   0.004127  -4.758 1.95e-06 ***
locsize01         -0.048479   0.014189  -3.417 0.000634 ***
age                0.096076   0.010892   8.821  < 2e-16 ***
age_group[55,60)  -0.378618   0.199139  -1.901 0.057266 .  
age_group[60,65)  -0.177407   0.204161  -0.869 0.384869    
age_group[65,70)  -0.136494   0.231969  -0.588 0.556253    
age_group[70,75)  -0.383645   0.269430  -1.424 0.154471    
age_group[75,80)  -0.438829   0.312462  -1.404 0.160192    
age_group[80,85)  -0.485129   0.358462  -1.353 0.175940    
age_group[85,90)  -0.488751   0.406437  -1.203 0.229160    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 57413  on 1512938  degrees of freedom
Residual deviance: 55138  on 1512927  degrees of freedom
  (38578 observations deleted due to missingness)
AIC: 55162

Number of Fisher Scoring iterations: 10

v. Any other(s) you may want to try, if any at all.

va. cubed

mhas_personmonth <- mhas_personmonth %>%
  mutate(age_cubed= age^3)

model_va <- glm(dead ~ female + schooling + locsize01 + age + age_cubed, family = binomial, data = mhas_personmonth)
summary(model_va)


Call:
glm(formula = dead ~ female + schooling + locsize01 + age + age_cubed, 
    family = binomial, data = mhas_personmonth)

Coefficients:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept) -1.258e+01  4.584e-01 -27.448  < 2e-16 ***
female      -2.513e-01  2.952e-02  -8.511  < 2e-16 ***
schooling   -1.654e-02  3.926e-03  -4.213 2.52e-05 ***
locsize01   -4.325e-02  1.317e-02  -3.285  0.00102 ** 
age          1.018e-01  8.715e-03  11.680  < 2e-16 ***
age_cubed   -1.031e-06  4.590e-07  -2.246  0.02468 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 64671  on 1548755  degrees of freedom
Residual deviance: 61316  on 1548750  degrees of freedom
  (2761 observations deleted due to missingness)
AIC: 61328

Number of Fisher Scoring iterations: 9

vb interaction.

model_vb <- glm(dead ~ female * age + schooling + locsize01, 
               family = binomial, data = mhas_personmonth)
summary(model_vb)


Call:
glm(formula = dead ~ female * age + schooling + locsize01, family = binomial, 
    data = mhas_personmonth)

Coefficients:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept) -11.267276   0.168743 -66.772  < 2e-16 ***
female       -0.881373   0.225373  -3.911 9.20e-05 ***
age           0.078291   0.002093  37.408  < 2e-16 ***
schooling    -0.017041   0.003931  -4.335 1.46e-05 ***
locsize01    -0.042486   0.013166  -3.227  0.00125 ** 
female:age    0.008112   0.002868   2.829  0.00467 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 64671  on 1548755  degrees of freedom
Residual deviance: 61313  on 1548750  degrees of freedom
  (2761 observations deleted due to missingness)
AIC: 61325

Number of Fisher Scoring iterations: 9

B. Assumptions of Baseline Hazard

Model I. No Age Control:
- The baseline hazard is constant with respect to age, implying no change in mortality risk as people get older. This assumption suggests that the probability of dying is the same for all ages, regardless of actual age differences. This assumption is unrealistic and runs counter to the literature. Thus, it is important to control for age.
Model II. Controlling for Age Linearly:
- The baseline hazard changes linearly with age. This implies that each additional year of age has a fixed, additive effect on the risk of death. The model assumes that mortality risk increases (or decreases) at a constant rate with each year, which may not capture more complex age-related risk patterns which vary over the life course.
Model III. Controlling for Age and Age-Squared:
- In this model, the baseline hazard has a quadratic relationship with age, allowing for non-linear effects. This can model a U-shaped or inverted U-shaped relationship, where mortality risk changes at different rates across ages. For example, risk might accelerate as people age, or it could rise more slowly at older ages. This approach is more flexible than the linear term alone and can better capture complex curivlinear patterns.
Model IV. Controlling for 5-Year Age Groups (Top-Coded at 85+):
- The baseline hazard changes non-linearly, with distinct levels of risk for each age group. This approach allows for discrete changes in risk between age groups without imposing a continuous relationship. It’s flexible enough to capture sudden increases or stabilizes in risk within specific age brackets and can be especially useful when there’s a marked difference in mortality risk at certain ages.
Models Va and Vb. Other Specifications (e.g., Splines, Interactions):
- Including interactions (such as age-gender) means that the baseline hazard is not only age-dependent but also varies by other covariates, like sex. For example, it could capture differences in how the risk of death changes with age between men and women.

C. Model Fit Comparison

model_comparison <- data.frame(
  Model = c("Model I", "Model II", "Model III", "Model IV", "Model V_a", "Model V_b"),
  AIC = c(AIC(model_i), AIC(model_ii), AIC(model_iii), AIC(model_iv), AIC(model_va), AIC(model_vb)),
  BIC = c(BIC(model_i), BIC(model_ii), BIC(model_iii), BIC(model_iv), BIC(model_va), BIC(model_vb))
  # Use the BIC vector defined above
)

model_comparison

      Model      AIC      BIC
1   Model I 64377.68 64426.69
2  Model II 61331.44 61392.70
3 Model III 61329.07 61402.59
4  Model IV 55162.42 55309.18
5 Model V_a 61328.25 61401.77
6 Model V_b 61325.42 61398.94

library(broom)
library(gtsummary)
library(tidyverse)

Warning: package 'purrr' was built under R version 4.3.2

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ lubridate 1.9.2     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

# Create a list of models
models <- list(model_i, model_ii,model_iii,model_iv, model_va, model_vb)

# Extract coefficients, standard errors, and odds ratios from each model
model_summaries <- lapply(seq_along(models), function(i) {
  model <- models[[i]]
  tidy(model) %>%
    mutate(
      Odds_Ratio = exp(estimate),
      Model = paste("Model", i)
    )
})

# Combine summaries into a single table
comparison_table <- bind_rows(model_summaries) %>%
  select(Model, term, estimate, std.error, Odds_Ratio, p.value)


# Pivot the data wider
df_wide <- comparison_table %>%
    pivot_wider(
        names_from = term,
        values_from = c(estimate, std.error, Odds_Ratio, p.value),
        names_glue = "{term}_{.value}"
    )

df_wide

# A tibble: 6 × 61
  Model   `(Intercept)_estimate` female_estimate schooling_estimate
  <chr>                    <dbl>           <dbl>              <dbl>
1 Model 1                  -5.34          -0.170            -0.0639
2 Model 2                 -11.6           -0.249            -0.0166
3 Model 3                 -13.0           -0.251            -0.0165
4 Model 4                 -12.2           -0.276            -0.0196
5 Model 5                 -12.6           -0.251            -0.0165
6 Model 6                 -11.3           -0.881            -0.0170
# ℹ 57 more variables: locsize01_estimate <dbl>, age_estimate <dbl>,
#   age_squared_estimate <dbl>, `age_group[55,60)_estimate` <dbl>,
#   `age_group[60,65)_estimate` <dbl>, `age_group[65,70)_estimate` <dbl>,
#   `age_group[70,75)_estimate` <dbl>, `age_group[75,80)_estimate` <dbl>,
#   `age_group[80,85)_estimate` <dbl>, `age_group[85,90)_estimate` <dbl>,
#   age_cubed_estimate <dbl>, `female:age_estimate` <dbl>,
#   `(Intercept)_std.error` <dbl>, female_std.error <dbl>, …

Most models have consistent estimates for the female variable, schooling, and locsize01. Model four with age groups had the best AIC and BIC, suggesting the best fit. Model IV has the lowest AIC (55162.42) and BIC (55309.18) values, suggesting it fits the data best among the models listed, with an appropriate balance of fit and complexity. Model III and Model V_b have similar AIC values (61329.07 and 61325.42, respectively) and slightly different BIC values, indicating comparable fits but with subtle differences in complexity or parameter penalties. Model I has the highest AIC and BIC values (64377.68 and 64426.69), indicating it is the least favorable model in terms of fit and complexity.In model 4, being female decreases the odds of the event, more education reduces the odds of the even and smaller or rural areas having reduced odds of the event. Risk of event increases the odds of the event with age. The effect of age is captured in different age categories, but most of the age group coefficients are not statistically significant, except for the group 55-60, which shows a marginally significant reduction in the odds of the event.

D. Logistic and Complementary Log-Log Models

Da. Logistic Model

logit_model <- glm(dead ~ age_group, data = mhas_personmonth, family = binomial(link = "logit"))

summary(logit_model)


Call:
glm(formula = dead ~ age_group, family = binomial(link = "logit"), 
    data = mhas_personmonth)

Coefficients:
                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)      -7.40036    0.16908 -43.768  < 2e-16 ***
age_group[55,60)  0.03536    0.19350   0.183    0.855    
age_group[60,65)  0.71102    0.17793   3.996 6.44e-05 ***
age_group[65,70)  1.22275    0.17364   7.042 1.90e-12 ***
age_group[70,75)  1.44536    0.17293   8.358  < 2e-16 ***
age_group[75,80)  1.86759    0.17262  10.819  < 2e-16 ***
age_group[80,85)  2.29911    0.17272  13.311  < 2e-16 ***
age_group[85,90)  2.77515    0.17355  15.991  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 57551  on 1514413  degrees of freedom
Residual deviance: 55439  on 1514406  degrees of freedom
  (37103 observations deleted due to missingness)
AIC: 55455

Number of Fisher Scoring iterations: 10

Db. Clog-Log Model

cloglog_model <- glm(dead ~ age_group, data = mhas_personmonth, family = binomial(link = "cloglog"))

summary(cloglog_model)


Call:
glm(formula = dead ~ age_group, family = binomial(link = "cloglog"), 
    data = mhas_personmonth)

Coefficients:
                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)      -7.40066    0.16903 -43.783  < 2e-16 ***
age_group[55,60)  0.03535    0.19344   0.183    0.855    
age_group[60,65)  0.71070    0.17787   3.996 6.45e-05 ***
age_group[65,70)  1.22202    0.17358   7.040 1.92e-12 ***
age_group[70,75)  1.44437    0.17287   8.355  < 2e-16 ***
age_group[75,80)  1.86592    0.17256  10.813  < 2e-16 ***
age_group[80,85)  2.29638    0.17265  13.301  < 2e-16 ***
age_group[85,90)  2.77057    0.17345  15.973  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 57551  on 1514413  degrees of freedom
Residual deviance: 55439  on 1514406  degrees of freedom
  (37103 observations deleted due to missingness)
AIC: 55455

Number of Fisher Scoring iterations: 10

# Predicted probabilities for logistic regression
logit_preds <- predict(logit_model, type = "response")

# Predicted probabilities for complementary-log-log regression
cloglog_preds <- predict(cloglog_model, type = "response")

# # Combine the predicted values with the age group variable for comparison
# comparison_df <- data.frame(age_group = mhas_personmonth$age_group, 
#                             logit_preds = logit_preds, 
#                             cloglog_preds = cloglog_preds)
# 
# # View the comparison dataframe
# head(comparison_df)