setwd("C:\\Users\\anami\\OneDrive\\Documents\\EHA\\Assignment 5 Revised")
data1 <-read.csv("C:\\Users\\anami\\OneDrive\\Documents\\EHA\\Assignment 5 Revised\\MHAS0121 NO LABELS NO NEGATIVE TIMES.csv")
data2<-na.omit(data1)
library(survival)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data2<-data2 %>%
mutate(
startmo = ageint*12,
endmo = agecensor*12,
dead = died0121
)
summary(data2)
## id locsize01 perwght01 died18 died21
## Min. : 2 Min. :1.000 Min. : 17 Min. :0 Min. :0.0000
## 1st Qu.: 2770 1st Qu.:1.000 1st Qu.: 204 1st Qu.:0 1st Qu.:0.0000
## Median : 5425 Median :1.000 Median : 466 Median :0 Median :0.0000
## Mean : 5487 Mean :1.898 Mean : 1100 Mean :0 Mean :0.1896
## 3rd Qu.: 8308 3rd Qu.:3.000 3rd Qu.: 1193 3rd Qu.:0 3rd Qu.:0.0000
## Max. :10992 Max. :4.000 Max. :44177 Max. :0 Max. :1.0000
## mobirth yrbirth female moint
## Min. : 1.000 Min. :1907 Min. :0.0000 Min. : 3.000
## 1st Qu.: 4.000 1st Qu.:1937 1st Qu.:0.0000 1st Qu.: 7.000
## Median : 6.000 Median :1943 Median :1.0000 Median : 7.000
## Mean : 6.544 Mean :1942 Mean :0.5586 Mean : 7.516
## 3rd Qu.:10.000 3rd Qu.:1947 3rd Qu.:1.0000 3rd Qu.: 8.000
## Max. :12.000 Max. :1950 Max. :1.0000 Max. :11.000
## yrint schooling died0103 refused0103 lost0103
## Min. :2001 Min. : 0.00 Min. :0 Min. :0.00000 Min. :0
## 1st Qu.:2001 1st Qu.: 1.00 1st Qu.:0 1st Qu.:0.00000 1st Qu.:0
## Median :2001 Median : 4.00 Median :0 Median :0.00000 Median :0
## Mean :2001 Mean : 4.61 Mean :0 Mean :0.01174 Mean :0
## 3rd Qu.:2001 3rd Qu.: 6.00 3rd Qu.:0 3rd Qu.:0.00000 3rd Qu.:0
## Max. :2001 Max. :19.00 Max. :0 Max. :1.00000 Max. :0
## followup0103 died0312 refused0312 lost0312
## Min. :0.0000 Min. :0.0000000 Min. :0.00000 Min. :0.000000
## 1st Qu.:1.0000 1st Qu.:0.0000000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :1.0000 Median :0.0000000 Median :0.00000 Median :0.000000
## Mean :0.9883 Mean :0.0002498 Mean :0.01474 Mean :0.005246
## 3rd Qu.:1.0000 3rd Qu.:0.0000000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :1.0000 Max. :1.0000000 Max. :1.00000 Max. :1.000000
## followup0312 died1215 refused1215 lost1215
## Min. :0.0000 Min. :0 Min. :0.000000 Min. :0.000000
## 1st Qu.:1.0000 1st Qu.:0 1st Qu.:0.000000 1st Qu.:0.000000
## Median :1.0000 Median :0 Median :0.000000 Median :0.000000
## Mean :0.9798 Mean :0 Mean :0.007494 Mean :0.006495
## 3rd Qu.:1.0000 3rd Qu.:0 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :1.0000 Max. :0 Max. :1.000000 Max. :1.000000
## followup1215 died1518 refused1518 lost1518
## Min. :0.000 Min. :0.0000000 Min. :0.000000 Min. :0.0000000
## 1st Qu.:1.000 1st Qu.:0.0000000 1st Qu.:0.000000 1st Qu.:0.0000000
## Median :1.000 Median :0.0000000 Median :0.000000 Median :0.0000000
## Mean :0.986 Mean :0.0002498 Mean :0.004746 Mean :0.0002498
## 3rd Qu.:1.000 3rd Qu.:0.0000000 3rd Qu.:0.000000 3rd Qu.:0.0000000
## Max. :1.000 Max. :1.0000000 Max. :1.000000 Max. :1.0000000
## followup1518 died1821 refused1821 lost1821
## Min. :0.0000 Min. :0.0000 Min. :0.000000 Min. :0.0000000
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.000000 1st Qu.:0.0000000
## Median :1.0000 Median :0.0000 Median :0.000000 Median :0.0000000
## Mean :0.9948 Mean :0.2051 Mean :0.001499 Mean :0.0002498
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.000000 3rd Qu.:0.0000000
## Max. :1.0000 Max. :1.0000 Max. :1.000000 Max. :1.0000000
## followup1821 died0121 mocensor yrcensor
## Min. :0.0000 Min. :0.0000 Min. : 1.000 Min. :2008
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.: 3.000 1st Qu.:2021
## Median :1.0000 Median :0.0000 Median :12.000 Median :2021
## Mean :0.7932 Mean :0.2051 Mean : 8.628 Mean :2021
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:12.000 3rd Qu.:2021
## Max. :1.0000 Max. :1.0000 Max. :99.000 Max. :2022
## ageint agecensor ageintexact exposure_months
## Min. :50.50 Min. : 67.42 Min. :50.50 Min. : 78.0
## 1st Qu.:54.00 1st Qu.: 74.00 1st Qu.:54.00 1st Qu.:243.0
## Median :57.92 Median : 77.92 Median :57.92 Median :244.0
## Mean :59.49 Mean : 79.18 Mean :59.49 Mean :236.3
## 3rd Qu.:63.75 3rd Qu.: 83.17 3rd Qu.:63.75 3rd Qu.:245.0
## Max. :94.17 Max. :114.42 Max. :94.17 Max. :296.0
## educlevel ageintmo agecensormo educ1
## Min. : 0.0 Min. : 606.0 Min. : 809.0 Min. :0.0000
## 1st Qu.: 15.0 1st Qu.: 648.0 1st Qu.: 888.0 1st Qu.:0.0000
## Median : 15.0 Median : 695.0 Median : 935.0 Median :0.0000
## Mean :187.7 Mean : 713.8 Mean : 950.1 Mean :0.2323
## 3rd Qu.: 68.0 3rd Qu.: 765.0 3rd Qu.: 998.0 3rd Qu.:0.0000
## Max. :919.0 Max. :1130.0 Max. :1373.0 Max. :1.0000
## educ2 educ3 educ4 locsize011
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.3507 Mean :0.2358 Mean :0.1811 Mean :0.5656
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## locsize012 locsize013 locsize014 startmo
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. : 606.0
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.: 648.0
## Median :0.0000 Median :0.00000 Median :0.0000 Median : 695.0
## Mean :0.1549 Mean :0.09518 Mean :0.1844 Mean : 713.8
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.: 765.0
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1130.0
## endmo dead
## Min. : 809.0 Min. :0.0000
## 1st Qu.: 888.0 1st Qu.:0.0000
## Median : 935.0 Median :0.0000
## Mean : 950.1 Mean :0.2051
## 3rd Qu.: 998.0 3rd Qu.:0.0000
## Max. :1373.0 Max. :1.0000
max(data2$exposure_months)
## [1] 296
# Expands data into person-months, up to maximum number observed in
mhas_personmonth<-survSplit(data2, cut=c(1:1000), start="startmo", end="endmo", event="dead")
# Sorting data by ID
mhas_personmonth<-mhas_personmonth[order (mhas_personmonth$id, mhas_personmonth$startmo),]
mhas_personmonth<-mhas_personmonth %>%
mutate(
age=startmo/12,
agesq=age^2,
.after = "died0121"
)
library(dplyr)
library(ggplot2)
#install.packages("AMR")
mhas_personmonth <- mhas_personmonth %>%
mutate(
# Create age groups
age5 = dplyr::case_when(
age >= 50 & age < 55 ~ "50-54",
age >= 55 & age < 60 ~ "55-59",
age >= 60 & age < 65 ~ "60-64",
age >= 65 & age < 70 ~ "65-69",
age >= 70 & age < 75 ~ "70-74",
age >= 75 & age < 80 ~ "75-79",
age >= 80 & age < 85 ~ "80-84",
age >= 85 ~ "85+"
),
# Convert to factor
age5 = factor(
age5,
level = c("50-54", "55-59","60-64","65-69","70-74","75-79","80-84","85+")
)
)
testPMdata1 <- subset(mhas_personmonth, id < 50, select=c(id,ageint,agecensor,startmo,endmo,age,age5,died0121,dead))
# Load the writexl library
library(writexl)
# Write the data frame to an Excel file
write_xlsx(testPMdata1, "testPMdata.xlsx")
# i. Not controlling for age.
fit.constant<-glm(dead ~ female + as.factor(educlevel) + as.factor(locsize01), data=mhas_personmonth, family=binomial(link=logit))
summary(fit.constant)
##
## Call:
## glm(formula = dead ~ female + as.factor(educlevel) + as.factor(locsize01),
## family = binomial(link = logit), data = mhas_personmonth)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.56988 0.09168 -71.665 < 2e-16 ***
## female -0.13260 0.07111 -1.865 0.062210 .
## as.factor(educlevel)15 -0.25956 0.08869 -2.926 0.003428 **
## as.factor(educlevel)68 -0.36804 0.10111 -3.640 0.000273 ***
## as.factor(educlevel)919 -0.66249 0.11996 -5.523 3.34e-08 ***
## as.factor(locsize01)2 -0.04905 0.09983 -0.491 0.623155
## as.factor(locsize01)3 -0.12125 0.12449 -0.974 0.330074
## as.factor(locsize01)4 -0.21389 0.10013 -2.136 0.032661 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13114 on 888735 degrees of freedom
## Residual deviance: 13080 on 888728 degrees of freedom
## AIC: 13096
##
## Number of Fisher Scoring iterations: 10
exp(coef(fit.constant))
## (Intercept) female as.factor(educlevel)15
## 0.001401965 0.875815926 0.771393552
## as.factor(educlevel)68 as.factor(educlevel)919 as.factor(locsize01)2
## 0.692090427 0.515565108 0.952129561
## as.factor(locsize01)3 as.factor(locsize01)4
## 0.885811648 0.807433892
#ii. Controlling for age linearly.
fit.linear<-glm(dead ~ age + female + as.factor(educlevel) + as.factor(locsize01), data=mhas_personmonth, family=binomial(link=logit))
summary(fit.linear)
##
## Call:
## glm(formula = dead ~ age + female + as.factor(educlevel) + as.factor(locsize01),
## family = binomial(link = logit), data = mhas_personmonth)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -25.967008 0.622183 -41.735 < 2e-16 ***
## age 0.260193 0.007847 33.158 < 2e-16 ***
## female -0.317962 0.071264 -4.462 8.13e-06 ***
## as.factor(educlevel)15 -0.093447 0.089087 -1.049 0.294
## as.factor(educlevel)68 0.071873 0.101798 0.706 0.480
## as.factor(educlevel)919 -0.014564 0.120864 -0.120 0.904
## as.factor(locsize01)2 0.018055 0.100613 0.179 0.858
## as.factor(locsize01)3 -0.176475 0.125201 -1.410 0.159
## as.factor(locsize01)4 -0.135233 0.100748 -1.342 0.180
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13114 on 888735 degrees of freedom
## Residual deviance: 11322 on 888727 degrees of freedom
## AIC: 11340
##
## Number of Fisher Scoring iterations: 11
exp(coef(fit.linear))
## (Intercept) age female
## 5.280460e-12 1.297181e+00 7.276301e-01
## as.factor(educlevel)15 as.factor(educlevel)68 as.factor(educlevel)919
## 9.107866e-01 1.074519e+00 9.855414e-01
## as.factor(locsize01)2 as.factor(locsize01)3 as.factor(locsize01)4
## 1.018219e+00 8.382194e-01 8.735121e-01
#iii. Controlling for age and age-squared .
fit.quadratic<-glm(dead ~ age + agesq + female + as.factor(educlevel) + as.factor(locsize01), data=mhas_personmonth, family=binomial(link=logit))
summary(fit.quadratic)
##
## Call:
## glm(formula = dead ~ age + agesq + female + as.factor(educlevel) +
## as.factor(locsize01), family = binomial(link = logit), data = mhas_personmonth)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 10.0588558 4.3248237 2.326 0.020 *
## age -0.7130010 0.1180136 -6.042 1.53e-09 ***
## agesq 0.0065284 0.0008011 8.149 3.66e-16 ***
## female -0.3174340 0.0713422 -4.449 8.61e-06 ***
## as.factor(educlevel)15 -0.0863476 0.0891911 -0.968 0.333
## as.factor(educlevel)68 0.0827675 0.1019707 0.812 0.417
## as.factor(educlevel)919 0.0003919 0.1211784 0.003 0.997
## as.factor(locsize01)2 0.0190345 0.1006706 0.189 0.850
## as.factor(locsize01)3 -0.1765767 0.1252922 -1.409 0.159
## as.factor(locsize01)4 -0.1283252 0.1008291 -1.273 0.203
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13114 on 888735 degrees of freedom
## Residual deviance: 11271 on 888726 degrees of freedom
## AIC: 11291
##
## Number of Fisher Scoring iterations: 12
exp(coef(fit.quadratic))
## (Intercept) age agesq
## 2.336176e+04 4.901710e-01 1.006550e+00
## female as.factor(educlevel)15 as.factor(educlevel)68
## 7.280147e-01 9.172753e-01 1.086289e+00
## as.factor(educlevel)919 as.factor(locsize01)2 as.factor(locsize01)3
## 1.000392e+00 1.019217e+00 8.381345e-01
## as.factor(locsize01)4
## 8.795673e-01
#iv. Controlling for 5-year age groups (top-coded at 85+).
fit.pch5<-glm(dead ~ as.factor(age5) + as.factor(female) + as.factor(educlevel) + as.factor(locsize01), data=mhas_personmonth, family=binomial(link=logit))
summary(fit.pch5)
##
## Call:
## glm(formula = dead ~ as.factor(age5) + as.factor(female) + as.factor(educlevel) +
## as.factor(locsize01), family = binomial(link = logit), data = mhas_personmonth)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -23.352208 428.235696 -0.055 0.957
## as.factor(age5)55-59 0.007880 488.897896 0.000 1.000
## as.factor(age5)60-64 0.016416 469.715144 0.000 1.000
## as.factor(age5)65-69 15.841814 428.235699 0.037 0.970
## as.factor(age5)70-74 16.600548 428.235693 0.039 0.969
## as.factor(age5)75-79 16.983793 428.235694 0.040 0.968
## as.factor(age5)80-84 18.751888 428.235690 0.044 0.965
## as.factor(age5)85+ 23.232129 428.236669 0.054 0.957
## as.factor(female)1 -0.299973 0.071326 -4.206 2.6e-05 ***
## as.factor(educlevel)15 -0.101122 0.089217 -1.133 0.257
## as.factor(educlevel)68 0.033160 0.101993 0.325 0.745
## as.factor(educlevel)919 -0.082844 0.121098 -0.684 0.494
## as.factor(locsize01)2 0.008283 0.100552 0.082 0.934
## as.factor(locsize01)3 -0.166632 0.125166 -1.331 0.183
## as.factor(locsize01)4 -0.144852 0.100738 -1.438 0.150
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13114 on 888735 degrees of freedom
## Residual deviance: 11433 on 888721 degrees of freedom
## AIC: 11463
##
## Number of Fisher Scoring iterations: 22
exp(coef(fit.pch5))
## (Intercept) as.factor(age5)55-59 as.factor(age5)60-64
## 7.215477e-11 1.007911e+00 1.016552e+00
## as.factor(age5)65-69 as.factor(age5)70-74 as.factor(age5)75-79
## 7.585989e+06 1.620043e+07 2.376663e+07
## as.factor(age5)80-84 as.factor(age5)85+ as.factor(female)1
## 1.392648e+08 1.229095e+10 7.408379e-01
## as.factor(educlevel)15 as.factor(educlevel)68 as.factor(educlevel)919
## 9.038225e-01 1.033716e+00 9.204943e-01
## as.factor(locsize01)2 as.factor(locsize01)3 as.factor(locsize01)4
## 1.008317e+00 8.465109e-01 8.651504e-01
#v. Any other(s) you may want to try, if any at all.
fit.interaction<-glm(dead ~ age*female + as.factor(educlevel) + as.factor(locsize01), data=mhas_personmonth, family=binomial(link=logit))
summary(fit.interaction)
##
## Call:
## glm(formula = dead ~ age * female + as.factor(educlevel) + as.factor(locsize01),
## family = binomial(link = logit), data = mhas_personmonth)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -23.84624 0.80707 -29.547 < 2e-16 ***
## age 0.23278 0.01035 22.489 < 2e-16 ***
## female -4.91331 1.23472 -3.979 6.91e-05 ***
## as.factor(educlevel)15 -0.08695 0.08912 -0.976 0.329241
## as.factor(educlevel)68 0.07055 0.10186 0.693 0.488560
## as.factor(educlevel)919 -0.02418 0.12104 -0.200 0.841662
## as.factor(locsize01)2 0.01767 0.10058 0.176 0.860543
## as.factor(locsize01)3 -0.17015 0.12520 -1.359 0.174145
## as.factor(locsize01)4 -0.13444 0.10075 -1.334 0.182102
## age:female 0.05891 0.01579 3.731 0.000191 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13114 on 888735 degrees of freedom
## Residual deviance: 11308 on 888726 degrees of freedom
## AIC: 11328
##
## Number of Fisher Scoring iterations: 11
exp(coef(fit.interaction))
## (Intercept) age female
## 4.402609e-11 1.262104e+00 7.348116e-03
## as.factor(educlevel)15 as.factor(educlevel)68 as.factor(educlevel)919
## 9.167204e-01 1.073100e+00 9.761110e-01
## as.factor(locsize01)2 as.factor(locsize01)3 as.factor(locsize01)4
## 1.017828e+00 8.435395e-01 8.742068e-01
## age:female
## 1.060676e+00
Each model specification makes different assumptions about how age influences the baseline hazard:
No Age Control (model_no_age): By excluding age, this model assumes that the baseline hazard is constant across ages, implying that age does not affect mortality risk. This is a simplistic and unrealistic assumption, as mortality risk generally increases with age.
Linear Age Control (model_linear_age): This model assumes a linear relationship between age and the hazard rate, capturing a general increase in risk as age rises. While it reflects the increasing mortality risk with age, it may not fully capture the accelerating risk seen in older age groups, where mortality tends to rise at a faster rate.
Quadratic Age Control (age and age-squared): Adding an age-squared term allows the model to capture a non-linear, accelerating increase in hazard with age, aligning more closely with mortality trends. This specification accounts for the rapid increase in mortality risk in older age groups.
Categorical Age Groups (5-year age groups): By treating age in 5-year intervals, this model does not impose a specific functional form on the age-hazard relationship. Each age group has its own baseline hazard, allowing for more flexibility to capture variations in risk across age groups. This approach models hazard changes in a stepwise manner rather than as a continuous function.
These distinctions highlight the importance of choosing an age specification that aligns with the mortality patterns in the data.
# Calculate AIC and BIC for each model
model_comparison <- data.frame(
Model = c("Without Age Control", "Linear Age Control", "Age + Age^2", "5-Year Age Groups"),
AIC = c(AIC(fit.constant), AIC(fit.linear), AIC(fit.quadratic),AIC(fit.pch5)),
BIC = c(BIC(fit.constant), BIC(fit.linear), BIC(fit.quadratic),BIC(fit.pch5))
)
print(model_comparison)
## Model AIC BIC
## 1 Without Age Control 13095.50 13189.09
## 2 Linear Age Control 11339.99 11445.27
## 3 Age + Age^2 11290.54 11407.51
## 4 5-Year Age Groups 11463.44 11638.90
While age + age² captures a nonlinear trend, interpreting the exact impact of age on mortality is straightforward. In contrast, 5-year age groups provide age-specific estimates but may be harder to analyze for trend analyses. If specific age thresholds are clinically meaningful, 5-year age groups might offer practical insights for age-related mortality patterns despite a slightly worse fit.
# Load necessary libraries
library(ggplot2)
library(scales)
# Fit the logistic and complementary log-log models
fit.pch5_noctrls1 <- glm(dead ~ as.factor(age5) + female + as.factor(educlevel) + as.factor(locsize01),
data = mhas_personmonth, family = binomial(link = "logit"))
fit.pch5_noctrls2 <- glm(dead ~ as.factor(age5) + female + as.factor(educlevel) + as.factor(locsize01),
data = mhas_personmonth, family = binomial(link = "cloglog"))
# Summarize the models
summary(fit.pch5_noctrls1)
##
## Call:
## glm(formula = dead ~ as.factor(age5) + female + as.factor(educlevel) +
## as.factor(locsize01), family = binomial(link = "logit"),
## data = mhas_personmonth)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -23.352208 428.235696 -0.055 0.957
## as.factor(age5)55-59 0.007880 488.897896 0.000 1.000
## as.factor(age5)60-64 0.016416 469.715144 0.000 1.000
## as.factor(age5)65-69 15.841814 428.235699 0.037 0.970
## as.factor(age5)70-74 16.600548 428.235693 0.039 0.969
## as.factor(age5)75-79 16.983793 428.235694 0.040 0.968
## as.factor(age5)80-84 18.751888 428.235690 0.044 0.965
## as.factor(age5)85+ 23.232129 428.236669 0.054 0.957
## female -0.299973 0.071326 -4.206 2.6e-05 ***
## as.factor(educlevel)15 -0.101122 0.089217 -1.133 0.257
## as.factor(educlevel)68 0.033160 0.101993 0.325 0.745
## as.factor(educlevel)919 -0.082844 0.121098 -0.684 0.494
## as.factor(locsize01)2 0.008283 0.100552 0.082 0.934
## as.factor(locsize01)3 -0.166632 0.125166 -1.331 0.183
## as.factor(locsize01)4 -0.144852 0.100738 -1.438 0.150
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13114 on 888735 degrees of freedom
## Residual deviance: 11433 on 888721 degrees of freedom
## AIC: 11463
##
## Number of Fisher Scoring iterations: 22
summary(fit.pch5_noctrls2)
##
## Call:
## glm(formula = dead ~ as.factor(age5) + female + as.factor(educlevel) +
## as.factor(locsize01), family = binomial(link = "cloglog"),
## data = mhas_personmonth)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -23.264571 409.439593 -0.057 0.955
## as.factor(age5)55-59 0.007863 467.439263 0.000 1.000
## as.factor(age5)60-64 0.016368 449.098475 0.000 1.000
## as.factor(age5)65-69 15.751740 409.439596 0.038 0.969
## as.factor(age5)70-74 16.510187 409.439589 0.040 0.968
## as.factor(age5)75-79 16.893154 409.439590 0.041 0.967
## as.factor(age5)80-84 18.657862 409.439586 0.046 0.964
## as.factor(age5)85+ 22.845754 409.440224 0.056 0.956
## female -0.297978 0.071138 -4.189 2.81e-05 ***
## as.factor(educlevel)15 -0.099404 0.088984 -1.117 0.264
## as.factor(educlevel)68 0.033667 0.101769 0.331 0.741
## as.factor(educlevel)919 -0.081931 0.120869 -0.678 0.498
## as.factor(locsize01)2 0.010016 0.100222 0.100 0.920
## as.factor(locsize01)3 -0.165974 0.124907 -1.329 0.184
## as.factor(locsize01)4 -0.144493 0.100499 -1.438 0.151
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13114 on 888735 degrees of freedom
## Residual deviance: 11434 on 888721 degrees of freedom
## AIC: 11464
##
## Number of Fisher Scoring iterations: 22
# Convert coefficients to regular numbers for logistic model
logit_coefficients <- coef(fit.pch5_noctrls1)
logit_coefficients_formatted <- format(logit_coefficients, scientific = FALSE)
# Convert coefficients to regular numbers for complementary log-log model
cloglog_coefficients <- coef(fit.pch5_noctrls2)
cloglog_coefficients_formatted <- format(cloglog_coefficients, scientific = FALSE)
# Print formatted coefficients for both models
print("Logistic Model Coefficients (Formatted):")
## [1] "Logistic Model Coefficients (Formatted):"
print(logit_coefficients_formatted)
## (Intercept) as.factor(age5)55-59 as.factor(age5)60-64
## "-23.352207664" " 0.007879531" " 0.016416364"
## as.factor(age5)65-69 as.factor(age5)70-74 as.factor(age5)75-79
## " 15.841813539" " 16.600548153" " 16.983792970"
## as.factor(age5)80-84 as.factor(age5)85+ female
## " 18.751887931" " 23.232129211" " -0.299973388"
## as.factor(educlevel)15 as.factor(educlevel)68 as.factor(educlevel)919
## " -0.101122272" " 0.033160230" " -0.082844479"
## as.factor(locsize01)2 as.factor(locsize01)3 as.factor(locsize01)4
## " 0.008282517" " -0.166632208" " -0.144851967"
print("Complementary Log-Log Model Coefficients (Formatted):")
## [1] "Complementary Log-Log Model Coefficients (Formatted):"
print(cloglog_coefficients_formatted)
## (Intercept) as.factor(age5)55-59 as.factor(age5)60-64
## "-23.264571436" " 0.007862525" " 0.016367777"
## as.factor(age5)65-69 as.factor(age5)70-74 as.factor(age5)75-79
## " 15.751739841" " 16.510186636" " 16.893153669"
## as.factor(age5)80-84 as.factor(age5)85+ female
## " 18.657862048" " 22.845754485" " -0.297977860"
## as.factor(educlevel)15 as.factor(educlevel)68 as.factor(educlevel)919
## " -0.099403506" " 0.033666597" " -0.081931344"
## as.factor(locsize01)2 as.factor(locsize01)3 as.factor(locsize01)4
## " 0.010016181" " -0.165974487" " -0.144492938"
# Exponentiate coefficients for female, education levels, and locality size for both models
logit_exp_coefficients <- exp(logit_coefficients[c("female", "as.factor(educlevel)15", "as.factor(educlevel)68",
"as.factor(educlevel)919", "as.factor(locsize01)2",
"as.factor(locsize01)3", "as.factor(locsize01)4")])
cloglog_exp_coefficients <- exp(cloglog_coefficients[c("female", "as.factor(educlevel)15", "as.factor(educlevel)68",
"as.factor(educlevel)919", "as.factor(locsize01)2",
"as.factor(locsize01)3", "as.factor(locsize01)4")])
# Print exponentiated coefficients for both models
print("Exponentiated Coefficients (Odds Ratios) - Logistic Model:")
## [1] "Exponentiated Coefficients (Odds Ratios) - Logistic Model:"
print(logit_exp_coefficients)
## female as.factor(educlevel)15 as.factor(educlevel)68
## 0.7408379 0.9038225 1.0337162
## as.factor(educlevel)919 as.factor(locsize01)2 as.factor(locsize01)3
## 0.9204943 1.0083169 0.8465109
## as.factor(locsize01)4
## 0.8651504
print("Exponentiated Coefficients (Odds Ratios) - Complementary Log-Log Model:")
## [1] "Exponentiated Coefficients (Odds Ratios) - Complementary Log-Log Model:"
print(cloglog_exp_coefficients)
## female as.factor(educlevel)15 as.factor(educlevel)68
## 0.7423178 0.9053773 1.0342397
## as.factor(educlevel)919 as.factor(locsize01)2 as.factor(locsize01)3
## 0.9213352 1.0100665 0.8470678
## as.factor(locsize01)4
## 0.8654610
# Obtain predicted values for each age group in both models
predicted_logit <- predict(fit.pch5_noctrls1, type = "response")
predicted_cloglog <- predict(fit.pch5_noctrls2, type = "response")
# Combine the predictions with age groups for comparison
predictions <- data.frame(
Age_Group = mhas_personmonth$age5,
Logistic_Prediction = predicted_logit,
Complementary_LogLog_Prediction = predicted_cloglog
)
# Aggregate predicted values by age group for comparison
predictions_summary <- aggregate(. ~ Age_Group, data = predictions, mean)
# Plotting the predictions with readable y-axis labels
ggplot(predictions_summary, aes(x = Age_Group)) +
geom_line(aes(y = Logistic_Prediction, color = "Logistic Model", group = 1), size = 1) +
geom_line(aes(y = Complementary_LogLog_Prediction, color = "Complementary Log-Log Model", group = 1), size = 1) +
geom_point(aes(y = Logistic_Prediction, color = "Logistic Model"), size = 2) +
geom_point(aes(y = Complementary_LogLog_Prediction, color = "Complementary Log-Log Model"), size = 2) +
scale_y_log10(labels = label_number()) + # Use label_number() for readable labels
labs(
title = "Model Comparison of Predicted Probabilities by Age Group",
x = "Age Group",
y = "Predicted Probability (Log Scale)",
color = "Model"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Fit the model
fit.pch5_noctrlsa <- glm(dead ~ as.factor(age5), data = mhas_personmonth, family = binomial(link = "logit"))
summary(fit.pch5_noctrlsa)
##
## Call:
## glm(formula = dead ~ as.factor(age5), family = binomial(link = "logit"),
## data = mhas_personmonth)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.357e+01 4.289e+02 -0.055 0.956
## as.factor(age5)55-59 2.622e-07 4.897e+02 0.000 1.000
## as.factor(age5)60-64 2.601e-07 4.705e+02 0.000 1.000
## as.factor(age5)65-69 1.582e+01 4.289e+02 0.037 0.971
## as.factor(age5)70-74 1.657e+01 4.289e+02 0.039 0.969
## as.factor(age5)75-79 1.694e+01 4.289e+02 0.039 0.968
## as.factor(age5)80-84 1.870e+01 4.289e+02 0.044 0.965
## as.factor(age5)85+ 2.316e+01 4.289e+02 0.054 0.957
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13114 on 888735 degrees of freedom
## Residual deviance: 11456 on 888728 degrees of freedom
## AIC: 11472
##
## Number of Fisher Scoring iterations: 22
AIC(fit.pch5_noctrlsa)
## [1] 11472.07
BIC(fit.pch5_noctrlsa)
## [1] 11565.65
# Create a new data frame with only the relevant age groups for prediction
xvalues <- data.frame(age5 = c("50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85+"))
xvalues$phat <- predict(fit.pch5_noctrlsa, xvalues, type = "response")
# Plotting the predicted probabilities
library(ggplot2)
ggplot(xvalues, aes(x = age5, y = phat, group = 1)) +
geom_point() +
geom_line() +
labs(x = "Age", y = "Predicted Probability of Death", title = "Predicted Death Probabilities for 5-Year Categories") +
theme_minimal()
# Fit the model
fit.pch5_noctrlsb <- glm(dead ~ as.factor(age5), data = mhas_personmonth, family = binomial(link = "cloglog"))
summary(fit.pch5_noctrlsb)
##
## Call:
## glm(formula = dead ~ as.factor(age5), family = binomial(link = "cloglog"),
## data = mhas_personmonth)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.348e+01 4.101e+02 -0.057 0.954
## as.factor(age5)55-59 5.306e-07 4.682e+02 0.000 1.000
## as.factor(age5)60-64 5.283e-07 4.498e+02 0.000 1.000
## as.factor(age5)65-69 1.573e+01 4.101e+02 0.038 0.969
## as.factor(age5)70-74 1.648e+01 4.101e+02 0.040 0.968
## as.factor(age5)75-79 1.685e+01 4.101e+02 0.041 0.967
## as.factor(age5)80-84 1.861e+01 4.101e+02 0.045 0.964
## as.factor(age5)85+ 2.280e+01 4.101e+02 0.056 0.956
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13114 on 888735 degrees of freedom
## Residual deviance: 11456 on 888728 degrees of freedom
## AIC: 11472
##
## Number of Fisher Scoring iterations: 22
AIC(fit.pch5_noctrlsb)
## [1] 11472.07
BIC(fit.pch5_noctrlsb)
## [1] 11565.65
# Create a new data frame with only the relevant age groups for prediction
xvalues <- data.frame(age5 = c("50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85+"))
xvalues$phat <- predict(fit.pch5_noctrlsb, xvalues, type = "response")
# Plotting the predicted probabilities
library(ggplot2)
ggplot(xvalues, aes(x = age5, y = phat, group = 1)) +
geom_point() +
geom_line() +
labs(x = "Age", y = "Predicted Probability of Death", title = "Predicted Death Probabilities for 5-Year Categories") +
theme_minimal()
# Load knitr package for displaying tables
library(knitr)
# Create the data frame
odds_ratios_table <- data.frame(
Variable = c("Female", "Schooling 1-5 years", "Schooling 6-8 years",
"Schooling 9 years or more", "locsize2",
"locsize3", "locsize4"),
`Logistic Model (Odds Ratio)` = c(0.7408, 0.9038, 1.0337, 0.9205, 1.0083, 0.8465, 0.8652),
`Complementary Log-Log Model (Odds Ratio)` = c(0.7424, 0.9054, 1.0342, 0.9213, 1.0101, 0.8471, 0.8655)
)
# Display the table
kable(odds_ratios_table, caption = "Exponentiated Coefficients (Odds Ratios) for Gender, Education Levels, and Locality Size")
Variable | Logistic.Model..Odds.Ratio. | Complementary.Log.Log.Model..Odds.Ratio. |
---|---|---|
Female | 0.7408 | 0.7424 |
Schooling 1-5 years | 0.9038 | 0.9054 |
Schooling 6-8 years | 1.0337 | 1.0342 |
Schooling 9 years or more | 0.9205 | 0.9213 |
locsize2 | 1.0083 | 1.0101 |
locsize3 | 0.8465 | 0.8471 |
locsize4 | 0.8652 | 0.8655 |
Logit Model Interpretation Intercept (-23.35, p = 0.957) is not significant; represents the baseline log-odds of mortality for the reference categories. None of the age groups are substantial (p > 0.05), indicating no significant difference in mortality across age groups relative to the 50-54 reference group. Females have significantly lower odds of mortality than males. The odds ratio is approximately 0.74, meaning females have a 26% lower risk of death than males, holding other factors constant. No education levels are significant. This suggests that educational attainment does not significantly influence mortality in this model. None of the locality size categories are significant, indicating that locality size does not meaningfully impact mortality risk in this model. Complementary Log-Log Model Interpretation Intercept (-23.26, p = 0.955) is not significant; represents the baseline hazard of mortality for the reference categories. None of the age groups have a significant effect (p > 0.05), meaning mortality risk does not differ significantly by age relative to the 50-54 reference group. Females have significantly lower mortality risk than males. The hazard ratio suggests that being female is associated with a roughly 26% reduction in the risk of death compared to males. Similar to the logit model, no education levels are significant, implying no strong association between educational attainment and mortality in this dataset. None of the locality size categories are significant, indicating that locality size does not significantly affect mortality risk in this model. Both models consistently find that being female is associated with a lower mortality risk, while age, education, and locality size do not have significant impacts on mortality within this dataset.
The odds ratios show that gender has a significant effect on mortality, with females having a 26% lower odds of death than males in both models, indicating a consistent protective effect. Education levels provide some protective effect, particularly for those with 9 or more years of schooling, who show around an 8% lower mortality risk compared to those with no schooling. However, the impact of education is generally modest, with odds ratios close to 1, indicating minimal variation in mortality risk across different schooling levels.
For locality size, larger communities (locsize3 and locsize4) are associated with slightly lower mortality odds compared to the smallest locality (locsize1), suggesting a modest protective effect of residing in larger areas. Specifically, residents in locsize3 and locsize4 have approximately 15% and 13.5% lower odds of mortality, respectively. Overall, gender and locality size show consistent protective associations with mortality, while education has a more nuanced, modest effect.
The odds ratios are very similar across both the logistic and complementary log-log models, indicating that the relationship between gender, education levels, and locality size with mortality is consistent regardless of the model used.
fit.pch5phtest1<-glm(dead ~ as.factor(age5) + female + as.factor(educlevel) + as.factor(locsize01) + as.factor(age5):female, data=mhas_personmonth, family=binomial(link=logit))
summary(fit.pch5phtest1)
##
## Call:
## glm(formula = dead ~ as.factor(age5) + female + as.factor(educlevel) +
## as.factor(locsize01) + as.factor(age5):female, family = binomial(link = logit),
## data = mhas_personmonth)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.348e+01 6.008e+02 -0.039 0.969
## as.factor(age5)55-59 1.596e-03 6.887e+02 0.000 1.000
## as.factor(age5)60-64 3.720e-03 6.636e+02 0.000 1.000
## as.factor(age5)65-69 1.612e+01 6.008e+02 0.027 0.979
## as.factor(age5)70-74 1.678e+01 6.008e+02 0.028 0.978
## as.factor(age5)75-79 1.720e+01 6.008e+02 0.029 0.977
## as.factor(age5)80-84 1.877e+01 6.008e+02 0.031 0.975
## as.factor(age5)85+ -8.300e-02 7.946e+04 0.000 1.000
## female -1.160e-02 8.576e+02 0.000 1.000
## as.factor(educlevel)15 -9.778e-02 8.926e-02 -1.096 0.273
## as.factor(educlevel)68 3.028e-02 1.020e-01 0.297 0.767
## as.factor(educlevel)919 -9.155e-02 1.212e-01 -0.755 0.450
## as.factor(locsize01)2 6.124e-03 1.006e-01 0.061 0.951
## as.factor(locsize01)3 -1.632e-01 1.252e-01 -1.304 0.192
## as.factor(locsize01)4 -1.463e-01 1.008e-01 -1.452 0.147
## as.factor(age5)55-59:female 2.135e-03 9.791e+02 0.000 1.000
## as.factor(age5)60-64:female 1.698e-03 9.410e+02 0.000 1.000
## as.factor(age5)65-69:female -6.323e-01 8.576e+02 -0.001 0.999
## as.factor(age5)70-74:female -3.844e-01 8.576e+02 0.000 1.000
## as.factor(age5)75-79:female -4.561e-01 8.576e+02 -0.001 1.000
## as.factor(age5)80-84:female -9.534e-02 8.576e+02 0.000 1.000
## as.factor(age5)85+:female 2.364e+01 7.947e+04 0.000 1.000
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13114 on 888735 degrees of freedom
## Residual deviance: 11424 on 888714 degrees of freedom
## AIC: 11468
##
## Number of Fisher Scoring iterations: 22
AIC(fit.pch5phtest1)
## [1] 11468.38
BIC(fit.pch5phtest1)
## [1] 11725.72
fit.pch5phtest2<-glm(dead ~ as.factor(age5) + female + as.factor(educlevel) + as.factor(locsize01) + as.factor(age5):as.factor(educlevel), data=mhas_personmonth, family=binomial(link=logit))
summary(fit.pch5phtest2)
##
## Call:
## glm(formula = dead ~ as.factor(age5) + female + as.factor(educlevel) +
## as.factor(locsize01) + as.factor(age5):as.factor(educlevel),
## family = binomial(link = logit), data = mhas_personmonth)
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error z value
## (Intercept) -2.335e+01 1.160e+03 -0.020
## as.factor(age5)55-59 7.619e-03 1.293e+03 0.000
## as.factor(age5)60-64 1.272e-02 1.241e+03 0.000
## as.factor(age5)65-69 1.532e+01 1.160e+03 0.013
## as.factor(age5)70-74 1.636e+01 1.160e+03 0.014
## as.factor(age5)75-79 1.716e+01 1.160e+03 0.015
## as.factor(age5)80-84 1.880e+01 1.160e+03 0.016
## as.factor(age5)85+ 2.250e+01 1.160e+03 0.019
## female -2.908e-01 7.145e-02 -4.070
## as.factor(educlevel)15 -1.405e-02 1.392e+03 0.000
## as.factor(educlevel)68 -5.774e-02 1.405e+03 0.000
## as.factor(educlevel)919 -1.061e-01 1.431e+03 0.000
## as.factor(locsize01)2 1.728e-03 1.005e-01 0.017
## as.factor(locsize01)3 -1.716e-01 1.252e-01 -1.371
## as.factor(locsize01)4 -1.417e-01 1.007e-01 -1.408
## as.factor(age5)55-59:as.factor(educlevel)15 -6.591e-03 1.561e+03 0.000
## as.factor(age5)60-64:as.factor(educlevel)15 -7.156e-03 1.498e+03 0.000
## as.factor(age5)65-69:as.factor(educlevel)15 2.540e-01 1.392e+03 0.000
## as.factor(age5)70-74:as.factor(educlevel)15 4.641e-02 1.392e+03 0.000
## as.factor(age5)75-79:as.factor(educlevel)15 -3.459e-01 1.392e+03 0.000
## as.factor(age5)80-84:as.factor(educlevel)15 -5.425e-02 1.392e+03 0.000
## as.factor(age5)85+:as.factor(educlevel)15 2.472e+01 7.947e+04 0.000
## as.factor(age5)55-59:as.factor(educlevel)68 -6.752e-03 1.583e+03 0.000
## as.factor(age5)60-64:as.factor(educlevel)68 -9.084e-03 1.521e+03 0.000
## as.factor(age5)65-69:as.factor(educlevel)68 9.307e-01 1.405e+03 0.001
## as.factor(age5)70-74:as.factor(educlevel)68 4.017e-01 1.405e+03 0.000
## as.factor(age5)75-79:as.factor(educlevel)68 -4.424e-02 1.405e+03 0.000
## as.factor(age5)80-84:as.factor(educlevel)68 -1.732e-01 1.405e+03 0.000
## as.factor(age5)85+:as.factor(educlevel)68 NA NA NA
## as.factor(age5)55-59:as.factor(educlevel)919 1.966e-03 1.616e+03 0.000
## as.factor(age5)60-64:as.factor(educlevel)919 7.540e-03 1.554e+03 0.000
## as.factor(age5)65-69:as.factor(educlevel)919 6.467e-01 1.431e+03 0.000
## as.factor(age5)70-74:as.factor(educlevel)919 5.560e-01 1.431e+03 0.000
## as.factor(age5)75-79:as.factor(educlevel)919 -4.876e-01 1.431e+03 0.000
## as.factor(age5)80-84:as.factor(educlevel)919 -2.137e-01 1.431e+03 0.000
## as.factor(age5)85+:as.factor(educlevel)919 NA NA NA
## Pr(>|z|)
## (Intercept) 0.984
## as.factor(age5)55-59 1.000
## as.factor(age5)60-64 1.000
## as.factor(age5)65-69 0.989
## as.factor(age5)70-74 0.989
## as.factor(age5)75-79 0.988
## as.factor(age5)80-84 0.987
## as.factor(age5)85+ 0.985
## female 4.7e-05 ***
## as.factor(educlevel)15 1.000
## as.factor(educlevel)68 1.000
## as.factor(educlevel)919 1.000
## as.factor(locsize01)2 0.986
## as.factor(locsize01)3 0.170
## as.factor(locsize01)4 0.159
## as.factor(age5)55-59:as.factor(educlevel)15 1.000
## as.factor(age5)60-64:as.factor(educlevel)15 1.000
## as.factor(age5)65-69:as.factor(educlevel)15 1.000
## as.factor(age5)70-74:as.factor(educlevel)15 1.000
## as.factor(age5)75-79:as.factor(educlevel)15 1.000
## as.factor(age5)80-84:as.factor(educlevel)15 1.000
## as.factor(age5)85+:as.factor(educlevel)15 1.000
## as.factor(age5)55-59:as.factor(educlevel)68 1.000
## as.factor(age5)60-64:as.factor(educlevel)68 1.000
## as.factor(age5)65-69:as.factor(educlevel)68 0.999
## as.factor(age5)70-74:as.factor(educlevel)68 1.000
## as.factor(age5)75-79:as.factor(educlevel)68 1.000
## as.factor(age5)80-84:as.factor(educlevel)68 1.000
## as.factor(age5)85+:as.factor(educlevel)68 NA
## as.factor(age5)55-59:as.factor(educlevel)919 1.000
## as.factor(age5)60-64:as.factor(educlevel)919 1.000
## as.factor(age5)65-69:as.factor(educlevel)919 1.000
## as.factor(age5)70-74:as.factor(educlevel)919 1.000
## as.factor(age5)75-79:as.factor(educlevel)919 1.000
## as.factor(age5)80-84:as.factor(educlevel)919 1.000
## as.factor(age5)85+:as.factor(educlevel)919 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13114 on 888735 degrees of freedom
## Residual deviance: 11407 on 888702 degrees of freedom
## AIC: 11475
##
## Number of Fisher Scoring iterations: 22
AIC(fit.pch5phtest2)
## [1] 11475.28
BIC(fit.pch5phtest2)
## [1] 11873
fit.pch5phtest3<-glm(dead ~ as.factor(age5) + female + as.factor(educlevel) + as.factor(locsize01) + as.factor(age5):as.factor(locsize01), data=mhas_personmonth, family=binomial(link=logit))
summary(fit.pch5phtest3)
##
## Call:
## glm(formula = dead ~ as.factor(age5) + female + as.factor(educlevel) +
## as.factor(locsize01) + as.factor(age5):as.factor(locsize01),
## family = binomial(link = logit), data = mhas_personmonth)
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value
## (Intercept) -2.338e+01 5.564e+02 -0.042
## as.factor(age5)55-59 6.353e-03 6.367e+02 0.000
## as.factor(age5)60-64 1.462e-02 6.118e+02 0.000
## as.factor(age5)65-69 1.582e+01 5.564e+02 0.028
## as.factor(age5)70-74 1.669e+01 5.564e+02 0.030
## as.factor(age5)75-79 1.701e+01 5.564e+02 0.031
## as.factor(age5)80-84 1.876e+01 5.564e+02 0.034
## as.factor(age5)85+ 2.291e+01 5.564e+02 0.041
## female -3.005e-01 7.135e-02 -4.212
## as.factor(educlevel)15 -9.915e-02 8.926e-02 -1.111
## as.factor(educlevel)68 3.458e-02 1.021e-01 0.339
## as.factor(educlevel)919 -8.435e-02 1.212e-01 -0.696
## as.factor(locsize01)2 -1.699e-02 1.176e+03 0.000
## as.factor(locsize01)3 -1.850e-02 1.633e+03 0.000
## as.factor(locsize01)4 -1.899e-02 1.210e+03 0.000
## as.factor(age5)55-59:as.factor(locsize01)2 -2.791e-03 1.350e+03 0.000
## as.factor(age5)60-64:as.factor(locsize01)2 -4.289e-03 1.299e+03 0.000
## as.factor(age5)65-69:as.factor(locsize01)2 3.790e-01 1.176e+03 0.000
## as.factor(age5)70-74:as.factor(locsize01)2 -3.034e-01 1.176e+03 0.000
## as.factor(age5)75-79:as.factor(locsize01)2 -1.741e-01 1.176e+03 0.000
## as.factor(age5)80-84:as.factor(locsize01)2 1.285e-01 1.176e+03 0.000
## as.factor(age5)85+:as.factor(locsize01)2 2.435e+01 7.947e+04 0.000
## as.factor(age5)55-59:as.factor(locsize01)3 1.847e-02 1.853e+03 0.000
## as.factor(age5)60-64:as.factor(locsize01)3 1.146e-02 1.775e+03 0.000
## as.factor(age5)65-69:as.factor(locsize01)3 1.179e-01 1.633e+03 0.000
## as.factor(age5)70-74:as.factor(locsize01)3 -2.234e-01 1.633e+03 0.000
## as.factor(age5)75-79:as.factor(locsize01)3 -2.590e-01 1.633e+03 0.000
## as.factor(age5)80-84:as.factor(locsize01)3 -1.291e-01 1.633e+03 0.000
## as.factor(age5)85+:as.factor(locsize01)3 NA NA NA
## as.factor(age5)55-59:as.factor(locsize01)4 -1.320e-02 1.372e+03 0.000
## as.factor(age5)60-64:as.factor(locsize01)4 -1.831e-02 1.318e+03 0.000
## as.factor(age5)65-69:as.factor(locsize01)4 -4.757e-01 1.210e+03 0.000
## as.factor(age5)70-74:as.factor(locsize01)4 -1.693e-01 1.210e+03 0.000
## as.factor(age5)75-79:as.factor(locsize01)4 6.390e-02 1.210e+03 0.000
## as.factor(age5)80-84:as.factor(locsize01)4 -1.287e-01 1.210e+03 0.000
## as.factor(age5)85+:as.factor(locsize01)4 -2.278e+01 7.947e+04 0.000
## Pr(>|z|)
## (Intercept) 0.966
## as.factor(age5)55-59 1.000
## as.factor(age5)60-64 1.000
## as.factor(age5)65-69 0.977
## as.factor(age5)70-74 0.976
## as.factor(age5)75-79 0.976
## as.factor(age5)80-84 0.973
## as.factor(age5)85+ 0.967
## female 2.54e-05 ***
## as.factor(educlevel)15 0.267
## as.factor(educlevel)68 0.735
## as.factor(educlevel)919 0.487
## as.factor(locsize01)2 1.000
## as.factor(locsize01)3 1.000
## as.factor(locsize01)4 1.000
## as.factor(age5)55-59:as.factor(locsize01)2 1.000
## as.factor(age5)60-64:as.factor(locsize01)2 1.000
## as.factor(age5)65-69:as.factor(locsize01)2 1.000
## as.factor(age5)70-74:as.factor(locsize01)2 1.000
## as.factor(age5)75-79:as.factor(locsize01)2 1.000
## as.factor(age5)80-84:as.factor(locsize01)2 1.000
## as.factor(age5)85+:as.factor(locsize01)2 1.000
## as.factor(age5)55-59:as.factor(locsize01)3 1.000
## as.factor(age5)60-64:as.factor(locsize01)3 1.000
## as.factor(age5)65-69:as.factor(locsize01)3 1.000
## as.factor(age5)70-74:as.factor(locsize01)3 1.000
## as.factor(age5)75-79:as.factor(locsize01)3 1.000
## as.factor(age5)80-84:as.factor(locsize01)3 1.000
## as.factor(age5)85+:as.factor(locsize01)3 NA
## as.factor(age5)55-59:as.factor(locsize01)4 1.000
## as.factor(age5)60-64:as.factor(locsize01)4 1.000
## as.factor(age5)65-69:as.factor(locsize01)4 1.000
## as.factor(age5)70-74:as.factor(locsize01)4 1.000
## as.factor(age5)75-79:as.factor(locsize01)4 1.000
## as.factor(age5)80-84:as.factor(locsize01)4 1.000
## as.factor(age5)85+:as.factor(locsize01)4 1.000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13114 on 888735 degrees of freedom
## Residual deviance: 11422 on 888701 degrees of freedom
## AIC: 11492
##
## Number of Fisher Scoring iterations: 22
AIC(fit.pch5phtest3)
## [1] 11491.64
BIC(fit.pch5phtest3)
## [1] 11901.05
#install.packages("lmtest")
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# Add your more complex model second
lrtest(fit.pch5, fit.pch5phtest1)
lrtest(fit.pch5, fit.pch5phtest2)
lrtest(fit.pch5, fit.pch5phtest3)
The likelihood ratio tests indicate that the proportional hazards assumption holds for gender, education levels, and locality size, as none of the interactions with age were statistically significant: Gender: The interaction test between age and gender yielded a chi-square of 9.0593 (p = 0.2484), suggesting no significant interaction. Education Level: The age and education level interaction test produced a chi-square of 26.153 (p = 0.126), indicating no significant effect. Locality Size: The test for interaction between age and locality size showed a chi-square of 11.796 (p = 0.9229), also non-significant. Since the proportional hazards assumption holds, no further adjustments are needed. If it had not, we could address non-proportionality by using Cox models with time-varying coefficients or stratified Cox models, which allow for different baseline hazards across strata (e.g., by gender or education level).
Females have a significantly lower risk of mortality than males. The interaction between age and females is significant, showing that the protective effect of being female decreases with age. Higher education levels are generally associated with lower mortality risk, though this effect diminishes when adjusting for age as a continuous variable, suggesting age may mediate this association. No significant association with mortality was found for locality size, and the proportional hazards assumption holds for this variable. Gender and education levels are associated with mortality risk, with gender effects varying by age. Locality size does not significantly impact mortality in this dataset. The proportional hazards assumption largely holds, but if it did not, time-varying coefficients or stratified models could address non-proportionality.