Warning: package 'ggplot2' was built under R version 4.3.2
Warning: package 'readr' was built under R version 4.3.2
Warning: package 'purrr' was built under R version 4.3.2
Warning: package 'dplyr' was built under R version 4.3.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.3 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.4.4 ✔ tibble 3.2.1
✔ lubridate 1.9.2 ✔ tidyr 1.3.0
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(AER)
Warning: package 'AER' was built under R version 4.3.3
Loading required package: car
Loading required package: carData
Attaching package: 'car'
The following object is masked from 'package:dplyr':
recode
The following object is masked from 'package:purrr':
some
Loading required package: lmtest
Warning: package 'lmtest' was built under R version 4.3.3
Loading required package: zoo
Warning: package 'zoo' was built under R version 4.3.2
Attaching package: 'zoo'
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
Loading required package: sandwich
Warning: package 'sandwich' was built under R version 4.3.2
Loading required package: survival
setwd("C:/Users/jacob/Downloads")mhas0121dummies<-read.csv("MHAS0121 NO LABELS NO NEGATIVE TIMES.csv")mhas0121noNA<-na.omit(mhas0121dummies)getwd()
[1] "C:/Users/jacob/Downloads"
mhas0121noNA<-mhas0121noNA %>%mutate(startmo = ageint*12,endmo = agecensor*12,dead = died0121,ageintmo = startmo - mobirth # Calculate age in months )max(mhas0121noNA$exposure_months)
[1] 296
#Possible max time is 296 months, but using an even higher threshold in case things change#rm(mhas_personmonth)# Code adapted from Mills (2011): section 3.5# Expands data into person-months, up to maximum number observed in mhas_personmonth<-survSplit(mhas0121noNA, cut=c(1:1000), start="startmo", end="endmo", event="dead")# Sorting data by IDmhas_personmonth<-mhas_personmonth[order (mhas_personmonth$id, mhas_personmonth$ageintmo),]#Creating time-varying age variables for different model specifications0mhas_personmonth<-mhas_personmonth %>%mutate(age=startmo/12, #could truncate if one wanted to, but I don't agesq=age^2,.after ="died0121" )mhas0121noNA <- mhas0121noNA %>%mutate(ageintmo = startmo - mobirth # Calculate age in months )# Create person-months datamhas_personmonth <-survSplit(mhas0121noNA, cut =c(1:1000), start ="startmo", end ="endmo", event ="dead")# Sorting data by ID and agemhas_personmonth <- mhas_personmonth %>%arrange(id, ageintmo)# Creating time-varying age variablesmhas_personmonth <- mhas_personmonth %>%mutate(age = startmo /12,agesq = age^2,.after = died0121 # Ensure died0121 is a column in your data )# Create age groupsmhas_personmonth <- mhas_personmonth %>%mutate(age5 =case_when( age >=50& age <55~"50-54", age >=55& age <60~"55-59", age >=60& age <65~"60-64", age >=65& age <70~"65-69", age >=70& age <75~"70-74", age >=75& age <80~"75-79", age >=80& age <85~"80-84", age >=85~"85+",TRUE~NA_character_# Handle cases outside of these ranges ),age5 =factor( age5,levels =c("50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85+") ) )# Check the structure of the resulting data frame
model1 <-glm(dead ~ female + schooling + locsize01, family = binomial, data = mhas_personmonth)summary(model1)
Call:
glm(formula = dead ~ female + schooling + locsize01, family = binomial,
data = mhas_personmonth)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.545591 0.100200 -65.325 < 2e-16 ***
female -0.139351 0.071153 -1.958 0.0502 .
schooling -0.052074 0.009389 -5.546 2.92e-08 ***
locsize01 -0.072584 0.031632 -2.295 0.0218 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 13114 on 888735 degrees of freedom
Residual deviance: 13080 on 888732 degrees of freedom
AIC: 13088
Number of Fisher Scoring iterations: 10
model2 <-glm(dead ~ female + schooling + locsize01 + age, family = binomial, data = mhas_personmonth)summary(model2)
Call:
glm(formula = dead ~ female + schooling + locsize01 + age, family = binomial,
data = mhas_personmonth)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -25.904271 0.623359 -41.556 < 2e-16 ***
female -0.317068 0.071144 -4.457 8.32e-06 ***
schooling 0.003073 0.009162 0.335 0.7374
locsize01 -0.055694 0.031897 -1.746 0.0808 .
age 0.259811 0.007846 33.114 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 13114 on 888735 degrees of freedom
Residual deviance: 11326 on 888731 degrees of freedom
AIC: 11336
Number of Fisher Scoring iterations: 11
mhas_personmonth <- mhas_personmonth %>%mutate(age_group =cut(age, breaks =seq(0, 90, by =5), right =FALSE))model4 <-glm(dead ~ female + schooling + locsize01 + age + age_group, family = binomial, data = mhas_personmonth)summary(model4)
Call:
glm(formula = dead ~ female + schooling + locsize01 + age + age_group,
family = binomial, data = mhas_personmonth)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -55.046798 421.380640 -0.131 0.8961
female -0.316194 0.071299 -4.435 9.22e-06 ***
schooling 0.003498 0.009183 0.381 0.7033
locsize01 -0.054457 0.031983 -1.703 0.0886 .
age 0.590257 0.031888 18.511 < 2e-16 ***
age_group[55,60) -2.604669 479.442519 -0.005 0.9957
age_group[60,65) -5.468869 460.888238 -0.012 0.9905
age_group[65,70) 7.366106 421.377371 0.017 0.9861
age_group[70,75) 5.285063 421.377563 0.013 0.9900
age_group[75,80) 2.761722 421.377824 0.007 0.9948
age_group[80,85) 2.171142 421.378055 0.005 0.9959
age_group[85,90) 4.610681 421.379545 0.011 0.9913
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 13114 on 888734 degrees of freedom
Residual deviance: 11045 on 888723 degrees of freedom
(1 observation deleted due to missingness)
AIC: 11069
Number of Fisher Scoring iterations: 22
mhas_personmonth <- mhas_personmonth %>%mutate(age_squared = age^2)model3 <-glm(dead ~ female + schooling + locsize01 + age + age_squared, family = binomial, data = mhas_personmonth)summary(model3)
Call:
glm(formula = dead ~ female + schooling + locsize01 + age + age_squared,
family = binomial, data = mhas_personmonth)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 10.1093398 4.3238314 2.338 0.0194 *
female -0.3164928 0.0712216 -4.444 8.84e-06 ***
schooling 0.0041593 0.0091741 0.453 0.6503
locsize01 -0.0539427 0.0319334 -1.689 0.0912 .
age -0.7130371 0.1179780 -6.044 1.51e-09 ***
age_squared 0.0065261 0.0008009 8.149 3.67e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 13114 on 888735 degrees of freedom
Residual deviance: 11275 on 888730 degrees of freedom
AIC: 11287
Number of Fisher Scoring iterations: 12