#1. Load Data and create Variables
library(readr)
mhas <- read_csv("MHAS0121.csv")
## Rows: 9636 Columns: 39
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): locsize01, died18, died21
## dbl (36): id, perwght01, mobirth, yrbirth, female, moint, yrint, schooling, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(survival)
library(asaur)
library(eha)
library(flextable)
library(officer)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(broom)
library(stargazer)
##
## Please cite as:
##
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(survminer)
## Loading required package: ggpubr
##
## Attaching package: 'ggpubr'
##
## The following objects are masked from 'package:flextable':
##
## border, font, rotate
##
##
## Attaching package: 'survminer'
##
## The following object is masked from 'package:survival':
##
## myeloma
summary(mhas$yrcensor)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2001 2008 2018 2015 2021 2022 847
summary(mhas$yrint)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2001 2001 2001 2001 2001 2001
summary(mhas$mocensor)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 4.000 9.000 8.822 12.000 99.000 102
summary(mhas$moint)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 7.000 7.000 7.494 8.000 11.000
table(mhas$mocensor)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 88 99
## 1387 327 385 333 343 546 874 435 387 481 1171 2755 12 98
mhas <- mhas[!is.na(mhas$yrcensor), ]
mhas <- mhas[!is.na(mhas$mocensor), ]
mhas <- mhas[mhas$mocensor != 88, ]
mhas <- mhas[mhas$mocensor != 99, ]
mhas <- mhas[mhas$yrcensor > mhas$yrint | (mhas$yrcensor == mhas$yrint & mhas$mocensor >= mhas$moint), ]
mhas$time_exposed <- with(mhas, ((yrcensor + mocensor/12) - (yrint + moint/12)) * 12 + 1)
summary(mhas$time_exposed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 89.0 201.0 165.3 245.0 248.0
summary(mhas$died0121)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.0000 1.0000 0.5223 1.0000 1.0000 1224
mhas <- mhas[!is.na(mhas$died0121), ]
mhas$event_occurred <- mhas$died0121
summary(mhas$ageint)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 50.50 55.25 61.25 63.42 69.50 106.08
summary(mhas$agecensor)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 51.17 72.75 77.58 78.20 83.67 114.42
mhas$entry_time <- mhas$ageint
mhas$exit_time <-mhas$agecensor
#2. Problem Set 3 ## Part I
##Part I. Multivariate parametric proportional hazards models
### a. Estimate parametric proportional hazards model with an exponential (i.e., constant) with no covariates. Instead of hazard ratios, obtain coefficients. Exponentiate the intercept of the model. (Later on, we will compare this to the crude death rate for people 50+ we will calculate in a later problem set).
model <- survreg(Surv(time_exposed, event_occurred) ~ 1, data = mhas, dist = "exponential")
summary(model)
##
## Call:
## survreg(formula = Surv(time_exposed, event_occurred) ~ 1, data = mhas,
## dist = "exponential")
## Value Std. Error z p
## (Intercept) 5.834 0.016 365 <2e-16
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -26679.5 Loglik(intercept only)= -26679.5
## Number of Newton-Raphson Iterations: 4
## n= 7475
intercept_log <- coef(model)
baseline_hazard_rate <- exp(intercept_log)
baseline_hazard_rate
## (Intercept)
## 341.6837
###b. Estimate another exponential, proportional hazards model, using FEMALE as the only covariate. Obtain coefficients, not hazard ratios.
#Interpret the coefficient for FEMALE after exponentiating it. Exponentiate the intercept of this model and interpret this figure.
model_female <- survreg(Surv(time_exposed, event_occurred) ~ female, data = mhas, dist = "exponential")
summary(model_female)
##
## Call:
## survreg(formula = Surv(time_exposed, event_occurred) ~ female,
## data = mhas, dist = "exponential")
## Value Std. Error z p
## (Intercept) 5.7728 0.0232 248.77 <2e-16
## female 0.1135 0.0320 3.54 4e-04
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -26673.2 Loglik(intercept only)= -26679.5
## Chisq= 12.52 on 1 degrees of freedom, p= 4e-04
## Number of Newton-Raphson Iterations: 4
## n= 7475
coef_female_log <- coef(model_female)
exp_coef_female <- exp(coef_female_log)
exp_coef_female
## (Intercept) female
## 321.423802 1.120213
#Sum the constant and the coefficient for female, and exponentiate this number.
intercept_log_female <- coef(model_female)
sum_log <- intercept_log_female + coef_female_log
hazard_rate_female <- exp(sum_log)
hazard_rate_female
## (Intercept) female
## 1.033133e+05 1.254876e+00
###c. Estimate another PH parametric model controlling for FEMALE, SCHOOLING (or categories of EDUCLEVEL, whatever you prefer, justifying your choice), and LOCSIZE01. Assuming the baseline hazard is distributed…
# Exponential
model_exp <- survreg(Surv(time_exposed, event_occurred) ~ female + schooling + locsize01, data = mhas, dist = "exponential")
summary(model_exp)
##
## Call:
## survreg(formula = Surv(time_exposed, event_occurred) ~ female +
## schooling + locsize01, data = mhas, dist = "exponential")
## Value Std. Error z p
## (Intercept) 5.64443 0.04212 134.00 < 2e-16
## female 0.17639 0.03248 5.43 5.6e-08
## schooling 0.06251 0.00442 14.13 < 2e-16
## locsize01100,000 - + -0.21179 0.04600 -4.60 4.1e-06
## locsize0115,000 - 99,999 -0.15607 0.05638 -2.77 0.0056
## locsize012,500 - 14,999 -0.13834 0.06332 -2.18 0.0289
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -26500.9 Loglik(intercept only)= -26617.6
## Chisq= 233.35 on 5 degrees of freedom, p= 2e-48
## Number of Newton-Raphson Iterations: 4
## n=7463 (12 observations deleted due to missingness)
# Gompertz
library(flexsurv)
##
## Attaching package: 'flexsurv'
## The following objects are masked from 'package:eha':
##
## dgompertz, dllogis, hgompertz, Hgompertz, hllogis, Hllogis, hlnorm,
## Hlnorm, hweibull, Hweibull, pgompertz, pllogis, qgompertz, qllogis,
## rgompertz, rllogis
model_gompertz <- flexsurvreg(Surv(time_exposed, event_occurred) ~ female + schooling + locsize01, data = mhas, dist = "gompertz")
summary(model_gompertz)
## female=0.544151145651883,schooling=4.28018223234624,locsize01100,000 - +=0.57523784001072,locsize0115,000 - 99,999=0.154629505560766,locsize012,500 - 14,999=0.0972799142436018
## time est lcl ucl
## 1 1 0.9978860 0.9977595 0.9980190
## 2 2 0.9957706 0.9955185 0.9960356
## 3 2 0.9957706 0.9955185 0.9960356
## 4 3 0.9936539 0.9932771 0.9940498
## 5 3 0.9936539 0.9932771 0.9940498
## 6 4 0.9915359 0.9910351 0.9920616
## 7 5 0.9894165 0.9887927 0.9900709
## 8 5 0.9894165 0.9887927 0.9900709
## 9 6 0.9872959 0.9865499 0.9880778
## 10 6 0.9872959 0.9865499 0.9880778
## 11 7 0.9851740 0.9843066 0.9860824
## 12 8 0.9830508 0.9820629 0.9840846
## 13 8 0.9830508 0.9820629 0.9840846
## 14 9 0.9809264 0.9798188 0.9820844
## 15 9 0.9809264 0.9798188 0.9820844
## 16 10 0.9788007 0.9775744 0.9800819
## 17 11 0.9766738 0.9753296 0.9780770
## 18 11 0.9766738 0.9753296 0.9780770
## 19 12 0.9745457 0.9730844 0.9760697
## 20 12 0.9745457 0.9730844 0.9760697
## 21 13 0.9724163 0.9708413 0.9740602
## 22 14 0.9702858 0.9685991 0.9720483
## 23 14 0.9702858 0.9685991 0.9720483
## 24 15 0.9681541 0.9663570 0.9700341
## 25 15 0.9681541 0.9663570 0.9700341
## 26 16 0.9660212 0.9641153 0.9680177
## 27 17 0.9638871 0.9618737 0.9659989
## 28 17 0.9638871 0.9618737 0.9659989
## 29 18 0.9617519 0.9596323 0.9639779
## 30 18 0.9617519 0.9596323 0.9639779
## 31 19 0.9596155 0.9573915 0.9619546
## 32 20 0.9574781 0.9551532 0.9599297
## 33 20 0.9574781 0.9551532 0.9599297
## 34 21 0.9553395 0.9529153 0.9579081
## 35 21 0.9553395 0.9529153 0.9579081
## 36 22 0.9531998 0.9506780 0.9558790
## 37 23 0.9510590 0.9484344 0.9538478
## 38 23 0.9510590 0.9484344 0.9538478
## 39 24 0.9489172 0.9461902 0.9518160
## 40 24 0.9489172 0.9461902 0.9518160
## 41 25 0.9467743 0.9439492 0.9497864
## 42 26 0.9446303 0.9417064 0.9477562
## 43 26 0.9446303 0.9417064 0.9477562
## 44 27 0.9424854 0.9394584 0.9457243
## 45 27 0.9424854 0.9394584 0.9457243
## 46 28 0.9403394 0.9372125 0.9436909
## 47 29 0.9381923 0.9349678 0.9416559
## 48 29 0.9381923 0.9349678 0.9416559
## 49 30 0.9360443 0.9327232 0.9396156
## 50 30 0.9360443 0.9327232 0.9396156
## 51 31 0.9338953 0.9304785 0.9375687
## 52 32 0.9317454 0.9282338 0.9355195
## 53 32 0.9317454 0.9282338 0.9355195
## 54 33 0.9295945 0.9259891 0.9334681
## 55 33 0.9295945 0.9259891 0.9334681
## 56 34 0.9274426 0.9237446 0.9314151
## 57 35 0.9252898 0.9215003 0.9293651
## 58 35 0.9252898 0.9215003 0.9293651
## 59 36 0.9231361 0.9192560 0.9273102
## 60 36 0.9231361 0.9192560 0.9273102
## 61 37 0.9209815 0.9170118 0.9252485
## 62 38 0.9188260 0.9147677 0.9231846
## 63 38 0.9188260 0.9147677 0.9231846
## 64 39 0.9166696 0.9125260 0.9211185
## 65 39 0.9166696 0.9125260 0.9211185
## 66 40 0.9145124 0.9102889 0.9190526
## 67 41 0.9123543 0.9080523 0.9169895
## 68 41 0.9123543 0.9080523 0.9169895
## 69 42 0.9101953 0.9058161 0.9149247
## 70 42 0.9101953 0.9058161 0.9149247
## 71 43 0.9080356 0.9035804 0.9128581
## 72 44 0.9058750 0.9013453 0.9107899
## 73 44 0.9058750 0.9013453 0.9107899
## 74 45 0.9037137 0.8990982 0.9087155
## 75 45 0.9037137 0.8990982 0.9087155
## 76 46 0.9015516 0.8968455 0.9066446
## 77 47 0.8993887 0.8945926 0.9045731
## 78 47 0.8993887 0.8945926 0.9045731
## 79 48 0.8972250 0.8923395 0.9024921
## 80 48 0.8972250 0.8923395 0.9024921
## 81 49 0.8950607 0.8900973 0.9004092
## 82 50 0.8928956 0.8878637 0.8983305
## 83 50 0.8928956 0.8878637 0.8983305
## 84 51 0.8907297 0.8856307 0.8962532
## 85 51 0.8907297 0.8856307 0.8962532
## 86 52 0.8885632 0.8833983 0.8941744
## 87 53 0.8863960 0.8811607 0.8921014
## 88 53 0.8863960 0.8811607 0.8921014
## 89 54 0.8842282 0.8789134 0.8900172
## 90 54 0.8842282 0.8789134 0.8900172
## 91 55 0.8820597 0.8766595 0.8879310
## 92 56 0.8798905 0.8744013 0.8858446
## 93 56 0.8798905 0.8744013 0.8858446
## 94 57 0.8777208 0.8721608 0.8837582
## 95 57 0.8777208 0.8721608 0.8837582
## 96 58 0.8755504 0.8699206 0.8816628
## 97 59 0.8733794 0.8676767 0.8795706
## 98 59 0.8733794 0.8676767 0.8795706
## 99 60 0.8712079 0.8654295 0.8774769
## 100 61 0.8690357 0.8631825 0.8753816
## 101 62 0.8668631 0.8609356 0.8732850
## 102 62 0.8668631 0.8609356 0.8732850
## 103 63 0.8646899 0.8586889 0.8711868
## 104 63 0.8646899 0.8586889 0.8711868
## 105 64 0.8625162 0.8564423 0.8690872
## 106 65 0.8603419 0.8541964 0.8669862
## 107 65 0.8603419 0.8541964 0.8669862
## 108 66 0.8581672 0.8519512 0.8648838
## 109 66 0.8581672 0.8519512 0.8648838
## 110 67 0.8559920 0.8497063 0.8627800
## 111 68 0.8538164 0.8474616 0.8606784
## 112 68 0.8538164 0.8474616 0.8606784
## 113 69 0.8516403 0.8452172 0.8585867
## 114 69 0.8516403 0.8452172 0.8585867
## 115 70 0.8494638 0.8429731 0.8564938
## 116 71 0.8472868 0.8407446 0.8543911
## 117 71 0.8472868 0.8407446 0.8543911
## 118 72 0.8451095 0.8385253 0.8522750
## 119 72 0.8451095 0.8385253 0.8522750
## 120 73 0.8429318 0.8363070 0.8501720
## 121 74 0.8407537 0.8340897 0.8480745
## 122 74 0.8407537 0.8340897 0.8480745
## 123 75 0.8385753 0.8318581 0.8459761
## 124 75 0.8385753 0.8318581 0.8459761
## 125 76 0.8363965 0.8296138 0.8438717
## 126 77 0.8342174 0.8273697 0.8417740
## 127 77 0.8342174 0.8273697 0.8417740
## 128 78 0.8320380 0.8251264 0.8396756
## 129 78 0.8320380 0.8251264 0.8396756
## 130 79 0.8298583 0.8228835 0.8375699
## 131 80 0.8276783 0.8206411 0.8354558
## 132 80 0.8276783 0.8206411 0.8354558
## 133 81 0.8254981 0.8183990 0.8333406
## 134 81 0.8254981 0.8183990 0.8333406
## 135 82 0.8233176 0.8161629 0.8312243
## 136 83 0.8211369 0.8139332 0.8291070
## 137 83 0.8211369 0.8139332 0.8291070
## 138 84 0.8189560 0.8117224 0.8269883
## 139 84 0.8189560 0.8117224 0.8269883
## 140 85 0.8167749 0.8095127 0.8248681
## 141 86 0.8145936 0.8072763 0.8227469
## 142 86 0.8145936 0.8072763 0.8227469
## 143 87 0.8124122 0.8050194 0.8206124
## 144 87 0.8124122 0.8050194 0.8206124
## 145 88 0.8102306 0.8027898 0.8184667
## 146 89 0.8080488 0.8005560 0.8163356
## 147 89 0.8080488 0.8005560 0.8163356
## 148 90 0.8058670 0.7983163 0.8142101
## 149 90 0.8058670 0.7983163 0.8142101
## 150 91 0.8036851 0.7960771 0.8120747
## 151 92 0.8015030 0.7938384 0.8099105
## 152 92 0.8015030 0.7938384 0.8099105
## 153 93 0.7993209 0.7916003 0.8078004
## 154 93 0.7993209 0.7916003 0.8078004
## 155 94 0.7971388 0.7893627 0.8056907
## 156 95 0.7949566 0.7871256 0.8035709
## 157 95 0.7949566 0.7871256 0.8035709
## 158 96 0.7927744 0.7848892 0.8014411
## 159 96 0.7927744 0.7848892 0.8014411
## 160 97 0.7905922 0.7826533 0.7993105
## 161 98 0.7884101 0.7804183 0.7971786
## 162 98 0.7884101 0.7804183 0.7971786
## 163 99 0.7862279 0.7781845 0.7950459
## 164 99 0.7862279 0.7781845 0.7950459
## 165 100 0.7840458 0.7759516 0.7929124
## 166 101 0.7818638 0.7737433 0.7907782
## 167 101 0.7818638 0.7737433 0.7907782
## 168 102 0.7796819 0.7715461 0.7886427
## 169 102 0.7796819 0.7715461 0.7886427
## 170 103 0.7775000 0.7693502 0.7864987
## 171 104 0.7753183 0.7671340 0.7843258
## 172 104 0.7753183 0.7671340 0.7843258
## 173 105 0.7731367 0.7648983 0.7821513
## 174 105 0.7731367 0.7648983 0.7821513
## 175 106 0.7709553 0.7626632 0.7799754
## 176 107 0.7687740 0.7604287 0.7777981
## 177 107 0.7687740 0.7604287 0.7777981
## 178 108 0.7665929 0.7581950 0.7756196
## 179 108 0.7665929 0.7581950 0.7756196
## 180 109 0.7644120 0.7559619 0.7734399
## 181 110 0.7622313 0.7537295 0.7712589
## 182 110 0.7622313 0.7537295 0.7712589
## 183 111 0.7600509 0.7514978 0.7690767
## 184 111 0.7600509 0.7514978 0.7690767
## 185 112 0.7578707 0.7492669 0.7669155
## 186 113 0.7556908 0.7470367 0.7647643
## 187 113 0.7556908 0.7470367 0.7647643
## 188 114 0.7535112 0.7448073 0.7626124
## 189 114 0.7535112 0.7448073 0.7626124
## 190 115 0.7513319 0.7425787 0.7604600
## 191 116 0.7491529 0.7403509 0.7583069
## 192 116 0.7491529 0.7403509 0.7583069
## 193 117 0.7469742 0.7381239 0.7561534
## 194 117 0.7469742 0.7381239 0.7561534
## 195 118 0.7447959 0.7358978 0.7539992
## 196 119 0.7426180 0.7336725 0.7518420
## 197 119 0.7426180 0.7336725 0.7518420
## 198 120 0.7404404 0.7314482 0.7496673
## 199 120 0.7404404 0.7314482 0.7496673
## 200 121 0.7382633 0.7292339 0.7474920
## 201 122 0.7360866 0.7270252 0.7453666
## 202 122 0.7360866 0.7270252 0.7453666
## 203 123 0.7339103 0.7248177 0.7432416
## 204 123 0.7339103 0.7248177 0.7432416
## 205 124 0.7317345 0.7226113 0.7411222
## 206 125 0.7295592 0.7204064 0.7390029
## 207 125 0.7295592 0.7204064 0.7390029
## 208 126 0.7273844 0.7182027 0.7368763
## 209 126 0.7273844 0.7182027 0.7368763
## 210 127 0.7252101 0.7160003 0.7347087
## 211 128 0.7230363 0.7137991 0.7325405
## 212 128 0.7230363 0.7137991 0.7325405
## 213 129 0.7208631 0.7115990 0.7303988
## 214 129 0.7208631 0.7115990 0.7303988
## 215 130 0.7186905 0.7094000 0.7282754
## 216 131 0.7165184 0.7072072 0.7260830
## 217 131 0.7165184 0.7072072 0.7260830
## 218 132 0.7143469 0.7050301 0.7238781
## 219 132 0.7143469 0.7050301 0.7238781
## 220 133 0.7121761 0.7028435 0.7217293
## 221 134 0.7100059 0.7006583 0.7196064
## 222 134 0.7100059 0.7006583 0.7196064
## 223 135 0.7078363 0.6984745 0.7174154
## 224 135 0.7078363 0.6984745 0.7174154
## 225 136 0.7056675 0.6962921 0.7152237
## 226 137 0.7034993 0.6941112 0.7130315
## 227 137 0.7034993 0.6941112 0.7130315
## 228 138 0.7013318 0.6919318 0.7108497
## 229 138 0.7013318 0.6919318 0.7108497
## 230 140 0.6969991 0.6875774 0.7065406
## 231 146 0.6840200 0.6745396 0.6935642
## 232 146 0.6840200 0.6745396 0.6935642
## 233 148 0.6797005 0.6701060 0.6892434
## 234 149 0.6775420 0.6678908 0.6870827
## 235 149 0.6775420 0.6678908 0.6870827
## 236 150 0.6753846 0.6657245 0.6849441
## 237 161 0.6517191 0.6420383 0.6616482
## 238 162 0.6495743 0.6399536 0.6595332
## 239 162 0.6495743 0.6399536 0.6595332
## 240 163 0.6474306 0.6377829 0.6573880
## 241 164 0.6452882 0.6356789 0.6551990
## 242 164 0.6452882 0.6356789 0.6551990
## 243 165 0.6431469 0.6335770 0.6530122
## 244 165 0.6431469 0.6335770 0.6530122
## 245 166 0.6410069 0.6314787 0.6509251
## 246 167 0.6388681 0.6293827 0.6487954
## 247 167 0.6388681 0.6293827 0.6487954
## 248 168 0.6367306 0.6272344 0.6466972
## 249 168 0.6367306 0.6272344 0.6466972
## 250 169 0.6345944 0.6250416 0.6445978
## 251 170 0.6324595 0.6228503 0.6424993
## 252 170 0.6324595 0.6228503 0.6424993
## 253 171 0.6303259 0.6206603 0.6403976
## 254 171 0.6303259 0.6206603 0.6403976
## 255 172 0.6281937 0.6184347 0.6382435
## 256 173 0.6260629 0.6162480 0.6361207
## 257 173 0.6260629 0.6162480 0.6361207
## 258 174 0.6239334 0.6140963 0.6339783
## 259 175 0.6218053 0.6119173 0.6319000
## 260 179 0.6133074 0.6035029 0.6234583
## 261 196 0.5774737 0.5671515 0.5877691
## 262 197 0.5753813 0.5650235 0.5857148
## 263 198 0.5732908 0.5629180 0.5836620
## 264 198 0.5732908 0.5629180 0.5836620
## 265 199 0.5712022 0.5608713 0.5816109
## 266 200 0.5691154 0.5587696 0.5795615
## 267 200 0.5691154 0.5587696 0.5795615
## 268 201 0.5670305 0.5566452 0.5775137
## 269 201 0.5670305 0.5566452 0.5775137
## 270 202 0.5649476 0.5545653 0.5754677
## 271 203 0.5628666 0.5524892 0.5734233
## 272 203 0.5628666 0.5524892 0.5734233
## 273 204 0.5607876 0.5504157 0.5713807
## 274 204 0.5607876 0.5504157 0.5713807
## 275 205 0.5587105 0.5483447 0.5693311
## 276 206 0.5566354 0.5462713 0.5672799
## 277 206 0.5566354 0.5462713 0.5672799
## 278 207 0.5545624 0.5441753 0.5652305
## 279 207 0.5545624 0.5441753 0.5652305
## 280 208 0.5524914 0.5421188 0.5631827
## 281 209 0.5504224 0.5400670 0.5611368
## 282 209 0.5504224 0.5400670 0.5611368
## 283 210 0.5483556 0.5380262 0.5590926
## 284 210 0.5483556 0.5380262 0.5590926
## 285 211 0.5462908 0.5359701 0.5570501
## 286 212 0.5442281 0.5339249 0.5550095
## 287 212 0.5442281 0.5339249 0.5550095
## 288 213 0.5421676 0.5318825 0.5529934
## 289 213 0.5421676 0.5318825 0.5529934
## 290 214 0.5401092 0.5298163 0.5510231
## 291 215 0.5380530 0.5277690 0.5490355
## 292 216 0.5359990 0.5257270 0.5470168
## 293 217 0.5339472 0.5236838 0.5449985
## 294 218 0.5318977 0.5216255 0.5429849
## 295 218 0.5318977 0.5216255 0.5429849
## 296 219 0.5298504 0.5195556 0.5409863
## 297 219 0.5298504 0.5195556 0.5409863
## 298 220 0.5278053 0.5174895 0.5389585
## 299 221 0.5257626 0.5154258 0.5369503
## 300 221 0.5257626 0.5154258 0.5369503
## 301 242 0.4834273 0.4723510 0.4952719
## 302 242 0.4834273 0.4723510 0.4952719
## 303 243 0.4814397 0.4702397 0.4932875
## 304 243 0.4814397 0.4702397 0.4932875
## 305 244 0.4794549 0.4682065 0.4913247
## 306 245 0.4774728 0.4662321 0.4893690
## 307 245 0.4774728 0.4662321 0.4893690
## 308 246 0.4754934 0.4642393 0.4874158
## 309 246 0.4754934 0.4642393 0.4874158
## 310 247 0.4735169 0.4621664 0.4855135
## 311 248 0.4715431 0.4601385 0.4836078
# Weibull
model_weibull <- survreg(Surv(time_exposed, event_occurred) ~ female + schooling + locsize01, data = mhas, dist = "weibull")
summary(model_weibull)
##
## Call:
## survreg(formula = Surv(time_exposed, event_occurred) ~ female +
## schooling + locsize01, data = mhas, dist = "weibull")
## Value Std. Error z p
## (Intercept) 5.58183 0.03503 159.33 < 2e-16
## female 0.15176 0.02689 5.64 1.7e-08
## schooling 0.05354 0.00372 14.40 < 2e-16
## locsize01100,000 - + -0.18185 0.03805 -4.78 1.8e-06
## locsize0115,000 - 99,999 -0.13379 0.04659 -2.87 0.0041
## locsize012,500 - 14,999 -0.11934 0.05231 -2.28 0.0225
## Log(scale) -0.19140 0.01476 -12.97 < 2e-16
##
## Scale= 0.826
##
## Weibull distribution
## Loglik(model)= -26422.5 Loglik(intercept only)= -26547.6
## Chisq= 250.2 on 5 degrees of freedom, p= 5e-52
## Number of Newton-Raphson Iterations: 5
## n=7463 (12 observations deleted due to missingness)
# Log logistic
model_loglogistic <- survreg(Surv(time_exposed, event_occurred) ~ female + schooling + locsize01, data = mhas, dist = "loglogistic")
summary(model_loglogistic)
##
## Call:
## survreg(formula = Surv(time_exposed, event_occurred) ~ female +
## schooling + locsize01, data = mhas, dist = "loglogistic")
## Value Std. Error z p
## (Intercept) 5.28347 0.03953 133.67 < 2e-16
## female 0.16658 0.03042 5.48 4.3e-08
## schooling 0.05818 0.00397 14.66 < 2e-16
## locsize01100,000 - + -0.20983 0.04301 -4.88 1.1e-06
## locsize0115,000 - 99,999 -0.14261 0.05262 -2.71 0.0067
## locsize012,500 - 14,999 -0.13227 0.05937 -2.23 0.0259
## Log(scale) -0.35098 0.01425 -24.63 < 2e-16
##
## Scale= 0.704
##
## Log logistic distribution
## Loglik(model)= -26458.3 Loglik(intercept only)= -26580.6
## Chisq= 244.56 on 5 degrees of freedom, p= 8.1e-51
## Number of Newton-Raphson Iterations: 4
## n=7463 (12 observations deleted due to missingness)
# Log Normal
model_lognormal <- survreg(Surv(time_exposed, event_occurred) ~ female + schooling + locsize01, data = mhas, dist = "lognormal")
summary(model_lognormal)
##
## Call:
## survreg(formula = Surv(time_exposed, event_occurred) ~ female +
## schooling + locsize01, data = mhas, dist = "lognormal")
## Value Std. Error z p
## (Intercept) 5.29451 0.04534 116.77 < 2e-16
## female 0.18573 0.03456 5.37 7.7e-08
## schooling 0.06491 0.00437 14.86 < 2e-16
## locsize01100,000 - + -0.24280 0.04898 -4.96 7.2e-07
## locsize0115,000 - 99,999 -0.15424 0.06015 -2.56 0.01
## locsize012,500 - 14,999 -0.13286 0.06789 -1.96 0.05
## Log(scale) 0.28647 0.01240 23.11 < 2e-16
##
## Scale= 1.33
##
## Log Normal distribution
## Loglik(model)= -26590.8 Loglik(intercept only)= -26710.3
## Chisq= 238.99 on 5 degrees of freedom, p= 1.3e-49
## Number of Newton-Raphson Iterations: 3
## n=7463 (12 observations deleted due to missingness)
## Part II Cox regression
###a.Estimate a non-parametric model via Cox regression. Add these HRs to the table.
cox_model <- coxph(Surv(time_exposed, event_occurred) ~ female + schooling + locsize01, data = mhas)
summary(cox_model)
## Call:
## coxph(formula = Surv(time_exposed, event_occurred) ~ female +
## schooling + locsize01, data = mhas)
##
## n= 7463, number of events= 3894
## (12 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## female -0.185390 0.830780 0.032496 -5.705 1.16e-08 ***
## schooling -0.065714 0.936399 0.004442 -14.795 < 2e-16 ***
## locsize01100,000 - + 0.216463 1.241677 0.046011 4.705 2.54e-06 ***
## locsize0115,000 - 99,999 0.163791 1.177968 0.056379 2.905 0.00367 **
## locsize012,500 - 14,999 0.145022 1.156064 0.063324 2.290 0.02201 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## female 0.8308 1.2037 0.7795 0.8854
## schooling 0.9364 1.0679 0.9283 0.9446
## locsize01100,000 - + 1.2417 0.8054 1.1346 1.3589
## locsize0115,000 - 99,999 1.1780 0.8489 1.0547 1.3156
## locsize012,500 - 14,999 1.1561 0.8650 1.0211 1.3088
##
## Concordance= 0.578 (se = 0.005 )
## Likelihood ratio test= 257.1 on 5 df, p=<2e-16
## Wald test = 237.3 on 5 df, p=<2e-16
## Score (logrank) test = 238.8 on 5 df, p=<2e-16
coef_cox <- coef(cox_model)
hr_cox <- exp(coef_cox)
hr_cox
## female schooling locsize01100,000 - +
## 0.8307803 0.9363990 1.2416773
## locsize0115,000 - 99,999 locsize012,500 - 14,999
## 1.1779677 1.1560645
##III. Prediction, diagnostics, time-varying covariates.
### a. Choosing whatever model you prefer from I or II, assess whether the proportionality of hazards assumption is violated for FEMALE and SCHOOLING (separately). Does it hold for either/both?
cox_model <- coxph(Surv(time_exposed, event_occurred) ~ female + schooling + locsize01, data = mhas)
summary(cox_model)
## Call:
## coxph(formula = Surv(time_exposed, event_occurred) ~ female +
## schooling + locsize01, data = mhas)
##
## n= 7463, number of events= 3894
## (12 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## female -0.185390 0.830780 0.032496 -5.705 1.16e-08 ***
## schooling -0.065714 0.936399 0.004442 -14.795 < 2e-16 ***
## locsize01100,000 - + 0.216463 1.241677 0.046011 4.705 2.54e-06 ***
## locsize0115,000 - 99,999 0.163791 1.177968 0.056379 2.905 0.00367 **
## locsize012,500 - 14,999 0.145022 1.156064 0.063324 2.290 0.02201 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## female 0.8308 1.2037 0.7795 0.8854
## schooling 0.9364 1.0679 0.9283 0.9446
## locsize01100,000 - + 1.2417 0.8054 1.1346 1.3589
## locsize0115,000 - 99,999 1.1780 0.8489 1.0547 1.3156
## locsize012,500 - 14,999 1.1561 0.8650 1.0211 1.3088
##
## Concordance= 0.578 (se = 0.005 )
## Likelihood ratio test= 257.1 on 5 df, p=<2e-16
## Wald test = 237.3 on 5 df, p=<2e-16
## Score (logrank) test = 238.8 on 5 df, p=<2e-16
ph_test <- cox.zph(cox_model)
print(ph_test)
## chisq df p
## female 0.10 1 0.7517
## schooling 9.11 1 0.0025
## locsize01 2.07 3 0.5583
## GLOBAL 13.60 5 0.0184
plot(ph_test)
###b. Estimate a “Cox-stratified” model using a new dummy variable that divides people into whether they have less than 9 vs. 9 or more years of formal schooling as the stratification variable. Would your conclusions about the effect of education (linearly anyway) ###change doing this?
mhas$schooling_strata <- ifelse(mhas$schooling < 9, "<9 years", ">=9 years")
cox_stratified_model <- coxph(Surv(time_exposed, event_occurred) ~ female + locsize01 + strata(schooling_strata), data = mhas)
summary(cox_stratified_model)
## Call:
## coxph(formula = Surv(time_exposed, event_occurred) ~ female +
## locsize01 + strata(schooling_strata), data = mhas)
##
## n= 7463, number of events= 3894
## (12 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## female -0.14619 0.86399 0.03235 -4.519 6.2e-06 ***
## locsize01100,000 - + 0.10516 1.11089 0.04508 2.333 0.0197 *
## locsize0115,000 - 99,999 0.08491 1.08862 0.05601 1.516 0.1296
## locsize012,500 - 14,999 0.10939 1.11560 0.06327 1.729 0.0838 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## female 0.864 1.1574 0.8109 0.9205
## locsize01100,000 - + 1.111 0.9002 1.0169 1.2135
## locsize0115,000 - 99,999 1.089 0.9186 0.9754 1.2149
## locsize012,500 - 14,999 1.116 0.8964 0.9855 1.2629
##
## Concordance= 0.521 (se = 0.005 )
## Likelihood ratio test= 24.38 on 4 df, p=7e-05
## Wald test = 24.38 on 4 df, p=7e-05
## Score (logrank) test = 24.42 on 4 df, p=7e-05
### c.“Recover” and graph the baseline hazard under the Cox model.
# Recover the baseline hazard
baseline_hazard <- basehaz(cox_model, centered = FALSE)
# Plot the baseline hazard
plot(baseline_hazard$time, baseline_hazard$hazard, type = "l", xlab = "Time", ylab = "Baseline Hazard", main = "Baseline Hazard under Cox Model")
### d.Estimate predicted survival curves or smoothed hazards (whichever you prefer or find more intuitive) for
mhas$locsize01 <- as.factor(mhas$locsize01)
surv_men_0 <- survfit(cox_model, newdata = data.frame(female = 0, schooling = 0, locsize01 = factor("100,000 - +", levels = levels(mhas$locsize01))))
surv_women_0 <- survfit(cox_model, newdata = data.frame(female = 1, schooling = 0, locsize01 = factor("100,000 - +", levels = levels(mhas$locsize01))))
surv_men_6 <- survfit(cox_model, newdata = data.frame(female = 0, schooling = 6, locsize01 = factor("100,000 - +", levels = levels(mhas$locsize01))))
surv_women_6 <- survfit(cox_model, newdata = data.frame(female = 1, schooling = 6, locsize01 = factor("100,000 - +", levels = levels(mhas$locsize01))))
plot(surv_men_0, xlab = "Time", ylab = "Survival Probability", col = "blue", lwd = 2, main = "Survival Curve", conf.int = FALSE)
lines(surv_women_0, col = "red", lwd = 2, conf.int = FALSE)
lines(surv_men_6, col = "green", lwd = 2, conf.int = FALSE)
lines(surv_women_6, col = "orange", lwd = 2, conf.int = FALSE)
legend("bottomleft",
legend = c("male_0", "female_0", "male_6", "female_6"),
col = c("blue", "red", "green", "orange"),
lwd = 2)