DEM 7223 - Event History Analysis - Discrete Time Hazard Model
Import the Dataset
Code
tracts.texas.pharmacy <- import("tracts.texas.pharmacy.rds") %>%
mutate(phy_type=ifelse(phy_type %in%c("Community Indep","Community Independent"), "Community Indep",
ifelse(phy_type=="", "Unknown",
ifelse(phy_type=="Comm Multi 5","Community Multi",
ifelse(phy_type%in%c("Hospital (Independent)"),"Indep Hospital",
ifelse(phy_type=="Hospital (Multiple/Chain)", "Multi Hospital",
ifelse(phy_type=="Ambulatory Surgical Center","Amb Surg Center",phy_type))))))) %>%
mutate(days=(as.numeric(status_date - lic_orig_date)), rurality=as.factor(rurality)) %>%
mutate(Otherpop=totPop19 - rowSums(.[39:41]),
majoritypop= pmax(whitepop,blackpop,Hispop,Otherpop, na.rm = TRUE),
pctminority=round((totPop19-majoritypop)/totPop19*100,1),
phy_type=as.factor(ifelse(phy_type %in%c("Community Indep","Community Multi"),phy_type, NA)),
rurality=as.factor(rurality)) %>%
filter(complete.cases(.))
Age at Pharmacy Closure in Texas
This analysis uses pharmacies’ age-at-closure as the outcome variable. I calculated age at pharmacy closure as the difference between the Pharmacy date of closure (Status_date) and their first date of operation(lic_orig_date). However, in the case of active pharmacies, I calculated their age using the censoring date of pharmacy closure follow-up. In this case, the latest follow-up date was December 31, 2021
Code
tracts.texas.pharmacy <- tracts.texas.pharmacy %>%
mutate(death_age =ifelse(age_months>0, age_months, NA),
d.event = ifelse(status == "Closed", 1, 0)) %>%
filter(complete.cases(.))
head(tracts.texas.pharmacy)
To use a discrete-time variable rather than a continuous time variable to determine the hazard rate of pharmacy closure, I will transform my data from a case-duration data format to a person-period format.
Creating a Person Period Dataset
Code
psn_prd <- survSplit(Surv(death_age,d.event)~ .,
data=tracts.texas.pharmacy,
cut = seq(0,120,12),
episode = "year_closure") %>%
mutate(year=year_closure-1)
check <- psn_prd %>%
group_by(lic_nbr) %>%
arrange(year, .by_group = TRUE) %>%
select(lic_nbr, death_age, d.event,year, phy_type, rurality) %>%
knitr::kable()
head(check, n=40)
[1] "| lic_nbr| death_age| d.event| year|phy_type |rurality |"
[2] "|-------:|---------:|-------:|----:|:---------------|:---------|"
[3] "| 6761| 12| 0| 1|Community Indep |Non-Metro |"
[4] "| 6761| 24| 0| 2|Community Indep |Non-Metro |"
[5] "| 6761| 36| 0| 3|Community Indep |Non-Metro |"
[6] "| 6761| 48| 0| 4|Community Indep |Non-Metro |"
[7] "| 6761| 60| 0| 5|Community Indep |Non-Metro |"
[8] "| 6761| 72| 0| 6|Community Indep |Non-Metro |"
[9] "| 6761| 84| 0| 7|Community Indep |Non-Metro |"
[10] "| 6761| 96| 0| 8|Community Indep |Non-Metro |"
[11] "| 6761| 108| 0| 9|Community Indep |Non-Metro |"
[12] "| 6761| 120| 0| 10|Community Indep |Non-Metro |"
[13] "| 6761| 468| 0| 11|Community Indep |Non-Metro |"
[14] "| 6781| 12| 0| 1|Community Indep |Metro |"
[15] "| 6781| 24| 0| 2|Community Indep |Metro |"
[16] "| 6781| 36| 0| 3|Community Indep |Metro |"
[17] "| 6781| 48| 0| 4|Community Indep |Metro |"
[18] "| 6781| 60| 0| 5|Community Indep |Metro |"
[19] "| 6781| 72| 0| 6|Community Indep |Metro |"
[20] "| 6781| 84| 0| 7|Community Indep |Metro |"
[21] "| 6781| 96| 0| 8|Community Indep |Metro |"
[22] "| 6781| 108| 0| 9|Community Indep |Metro |"
[23] "| 6781| 120| 0| 10|Community Indep |Metro |"
[24] "| 6781| 468| 0| 11|Community Indep |Metro |"
[25] "| 6782| 12| 0| 1|Community Indep |Metro |"
[26] "| 6782| 24| 0| 2|Community Indep |Metro |"
[27] "| 6782| 36| 0| 3|Community Indep |Metro |"
[28] "| 6782| 48| 0| 4|Community Indep |Metro |"
[29] "| 6782| 60| 0| 5|Community Indep |Metro |"
[30] "| 6782| 72| 0| 6|Community Indep |Metro |"
[31] "| 6782| 84| 0| 7|Community Indep |Metro |"
[32] "| 6782| 96| 0| 8|Community Indep |Metro |"
[33] "| 6782| 108| 0| 9|Community Indep |Metro |"
[34] "| 6782| 120| 0| 10|Community Indep |Metro |"
[35] "| 6782| 436| 1| 11|Community Indep |Metro |"
[36] "| 6806| 12| 0| 1|Community Indep |Non-Metro |"
[37] "| 6806| 24| 0| 2|Community Indep |Non-Metro |"
[38] "| 6806| 36| 0| 3|Community Indep |Non-Metro |"
[39] "| 6806| 48| 0| 4|Community Indep |Non-Metro |"
[40] "| 6806| 60| 0| 5|Community Indep |Non-Metro |"
Discriptive Analysis- Hazard Function For Pharmacy Closure
Code
psn_prd %>%
group_by(year) %>%
summarise(prop_closure=mean(d.event, na.rm=T)) %>%
ggplot(aes(x=year, y=prop_closure))+
geom_line()+
theme_minimal()+
labs(title = "Hazard for Pharmacy Closure",
caption = "Source: Texas Pharmacy Board, 2021 \n Calculations by Authors",
x = "Years",
y = "Proportion of Pharmacy Closure")+
theme(axis.line = element_line(linetype = "solid"),
axis.ticks = element_line(linetype = "blank"),
panel.grid.major = element_line(colour = "gray80"),
panel.grid.minor = element_line(colour = "gray94",
linetype = "blank"), axis.title = element_text(family = "serif",
face = "bold"), axis.text = element_text(face = "bold"),
plot.title = element_text(family = "serif",
size = 14, face = "bold"), legend.text = element_text(face = "bold",
family = "serif"), legend.title = element_text(face = "bold",
family = "serif"), panel.background = element_rect(fill = "gray97"),
legend.key = element_rect(fill = NA,
linetype = "solid"), legend.background = element_rect(linetype = "solid"))
Code
psn_prd %>%
group_by(year, phy_type) %>%
summarise(prop_closure=mean(d.event, na.rm=T)) %>%
ggplot(aes(x=year, y=prop_closure))+
geom_line(aes(group=phy_type, color=phy_type))+
theme_minimal()+
labs(title = "Hazard for Pharmacy Closure by Pharmacy Types",
caption = "Source: Texas Pharmacy Board, 2021 \n Calculations by Authors",
x = "Years",
y = "Proportion of Pharmacy Closure")+
theme(axis.line = element_line(linetype = "solid"),
axis.ticks = element_line(linetype = "blank"),
panel.grid.major = element_line(colour = "gray80"),
panel.grid.minor = element_line(colour = "gray94",
linetype = "blank"), axis.title = element_text(family = "serif",
face = "bold"), axis.text = element_text(face = "bold"),
plot.title = element_text(family = "serif",
size = 14, face = "bold"), legend.text = element_text(face = "bold",
family = "serif"), legend.title = element_text(face = "bold",
family = "serif"), panel.background = element_rect(fill = "gray97"),
legend.key = element_rect(fill = NA,
linetype = "solid"), legend.background = element_rect(linetype = "solid")) +labs(colour = "Types of Pharmacy", fill = "Types of Pharmacy")
Discrete Time model - General Model
Code
Call:
glm(formula = d.event ~ factor(year_closure) - 1 + phy_type +
rurality, family = binomial(link = "cloglog"), data = psn_prd)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.7165 -0.2492 -0.1144 -0.0993 3.5808
Coefficients:
Estimate Std. Error z value Pr(>|z|)
factor(year_closure)2 -3.39377 0.09255 -36.670 < 2e-16 ***
factor(year_closure)3 -2.93653 0.07793 -37.680 < 2e-16 ***
factor(year_closure)4 -3.24184 0.09599 -33.772 < 2e-16 ***
factor(year_closure)5 -3.48165 0.11510 -30.250 < 2e-16 ***
factor(year_closure)6 -3.19765 0.10622 -30.103 < 2e-16 ***
factor(year_closure)7 -3.32909 0.11926 -27.914 < 2e-16 ***
factor(year_closure)8 -3.47186 0.13374 -25.961 < 2e-16 ***
factor(year_closure)9 -3.83789 0.16779 -22.873 < 2e-16 ***
factor(year_closure)10 -3.64185 0.15941 -22.846 < 2e-16 ***
factor(year_closure)11 -3.77825 0.17816 -21.207 < 2e-16 ***
factor(year_closure)12 -1.35997 0.06255 -21.743 < 2e-16 ***
phy_typeCommunity Multi -1.83080 0.07238 -25.295 < 2e-16 ***
ruralityNon-Metro -0.74167 0.11826 -6.271 3.58e-10 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 107164 on 54185 degrees of freedom
Residual deviance: 9435 on 54172 degrees of freedom
AIC: 9461
Number of Fisher Scoring iterations: 8
Code
# Plotting the hazard Function on a probability Scale
haz_time <- data.frame(haz<-1/(1+exp(-coef(fit.0))),
time<-seq(1,13,1))
haz_time%>%
ggplot(aes(x=time, y=haz))+
geom_line()+
theme_minimal()+
labs(title = "Hazard for Pharmacy Closure on Probability Scale",
caption = "Source: Texas Pharmacy Board, 2021 \n Calculations by Authors",
x = "Years",
y = "Risk of Pharmacy Closure")+
theme(axis.line = element_line(linetype = "solid"),
axis.ticks = element_line(linetype = "blank"),
panel.grid.major = element_line(colour = "gray80"),
panel.grid.minor = element_line(colour = "gray94",
linetype = "blank"), axis.title = element_text(family = "serif",
face = "bold"), axis.text = element_text(face = "bold"),
plot.title = element_text(family = "serif",
size = 14, face = "bold"), legend.text = element_text(face = "bold",
family = "serif"), legend.title = element_text(face = "bold",
family = "serif"), panel.background = element_rect(fill = "gray97"),
legend.key = element_rect(fill = NA,
linetype = "solid"), legend.background = element_rect(linetype = "solid"))
Alternative Time Specifications- Constant Term for Time
Code
Call:
glm(formula = d.event ~ phy_type + rurality, family = binomial(link = "cloglog"),
data = psn_prd)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.3010 -0.3010 -0.1269 -0.1269 3.3221
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.09443 0.03508 -88.208 < 2e-16 ***
phy_typeCommunity Multi -1.72717 0.07161 -24.118 < 2e-16 ***
ruralityNon-Metro -0.69470 0.11818 -5.878 4.15e-09 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 10859 on 54184 degrees of freedom
Residual deviance: 10100 on 54182 degrees of freedom
AIC: 10106
Number of Fisher Scoring iterations: 7
Alternative Time Specifications- Linear Term for Time
Code
Call:
glm(formula = d.event ~ year_closure + phy_type + rurality, family = binomial(link = "cloglog"),
data = psn_prd)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.4134 -0.2507 -0.1406 -0.1086 3.5027
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.793671 0.073610 -51.538 < 2e-16 ***
year_closure 0.111164 0.009462 11.748 < 2e-16 ***
phy_typeCommunity Multi -1.823498 0.072067 -25.303 < 2e-16 ***
ruralityNon-Metro -0.738551 0.118218 -6.247 4.17e-10 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 10859.5 on 54184 degrees of freedom
Residual deviance: 9964.2 on 54181 degrees of freedom
AIC: 9972.2
Number of Fisher Scoring iterations: 7
Alternative Time Specifications- Quadratic Term for Time
Code
Call:
glm(formula = d.event ~ year_closure + I(year_closure^2) + phy_type +
rurality, family = binomial(link = "cloglog"), data = psn_prd)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.5680 -0.2312 -0.1318 -0.0987 3.5335
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.949865 0.130161 -14.980 < 2e-16 ***
year_closure -0.587908 0.046118 -12.748 < 2e-16 ***
I(year_closure^2) 0.049864 0.003238 15.401 < 2e-16 ***
phy_typeCommunity Multi -1.820137 0.072255 -25.191 < 2e-16 ***
ruralityNon-Metro -0.739680 0.118171 -6.259 3.86e-10 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 10859.5 on 54184 degrees of freedom
Residual deviance: 9729.2 on 54180 degrees of freedom
AIC: 9739.2
Number of Fisher Scoring iterations: 7
Alternative Time Specifications- Cubic Term for Time
Code
Call:
glm(formula = d.event ~ year_closure + I(year^2) + I(year_closure^3) +
phy_type + rurality, family = binomial(link = "cloglog"),
data = psn_prd)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6743 -0.2535 -0.1272 -0.0967 3.5970
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -5.307683 0.287835 -18.440 < 2e-16 ***
year_closure 1.026159 0.118920 8.629 < 2e-16 ***
I(year^2) -0.326203 0.027780 -11.742 < 2e-16 ***
I(year_closure^3) 0.017930 0.001318 13.605 < 2e-16 ***
phy_typeCommunity Multi -1.826392 0.072321 -25.254 < 2e-16 ***
ruralityNon-Metro -0.739956 0.118216 -6.259 3.87e-10 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 10859.5 on 54184 degrees of freedom
Residual deviance: 9528.7 on 54179 degrees of freedom
AIC: 9540.7
Number of Fisher Scoring iterations: 8
Alternative Time Specifications- Spline Term for Time
Code
Call:
glm(formula = d.event ~ ns(year_closure, df = 3) + phy_type +
rurality, family = binomial(link = "cloglog"), data = psn_prd)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6272 -0.2521 -0.1277 -0.0972 3.6189
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.37831 0.08137 -41.520 < 2e-16 ***
ns(year_closure, df = 3)1 -1.91632 0.15679 -12.222 < 2e-16 ***
ns(year_closure, df = 3)2 1.30100 0.19847 6.555 5.56e-11 ***
ns(year_closure, df = 3)3 1.28933 0.07492 17.210 < 2e-16 ***
phy_typeCommunity Multi -1.82304 0.07228 -25.221 < 2e-16 ***
ruralityNon-Metro -0.73900 0.11820 -6.252 4.04e-10 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 10859.5 on 54184 degrees of freedom
Residual deviance: 9612.5 on 54179 degrees of freedom
AIC: 9624.5
Number of Fisher Scoring iterations: 8
Generate Hazard Plots for different cases
Code
newdat <- expand.grid(d.event=unique(psn_prd$d.event),
year_closure=unique(psn_prd$year_closure),
phy_type=unique(psn_prd$phy_type),
rurality=unique(psn_prd$rurality))%>%
filter(complete.cases(.))
newdat$haz_0<-as.numeric(predict(fit.0, newdata=newdat, type="response"))
newdat$haz_1<-as.numeric(predict(fit.c, newdata=newdat, type="response"))
newdat$haz_2<-as.numeric(predict(fit.l, newdata=newdat, type="response"))
newdat$haz_3<-as.numeric(predict(fit.q, newdata=newdat, type="response"))
newdat$haz_4<-as.numeric(predict(fit.sp, newdata=newdat, type="response"))
head(newdat, n=15)
Code
out <- newdat %>%
pivot_longer(cols = c(-d.event,-year_closure,-phy_type,-rurality),
names_to = c( ".value","hazard"),
names_sep ="_") %>%
mutate(hazard= car::Recode(hazard, recodes="0='General';1='Constant'; 2='Linear';
3='Quadratic'; 4='Spline'; else= NA", as.factor=T))
out %>%
ggplot(aes(x = year_closure-1, y=haz ))+
geom_line(aes(group=rurality, color=rurality) )+
theme_minimal()+
labs(title = "Alternative Model Specifications-Hazard Function for Pharmacy Closure",
caption = "Source: Texas Pharmacy Board, 2021 \n Calculations by Authors",
x = "Years",
y = "Risk of Pharmacy Closure")+
theme(axis.line = element_line(linetype = "solid"),
axis.ticks = element_line(linetype = "blank"),
panel.grid.major = element_line(colour = "gray80"),
panel.grid.minor = element_line(colour = "gray94",
linetype = "blank"), axis.title = element_text(family = "serif",
face = "bold"), axis.text = element_text(face = "bold"),
plot.title = element_text(family = "serif",
size = 14, face = "bold"), legend.text = element_text(face = "bold",
family = "serif"), legend.title = element_text(face = "bold",
family = "serif"), panel.background = element_rect(fill = "gray97"),
legend.key = element_rect(fill = NA,
linetype = "solid"), legend.background = element_rect(linetype = "solid")) +labs(colour = "Pharmacy Rurality", fill = "Pharmacy Rurality")+
facet_wrap(~hazard)+
labs(color="Rurality")
Code
AIC0<-AIC(fit.0)
AIC1<-AIC(fit.c)
AIC2<-AIC(fit.l)
AIC3<-AIC(fit.q)
AIC4<-AIC(fit.sp)
AIC <- c(AIC0,AIC1,AIC2,AIC3,AIC4)
Mod <-c("General", "Constant", "linear", "Quadratic", "Spline")
AICS<-data.frame(AIC,Mod) %>%
mutate(AIC=round(AIC,2),DeltaAIC=round(AIC-AIC0),1) %>%
select(Mod, AIC,DeltaAIC) %>%
knitr::kable( caption = "Relative AIC for alternative time specifications")
head(AICS, n=20)
[1] "Table: Relative AIC for alternative time specifications"
[2] ""
[3] "|Mod | AIC| DeltaAIC|"
[4] "|:---------|--------:|--------:|"
[5] "|General | 9460.99| 0|"
[6] "|Constant | 10106.00| 645|"
[7] "|linear | 9972.23| 511|"
[8] "|Quadratic | 9739.21| 278|"
[9] "|Spline | 9624.50| 164|"
Discrete Time model - General Model With Interaction
Code
Call:
glm(formula = d.event ~ factor(year_closure) - 1 + phy_type +
rurality + phy_type * rurality, family = binomial(link = "cloglog"),
data = psn_prd)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.7211 -0.2518 -0.1121 -0.0978 3.4440
Coefficients:
Estimate Std. Error z value Pr(>|z|)
factor(year_closure)2 -3.38336 0.09256 -36.552 < 2e-16
factor(year_closure)3 -2.92576 0.07795 -37.532 < 2e-16
factor(year_closure)4 -3.23063 0.09601 -33.650 < 2e-16
factor(year_closure)5 -3.46984 0.11512 -30.142 < 2e-16
factor(year_closure)6 -3.18556 0.10624 -29.984 < 2e-16
factor(year_closure)7 -3.31663 0.11928 -27.805 < 2e-16
factor(year_closure)8 -3.45943 0.13375 -25.865 < 2e-16
factor(year_closure)9 -3.82512 0.16781 -22.795 < 2e-16
factor(year_closure)10 -3.62904 0.15942 -22.764 < 2e-16
factor(year_closure)11 -3.76543 0.17818 -21.133 < 2e-16
factor(year_closure)12 -1.34707 0.06265 -21.503 < 2e-16
phy_typeCommunity Multi -1.88414 0.07584 -24.842 < 2e-16
ruralityNon-Metro -0.92218 0.14296 -6.450 1.12e-10
phy_typeCommunity Multi:ruralityNon-Metro 0.70218 0.25478 2.756 0.00585
factor(year_closure)2 ***
factor(year_closure)3 ***
factor(year_closure)4 ***
factor(year_closure)5 ***
factor(year_closure)6 ***
factor(year_closure)7 ***
factor(year_closure)8 ***
factor(year_closure)9 ***
factor(year_closure)10 ***
factor(year_closure)11 ***
factor(year_closure)12 ***
phy_typeCommunity Multi ***
ruralityNon-Metro ***
phy_typeCommunity Multi:ruralityNon-Metro **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 107164 on 54185 degrees of freedom
Residual deviance: 9428 on 54171 degrees of freedom
AIC: 9456
Number of Fisher Scoring iterations: 8