## Homework 7: Generalized Linear Models
(GLMs)
# load data
data <- dvisits %>% filter(hospdays > 0)
# explore the response variable
hist(data$hospdays)
# fit poisson regression model
poisson_model <- glm(hospdays ~ age + sex + illness, data = data, family = poisson)
summary(poisson_model)
##
## Call:
## glm(formula = hospdays ~ age + sex + illness, family = poisson,
## data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.412074 0.034064 41.453 < 2e-16 ***
## age 1.738076 0.061077 28.457 < 2e-16 ***
## sex -0.115799 0.025219 -4.592 4.40e-06 ***
## illness 0.059204 0.007921 7.474 7.76e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 8641.7 on 698 degrees of freedom
## Residual deviance: 7577.5 on 695 degrees of freedom
## AIC: 10060
##
## Number of Fisher Scoring iterations: 5
# fit negative binomial model
negbin_model <- glm.nb(hospdays ~ age + sex + illness, data = data)
summary(negbin_model)
##
## Call:
## glm.nb(formula = hospdays ~ age + sex + illness, data = data,
## init.theta = 1.111056706, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.42968 0.09470 15.096 < 2e-16 ***
## age 1.75183 0.18785 9.325 < 2e-16 ***
## sex -0.20117 0.07962 -2.527 0.01152 *
## illness 0.07109 0.02622 2.711 0.00671 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.1111) family taken to be 1)
##
## Null deviance: 850.19 on 698 degrees of freedom
## Residual deviance: 740.03 on 695 degrees of freedom
## AIC: 4577.9
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.1111
## Std. Err.: 0.0616
##
## 2 x log-likelihood: -4567.8660
# fit gamma model
gamma_model <- glm(hospdays ~ age + sex + illness, data = data, family = Gamma(link = "log"))
summary(gamma_model)
##
## Call:
## glm(formula = hospdays ~ age + sex + illness, family = Gamma(link = "log"),
## data = data)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.43080 0.13509 10.592 < 2e-16 ***
## age 1.75668 0.27138 6.473 1.81e-10 ***
## sex -0.20901 0.11476 -1.821 0.0690 .
## illness 0.07159 0.03808 1.880 0.0606 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 2.115837)
##
## Null deviance: 902.32 on 698 degrees of freedom
## Residual deviance: 791.16 on 695 degrees of freedom
## AIC: 4510.7
##
## Number of Fisher Scoring iterations: 7
# compare AIC across models
AIC(poisson_model, negbin_model, gamma_model)
## df AIC
## poisson_model 4 10059.539
## negbin_model 5 4577.866
## gamma_model 5 4510.712
# compare BIC across models
BIC(poisson_model, negbin_model, gamma_model)
## df BIC
## poisson_model 4 10077.738
## negbin_model 5 4600.614
## gamma_model 5 4533.461
The Gamma model has the lowest AIC/BIC, indicating it is the best model and suggesting that a continuous distribution fits better.