HL-test on lowbwt data
lowbwt <- read.csv("lowbwt.csv")
library(uwIntroStats) # has dummy() and other useful functions. Available at http://www.emersonstatistics.com/R/
library(ResourceSelection) # has hoslem.test() function. Available on CRAN
library(Hmisc)
library(dplyr)
lowbwt$LOW <- relevel(lowbwt$LOW, ref = "BWT>2500g")
lowbwt <- mutate(lowbwt, lownum = as.numeric(LOW)-1)
lowbwt$RACE <- relevel(lowbwt$RACE, ref = "White")
fit <- glm(lownum ~ LWT + dummy(RACE),data=lowbwt, family=binomial)
summary(fit)
##
## Call:
## glm(formula = lownum ~ LWT + dummy(RACE), family = binomial,
## data = lowbwt)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.3491 -0.8919 -0.7196 1.2526 2.0993
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.805753 0.845167 0.953 0.3404
## LWT -0.015223 0.006439 -2.364 0.0181 *
## dummy(RACE)Black vs White 1.081066 0.488052 2.215 0.0268 *
## dummy(RACE)Other vs White 0.480603 0.356674 1.347 0.1778
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 234.67 on 188 degrees of freedom
## Residual deviance: 223.26 on 185 degrees of freedom
## AIC: 231.26
##
## Number of Fisher Scoring iterations: 4
hl <- hoslem.test(fit$y, fitted(fit), g=10) # note: needs fit$y even if response variable is otherwise labelled.
hl
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: fit$y, fitted(fit)
## X-squared = 9.8031, df = 8, p-value = 0.2791
cbind(hl$expected, hl$observed)
## yhat0 yhat1 y0 y1
## [0.0589,0.168] 16.633482 2.366518 17 2
## (0.168,0.223] 16.751683 4.248317 17 4
## (0.223,0.254] 12.969796 4.030204 12 5
## (0.254,0.271] 13.990085 5.009915 15 4
## (0.271,0.296] 13.579677 5.420323 11 8
## (0.296,0.333] 12.894935 6.105065 13 6
## (0.333,0.368] 14.768642 8.231358 17 6
## (0.368,0.406] 8.561261 5.438739 12 2
## (0.406,0.467] 10.691350 8.308650 7 12
## (0.467,0.597] 9.159089 9.840911 9 10
HL-test on ICU data
ICU <- read.csv("icu.csv")
ICU$SYS4 <- cut2(ICU$SYS, g=4) # create quartiles for SYS
ICU$SYS4gp <- ifelse(as.numeric(ICU$SYS4) == 4, 1, 0) # binary code for quartile 4 = 1
ICU$LOCD <- ifelse(ICU$LOC==0, 0, 1) # binary code for no coma vs coma/stupor = 1
table(ICU$SYS4gp)
##
## 0 1
## 153 47
fit <- glm(STA ~ AGE + CAN + SYS4gp + TYP + LOCD, data= ICU, family = binomial)
summary(fit)
##
## Call:
## glm(formula = STA ~ AGE + CAN + SYS4gp + TYP + LOCD, family = binomial,
## data = ICU)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.30702 -0.55912 -0.31767 -0.09501 2.46999
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.68053 1.32066 -5.058 4.23e-07 ***
## AGE 0.04063 0.01286 3.159 0.00158 **
## CAN 2.07875 0.82957 2.506 0.01222 *
## SYS4gp -1.51115 0.72047 -2.097 0.03595 *
## TYP 2.90668 0.92574 3.140 0.00169 **
## LOCD 3.96553 0.98203 4.038 5.39e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 200.16 on 199 degrees of freedom
## Residual deviance: 133.52 on 194 degrees of freedom
## AIC: 145.52
##
## Number of Fisher Scoring iterations: 6
hl <- hoslem.test(fit$y, fitted(fit), g=10)
hl
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: fit$y, fitted(fit)
## X-squared = 4.0018, df = 8, p-value = 0.857
cbind(hl$expected, hl$observed)
## yhat0 yhat1 y0 y1
## [0.00258,0.0107] 19.903106 0.09689425 20 0
## (0.0107,0.0297] 19.614705 0.38529498 20 0
## (0.0297,0.0492] 22.003897 0.99610307 21 2
## (0.0492,0.0671] 16.022829 0.97717147 17 0
## (0.0671,0.108] 19.168652 1.83134826 19 2
## (0.108,0.167] 16.381883 2.61811685 17 2
## (0.167,0.224] 16.060362 3.93963849 15 5
## (0.224,0.312] 14.498863 5.50113734 16 4
## (0.312,0.451] 12.415781 7.58421931 12 8
## (0.451,0.962] 3.929924 16.07007599 3 17