Clinical Prediction Models Chapter 13: Modern Estimation Methods

References

Clinical Prediction Models: http://www.clinicalpredictionmodels.org
Shrinkage calculations: http://survey.erasmusmc.nl/wiki/mgz-clinpred/doku.php?id=rcode_and_data:chapter13

Background (from text)

These are methods to estimate biased regression coefficients, which lead to better predictions than those obtained with traditional methods.

Prediction s from multivariable models for future subjects if the predictions are shrunk towards the average because predictions will have lower variance despite being slightly biased.

Shrinkage methods

Uniform shrinkage: Shrinkage after estimation
Penalized maximum likelihood: Shrinkage during estimation
Least absolute shrinkage and selection operator (Lasso): Shrinkage for selection (zeroing of coefficients)

Prepare data

## Load foreign package
library(foreign)
## Load gusto data (n = 785)
gusto <- read.spss("./gusto/sample4.sav", to.data.frame = TRUE)
## Lower case variable names
names(gusto) <- tolower(names(gusto))

Standard logistic regression

## Standard logistic regression
res.logit <- glm(formula = day30 ~ sho + a65 + hig + dia + hyp + hrt + ttr + sex,
                 family  = binomial(link = "logit"),
                 data    = gusto)
res.df <- data.frame(Original = coef(res.logit)[-1])

## Null model
res.null <- glm(formula = day30 ~ 1,
                family  = binomial(link = "logit"),
                data    = gusto)
## Show results
round(res.df, 2)

    Original
sho     1.12
a65     1.49
hig     0.84
dia     0.43
hyp     0.99
hrt     0.96
ttr     0.59
sex     0.07

Uniform shrinkage

Heuristic method

## -2 log likelihood difference between the null model and full model
model.chisq <- deviance(res.null) - deviance(res.logit)

## Additional degrees of freedom used in the full model
model.df <- df.residual(res.null) - df.residual(res.logit)

## Shrinkage factor
shrinkage.factor <- (model.chisq - model.df) / model.chisq
shrinkage.factor

[1] 0.8722


## Show results
res.df$Shrunk.heuristic <- res.df$Original * shrinkage.factor
round(res.df, 2)

    Original Shrunk.heuristic
sho     1.12             0.97
a65     1.49             1.30
hig     0.84             0.74
dia     0.43             0.38
hyp     0.99             0.86
hrt     0.96             0.84
ttr     0.59             0.51
sex     0.07             0.06

Bootstrap method (from website)

## fit a model with 8 predictors
library(rms)
full8     <- lrm(day30 ~ sho + a65 + hig + dia + hyp + hrt + ttr + sex,
                 data = gusto, x = T, y = T, linear.predictors = T)

## validate with 200 bootstraps
val.full8 <- validate(full8, B = 200)


Divergence or singularity in 1 samples

val.full8

          index.orig training    test optimism index.corrected   n
Dxy           0.5789   0.6076  0.5582   0.0494          0.5295 199
R2            0.1987   0.2241  0.1774   0.0467          0.1520 199
Intercept     0.0000   0.0000 -0.2438   0.2438         -0.2438 199
Slope         1.0000   1.0000  0.8795   0.1205          0.8795 199
Emax          0.0000   0.0000  0.0773   0.0773          0.0773 199
D             0.0785   0.0893  0.0697   0.0197          0.0588 199
U            -0.0025  -0.0025  0.0025  -0.0050          0.0025 199
Q             0.0810   0.0919  0.0672   0.0247          0.0563 199
B             0.0546   0.0532  0.0559  -0.0028          0.0574 199
g             1.3091   1.4191  1.2138   0.2052          1.1039 199
gp            0.0734   0.0767  0.0694   0.0072          0.0662 199


## copy original model fit to shrunk model
full8.shrunk <- full8

## use result from bootstrapping to shrink coefficients (coefs multiplied by corrected slope)
full8.shrunk$coef <- full8.shrunk$coef * val.full8["Slope", "index.corrected"]

## Estimate new intercept, with shrunk lp as offset variable, i.e. coef fixed at unity
full8.shrunk$coef[1] <- lrm.fit(y = full8$y, offset= full8$x %*% full8.shrunk$coef[2:9])$coef[1]

## Show results
res.df$Shrunk.boot <- full8.shrunk$coef[-1]
round(res.df, 2)

    Original Shrunk.heuristic Shrunk.boot
sho     1.12             0.97        0.98
a65     1.49             1.30        1.31
hig     0.84             0.74        0.74
dia     0.43             0.38        0.38
hyp     0.99             0.86        0.87
hrt     0.96             0.84        0.85
ttr     0.59             0.51        0.52
sex     0.07             0.06        0.06

Penalized maximum likelihood estimation

It is a generalization of the ridge regression method used for linear regression.

## Fit logistic regression (actually already done in the previous step)
library(rms)
full8 <- lrm(formula = day30 ~ sho + a65 + hig + dia + hyp + hrt + ttr + sex,
               data    = gusto,
               x = TRUE, y = TRUE)      # This part is necessary

## Find the best penalty
p8 <- pentrace(fit = full8, penalty = 0:20)
p8


Best penalty:

 penalty    df
       8 6.919

 penalty    df   aic   bic aic.c
       0 8.000 46.62  9.29 46.43
       1 7.838 46.91 10.34 46.73
       2 7.686 47.14 11.28 46.97
       3 7.542 47.32 12.13 47.16
       4 7.406 47.46 12.90 47.30
       5 7.276 47.55 13.60 47.40
       6 7.152 47.61 14.24 47.46
       7 7.033 47.64 14.83 47.50
       8 6.919 47.65 15.37 47.51
       9 6.810 47.64 15.87 47.50
      10 6.705 47.61 16.32 47.48
      11 6.604 47.56 16.75 47.43
      12 6.507 47.50 17.14 47.37
      13 6.413 47.42 17.50 47.30
      14 6.322 47.34 17.84 47.22
      15 6.234 47.24 18.16 47.13
      16 6.149 47.14 18.45 47.03
      17 6.066 47.03 18.72 46.92
      18 5.986 46.91 18.98 46.80
      19 5.909 46.78 19.22 46.68
      20 5.833 46.65 19.44 46.55

plot(p8)

plot of chunk unnamed-chunk-6

plot(p8, which = c("aic.c","aic","bic")[1:2], ylim = c(45, 48))

plot of chunk unnamed-chunk-6


## Perform penalized maximum likelihood estimation
full8.penal <- update(full8, penalty = p8$penalty)

## Show results
res.df$Penalized <- coef(full8.penal)[-1]
round(res.df, 2)

    Original Shrunk.heuristic Shrunk.boot Penalized
sho     1.12             0.97        0.98      1.17
a65     1.49             1.30        1.31      1.21
hig     0.84             0.74        0.74      0.72
dia     0.43             0.38        0.38      0.36
hyp     0.99             0.86        0.87      0.83
hrt     0.96             0.84        0.85      0.84
ttr     0.59             0.51        0.52      0.49
sex     0.07             0.06        0.06      0.11

Least absolute shrinkage and selection operator (Lasso)

## Load glmpath (L1 Regularization Path for Generalized Linear Models and Cox Proportional Hazards Model)
library(glmpath)

## model matrix x, and outcome vector y
gustosd <- list(x = full8$x, y = full8$y)

## Fit logistic models over a range of L1
gustopath <- glmpath(data = gustosd)

## Plot results
par(mar = c(5.1, 4.1, 4.1, 5.1))
plot(gustopath, type = c("coefficients", "aic", "bic")[1]) # coefficients

plot of chunk unnamed-chunk-7

plot(gustopath, type = c("coefficients", "aic", "bic")[2]) # AIC

plot of chunk unnamed-chunk-7

plot(gustopath, type = c("coefficients", "aic", "bic")[3]) # BIC

plot of chunk unnamed-chunk-7


## List b.predictor (matrix of coefficient estimates from the predictor steps) with aic at each L1 bound
b.pred.aic <- cbind(gustopath$b.predictor, aic = gustopath$aic)
b.pred.aic

   Intercept    sho    a65    hig    dia    hyp    hrt     ttr     sex   aic
1     -2.646 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.00000 0.00000 384.8
2     -2.696 0.0000 0.1196 0.0000 0.0000 0.0000 0.0000 0.00000 0.00000 382.6
3     -2.804 0.6054 0.3236 0.0000 0.0000 0.0000 0.0000 0.00000 0.00000 373.4
4     -2.809 0.4692 0.3244 0.0000 0.0000 0.0000 0.0000 0.00000 0.00000 373.4
5     -3.001 0.7958 0.5466 0.0000 0.0000 0.0000 0.2500 0.00000 0.00000 362.2
6     -3.012 0.7322 0.5512 0.0000 0.0000 0.0000 0.2404 0.00000 0.00000 362.1
7     -3.012 0.7322 0.5512 0.0000 0.0000 0.0000 0.2404 0.00000 0.00000 362.1
8     -3.441 1.0368 0.8613 0.3153 0.0000 0.0000 0.5180 0.00000 0.00000 347.1
9     -3.664 1.0361 0.9717 0.4005 0.0000 0.0000 0.5803 0.08943 0.00000 345.0
10    -3.915 1.0743 1.0803 0.5041 0.0000 0.3099 0.6730 0.20156 0.00000 341.8
11    -4.552 1.0880 1.3601 0.7280 0.3489 0.8746 0.8716 0.46091 0.00000 336.7
12    -4.912 1.1139 1.4840 0.8405 0.4310 0.9894 0.9583 0.58291 0.07163 338.2


## Show coefficients with the smallest AIC
b.pred.aic[which.min(gustopath$aic), , drop = FALSE]

   Intercept   sho  a65   hig    dia    hyp    hrt    ttr sex   aic
11    -4.552 1.088 1.36 0.728 0.3489 0.8746 0.8716 0.4609   0 336.7


## Extract coefficients for which the AIC is the smallest
res.df$Lasso <- gustopath$b.predictor[which.min(gustopath$aic), -1]

## Show the results (sex coefficient is set to zero)
round(res.df, 2)

    Original Shrunk.heuristic Shrunk.boot Penalized Lasso
sho     1.12             0.97        0.98      1.17  1.09
a65     1.49             1.30        1.31      1.21  1.36
hig     0.84             0.74        0.74      0.72  0.73
dia     0.43             0.38        0.38      0.36  0.35
hyp     0.99             0.86        0.87      0.83  0.87
hrt     0.96             0.84        0.85      0.84  0.87
ttr     0.59             0.51        0.52      0.49  0.46
sex     0.07             0.06        0.06      0.11  0.00

Table of Contents: http://rpubs.com/kaz_yos/toc
Table of Contents (Japanese): http://rpubs.com/kaz_yos/toc-j
Top Page: http://rpubs.com/kaz_yos/
SlideShare: http://www.slideshare.net/kaz_yos