These are methods to estimate biased regression coefficients, which lead to better predictions than those obtained with traditional methods.
Prediction s from multivariable models for future subjects if the predictions are shrunk towards the average because predictions will have lower variance despite being slightly biased.
Uniform shrinkage: Shrinkage after estimation
Penalized maximum likelihood: Shrinkage during estimation
Least absolute shrinkage and selection operator (Lasso): Shrinkage for selection (zeroing of coefficients)
## Load foreign package
library(foreign)
## Load gusto data (n = 785)
gusto <- read.spss("./gusto/sample4.sav", to.data.frame = TRUE)
## Lower case variable names
names(gusto) <- tolower(names(gusto))
## Standard logistic regression
res.logit <- glm(formula = day30 ~ sho + a65 + hig + dia + hyp + hrt + ttr + sex,
family = binomial(link = "logit"),
data = gusto)
res.df <- data.frame(Original = coef(res.logit)[-1])
## Null model
res.null <- glm(formula = day30 ~ 1,
family = binomial(link = "logit"),
data = gusto)
## Show results
round(res.df, 2)
Original
sho 1.12
a65 1.49
hig 0.84
dia 0.43
hyp 0.99
hrt 0.96
ttr 0.59
sex 0.07
Heuristic method
## -2 log likelihood difference between the null model and full model
model.chisq <- deviance(res.null) - deviance(res.logit)
## Additional degrees of freedom used in the full model
model.df <- df.residual(res.null) - df.residual(res.logit)
## Shrinkage factor
shrinkage.factor <- (model.chisq - model.df) / model.chisq
shrinkage.factor
[1] 0.8722
## Show results
res.df$Shrunk.heuristic <- res.df$Original * shrinkage.factor
round(res.df, 2)
Original Shrunk.heuristic
sho 1.12 0.97
a65 1.49 1.30
hig 0.84 0.74
dia 0.43 0.38
hyp 0.99 0.86
hrt 0.96 0.84
ttr 0.59 0.51
sex 0.07 0.06
Bootstrap method (from website)
## fit a model with 8 predictors
library(rms)
full8 <- lrm(day30 ~ sho + a65 + hig + dia + hyp + hrt + ttr + sex,
data = gusto, x = T, y = T, linear.predictors = T)
## validate with 200 bootstraps
val.full8 <- validate(full8, B = 200)
Divergence or singularity in 1 samples
val.full8
index.orig training test optimism index.corrected n
Dxy 0.5789 0.6076 0.5582 0.0494 0.5295 199
R2 0.1987 0.2241 0.1774 0.0467 0.1520 199
Intercept 0.0000 0.0000 -0.2438 0.2438 -0.2438 199
Slope 1.0000 1.0000 0.8795 0.1205 0.8795 199
Emax 0.0000 0.0000 0.0773 0.0773 0.0773 199
D 0.0785 0.0893 0.0697 0.0197 0.0588 199
U -0.0025 -0.0025 0.0025 -0.0050 0.0025 199
Q 0.0810 0.0919 0.0672 0.0247 0.0563 199
B 0.0546 0.0532 0.0559 -0.0028 0.0574 199
g 1.3091 1.4191 1.2138 0.2052 1.1039 199
gp 0.0734 0.0767 0.0694 0.0072 0.0662 199
## copy original model fit to shrunk model
full8.shrunk <- full8
## use result from bootstrapping to shrink coefficients (coefs multiplied by corrected slope)
full8.shrunk$coef <- full8.shrunk$coef * val.full8["Slope", "index.corrected"]
## Estimate new intercept, with shrunk lp as offset variable, i.e. coef fixed at unity
full8.shrunk$coef[1] <- lrm.fit(y = full8$y, offset= full8$x %*% full8.shrunk$coef[2:9])$coef[1]
## Show results
res.df$Shrunk.boot <- full8.shrunk$coef[-1]
round(res.df, 2)
Original Shrunk.heuristic Shrunk.boot
sho 1.12 0.97 0.98
a65 1.49 1.30 1.31
hig 0.84 0.74 0.74
dia 0.43 0.38 0.38
hyp 0.99 0.86 0.87
hrt 0.96 0.84 0.85
ttr 0.59 0.51 0.52
sex 0.07 0.06 0.06
It is a generalization of the ridge regression method used for linear regression.
## Fit logistic regression (actually already done in the previous step)
library(rms)
full8 <- lrm(formula = day30 ~ sho + a65 + hig + dia + hyp + hrt + ttr + sex,
data = gusto,
x = TRUE, y = TRUE) # This part is necessary
## Find the best penalty
p8 <- pentrace(fit = full8, penalty = 0:20)
p8
Best penalty:
penalty df
8 6.919
penalty df aic bic aic.c
0 8.000 46.62 9.29 46.43
1 7.838 46.91 10.34 46.73
2 7.686 47.14 11.28 46.97
3 7.542 47.32 12.13 47.16
4 7.406 47.46 12.90 47.30
5 7.276 47.55 13.60 47.40
6 7.152 47.61 14.24 47.46
7 7.033 47.64 14.83 47.50
8 6.919 47.65 15.37 47.51
9 6.810 47.64 15.87 47.50
10 6.705 47.61 16.32 47.48
11 6.604 47.56 16.75 47.43
12 6.507 47.50 17.14 47.37
13 6.413 47.42 17.50 47.30
14 6.322 47.34 17.84 47.22
15 6.234 47.24 18.16 47.13
16 6.149 47.14 18.45 47.03
17 6.066 47.03 18.72 46.92
18 5.986 46.91 18.98 46.80
19 5.909 46.78 19.22 46.68
20 5.833 46.65 19.44 46.55
plot(p8)
plot(p8, which = c("aic.c","aic","bic")[1:2], ylim = c(45, 48))
## Perform penalized maximum likelihood estimation
full8.penal <- update(full8, penalty = p8$penalty)
## Show results
res.df$Penalized <- coef(full8.penal)[-1]
round(res.df, 2)
Original Shrunk.heuristic Shrunk.boot Penalized
sho 1.12 0.97 0.98 1.17
a65 1.49 1.30 1.31 1.21
hig 0.84 0.74 0.74 0.72
dia 0.43 0.38 0.38 0.36
hyp 0.99 0.86 0.87 0.83
hrt 0.96 0.84 0.85 0.84
ttr 0.59 0.51 0.52 0.49
sex 0.07 0.06 0.06 0.11
## Load glmpath (L1 Regularization Path for Generalized Linear Models and Cox Proportional Hazards Model)
library(glmpath)
## model matrix x, and outcome vector y
gustosd <- list(x = full8$x, y = full8$y)
## Fit logistic models over a range of L1
gustopath <- glmpath(data = gustosd)
## Plot results
par(mar = c(5.1, 4.1, 4.1, 5.1))
plot(gustopath, type = c("coefficients", "aic", "bic")[1]) # coefficients
plot(gustopath, type = c("coefficients", "aic", "bic")[2]) # AIC
plot(gustopath, type = c("coefficients", "aic", "bic")[3]) # BIC
## List b.predictor (matrix of coefficient estimates from the predictor steps) with aic at each L1 bound
b.pred.aic <- cbind(gustopath$b.predictor, aic = gustopath$aic)
b.pred.aic
Intercept sho a65 hig dia hyp hrt ttr sex aic
1 -2.646 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.00000 0.00000 384.8
2 -2.696 0.0000 0.1196 0.0000 0.0000 0.0000 0.0000 0.00000 0.00000 382.6
3 -2.804 0.6054 0.3236 0.0000 0.0000 0.0000 0.0000 0.00000 0.00000 373.4
4 -2.809 0.4692 0.3244 0.0000 0.0000 0.0000 0.0000 0.00000 0.00000 373.4
5 -3.001 0.7958 0.5466 0.0000 0.0000 0.0000 0.2500 0.00000 0.00000 362.2
6 -3.012 0.7322 0.5512 0.0000 0.0000 0.0000 0.2404 0.00000 0.00000 362.1
7 -3.012 0.7322 0.5512 0.0000 0.0000 0.0000 0.2404 0.00000 0.00000 362.1
8 -3.441 1.0368 0.8613 0.3153 0.0000 0.0000 0.5180 0.00000 0.00000 347.1
9 -3.664 1.0361 0.9717 0.4005 0.0000 0.0000 0.5803 0.08943 0.00000 345.0
10 -3.915 1.0743 1.0803 0.5041 0.0000 0.3099 0.6730 0.20156 0.00000 341.8
11 -4.552 1.0880 1.3601 0.7280 0.3489 0.8746 0.8716 0.46091 0.00000 336.7
12 -4.912 1.1139 1.4840 0.8405 0.4310 0.9894 0.9583 0.58291 0.07163 338.2
## Show coefficients with the smallest AIC
b.pred.aic[which.min(gustopath$aic), , drop = FALSE]
Intercept sho a65 hig dia hyp hrt ttr sex aic
11 -4.552 1.088 1.36 0.728 0.3489 0.8746 0.8716 0.4609 0 336.7
## Extract coefficients for which the AIC is the smallest
res.df$Lasso <- gustopath$b.predictor[which.min(gustopath$aic), -1]
## Show the results (sex coefficient is set to zero)
round(res.df, 2)
Original Shrunk.heuristic Shrunk.boot Penalized Lasso
sho 1.12 0.97 0.98 1.17 1.09
a65 1.49 1.30 1.31 1.21 1.36
hig 0.84 0.74 0.74 0.72 0.73
dia 0.43 0.38 0.38 0.36 0.35
hyp 0.99 0.86 0.87 0.83 0.87
hrt 0.96 0.84 0.85 0.84 0.87
ttr 0.59 0.51 0.52 0.49 0.46
sex 0.07 0.06 0.06 0.11 0.00