Replicate the figure 3.8 in ESL

library(MASS)
dat = read.table('http://statweb.stanford.edu/~tibs/ElemStatLearn/datasets/prostate.data')

dat_train = dat[dat$train,1:9]
dat_test = dat[-dat$train,1:9]

Normalize data. Note testing data use the same center and scale as training data.

X_train = scale(dat_train[,names(dat_train)!='lpsa'])
y_train = dat_train$lpsa - mean(dat_train$lpsa)

X_test = scale(dat_test[,names(dat_train)!='lpsa'],
               center = attr(X_train,'scaled:center'),
               scale = attr(X_train,'scaled:scale'))
y_test = dat_test$lpsa - mean(dat_train$lpsa)

Use SVD to save computation time since the calculation has to be repeated with different \(\lambda\)’s.

sv_dec = svd(X_train)
t_u = t(sv_dec$u)
t_v = t(sv_dec$v)
lambda_grid = seq(0,10000,length.out = 1000)
betas = matrix(NA,nrow=1000,ncol=8)
dof = numeric(1000)

for(i in seq_along(lambda_grid)){
    lambda = lambda_grid[i]
    ds = sv_dec$d/(sv_dec$d^2+lambda)    
    
    betas[i,] = ginv(t_v)%*%diag(ds)%*%t_u%*%y_train
    dof[i] = sum(ds*sv_dec$d)
}

Following the equations: \[ \beta = \left( V'(D^2+\lambda I)V\right)^{-1}VDU'y \]

and

\[ df(\lambda) = tr\left(X(X'X+\lambda I)X'\right) = \sum^{p=8}_{i=1}\left(\frac{d_i^2}{d_i^2+\lambda}\right) \]

Then plot:

matplot(dof,betas,type = 'l',xlim=c(min(dof),max(dof)+1),lty=1)
text(max(dof+0.1),y=betas[1,],labels = names(dat_train)[1:8],adj = 0)
abline(h = 0,lty=2)
abline(v = 5,lty=2)