Step 1: Load and clean up data
# Load the data
dat <- ElemStatLearn::SAheart
vo <- c('sbp','tobacco','ldl','famhist','obesity','alcohol','age')
X <- dat[,vo]
# Re-code famhist
X <- cbind(intercept=1,famhist=model.matrix(~-1+famhist,data=dat)[,2],X[,-which(colnames(X)=='famhist')])
X <- as.matrix(X)[,c('intercept',vo)]
y <- dat$chd
# Define the p(x_i;beta) function
p <- function(xi,beta) { 1/(1+exp(-(sum(xi*beta)))) }
head(cbind(y,X))
y intercept sbp tobacco ldl famhist obesity alcohol age
1 1 1 160 12.00 5.73 1 25.30 97.20 52
2 1 1 144 0.01 4.41 0 28.87 2.06 63
3 0 1 118 0.08 3.48 1 29.14 3.81 46
4 1 1 170 7.50 6.41 1 31.99 24.26 58
5 1 1 134 13.60 3.50 1 25.99 57.34 49
6 0 1 132 6.20 6.47 1 30.77 14.14 45
Step 2: Estimate coefficients with Newton-Raphson
# Set up optimization parameters
beta.old <- rep(0,ncol(X))
tol <- 10e-3
dist <- 1
i <- 0
# Begin the loop!
while(dist > tol) {
i <- i + 1
# Get the p-vector
p.vec <- apply(X,1,p,beta=beta.old)
# Get the W weight matrix
W <- diag(p.vec*(1-p.vec))
# Get the Score vector (i.e. gradient)
dldbeta <- t(X) %*% (y-p.vec)
# Get the Hessian
dl2dbeta2 <- -t(X) %*% W %*% X
# Update beta
beta.new <- beta.old - solve(dl2dbeta2) %*% dldbeta
# Calculate distance
dist <- sqrt(sum((beta.new-beta.old)^2))
# Reset
beta.old <- beta.new
}
paste('Convergence achieved after',i,'steps',sep=' ')
[1] "Convergence achieved after 4 steps"
round(beta.new,3)
[,1]
intercept -4.130
sbp 0.006
tobacco 0.080
ldl 0.185
famhist 0.939
obesity -0.035
alcohol 0.001
age 0.043
Step 3: Estimate coefficients with IRLS algorithm
# Set up optimization parameters
beta.old <- rep(0,ncol(X))
tol <- 10e-3
dist <- 1
i <- 0
# Begin the loop!
while(dist > tol) {
i <- i + 1
# Get the p-vector
p.vec <- apply(X,1,p,beta=beta.old)
# Get the W weight matrix
W <- diag(p.vec*(1-p.vec))
# Get W inverse
Winv <- diag(1/(p.vec*(1-p.vec)))
# Get z
z <- (X %*% beta.old) + Winv %*% (y-p.vec)
# Update beta
beta.new <- solve(t(X) %*% W %*% X) %*% t(X) %*% W %*% z
# Calculate distance
dist <- sqrt(sum((beta.new-beta.old)^2))
# Reset
beta.old <- beta.new
}
paste('Convergence achieved after',i,'steps',sep=' ')
[1] "Convergence achieved after 4 steps"
round(beta.new,3)
[,1]
intercept -4.130
sbp 0.006
tobacco 0.080
ldl 0.185
famhist 0.939
obesity -0.035
alcohol 0.001
age 0.043
LS0tDQp0aXRsZTogIkxvZ2lzdGljIFJlZ3Jlc3Npb246IFNUQVQgOTYzIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyMjIFN0ZXAgMTogTG9hZCBhbmQgY2xlYW4gdXAgZGF0YQ0KDQpgYGB7cn0NCiMgTG9hZCB0aGUgZGF0YQ0KZGF0IDwtIEVsZW1TdGF0TGVhcm46OlNBaGVhcnQNCnZvIDwtIGMoJ3NicCcsJ3RvYmFjY28nLCdsZGwnLCdmYW1oaXN0Jywnb2Jlc2l0eScsJ2FsY29ob2wnLCdhZ2UnKQ0KWCA8LSBkYXRbLHZvXQ0KIyBSZS1jb2RlIGZhbWhpc3QNClggPC0gY2JpbmQoaW50ZXJjZXB0PTEsZmFtaGlzdD1tb2RlbC5tYXRyaXgofi0xK2ZhbWhpc3QsZGF0YT1kYXQpWywyXSxYWywtd2hpY2goY29sbmFtZXMoWCk9PSdmYW1oaXN0JyldKQ0KWCA8LSBhcy5tYXRyaXgoWClbLGMoJ2ludGVyY2VwdCcsdm8pXQ0KeSA8LSBkYXQkY2hkDQojIERlZmluZSB0aGUgcCh4X2k7YmV0YSkgZnVuY3Rpb24NCnAgPC0gZnVuY3Rpb24oeGksYmV0YSkgeyAxLygxK2V4cCgtKHN1bSh4aSpiZXRhKSkpKSB9DQpoZWFkKGNiaW5kKHksWCkpDQpgYGANCg0KIyMjIFN0ZXAgMjogRXN0aW1hdGUgY29lZmZpY2llbnRzIHdpdGggTmV3dG9uLVJhcGhzb24NCg0KYGBge3J9DQojIFNldCB1cCBvcHRpbWl6YXRpb24gcGFyYW1ldGVycw0KYmV0YS5vbGQgPC0gcmVwKDAsbmNvbChYKSkNCnRvbCA8LSAxMGUtMw0KZGlzdCA8LSAxDQppIDwtIDANCiMgQmVnaW4gdGhlIGxvb3AhDQp3aGlsZShkaXN0ID4gdG9sKSB7DQogIGkgPC0gaSArIDENCiAgIyBHZXQgdGhlIHAtdmVjdG9yDQogIHAudmVjIDwtIGFwcGx5KFgsMSxwLGJldGE9YmV0YS5vbGQpDQogICMgR2V0IHRoZSBXIHdlaWdodCBtYXRyaXgNCiAgVyA8LSBkaWFnKHAudmVjKigxLXAudmVjKSkNCiAgIyBHZXQgdGhlIFNjb3JlIHZlY3RvciAoaS5lLiBncmFkaWVudCkNCiAgZGxkYmV0YSA8LSB0KFgpICUqJSAoeS1wLnZlYykNCiAgIyBHZXQgdGhlIEhlc3NpYW4NCiAgZGwyZGJldGEyIDwtIC10KFgpICUqJSBXICUqJSBYDQogICMgVXBkYXRlIGJldGENCiAgYmV0YS5uZXcgPC0gYmV0YS5vbGQgLSBzb2x2ZShkbDJkYmV0YTIpICUqJSBkbGRiZXRhDQogICMgQ2FsY3VsYXRlIGRpc3RhbmNlDQogIGRpc3QgPC0gc3FydChzdW0oKGJldGEubmV3LWJldGEub2xkKV4yKSkNCiAgIyBSZXNldA0KICBiZXRhLm9sZCA8LSBiZXRhLm5ldw0KfQ0KcGFzdGUoJ0NvbnZlcmdlbmNlIGFjaGlldmVkIGFmdGVyJyxpLCdzdGVwcycsc2VwPScgJykNCnJvdW5kKGJldGEubmV3LDMpDQpgYGANCg0KIyMjIFN0ZXAgMzogRXN0aW1hdGUgY29lZmZpY2llbnRzIHdpdGggSVJMUyBhbGdvcml0aG0NCg0KYGBge3J9DQojIFNldCB1cCBvcHRpbWl6YXRpb24gcGFyYW1ldGVycw0KYmV0YS5vbGQgPC0gcmVwKDAsbmNvbChYKSkNCnRvbCA8LSAxMGUtMw0KZGlzdCA8LSAxDQppIDwtIDANCiMgQmVnaW4gdGhlIGxvb3AhDQp3aGlsZShkaXN0ID4gdG9sKSB7DQogIGkgPC0gaSArIDENCiAgIyBHZXQgdGhlIHAtdmVjdG9yDQogIHAudmVjIDwtIGFwcGx5KFgsMSxwLGJldGE9YmV0YS5vbGQpDQogICMgR2V0IHRoZSBXIHdlaWdodCBtYXRyaXgNCiAgVyA8LSBkaWFnKHAudmVjKigxLXAudmVjKSkNCiAgIyBHZXQgVyBpbnZlcnNlDQogIFdpbnYgPC0gZGlhZygxLyhwLnZlYyooMS1wLnZlYykpKQ0KICAjIEdldCB6DQogIHogPC0gKFggJSolIGJldGEub2xkKSArIFdpbnYgJSolICh5LXAudmVjKQ0KICAjIFVwZGF0ZSBiZXRhDQogIGJldGEubmV3IDwtIHNvbHZlKHQoWCkgJSolIFcgJSolIFgpICUqJSB0KFgpICUqJSBXICUqJSB6DQogICMgQ2FsY3VsYXRlIGRpc3RhbmNlDQogIGRpc3QgPC0gc3FydChzdW0oKGJldGEubmV3LWJldGEub2xkKV4yKSkNCiAgIyBSZXNldA0KICBiZXRhLm9sZCA8LSBiZXRhLm5ldw0KfQ0KcGFzdGUoJ0NvbnZlcmdlbmNlIGFjaGlldmVkIGFmdGVyJyxpLCdzdGVwcycsc2VwPScgJykNCnJvdW5kKGJldGEubmV3LDMpDQpgYGANCg0K