train = sample(1:dim(College)[1], dim(College)[1]/2)
College.train = College[train,]
College.test = College[-train,]
lm.fit = lm(Apps~., data = College.train)
lm.pred = predict(lm.fit, College.test)
mean((lm.pred - College.test[,"Apps"]) ^2)
## [1] 1511450
test error = 1511450
train.mat = model.matrix(Apps~., data=College.train)
test.mat = model.matrix(Apps~., data=College.test)
grid = 10 ^ seq(10, -2, length=100)
ridge.mod = cv.glmnet(train.mat, College.train[, "Apps"], alpha=0, lambda=grid)
lambda.best = ridge.mod$lambda.min
ridge.pred = predict(ridge.mod, newx=test.mat, s=lambda.best)
mean((College.test[, "Apps"] - ridge.pred) ^2)
## [1] 1589018
test error = 1589018
mod.lasso = cv.glmnet(train.mat, College.train[, "Apps"], alpha=1, lambda=grid)
lambda.best = mod.lasso$lambda.min
lasso.pred = predict(mod.lasso, newx=test.mat, s=lambda.best)
mean((College.test[, "Apps"] - lasso.pred)^2)
## [1] 1535419
test error = 1535419
pcr.fit = pcr(Apps~., data=College.train, scale=T, validation="CV")
pcr.pred = predict(pcr.fit, College.test, ncomp=10)
mean((College.test[, "Apps"] - pcr.pred)^2)
## [1] 3093142
test error = 3093142
pls.fit = plsr(Apps~., data=College.train, scale=T, validation="CV")
pls.pred = predict(pls.fit, College.test, ncomp=10)
mean((College.test[, "Apps"] - pls.pred)^2)
## [1] 1554293
test error = 1554293
PCR model under performed compared to the others.
train = sample(1:dim(Boston)[1], dim(Boston)[1]/2)
Boston.train = Boston[train,]
Boston.test = Boston[-train,]
train.mat = model.matrix(crim~., data=Boston.train)
test.mat = model.matrix(crim~., data=Boston.train)
grid = 10 ^ seq(10, -2, length=100)
ridge.mod = cv.glmnet(train.mat, Boston.train[, "crim"], alpha=0, lambda=grid)
lambda.best = ridge.mod$lambda.min
ridge.pred = predict(ridge.mod, newx=test.mat, s=lambda.best)
mean((Boston.test[, "crim"]- ridge.pred) ^2)
## [1] 130.8663
mod.lasso = cv.glmnet(train.mat, Boston.train[, "crim"], alpha=1, lambda=grid)
lambda.best = mod.lasso$lambda.min
lasso.pred = predict(mod.lasso, newx=test.mat, s=lambda.best)
mean((Boston.test[, "crim"] - lasso.pred)^2)
## [1] 129.3982
pcr.fit = pcr(crim~., data=Boston.train, scale=T, validation="CV")
pcr.pred = predict(pcr.fit, Boston.test, ncomp=10)
mean((Boston.test[, "crim"] - pcr.pred)^2)
## [1] 71.10062
PCR has lowest test error so id choose that one.
???