library(ISLR2)
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-8
library(leaps)
library(pls)
## 
## Attaching package: 'pls'
## The following object is masked from 'package:stats':
## 
##     loadings
data("Boston")
View(Boston)
Boston <- na.omit(Boston)

#Least Squares Regression
set.seed(1)
train <- sample(1:nrow(Boston), nrow(Boston)*0.90)
test <- -train
y.test <- Boston$crim[test]

lm.fit <- lm(crim~., data=Boston, subset=train)
summary(lm.fit)
## 
## Call:
## lm(formula = crim ~ ., data = Boston, subset = train)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.772 -2.329 -0.341  1.044 73.453 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 14.2943760  7.7759724   1.838 0.066693 .  
## zn           0.0445855  0.0207506   2.149 0.032206 *  
## indus       -0.0693981  0.0902700  -0.769 0.442433    
## chas        -0.9142474  1.2970512  -0.705 0.481265    
## nox         -9.8495028  5.7626291  -1.709 0.088115 .  
## rm           0.7508779  0.6588809   1.140 0.255059    
## age         -0.0008849  0.0196652  -0.045 0.964127    
## dis         -1.0570654  0.3149226  -3.357 0.000857 ***
## rad          0.6306703  0.0942514   6.691 6.71e-11 ***
## tax         -0.0042698  0.0055458  -0.770 0.441760    
## ptratio     -0.3304103  0.2053000  -1.609 0.108242    
## lstat        0.1332271  0.0822788   1.619 0.106114    
## medv        -0.2375039  0.0648955  -3.660 0.000283 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.756 on 442 degrees of freedom
## Multiple R-squared:  0.438,  Adjusted R-squared:  0.4228 
## F-statistic: 28.71 on 12 and 442 DF,  p-value: < 2.2e-16
lm.pred <- predict(lm.fit, Boston[test,])
lm.error <- mean((lm.pred - y.test)^2)
lm.error
## [1] 8.179267
#Lasso Regression
x <- model.matrix(crim~., Boston)[, -1]
y <- Boston$crim


lasso_fit <- glmnet(x[train, ], y[train], alpha=1)
plot(lasso_fit)

lambda_lasso <- lasso_fit$lambda.min

lasso_pred <- predict(lasso_fit, s=lambda_lasso, newx=x[test,]) 
lasso_error <- mean((lasso_pred-y[test])^2)
lasso_error
## [1] 8.846331
#Ridge Regression
ridge_fit <- glmnet(x[train, ], y[train], alpha=0)
summary(ridge_fit)
##           Length Class     Mode   
## a0         100   -none-    numeric
## beta      1200   dgCMatrix S4     
## df         100   -none-    numeric
## dim          2   -none-    numeric
## lambda     100   -none-    numeric
## dev.ratio  100   -none-    numeric
## nulldev      1   -none-    numeric
## npasses      1   -none-    numeric
## jerr         1   -none-    numeric
## offset       1   -none-    logical
## call         4   -none-    call   
## nobs         1   -none-    numeric
plot(ridge_fit)

lambda_ridge <- ridge_fit$lambda.min
lambda_ridge
## NULL
ridge_pred <- predict(ridge_fit, s=lambda_ridge, newx=x[test,]) 
ridge_error <- mean((ridge_pred-y[test])^2)
ridge_error
## [1] 16.34827
#partial least squares
pls.fit<- plsr(crim~., data=Boston, subset=train, scale=TRUE, validation="CV")
summary(pls.fit)
## Data:    X dimension: 455 12 
##  Y dimension: 455 1
## Fit method: kernelpls
## Number of components considered: 12
## 
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
##        (Intercept)  1 comps  2 comps  3 comps  4 comps  5 comps  6 comps
## CV           8.902    7.350    6.934    6.838    6.819    6.802    6.782
## adjCV        8.902    7.349    6.931    6.835    6.813    6.796    6.776
##        7 comps  8 comps  9 comps  10 comps  11 comps  12 comps
## CV       6.778    6.770    6.769     6.770     6.770     6.770
## adjCV    6.771    6.764    6.763     6.764     6.764     6.764
## 
## TRAINING: % variance explained
##       1 comps  2 comps  3 comps  4 comps  5 comps  6 comps  7 comps  8 comps
## X        49.5    58.77    64.68    74.38    80.71    83.44    86.28    91.11
## crim     31.9    39.99    42.15    42.88    43.27    43.66    43.76    43.79
##       9 comps  10 comps  11 comps  12 comps
## X       94.52     96.23      98.2     100.0
## crim    43.80     43.80      43.8      43.8