data(swiss)
# Showing Dataset structure
str(swiss)
## 'data.frame': 47 obs. of 6 variables:
## $ Fertility : num 80.2 83.1 92.5 85.8 76.9 76.1 83.8 92.4 82.4 82.9 ...
## $ Agriculture : num 17 45.1 39.7 36.5 43.5 35.3 70.2 67.8 53.3 45.2 ...
## $ Examination : int 15 6 5 12 17 9 16 14 12 16 ...
## $ Education : int 12 9 5 7 15 7 7 8 7 13 ...
## $ Catholic : num 9.96 84.84 93.4 33.77 5.16 ...
## $ Infant.Mortality: num 22.2 22.2 20.2 20.3 20.6 26.6 23.6 24.9 21 24.4 ...
# Fit multiple linear regression model
fittedModel <- lm(Fertility ~ Agriculture + Examination + Education + Catholic + Infant.Mortality,
data = swiss)
# View summary
summary(fittedModel)
##
## Call:
## lm(formula = Fertility ~ Agriculture + Examination + Education +
## Catholic + Infant.Mortality, data = swiss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.2743 -5.2617 0.5032 4.1198 15.3213
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 66.91518 10.70604 6.250 1.91e-07 ***
## Agriculture -0.17211 0.07030 -2.448 0.01873 *
## Examination -0.25801 0.25388 -1.016 0.31546
## Education -0.87094 0.18303 -4.758 2.43e-05 ***
## Catholic 0.10412 0.03526 2.953 0.00519 **
## Infant.Mortality 1.07705 0.38172 2.822 0.00734 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.165 on 41 degrees of freedom
## Multiple R-squared: 0.7067, Adjusted R-squared: 0.671
## F-statistic: 19.76 on 5 and 41 DF, p-value: 5.594e-10
par(mfrow=c(2,2))
plot(fittedModel)
summary(fittedModel$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -15.2743 -5.2617 0.5032 0.0000 4.1198 15.3213
fittedModel_backward <- step(fittedModel, direction = "backward")
## Start: AIC=190.69
## Fertility ~ Agriculture + Examination + Education + Catholic +
## Infant.Mortality
##
## Df Sum of Sq RSS AIC
## - Examination 1 53.03 2158.1 189.86
## <none> 2105.0 190.69
## - Agriculture 1 307.72 2412.8 195.10
## - Infant.Mortality 1 408.75 2513.8 197.03
## - Catholic 1 447.71 2552.8 197.75
## - Education 1 1162.56 3267.6 209.36
##
## Step: AIC=189.86
## Fertility ~ Agriculture + Education + Catholic + Infant.Mortality
##
## Df Sum of Sq RSS AIC
## <none> 2158.1 189.86
## - Agriculture 1 264.18 2422.2 193.29
## - Infant.Mortality 1 409.81 2567.9 196.03
## - Catholic 1 956.57 3114.6 205.10
## - Education 1 2249.97 4408.0 221.43
summary(fittedModel_backward)
##
## Call:
## lm(formula = Fertility ~ Agriculture + Education + Catholic +
## Infant.Mortality, data = swiss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.6765 -6.0522 0.7514 3.1664 16.1422
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.10131 9.60489 6.466 8.49e-08 ***
## Agriculture -0.15462 0.06819 -2.267 0.02857 *
## Education -0.98026 0.14814 -6.617 5.14e-08 ***
## Catholic 0.12467 0.02889 4.315 9.50e-05 ***
## Infant.Mortality 1.07844 0.38187 2.824 0.00722 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.168 on 42 degrees of freedom
## Multiple R-squared: 0.6993, Adjusted R-squared: 0.6707
## F-statistic: 24.42 on 4 and 42 DF, p-value: 1.717e-10
null_model <- lm(Fertility ~ 1, data = swiss)
forward_model <- step(
null_model,
scope = list(lower = ~1, upper = ~ Agriculture + Examination + Education + Catholic + Infant.Mortality),
direction = "forward"
)
## Start: AIC=238.35
## Fertility ~ 1
##
## Df Sum of Sq RSS AIC
## + Education 1 3162.7 4015.2 213.04
## + Examination 1 2994.4 4183.6 214.97
## + Catholic 1 1543.3 5634.7 228.97
## + Infant.Mortality 1 1245.5 5932.4 231.39
## + Agriculture 1 894.8 6283.1 234.09
## <none> 7178.0 238.34
##
## Step: AIC=213.04
## Fertility ~ Education
##
## Df Sum of Sq RSS AIC
## + Catholic 1 961.07 3054.2 202.18
## + Infant.Mortality 1 891.25 3124.0 203.25
## + Examination 1 465.63 3549.6 209.25
## <none> 4015.2 213.04
## + Agriculture 1 61.97 3953.3 214.31
##
## Step: AIC=202.18
## Fertility ~ Education + Catholic
##
## Df Sum of Sq RSS AIC
## + Infant.Mortality 1 631.92 2422.2 193.29
## + Agriculture 1 486.28 2567.9 196.03
## <none> 3054.2 202.18
## + Examination 1 2.46 3051.7 204.15
##
## Step: AIC=193.29
## Fertility ~ Education + Catholic + Infant.Mortality
##
## Df Sum of Sq RSS AIC
## + Agriculture 1 264.176 2158.1 189.86
## <none> 2422.2 193.29
## + Examination 1 9.486 2412.8 195.10
##
## Step: AIC=189.86
## Fertility ~ Education + Catholic + Infant.Mortality + Agriculture
##
## Df Sum of Sq RSS AIC
## <none> 2158.1 189.86
## + Examination 1 53.027 2105.0 190.69
summary(forward_model)
##
## Call:
## lm(formula = Fertility ~ Education + Catholic + Infant.Mortality +
## Agriculture, data = swiss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.6765 -6.0522 0.7514 3.1664 16.1422
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.10131 9.60489 6.466 8.49e-08 ***
## Education -0.98026 0.14814 -6.617 5.14e-08 ***
## Catholic 0.12467 0.02889 4.315 9.50e-05 ***
## Infant.Mortality 1.07844 0.38187 2.824 0.00722 **
## Agriculture -0.15462 0.06819 -2.267 0.02857 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.168 on 42 degrees of freedom
## Multiple R-squared: 0.6993, Adjusted R-squared: 0.6707
## F-statistic: 24.42 on 4 and 42 DF, p-value: 1.717e-10
stepwise_model <- step(fittedModel, direction = "both")
## Start: AIC=190.69
## Fertility ~ Agriculture + Examination + Education + Catholic +
## Infant.Mortality
##
## Df Sum of Sq RSS AIC
## - Examination 1 53.03 2158.1 189.86
## <none> 2105.0 190.69
## - Agriculture 1 307.72 2412.8 195.10
## - Infant.Mortality 1 408.75 2513.8 197.03
## - Catholic 1 447.71 2552.8 197.75
## - Education 1 1162.56 3267.6 209.36
##
## Step: AIC=189.86
## Fertility ~ Agriculture + Education + Catholic + Infant.Mortality
##
## Df Sum of Sq RSS AIC
## <none> 2158.1 189.86
## + Examination 1 53.03 2105.0 190.69
## - Agriculture 1 264.18 2422.2 193.29
## - Infant.Mortality 1 409.81 2567.9 196.03
## - Catholic 1 956.57 3114.6 205.10
## - Education 1 2249.97 4408.0 221.43
summary(stepwise_model)
##
## Call:
## lm(formula = Fertility ~ Agriculture + Education + Catholic +
## Infant.Mortality, data = swiss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.6765 -6.0522 0.7514 3.1664 16.1422
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.10131 9.60489 6.466 8.49e-08 ***
## Agriculture -0.15462 0.06819 -2.267 0.02857 *
## Education -0.98026 0.14814 -6.617 5.14e-08 ***
## Catholic 0.12467 0.02889 4.315 9.50e-05 ***
## Infant.Mortality 1.07844 0.38187 2.824 0.00722 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.168 on 42 degrees of freedom
## Multiple R-squared: 0.6993, Adjusted R-squared: 0.6707
## F-statistic: 24.42 on 4 and 42 DF, p-value: 1.717e-10
install.packages("glmnet")
## Installing package into '/home/donatient/R/x86_64-pc-linux-gnu-library/4.6'
## (as 'lib' is unspecified)
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 5.0
x <- model.matrix(Fertility ~ ., swiss)[, -1]
y <- swiss$Fertility
cv_lasso <- cv.glmnet(x, y, alpha = 1)
lasso_model<-coef(cv_lasso, s = "lambda.min")
summary(lasso_model)
## 6 x 1 sparse Matrix of class "dgCMatrix", with 6 entries
## i j x
## 1 1 1 66.2945347
## 2 2 1 -0.1624899
## 3 3 1 -0.2525326
## 4 4 1 -0.8554684
## 5 5 1 0.1018710
## 6 6 1 1.0753076
AIC(fittedModel, fittedModel_backward, forward_model,stepwise_model)
## df AIC
## fittedModel 7 326.0716
## fittedModel_backward 6 325.2408
## forward_model 6 325.2408
## stepwise_model 6 325.2408
THANKS!