R Markdown
mussels <- read.csv("mussels.csv")
attach(mussels)
library(caret)
## Warning: package 'caret' was built under R version 4.0.4
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.4
library(MASS)
library(olsrr)
## Warning: package 'olsrr' was built under R version 4.0.4
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:MASS':
##
## cement
## The following object is masked from 'package:datasets':
##
## rivers
#Exercise 1:
plot(mussels)

par(mfrow=c(2,2))
#AIC:
modelqua <- lm(Mass~Height+Width+Length+I(Height^2)+I(Width^2)+I(Length^2)
+Height:Width+Height:Length+Width:Length)
plot(modelqua)

boxcox(modelqua)
yn <- (Mass^(-0.5)-1)/(-0.5)
modelquan <- lm(yn~Height+Width+Length+I(Height^2)+I(Width^2)+I(Length^2)
+Height:Width+Height:Length+Width:Length)
plot(modelquan)

modelquan.stepped <-step(modelquan)
## Start: AIC=-880.78
## yn ~ Height + Width + Length + I(Height^2) + I(Width^2) + I(Length^2) +
## Height:Width + Height:Length + Width:Length
##
## Df Sum of Sq RSS AIC
## - I(Width^2) 1 3.2770e-06 0.0013933 -882.59
## - I(Height^2) 1 6.5710e-06 0.0013966 -882.39
## - Width:Length 1 3.1872e-05 0.0014219 -880.92
## <none> 0.0013901 -880.78
## - Height:Width 1 1.1382e-04 0.0015039 -876.33
## - I(Length^2) 1 1.5052e-04 0.0015406 -874.35
## - Height:Length 1 1.7420e-04 0.0015643 -873.10
##
## Step: AIC=-882.59
## yn ~ Height + Width + Length + I(Height^2) + I(Length^2) + Height:Width +
## Height:Length + Width:Length
##
## Df Sum of Sq RSS AIC
## - I(Height^2) 1 6.1370e-06 0.0013995 -884.23
## <none> 0.0013933 -882.59
## - Width:Length 1 1.1113e-04 0.0015045 -878.29
## - Height:Width 1 1.1211e-04 0.0015054 -878.24
## - Height:Length 1 1.7302e-04 0.0015664 -874.99
## - I(Length^2) 1 1.8810e-04 0.0015814 -874.20
##
## Step: AIC=-884.23
## yn ~ Height + Width + Length + I(Length^2) + Height:Width + Height:Length +
## Width:Length
##
## Df Sum of Sq RSS AIC
## <none> 0.0013995 -884.23
## - Width:Length 1 0.00010524 0.0015047 -880.28
## - Height:Width 1 0.00011073 0.0015102 -879.98
## - I(Length^2) 1 0.00019277 0.0015922 -875.64
## - Height:Length 1 0.00025729 0.0016568 -872.39
modelquan.stepped
##
## Call:
## lm(formula = yn ~ Height + Width + Length + I(Length^2) + Height:Width +
## Height:Length + Width:Length)
##
## Coefficients:
## (Intercept) Height Width Length I(Length^2)
## 1.593e+00 3.687e-03 -1.320e-04 -1.221e-04 7.221e-06
## Height:Width Height:Length Width:Length
## 5.054e-05 -2.078e-05 -2.030e-05
plot(modelquan.stepped)

anova(modelquan.stepped,modelquan)
## Analysis of Variance Table
##
## Model 1: yn ~ Height + Width + Length + I(Length^2) + Height:Width + Height:Length +
## Width:Length
## Model 2: yn ~ Height + Width + Length + I(Height^2) + I(Width^2) + I(Length^2) +
## Height:Width + Height:Length + Width:Length
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 74 0.0013995
## 2 72 0.0013901 2 9.4147e-06 0.2438 0.7843
modellin <- lm(Mass~Height+Width+Length)
plot(modellin)

boxcox(modellin)
modellinn <- lm(log(Mass)~Height+Width+Length)
plot(modellinn)

modellinn.stepped <-step(modellinn)
## Start: AIC=-427.22
## log(Mass) ~ Height + Width + Length
##
## Df Sum of Sq RSS AIC
## <none> 0.40620 -427.22
## - Length 1 0.06702 0.47322 -416.70
## - Width 1 0.19292 0.59912 -397.36
## - Height 1 0.26001 0.66621 -388.66
modellinn.stepped
##
## Call:
## lm(formula = log(Mass) ~ Height + Width + Length)
##
## Coefficients:
## (Intercept) Height Width Length
## 3.135117 0.009421 0.014326 0.002363
plot(modellinn.stepped)

AIC(modelquan.stepped, modellinn)
## df AIC
## modelquan.stepped 9 -649.5209
## modellinn 5 -192.5189
#Quadratic model is better in AIC
#Cross-validation:
train.control <- trainControl(method = "cv", number = 10)
model.lincv <- train(log(Mass)~Height+Width+Length, data=mussels, method = "lm",
trControl = train.control)
model.lincv$results
## intercept RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 TRUE 0.07353902 0.9722304 0.05625326 0.02294634 0.02026655 0.0136949
model.quacv <- train(((Mass^(-0.5)-1)/(-0.5))~Height+Width+Length+I(Length^2)+Height:Width+Height:Length+Width:Length,
data=mussels, method = "lm",
trControl = train.control)
model.quacv$results
## intercept RMSE Rsquared MAE RMSESD RsquaredSD
## 1 TRUE 0.004935906 0.9700964 0.003988631 0.001861945 0.03592856
## MAESD
## 1 0.001164207
#Mallow's Cp:
ols_mallows_cp(modelquan.stepped,modelquan)
## [1] 6.487645
plot(modelquan.stepped)

#b:
New.Values <- data.frame(Height=100,Width=100,Length=200)
predvalue <- predict(modelquan.stepped, New.Values,interval="prediction")
predmass <- ((predvalue*(-0.5))+1)^(-2)
predmass
## fit lwr upr
## 1 371.9043 204.6153 875.3266
#Exercise 2:
#a:
ex2 <- read.csv("ex2.csv")
attach(ex2)
ex2model1 <- lm(y~x1+x2)
ex2model2 <- lm(y~x1+x2+I(x1^2)+I(x2^2)+x1:x2)
anova(ex2model1,ex2model2)
## Analysis of Variance Table
##
## Model 1: y ~ x1 + x2
## Model 2: y ~ x1 + x2 + I(x1^2) + I(x2^2) + x1:x2
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 77 1565.7
## 2 74 1479.7 3 85.989 1.4335 0.2399
plot(ex2model1)

plot(ex2model2)

