R Markdown

mussels <- read.csv("mussels.csv")
attach(mussels)

library(caret)
## Warning: package 'caret' was built under R version 4.0.4
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.4
library(MASS)
library(olsrr)
## Warning: package 'olsrr' was built under R version 4.0.4
## 
## Attaching package: 'olsrr'
## The following object is masked from 'package:MASS':
## 
##     cement
## The following object is masked from 'package:datasets':
## 
##     rivers
#Exercise 1:
plot(mussels)

par(mfrow=c(2,2))
#AIC:
modelqua <- lm(Mass~Height+Width+Length+I(Height^2)+I(Width^2)+I(Length^2)
                +Height:Width+Height:Length+Width:Length)
plot(modelqua)

boxcox(modelqua)

yn <- (Mass^(-0.5)-1)/(-0.5)

modelquan <- lm(yn~Height+Width+Length+I(Height^2)+I(Width^2)+I(Length^2)
                 +Height:Width+Height:Length+Width:Length)
plot(modelquan)

modelquan.stepped <-step(modelquan)
## Start:  AIC=-880.78
## yn ~ Height + Width + Length + I(Height^2) + I(Width^2) + I(Length^2) + 
##     Height:Width + Height:Length + Width:Length
## 
##                 Df  Sum of Sq       RSS     AIC
## - I(Width^2)     1 3.2770e-06 0.0013933 -882.59
## - I(Height^2)    1 6.5710e-06 0.0013966 -882.39
## - Width:Length   1 3.1872e-05 0.0014219 -880.92
## <none>                        0.0013901 -880.78
## - Height:Width   1 1.1382e-04 0.0015039 -876.33
## - I(Length^2)    1 1.5052e-04 0.0015406 -874.35
## - Height:Length  1 1.7420e-04 0.0015643 -873.10
## 
## Step:  AIC=-882.59
## yn ~ Height + Width + Length + I(Height^2) + I(Length^2) + Height:Width + 
##     Height:Length + Width:Length
## 
##                 Df  Sum of Sq       RSS     AIC
## - I(Height^2)    1 6.1370e-06 0.0013995 -884.23
## <none>                        0.0013933 -882.59
## - Width:Length   1 1.1113e-04 0.0015045 -878.29
## - Height:Width   1 1.1211e-04 0.0015054 -878.24
## - Height:Length  1 1.7302e-04 0.0015664 -874.99
## - I(Length^2)    1 1.8810e-04 0.0015814 -874.20
## 
## Step:  AIC=-884.23
## yn ~ Height + Width + Length + I(Length^2) + Height:Width + Height:Length + 
##     Width:Length
## 
##                 Df  Sum of Sq       RSS     AIC
## <none>                        0.0013995 -884.23
## - Width:Length   1 0.00010524 0.0015047 -880.28
## - Height:Width   1 0.00011073 0.0015102 -879.98
## - I(Length^2)    1 0.00019277 0.0015922 -875.64
## - Height:Length  1 0.00025729 0.0016568 -872.39
modelquan.stepped
## 
## Call:
## lm(formula = yn ~ Height + Width + Length + I(Length^2) + Height:Width + 
##     Height:Length + Width:Length)
## 
## Coefficients:
##   (Intercept)         Height          Width         Length    I(Length^2)  
##     1.593e+00      3.687e-03     -1.320e-04     -1.221e-04      7.221e-06  
##  Height:Width  Height:Length   Width:Length  
##     5.054e-05     -2.078e-05     -2.030e-05
plot(modelquan.stepped)

anova(modelquan.stepped,modelquan)
## Analysis of Variance Table
## 
## Model 1: yn ~ Height + Width + Length + I(Length^2) + Height:Width + Height:Length + 
##     Width:Length
## Model 2: yn ~ Height + Width + Length + I(Height^2) + I(Width^2) + I(Length^2) + 
##     Height:Width + Height:Length + Width:Length
##   Res.Df       RSS Df  Sum of Sq      F Pr(>F)
## 1     74 0.0013995                            
## 2     72 0.0013901  2 9.4147e-06 0.2438 0.7843
modellin <- lm(Mass~Height+Width+Length)

plot(modellin)

boxcox(modellin)

modellinn <- lm(log(Mass)~Height+Width+Length)

plot(modellinn)

modellinn.stepped <-step(modellinn)
## Start:  AIC=-427.22
## log(Mass) ~ Height + Width + Length
## 
##          Df Sum of Sq     RSS     AIC
## <none>                0.40620 -427.22
## - Length  1   0.06702 0.47322 -416.70
## - Width   1   0.19292 0.59912 -397.36
## - Height  1   0.26001 0.66621 -388.66
modellinn.stepped
## 
## Call:
## lm(formula = log(Mass) ~ Height + Width + Length)
## 
## Coefficients:
## (Intercept)       Height        Width       Length  
##    3.135117     0.009421     0.014326     0.002363
plot(modellinn.stepped)

AIC(modelquan.stepped, modellinn)
##                   df       AIC
## modelquan.stepped  9 -649.5209
## modellinn          5 -192.5189
#Quadratic model is better in AIC
#Cross-validation:

train.control <- trainControl(method = "cv", number = 10)

model.lincv <- train(log(Mass)~Height+Width+Length, data=mussels, method = "lm",
                   trControl = train.control)

model.lincv$results
##   intercept       RMSE  Rsquared        MAE     RMSESD RsquaredSD     MAESD
## 1      TRUE 0.07353902 0.9722304 0.05625326 0.02294634 0.02026655 0.0136949
model.quacv <- train(((Mass^(-0.5)-1)/(-0.5))~Height+Width+Length+I(Length^2)+Height:Width+Height:Length+Width:Length, 
                     data=mussels, method = "lm",
                     trControl = train.control)

model.quacv$results
##   intercept        RMSE  Rsquared         MAE      RMSESD RsquaredSD
## 1      TRUE 0.004935906 0.9700964 0.003988631 0.001861945 0.03592856
##         MAESD
## 1 0.001164207
#Mallow's Cp:

ols_mallows_cp(modelquan.stepped,modelquan)
## [1] 6.487645
plot(modelquan.stepped)

#b:

New.Values <- data.frame(Height=100,Width=100,Length=200)
predvalue <- predict(modelquan.stepped, New.Values,interval="prediction")

predmass <- ((predvalue*(-0.5))+1)^(-2)
predmass
##        fit      lwr      upr
## 1 371.9043 204.6153 875.3266
#Exercise 2:
#a:
ex2 <- read.csv("ex2.csv")
attach(ex2)

ex2model1 <- lm(y~x1+x2)
ex2model2 <- lm(y~x1+x2+I(x1^2)+I(x2^2)+x1:x2)

anova(ex2model1,ex2model2)
## Analysis of Variance Table
## 
## Model 1: y ~ x1 + x2
## Model 2: y ~ x1 + x2 + I(x1^2) + I(x2^2) + x1:x2
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     77 1565.7                           
## 2     74 1479.7  3    85.989 1.4335 0.2399
plot(ex2model1)

plot(ex2model2)