library(caret);data(faithful); set.seed(333)
inTrain <- createDataPartition(y=faithful$waiting,
p=0.5, list=FALSE)
trainFaith <- faithful[inTrain,]; testFaith <- faithful[-inTrain,]
head(trainFaith)
## eruptions waiting
## 1 3.600 79
## 3 3.333 74
## 5 4.533 85
## 6 2.883 55
## 7 4.700 88
## 8 3.600 85
plot(trainFaith$waiting,trainFaith$eruptions,pch=19,col="blue",xlab="Waiting",ylab="Duration")
\[ ED_i = b_0 + b_1 WT_i + e_i \]
lm1 <- lm(eruptions ~ waiting,data=trainFaith)
summary(lm1)
##
## Call:
## lm(formula = eruptions ~ waiting, data = trainFaith)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.26990 -0.34789 0.03979 0.36589 1.05020
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.792739 0.227869 -7.867 1.04e-12 ***
## waiting 0.073901 0.003148 23.474 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.495 on 135 degrees of freedom
## Multiple R-squared: 0.8032, Adjusted R-squared: 0.8018
## F-statistic: 551 on 1 and 135 DF, p-value: < 2.2e-16
plot(trainFaith$waiting,trainFaith$eruptions,pch=19,col="blue",xlab="Waiting",ylab="Duration")
lines(trainFaith$waiting,lm1$fitted,lwd=3)
\[\hat{ED} = \hat{b}_0 + \hat{b}_1 WT\]
coef(lm1)[1] + coef(lm1)[2]*80
## (Intercept)
## 4.119307
newdata <- data.frame(waiting=c(80, 90, 100, 110, 120))
predict(lm1,newdata)
## 1 2 3 4 5
## 4.119307 4.858313 5.597319 6.336325 7.075331
par(mfrow=c(1,2))
plot(trainFaith$waiting,trainFaith$eruptions,pch=19,col="blue",xlab="Waiting",ylab="Duration")
lines(trainFaith$waiting,predict(lm1),lwd=3)
plot(testFaith$waiting,testFaith$eruptions,pch=19,col="blue",xlab="Waiting",ylab="Duration")
lines(testFaith$waiting,predict(lm1,newdata=testFaith),lwd=3)
# Calculate RMSE on training
sqrt(sum((lm1$fitted-trainFaith$eruptions)^2))
## [1] 5.75186
# Calculate RMSE on test
sqrt(sum((predict(lm1,newdata=testFaith)-testFaith$eruptions)^2))
## [1] 5.838559
pred1 <- predict(lm1,newdata=testFaith,interval="prediction")
ord <- order(testFaith$waiting)
plot(testFaith$waiting,testFaith$eruptions,pch=19,col="blue")
matlines(testFaith$waiting[ord],pred1[ord,],type="l",,col=c(1,2,2),lty = c(1,1,1), lwd=3)
modFit <- train(eruptions ~ waiting,data=trainFaith,method="lm")
summary(modFit$finalModel)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.26990 -0.34789 0.03979 0.36589 1.05020
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.792739 0.227869 -7.867 1.04e-12 ***
## waiting 0.073901 0.003148 23.474 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.495 on 135 degrees of freedom
## Multiple R-squared: 0.8032, Adjusted R-squared: 0.8018
## F-statistic: 551 on 1 and 135 DF, p-value: < 2.2e-16