Fit a Model
nfl<-read.csv("https://raw.githubusercontent.com/kitadasmalley/sp21_MATH376LMT/main/data/nlf1976.csv",
header=TRUE)
library(tidyverse)
ggplot(nfl, aes(x=x8, y))+
geom_point()+
geom_smooth(method="lm", se=FALSE)

mod<-lm(y~x8, data=nfl)
Confidence Interval for Slope
summary(mod)
##
## Call:
## lm(formula = y ~ x8, data = nfl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.804 -1.591 -0.647 2.032 4.580
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.788251 2.696233 8.081 1.46e-08 ***
## x8 -0.007025 0.001260 -5.577 7.38e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.393 on 26 degrees of freedom
## Multiple R-squared: 0.5447, Adjusted R-squared: 0.5272
## F-statistic: 31.1 on 1 and 26 DF, p-value: 7.381e-06
confint(mod)
## 2.5 % 97.5 %
## (Intercept) 16.246064040 27.330437725
## x8 -0.009614347 -0.004435854
90% Confidence Interval
# change the confidence level to 90%
predict(mod, newdata, interval="confidence",level=.9)
## fit lwr upr
## 1 9.14307 8.123803 10.16234
90% Prediction Interval
# change the prediction level to 90%
predict(mod, newdata, interval="predict", level=.9)
## fit lwr upr
## 1 9.14307 4.936392 13.34975
CHECK FROM SCRATCH
## from scratch
beta_0<-mod$coefficients[1]
beta_0
## (Intercept)
## 21.78825
beta_1<-mod$coefficients[2]
beta_1
## x8
## -0.0070251
x_0<-1800
pointEst<-beta_0+beta_1*x_0
pointEst
## (Intercept)
## 9.14307
ss_res<-sum(mod$residuals^2)
n<-length(mod$residuals)
ms_res<-ss_res/(n-2)
seConf<-sqrt(ms_res*(1 / n + (x_0 - mean(nfl$x8))^2 / (sum((nfl$x8 - mean(nfl$x8))^2))))
seConf
## [1] 0.597594
# 90%
pointEst+c(-1,1)*qt(0.95, n-2)*seConf
## [1] 8.123803 10.162337
sePred<-sqrt(ms_res*(1+(1 / n) + (x_0 - mean(nfl$x8))^2 / (sum((nfl$x8 - mean(nfl$x8))^2))))
sePred
## [1] 2.466366
pointEst+c(-1,1)*qt(0.95, n-2)*sePred
## [1] 4.936392 13.349749
Confidence and Prediction Bands
# BANDS
confBand<-predict(mod, interval="confidence", level=.9)
predBand<-predict(mod, interval="predict", level=.9)
## Warning in predict.lm(mod, interval = "predict", level = 0.9): predictions on current data refer to _future_ responses
# rename cols in predBand to be unique from confBand
colnames(predBand)<-c("fit2", "lwr2", "upr2")
# create a new dataframe with the data and new bands
newDF<-cbind(nfl, confBand, predBand)
ggplot(newDF, aes(x=x8, y=y))+
geom_point()+
xlab("Yards Gained Rushing by Opponents")+
ylab("Games Won")+
geom_abline(slope=mod$coefficients[2], intercept=mod$coefficients[1],
color="blue", lty=1, lwd=1)+
geom_line(aes(y=lwr), color="green", lty=1, lwd=1)+
geom_line(aes(y=upr), color="green", lty=1, lwd=1)+
geom_line(aes(y=lwr2), color="red", lty=1, lwd=1)+
geom_line(aes(y=upr2), color="red", lty=1, lwd=1)+
theme_bw()
