Homework #5: Oxygen

oxygen<-read.csv("https://raw.githubusercontent.com/kitadasmalley/sp21_MATH376LMT/main/data/oxygenPurity.csv", header = TRUE)

model<-lm(purity~hydro, data=oxygen)
summary(model)
## 
## Call:
## lm(formula = purity ~ hydro, data = oxygen)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.6724 -3.2113 -0.0626  2.5783  7.3037 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   77.863      4.199  18.544 3.54e-13 ***
## hydro         11.801      3.485   3.386  0.00329 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.597 on 18 degrees of freedom
## Multiple R-squared:  0.3891, Adjusted R-squared:  0.3552 
## F-statistic: 11.47 on 1 and 18 DF,  p-value: 0.003291
confint(model)
##                 2.5 %   97.5 %
## (Intercept) 69.041747 86.68482
## hydro        4.479066 19.12299
newdata<-data.frame(hydro=1)
predict(model, newdata, interval="confidence")
##        fit      lwr      upr
## 1 89.66431 87.51017 91.81845

Homework #5: NFL

Fit a Model

nfl<-read.csv("https://raw.githubusercontent.com/kitadasmalley/sp21_MATH376LMT/main/data/nlf1976.csv",
              header=TRUE)

library(tidyverse)

ggplot(nfl, aes(x=x8, y))+
  geom_point()+
  geom_smooth(method="lm", se=FALSE)

mod<-lm(y~x8, data=nfl)

Confidence Interval for Slope

summary(mod)
## 
## Call:
## lm(formula = y ~ x8, data = nfl)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.804 -1.591 -0.647  2.032  4.580 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 21.788251   2.696233   8.081 1.46e-08 ***
## x8          -0.007025   0.001260  -5.577 7.38e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.393 on 26 degrees of freedom
## Multiple R-squared:  0.5447, Adjusted R-squared:  0.5272 
## F-statistic:  31.1 on 1 and 26 DF,  p-value: 7.381e-06
confint(mod)
##                    2.5 %       97.5 %
## (Intercept) 16.246064040 27.330437725
## x8          -0.009614347 -0.004435854

Point Estimate

newdata<-data.frame(x8=1800)

predict(mod, newdata)
##       1 
## 9.14307

90% Confidence Interval

# change the confidence level to 90%
predict(mod, newdata, interval="confidence",level=.9)
##       fit      lwr      upr
## 1 9.14307 8.123803 10.16234

90% Prediction Interval

# change the prediction level to 90%
predict(mod, newdata, interval="predict", level=.9)
##       fit      lwr      upr
## 1 9.14307 4.936392 13.34975

CHECK FROM SCRATCH

## from scratch
beta_0<-mod$coefficients[1]
beta_0
## (Intercept) 
##    21.78825
beta_1<-mod$coefficients[2]
beta_1
##         x8 
## -0.0070251
x_0<-1800

pointEst<-beta_0+beta_1*x_0
pointEst
## (Intercept) 
##     9.14307
ss_res<-sum(mod$residuals^2)
n<-length(mod$residuals)
ms_res<-ss_res/(n-2)  

seConf<-sqrt(ms_res*(1 / n + (x_0 - mean(nfl$x8))^2 / (sum((nfl$x8 - mean(nfl$x8))^2))))
seConf 
## [1] 0.597594
# 90% 
pointEst+c(-1,1)*qt(0.95, n-2)*seConf
## [1]  8.123803 10.162337
sePred<-sqrt(ms_res*(1+(1 / n) + (x_0 - mean(nfl$x8))^2 / (sum((nfl$x8 - mean(nfl$x8))^2))))
sePred 
## [1] 2.466366
pointEst+c(-1,1)*qt(0.95, n-2)*sePred
## [1]  4.936392 13.349749

Confidence and Prediction Bands

# BANDS
confBand<-predict(mod, interval="confidence", level=.9)
predBand<-predict(mod, interval="predict", level=.9)
## Warning in predict.lm(mod, interval = "predict", level = 0.9): predictions on current data refer to _future_ responses
# rename cols in predBand to be unique from confBand
colnames(predBand)<-c("fit2", "lwr2", "upr2")

# create a new dataframe with the data and new bands
newDF<-cbind(nfl, confBand, predBand)

ggplot(newDF, aes(x=x8, y=y))+
  geom_point()+
  xlab("Yards Gained Rushing by Opponents")+
  ylab("Games Won")+
  geom_abline(slope=mod$coefficients[2], intercept=mod$coefficients[1],
              color="blue", lty=1, lwd=1)+
  geom_line(aes(y=lwr), color="green", lty=1, lwd=1)+
  geom_line(aes(y=upr), color="green", lty=1, lwd=1)+
  geom_line(aes(y=lwr2), color="red", lty=1, lwd=1)+
  geom_line(aes(y=upr2), color="red", lty=1, lwd=1)+
  theme_bw()