Problem 2: NFL Data

library("scatterplot3d")
# Load Data ( make sure URL is on one line)
nfl<-read.csv("https://raw.githubusercontent.com/kitadasmalley/sp21_MATH376LMT/main/data/nlf1976.csv", header = TRUE)

games_won = nfl$y
rushing_yards = nfl$x1
passing_yards = nfl$x2

b) Make a 3D scatterplot with the least squares fitted plane.

sp3d<-scatterplot3d(x=rushing_yards, y=passing_yards, z=games_won, 
                    pch=16, angle = 60)

c) Report the hat matrix H and I - H .

xMat<-matrix(c(rep(1, length(games_won)), 
          rushing_yards,passing_yards), 
          nrow=length(games_won))
head(xMat)
##      [,1] [,2] [,3]
## [1,]    1 2113 1985
## [2,]    1 2003 2855
## [3,]    1 2957 1737
## [4,]    1 2285 2905
## [5,]    1 2971 1666
## [6,]    1 2309 2927
#Confirm that H(I-H) = 0
hMat<-xMat%*%solve(t(xMat)%*%xMat)%*%t(xMat)

I<-diag(length(games_won))

head(hMat%*%(I-hMat))
##               [,1]          [,2]          [,3]          [,4]          [,5]
## [1,] -6.375109e-17 -1.128654e-16 -1.708703e-16 -1.435484e-16 -1.721713e-16
## [2,] -5.833008e-17  2.629190e-17  7.372575e-18  6.765422e-17  2.602085e-17
## [3,] -1.613293e-16  9.107298e-17  9.107298e-17  2.055647e-16  1.127570e-17
## [4,] -1.092876e-16  1.272853e-16  1.509209e-16  2.229120e-16  1.353084e-16
## [5,] -1.478852e-16  8.413409e-17  2.211772e-17  2.203099e-16  4.683753e-17
## [6,] -1.201296e-16  1.162265e-16  1.543904e-16  2.307182e-16  1.682682e-16
##               [,6]          [,7]          [,8]          [,9]         [,10]
## [1,] -1.452831e-16 -1.357421e-16 -1.264180e-16  3.122502e-17 -1.201296e-16
## [2,]  6.006480e-17  2.623769e-17 -2.818926e-18 -2.055647e-16  1.192622e-18
## [3,]  2.584738e-16  8.196568e-17  1.194791e-16 -4.909267e-16  2.504507e-17
## [4,]  2.203099e-16  1.435484e-16  1.079865e-16 -4.076600e-16  3.057450e-17
## [5,]  2.177078e-16  1.127570e-16  1.077697e-16 -4.770490e-16 -1.087455e-16
## [6,]  2.402592e-16  1.491862e-16  1.353084e-16 -4.076600e-16  3.024924e-17
##              [,11]         [,12]         [,13]         [,14]         [,15]
## [1,] -8.196568e-17 -8.283305e-17 -1.008037e-16 -5.421011e-17 -8.738670e-17
## [2,] -8.738670e-17 -5.074066e-17 -4.272434e-17 -9.454243e-17 -1.913617e-17
## [3,] -2.320193e-16 -8.890458e-17 -2.149431e-17 -1.314053e-16 -4.531965e-17
## [4,] -1.370432e-16 -4.813858e-17  4.626833e-17 -1.032160e-16 -1.713039e-17
## [5,] -2.372234e-16 -8.716985e-17 -1.246832e-17 -1.452831e-16 -8.998878e-17
## [6,] -1.517883e-16 -1.778092e-17  4.008838e-17 -1.144917e-16 -1.539567e-17
##              [,16]         [,17]         [,18]         [,19]         [,20]
## [1,] -4.336809e-18  1.040834e-17 -9.557242e-17 -9.649399e-17 -3.295975e-17
## [2,] -1.517883e-16 -1.613293e-16  2.989687e-17 -3.621235e-17 -1.340074e-16
## [3,] -3.903128e-16 -3.400058e-16  1.117812e-16 -6.461845e-17 -2.775558e-16
## [4,] -3.295975e-16 -2.879641e-16  5.984796e-17 -1.799776e-17 -2.038300e-16
## [5,] -3.668940e-16 -3.001072e-16  6.136584e-17 -4.423545e-17 -2.827599e-16
## [6,] -3.434752e-16 -3.348016e-16  5.995638e-17 -2.320193e-17 -2.532696e-16
##              [,21]         [,22]         [,23]         [,24]         [,25]
## [1,] -2.688821e-17 -3.209238e-17 -3.729655e-17 -1.409463e-16 -8.597723e-17
## [2,] -1.071192e-16 -1.331400e-16 -1.279359e-16  1.669671e-17 -3.773024e-17
## [3,] -3.113829e-16 -3.426079e-16 -3.209238e-16  1.752071e-16 -4.076600e-17
## [4,] -2.203099e-16 -2.758210e-16 -2.723516e-16  1.847481e-16 -2.949030e-17
## [5,] -2.896988e-16 -3.547510e-16 -3.001072e-16  2.168404e-16 -2.298509e-17
## [6,] -2.419939e-16 -2.810252e-16 -2.489328e-16  2.185752e-16  1.496199e-17
##              [,26]         [,27]         [,28]
## [1,] -9.649399e-17 -2.341877e-17  4.163336e-17
## [2,] -3.252607e-17 -1.242496e-16 -2.151057e-16
## [3,] -1.233822e-16 -1.717376e-16 -5.551115e-16
## [4,] -7.546047e-17 -1.674008e-16 -4.787837e-16
## [5,] -1.838807e-16 -1.387779e-16 -4.996004e-16
## [6,] -8.890458e-17 -1.769418e-16 -4.787837e-16
head(round(hMat%*%(diag(length(games_won))-hMat),2))
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0
## [2,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0
## [3,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0
## [4,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0
## [5,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0
## [6,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0
##      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
## [1,]     0     0     0     0     0     0     0     0     0     0     0     0
## [2,]     0     0     0     0     0     0     0     0     0     0     0     0
## [3,]     0     0     0     0     0     0     0     0     0     0     0     0
## [4,]     0     0     0     0     0     0     0     0     0     0     0     0
## [5,]     0     0     0     0     0     0     0     0     0     0     0     0
## [6,]     0     0     0     0     0     0     0     0     0     0     0     0
##      [,27] [,28]
## [1,]     0     0
## [2,]     0     0
## [3,]     0     0
## [4,]     0     0
## [5,]     0     0
## [6,]     0     0

d) Construct the ANOVA table and test for significance of regression given the normality assumption

anova(mod)
## Analysis of Variance Table
## 
## Response: games_won
##               Df  Sum Sq Mean Sq F value    Pr(>F)    
## rushing_yards  1 115.068 115.068  22.378 7.492e-05 ***
## passing_yards  1  83.343  83.343  16.208 0.0004635 ***
## Residuals     25 128.553   5.142                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Response vect
Y<-games_won

n = length(games_won)

### SS(Res)
I<-diag(length(games_won))
ss_res<-t(Y)%*%(I-hMat)%*%Y
ss_res
##          [,1]
## [1,] 128.5531
### SS(Tot)
ones<-matrix(rep(1, length(games_won)),
             nrow=length(games_won))

J<-ones%*%t(ones)

ss_tot<-t(Y)%*%(I-(1/n)*J)%*%Y
ss_tot
##          [,1]
## [1,] 326.9643
### SS(Reg)

ss_reg<-t(Y)%*%(hMat-(1/n)*J)%*%Y
ss_reg
##          [,1]
## [1,] 198.4112
#Anova Table

p=2

### MS(Res)
ms_res<-ss_res/(n-p-1)
ms_res
##          [,1]
## [1,] 5.142124
### MS(Reg)
ms_reg<-ss_reg/p
ms_reg
##         [,1]
## [1,] 99.2056
### F-stat
f_stat<-ms_reg/ms_res
f_stat
##          [,1]
## [1,] 19.29273
### P-value
pf(f_stat, df1=p, df2=(n-p-1), lower.tail = FALSE)
##              [,1]
## [1,] 8.556137e-06

e) Report a good estimate of the nuisance parameter σ .

ms_res<-sum(mod$residuals^2)/(n-p-1)
sigma_hat<-sqrt(ms_res)
sigma_hat
## [1] 2.267625

f) Create the covariance matrix for β . Print to matrix

g) Report the standard errors of β0 , β1 , and β2 .

covMat<-ms_res*solve(t(xMat)%*%xMat)
covMat
##              [,1]          [,2]          [,3]
## [1,]  9.682627559 -2.782428e-03 -1.705263e-03
## [2,] -0.002782428  1.281857e-06  3.640215e-08
## [3,] -0.001705263  3.640215e-08  7.655303e-07
sqrt(diag(covMat))
## [1] 3.1116920733 0.0011321912 0.0008749459

h) Based on the sample, give an estimate of the covariance between β1 and βˆ2

cov(rushing_yards, passing_yards)
## [1] -7074.418

i) Perform t-tests for the slope parameters.

n<-dim(nfl)[1]

beta_1<-mod$coefficients[2]
beta_1
## rushing_yards 
##   0.005519703
beta_2<-mod$coefficients[3]
beta_2
## passing_yards 
##   0.003522447
ss_res<-sum(mod$residuals^2)

se_b1<-sqrt(ms_res/sum((rushing_yards-mean(rushing_yards))^2))
se_b1
## [1] 0.001131427
se_b2<-sqrt(ms_res/sum((passing_yards-mean(passing_yards))^2))
se_b2
## [1] 0.000874355
test_stat1<-beta_1/se_b1
test_stat2<-beta_2/se_b2


pt(abs(test_stat1), df=n-2, lower.tail=FALSE)*2
## rushing_yards 
##  4.630568e-05
pt(abs(test_stat2), df=n-2, lower.tail=FALSE)*2
## passing_yards 
##  0.0004338643

j) Interpret each of these tests on the context of the data.

Because both of the test statistics were less than alpha level 0.05, we can reject the null hypothesis and state that there is no difference between the means.