Problem 7.4
rm(list = ls())
setwd("~/Documents/Stats 525/Code")
Grocery <- read.delim("~/Documents/Stats 525/DATA/CH06PR09.txt", header=FALSE)
names(Grocery)<-c("hours", "cases", "labor", "holiday")
attach(Grocery)
Y <- hours
X1 <- cases
X2 <- labor
X3 <- holiday
(a)
#anova tables for SSR(X1), SSR(X3|X1), SSR(X2|X1, X3)
mod123 = lm(Y~X1+X2+X3)
anova(mod123)
Analysis of Variance Table
Response: Y
Df Sum Sq Mean Sq F value Pr(>F)
X1 1 136366 136366 6.6417 0.01309 *
X2 1 5726 5726 0.2789 0.59987
X3 1 2034514 2034514 99.0905 2.941e-13 ***
Residuals 48 985530 20532
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#SSR(X1) = 136366
#SSR(X3|X1) = 2033565
#SSR(X2|X1,X3) = 6675
(b)
Hypothesis: H0: B2 = 0 vs HA: B2 != 0 SSR(X2|X1,X3) = 667, SSE(X1,X2,X3) = 985530 Decision Rule: Reject H0 if Fstar > Fobs Fobs = F(0.95, 1, 48) = 4.04 Fstar = (6675/1)/(985530/(52-4)) = 0.325 Conclusion: Fstar < Fobs –> fail to reject H0 Pvalue = 0.57123
qf(0.95, 1, 48)
[1] 4.042652
(c)
anova(mod123)
Analysis of Variance Table
Response: Y
Df Sum Sq Mean Sq F value Pr(>F)
X1 1 136366 136366 6.6417 0.01309 *
X2 1 5726 5726 0.2789 0.59987
X3 1 2034514 2034514 99.0905 2.941e-13 ***
Residuals 48 985530 20532
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
mod213 = lm(Y~X2+X1+X3)
anova(mod213)
Analysis of Variance Table
Response: Y
Df Sum Sq Mean Sq F value Pr(>F)
X2 1 11395 11395 0.5550 0.4599
X1 1 130697 130697 6.3656 0.0150 *
X3 1 2034514 2034514 99.0905 2.941e-13 ***
Residuals 48 985530 20532
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
SSR(X1)+SSR(X2|X1) = 136366 + 5726 = 142092 SSR(X2)+SSR(X1|X2) = 11395 + 130697 = 142092 Yes, the two are equal and always will be.
Problem 7.6
Hypothesis: H0: Beta2 = beta3 = 0 HA: At least one Beta is not equal to zero Decision Rule: Reject H0 if the P-value is < 0.025 P-value = 0.02216 Conclusion: P-value < 0.025 so we reject H0 at alpha = 0.025 We cannot drop X2 and X3 and obtain a better model than the full model
rm(list = ls())
setwd("~/Documents/Stats 525/Code")
patientsatisfaction <- read.table("~/Documents/Stats 525/(MODIFIED) data sets for HW problems assigned from textbook-20210325/CH06PR15.txt", quote="\"", comment.char="")
names(patientsatisfaction)<-c("satisfaction", "age", "severity", "anxiety")
attach(patientsatisfaction)
Y<-patientsatisfaction$satisfaction
X1<-patientsatisfaction$age
X2<-patientsatisfaction$severity
X3<-patientsatisfaction$anxiety
mod_full = lm(Y~X1+X2+X3)
mod_reduced = lm(Y ~ X1)
anova(mod_reduced, mod_full)
Analysis of Variance Table
Model 1: Y ~ X1
Model 2: Y ~ X1 + X2 + X3
Res.Df RSS Df Sum of Sq F Pr(>F)
1 44 5093.9
2 42 4248.8 2 845.07 4.1768 0.02216 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Problem 7.11 (a)
X1 = c(4, 4, 4, 4, 6, 6, 6, 6)
X2 = c(2, 2, 3, 3, 2, 2, 3, 3)
Y = c(42, 39, 48, 51, 49, 53, 61, 60)
df <- cbind(Y, X1, X2)
n = length(Y)
X = cbind(rep(1,n),X1)
tXX = t(X)%*%X
tXY = t(X)%*%Y
tYY = t(Y)%*%Y
b = solve(tXX)%*%tXY
J = matrix(1,8,8)
SSR = t(b) %*% t(X) %*% Y - (1 / n) * t(Y) %*% J %*% Y
SSTO = tYY - (1/n) * t(Y) %*% J %*% Y
SSR/SSTO
[,1]
[1,] 0.5504614
X1 = c(4, 4, 4, 4, 6, 6, 6, 6)
X2 = c(2, 2, 3, 3, 2, 2, 3, 3)
Y = c(42, 39, 48, 51, 49, 53, 61, 60)
df <- cbind(Y, X1, X2)
n = length(Y)
X = cbind(rep(1,n),X2)
tXX = t(X)%*%X
tXY = t(X)%*%Y
tYY = t(Y)%*%Y
b = solve(tXX)%*%tXY
J = matrix(1,8,8)
SSR = t(b) %*% t(X) %*% Y - (1 / n) * t(Y) %*% J %*% Y
SSTO = tYY - (1/n) * t(Y) %*% J %*% Y
SSR/SSTO
[,1]
[1,] 0.4075618
n = length(X2)
X = cbind(rep(1,n),X1)
tXX = t(X)%*%X
tXY = t(X)%*%Y
tYY = t(Y)%*%Y
b = solve(tXX)%*%tXY
J = matrix(1,8,8)
SSR = t(b) %*% t(X) %*% Y - (1 / n) * t(X2) %*% J %*% X2
SSTO = tYY - (1/n) * t(X2) %*% J %*% X2
SSR/SSTO
[,1]
[1,] 0.9908689
#R2Y12
#total variance
n = length(Y)
X = cbind(rep(1,n),X1,X2)
tXX = t(X)%*%X
tXY = t(X)%*%Y
tYY = t(Y)%*%Y
b = solve(tXX)%*%tXY
J = matrix(1,8,8)
SSRX1X2 = t(b) %*% t(X) %*% Y - (1 / n) * t(Y) %*% J %*% Y
X = cbind(rep(1,n),X2)
tXX = t(X)%*%X
tXY = t(X)%*%Y
tYY = t(Y)%*%Y
b = solve(tXX)%*%tXY
SSRX2 = t(b) %*% t(X) %*% Y - (1 / n) * t(Y) %*% J %*% Y
Yhat = X%*%b
e = Y - Yhat
SSEX2 = t(e) %*% e
(SSRX1X2 - SSRX2)/SSEX2
[,1]
[1,] 0.9291457
#R2Y12
n = length(Y)
X = cbind(rep(1,n), X2, X1)
tXX = t(X)%*%X
tXY = t(X)%*%Y
tYY = t(Y)%*%Y
b = solve(tXX)%*%tXY
J = matrix(1, 8, 8)
SSR_X2_X1 = t(b) %*% t(X) %*% Y - (1 / n) * t(Y) %*% J %*% Y
X = cbind(rep(1,n), X1)
tXX = t(X)%*%X
tXY = t(X)%*%Y
tYY = t(Y)%*%Y
b = solve(tXX)%*%tXY
SSR_X1 = t(b) %*% t(X) %*% Y - (1 / n) * t(Y) %*% J %*% Y
Y_hat = X%*%b
e = Y - Y_hat
SSE_X1 = t(e) %*% e
(SSR_X2_X1 - SSR_X1)/SSE_X1
[,1]
[1,] 0.9066225
n = length(Y)
X = cbind(rep(1,n), X1, X2)
tXX = t(X)%*%X
tXY = t(X)%*%Y
tYY = t(Y)%*%Y
b = solve(tXX)%*%tXY
J = matrix(1, 8, 8)
SSR = t(b) %*% t(X) %*% Y - (1 / n) * t(Y) %*% J %*% Y
SSTO = tYY - (1/n) * t(Y) %*% J %*% Y
SSR/SSTO
[,1]
[1,] 0.9580232
(b) Yes, the correlation between X1 and X2 is zero.
Problem 7.26 (a)
mod = lm(Y~X1+X2)
mod
Call:
lm(formula = Y ~ X1 + X2)
Coefficients:
(Intercept) X1 X2
156.6719 -1.2677 -0.9208
(b)
mod = lm(Y~X1+X2+X3)
mod
Call:
lm(formula = Y ~ X1 + X2 + X3)
Coefficients:
(Intercept) X1 X2 X3
158.491 -1.142 -0.442 -13.470
Yhat = 158.491 - 1.142X1 - 0.442X2 - 13.47X3 These coefficients are pretty close between 6.15c and parts a and b.
(c) SSR(X1) = 8275.389 SSR(X1 | X3) = 3483.891 SSR(X2) = 4860.26 SSR(X2 | X3) = 707.9971 Neither expressions are equal.
n = length(Y)
X = cbind(rep(1,n), X1)
tXX = t(X)%*%X
tXY = t(X)%*%Y
tYY = t(Y)%*%Y
b = solve(tXX)%*%tXY
J = matrix(1, n, n)
SSR_X1 = t(b) %*% t(X) %*% Y - (1 / n) * t(Y) %*% J %*% Y
X = cbind(rep(1,n), X1, X3)
tXX = t(X)%*%X
tXY = t(X)%*%Y
tYY = t(Y)%*%Y
b = solve(tXX)%*%tXY
SSR_X1_X3 = t(b) %*% t(X) %*% Y - (1 / n) * t(Y) %*% J %*% Y
X = cbind(rep(1,n), X3)
tXX = t(X)%*%X
tXY = t(X)%*%Y
tYY = t(Y)%*%Y
b = solve(tXX)%*%tXY
SSR_X3 = t(b) %*% t(X) %*% Y - (1 / n) * t(Y) %*% J %*% Y
SSR_X1
[,1]
[1,] 8275.389
SSR_X1_X3 - SSR_X3
[,1]
[1,] 3483.891
n = length(Y)
#Create a n by 3 matrix with 1's in the first column
X = cbind(rep(1,n), X2)
#t(X) is transpose. %*% is matrix multiplication
tXX = t(X)%*%X
tXY = t(X)%*%Y
tYY = t(Y)%*%Y
#solve(tXX) is the inverse of the matrix tXX.
b = solve(tXX)%*%tXY
J = matrix(1, n, n) #J
SSR_X2 = t(b) %*% t(X) %*% Y - (1 / n) * t(Y) %*% J %*% Y
#Create a n by 3 matrix with 1's in the first column
X = cbind(rep(1,n), X2, X3)
#t(X) is transpose. %*% is matrix multiplication
tXX = t(X)%*%X
tXY = t(X)%*%Y
tYY = t(Y)%*%Y
#solve(tXX) is the inverse of the matrix tXX.
b = solve(tXX)%*%tXY
SSR_X2_X3 = t(b) %*% t(X) %*% Y - (1 / n) * t(Y) %*% J %*% Y
#Create a n by 3 matrix with 1's in the first column
X = cbind(rep(1,n), X3)
#t(X) is transpose. %*% is matrix multiplication
tXX = t(X)%*%X
tXY = t(X)%*%Y
tYY = t(Y)%*%Y
#solve(tXX) is the inverse of the matrix tXX.
b = solve(tXX)%*%tXY
SSR_X3 = t(b) %*% t(X) %*% Y - (1 / n) * t(Y) %*% J %*% Y
SSR_X2
[,1]
[1,] 4860.26
SSR_X2_X3 - SSR_X3
[,1]
[1,] 707.9971
(d) r12 = 0.5680, r13 = 0.5697, r23 = 0.6705. This supports that there are some dependencies between some predicotr variables.
round(cor(patientsatisfaction), 4)
satisfaction age severity anxiety
satisfaction 1.0000 -0.7868 -0.6029 -0.6446
age -0.7868 1.0000 0.5680 0.5697
severity -0.6029 0.5680 1.0000 0.6705
anxiety -0.6446 0.5697 0.6705 1.0000