7.7 a

SSR(X4) = 66.775

SSR(X1|X4) = 42.2746

SSR(X2|X1,X4) = 27.85

SSR(X3|X1,X2,X4) = 0.419

ch18 <-read.table("CH06PR18.txt")
head(ch18)

##     V1 V2    V3   V4     V5
## 1 13.5  1  5.02 0.14 123000
## 2 12.0 14  8.19 0.27 104079
## 3 10.5 16  3.00 0.00  39998
## 4 15.0  4 10.70 0.05  57112
## 5 14.0 11  8.97 0.07  60000
## 6 10.5 15  9.45 0.24 101385

head(ch18)

##     V1 V2    V3   V4     V5
## 1 13.5  1  5.02 0.14 123000
## 2 12.0 14  8.19 0.27 104079
## 3 10.5 16  3.00 0.00  39998
## 4 15.0  4 10.70 0.05  57112
## 5 14.0 11  8.97 0.07  60000
## 6 10.5 15  9.45 0.24 101385

#i) With SSR(X4) 
fit4 <- lm(V1 ~ V5, ch18)
anova(fit4)

## Analysis of Variance Table
## 
## Response: V1
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## V5         1  67.775  67.775  31.723 2.628e-07 ***
## Residuals 79 168.782   2.136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

67.775 #67.775

## [1] 67.775

#ii) With SSR(X1|X4) = SSE(X4) - SSE(X1,X4)
fit_x4 <- lm(V1~V5, ch18) #SSE(X4)
anova(fit_x4)

## Analysis of Variance Table
## 
## Response: V1
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## V5         1  67.775  67.775  31.723 2.628e-07 ***
## Residuals 79 168.782   2.136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

fit_x1x4 <- lm(V1~V2+V5,ch18) #SSE(X1,X4)
anova(fit_x1x4)

## Analysis of Variance Table
## 
## Response: V1
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## V2         1  14.819  14.819  9.1365  0.003389 ** 
## V5         1  95.231  95.231 58.7160 4.225e-11 ***
## Residuals 78 126.508   1.622                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

168.782-126.508 #42.2746

## [1] 42.274

#iii) With SSR(X2|X1,X4) = SSE(X1,X4) - SSE(X1,X2,X4)
fit_x1x2x4 <- lm(V1~V2+V3+V5, ch18)
anova(fit_x1x2x4)

## Analysis of Variance Table
## 
## Response: V1
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## V2         1 14.819  14.819  11.566  0.001067 ** 
## V3         1 72.802  72.802  56.825 7.841e-11 ***
## V5         1 50.287  50.287  39.251 1.973e-08 ***
## Residuals 77 98.650   1.281                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

126.508 - 98.650 #27.85

## [1] 27.858

#iv) With SSR(X3|X1,X2,X4) = SSE(X1,X2,X4) - SSE(X1,X2,X3,X4)
fit_full <- lm(V1~., ch18)
anova(fit_full)

## Analysis of Variance Table
## 
## Response: V1
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## V2         1 14.819  14.819 11.4649  0.001125 ** 
## V3         1 72.802  72.802 56.3262 9.699e-11 ***
## V4         1  8.381   8.381  6.4846  0.012904 *  
## V5         1 42.325  42.325 32.7464 1.976e-07 ***
## Residuals 76 98.231   1.293                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

98.650- 98.231 #0.419

## [1] 0.419

7.7 b

Hypothesis Test:

\(H_0: \beta_{3}= 0\)

\(H_A:\) \(\beta_{3} \neq 0\)

Decision Rule:

If \(F* > F\), then reject \(H_0\).

Conclusion: From the full model compared to our reduced model, we see that the F* value of 0.3248 < F = 6.9806. Thus, we fail to reject our \(H_0\)

The p-value is 0.5707886

#Method 1
anova(fit_x1x2x4, fit_full)

## Analysis of Variance Table
## 
## Model 1: V1 ~ V2 + V3 + V5
## Model 2: V1 ~ V2 + V3 + V4 + V5
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     77 98.650                           
## 2     76 98.231  1   0.41975 0.3248 0.5704

n <- nrow(ch18)
p <- 5
F_test <- qf(0.99,1,n-p)
F_test

## [1] 6.980578

#Method 2
(0.42/1) / (98.2306/76) #(SSR(X3|X1,X2,X4)/1 / SSE(X1,X2,X3,X4)/76)

## [1] 0.3249497

#Method 3
n=nrow(ch18)
p=5
q=4
#SSE(X1,X2,X4)
SSE_X124 <- 98.650
#SSE(X1,X2,X3)
SSE_X1234 <- 98.231
Fstar=(SSE_X124-SSE_X1234)/(p-q)/((SSE_X1234)/(n-p))
Fstar

## [1] 0.3241746

1-pf(Fstar,(p-q),(n-p))

## [1] 0.5707886

7.8

Hypothesis Test:

\(H_0: \beta_{2}} = \beta_{3} = 0\)

\(H_A:\) Either \(\beta_{2}\) or \(\beta_{3} \neq 0\)

Decision Rule:

If \(F* > F\), then reject \(H_0\).

Conclusion: From the full model compared to our reduced model, we see that the F* value of 10.93 > F = 4.8958. Thus, we reject our \(H_0\).

The p-value is 6.682e-05.

#Method 1
anova(fit_x1x4, fit_full)

## Analysis of Variance Table
## 
## Model 1: V1 ~ V2 + V5
## Model 2: V1 ~ V2 + V3 + V4 + V5
##   Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
## 1     78 126.508                                  
## 2     76  98.231  2    28.277 10.939 6.682e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Method 2
#With SSR(X2,X3|X1,X4) = SSE(X1,X4) - SSE(X1,X2,X3,X4)
126.508 - 98.231 #28.77

## [1] 28.277

(28.277/2) / (98.2306/76) #(SSR(X3|X1,X2,X4)/1 / SSE(X1,X2,X3,X4)/76)

## [1] 10.93881

F_test <- qf(0.99,2,n-p)
F_test

## [1] 4.89584

7.15 Only calculate and interpret R^2{Y1|4}

\(R^2_{Y 1|2}\) = 0.2505

The coefficient of determination = 0.2505, is the proportion of the variation in Y explained by an independent variable X1, when the independent variable X4 is in the model.

#R^2 Y1|4 
#SSR(X1|X4)/SSE(X4) = SSE(X4) - SSE(X1,X4)/SSE(X4) = 1 - SSE(X1,X4)/SSE(X4)

anova(fit_x1x4)

## Analysis of Variance Table
## 
## Response: V1
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## V2         1  14.819  14.819  9.1365  0.003389 ** 
## V5         1  95.231  95.231 58.7160 4.225e-11 ***
## Residuals 78 126.508   1.622                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

fit_x4 <- lm(V1~V5,data=ch18)
anova(fit_x4)

## Analysis of Variance Table
## 
## Response: V1
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## V5         1  67.775  67.775  31.723 2.628e-07 ***
## Residuals 79 168.782   2.136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

1 - (126.508/168.782)

## [1] 0.2504651

7.19 a,b,c

a.

The standardized regression model is \(\hat_{Y^*}\) = -5.478526e-0\(X_1^*\) + 4.236468e-01\(X_2^*\) + 4.846136e-02\(X_3^*\) + 5.027571e-01\(X_4^*\)

b.

b2* is 9.490e-02 from the summary of the standardized model.

When \(X_2\) increases by one, \(sd(X_2)\) increases by 9.490*10^-2(sd(Y)).

c.

Standardized coefficients: (Intercept) = -3.364857e-16, X1star = -5.478526e-01,
X2star = 4.236468e-01, X3star = 4.846136e-02, X4star = 5.027571e-01

Original coefficients for fitted reg model: X1star= 1.220059e+01
X1star = -1.420336e-0
X2star = 2.820165e-01
X3star = 6.193435e-01 X4star = 7.924302e-06

Coefficients from 6.18c - Same coefficients \(\hat{Y} = 12.2006 − 0.1420X1+ 0.2820X2 + 0.6193X3 + 0.0000079X4\)

summary(fit_full)

## 
## Call:
## lm(formula = V1 ~ ., data = ch18)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1872 -0.5911 -0.0910  0.5579  2.9441 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.220e+01  5.780e-01  21.110  < 2e-16 ***
## V2          -1.420e-01  2.134e-02  -6.655 3.89e-09 ***
## V3           2.820e-01  6.317e-02   4.464 2.75e-05 ***
## V4           6.193e-01  1.087e+00   0.570     0.57    
## V5           7.924e-06  1.385e-06   5.722 1.98e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.137 on 76 degrees of freedom
## Multiple R-squared:  0.5847, Adjusted R-squared:  0.5629 
## F-statistic: 26.76 on 4 and 76 DF,  p-value: 7.272e-14

n<-nrow(ch18)
v2 <- mean(ch18$V2)
sd <- sqrt(sum((ch18$V2 - v2)^2)/n-1)
(1/sqrt(n-1))*((ch18$V2 - v2)/sd)

##  [1] -0.117788363  0.105289238  0.139608869 -0.066308916  0.053809792
##  [6]  0.122449053 -0.100628547 -0.117788363 -0.117788363  0.002330345
## [11]  0.070969607 -0.100628547 -0.100628547  0.088129422 -0.100628547
## [16] -0.117788363 -0.117788363  0.070969607  0.139608869 -0.083468732
## [21] -0.031989286 -0.083468732 -0.083468732 -0.083468732  0.105289238
## [26] -0.117788363  0.122449053  0.139608869 -0.100628547  0.139608869
## [31] -0.100628547 -0.083468732 -0.083468732  0.139608869  0.139608869
## [36]  0.088129422 -0.117788363  0.105289238 -0.083468732  0.122449053
## [41] -0.117788363  0.122449053 -0.117788363  0.122449053 -0.083468732
## [46] -0.083468732 -0.066308916 -0.117788363 -0.066308916 -0.066308916
## [51] -0.100628547 -0.100628547 -0.117788363 -0.100628547 -0.083468732
## [56]  0.122449053  0.156768684 -0.117788363  0.105289238 -0.100628547
## [61] -0.117788363 -0.134948178  0.173928499  0.139608869  0.088129422
## [66]  0.208248130  0.173928499  0.139608869 -0.117788363 -0.100628547
## [71]  0.139608869 -0.117788363  0.122449053 -0.066308916  0.208248130
## [76] -0.083468732 -0.083468732  0.173928499  0.122449053  0.053809792
## [81]  0.105289238

##Standardized MR
attach(ch18)
Ystar=scale(V1)
X1star=scale(V2)
X2star=scale(V3)
X3star=scale(V4)
X4star=scale(V5)

fit4=lm(Ystar~X1star+X2star+X3star+X4star)
fit4$coefficients

##   (Intercept)        X1star        X2star        X3star        X4star 
## -3.364857e-16 -5.478526e-01  4.236468e-01  4.846136e-02  5.027571e-01

b1=sd(V1)/sd(V2)*fit4$coefficients[2]
b2=sd(V1)/sd(V3)*fit4$coefficients[3]
b3=sd(V1)/sd(V4)*fit4$coefficients[4]
b4=sd(V1)/sd(V5)*fit4$coefficients[5]
b0=mean(V1)-b1*mean(V2)-b2*mean(V3)-b3*mean(V4)-b4*mean(V5)
c(b0,b1,b2,b3,b4)

##        X1star        X1star        X2star        X3star        X4star 
##  1.220059e+01 -1.420336e-01  2.820165e-01  6.193435e-01  7.924302e-06

7.27 a

a First order linear regression model for relating rental rates to property age and size:

\(\hat{Y}\) = 14.36 - 0.1145\(X_1\) + 0.00001\(X_4\)

reg_fit <- lm(V1~ V2+V5, data=ch18)
summary(reg_fit)

## 
## Call:
## lm(formula = V1 ~ V2 + V5, data = ch18)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2032 -0.4593  0.0641  0.7730  2.5083 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.436e+01  2.771e-01  51.831  < 2e-16 ***
## V2          -1.145e-01  2.242e-02  -5.105 2.27e-06 ***
## V5           1.045e-05  1.363e-06   7.663 4.23e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.274 on 78 degrees of freedom
## Multiple R-squared:  0.4652, Adjusted R-squared:  0.4515 
## F-statistic: 33.93 on 2 and 78 DF,  p-value: 2.506e-11

7.27 b.

In 6.18c, the model was \(\hat{Y} = 12.2006 − 0.1420X1+ 0.2820X2 + 0.6193X3 + 0.0000079X4\)

The model here is \(\hat{Y}\) = 14.36 - 0.1145\(X_1\) + 0.00001\(X_4\)

The coefficients for X1 are -0.142 and -0.1145 which are about the same, but it is a bit lower for our second, reduced model. The coefficients for X4 are 0.0000079 and 0.00001, which are about the same but it is a bit higher for our first, full model .

7.27 c.

No. SSR(X4) = 67.775. SSR(X4|X3) = 66.859.

No SSR(X1) = 14.819 SSR(X1|X3) = 13.775

#SSR(X4)
anova(fit_x4)

## Analysis of Variance Table
## 
## Response: V1
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## V5         1  67.775  67.775  31.723 2.628e-07 ***
## Residuals 79 168.782   2.136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#SSR(X4|X3) = SSE(X3) - SSE(X3,X4)
fit_x3x4 <- lm(V1~V4+V5, data=ch18)
anova(fit_x3x4)

## Analysis of Variance Table
## 
## Response: V1
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## V4         1   1.047   1.047  0.4842    0.4886    
## V5         1  66.858  66.858 30.9213 3.626e-07 ***
## Residuals 78 168.652   2.162                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

fit_x3 <- lm(V1~V4, data=ch18)
anova(fit_x3)

## Analysis of Variance Table
## 
## Response: V1
##           Df  Sum Sq Mean Sq F value Pr(>F)
## V4         1   1.047  1.0470  0.3512 0.5551
## Residuals 79 235.511  2.9811

235.511-168.652

## [1] 66.859

#SSR(X1)
fit_x1 <- lm(V1~V2, data=ch18)
anova(fit_x1)

## Analysis of Variance Table
## 
## Response: V1
##           Df  Sum Sq Mean Sq F value  Pr(>F)  
## V2         1  14.819 14.8185  5.2795 0.02422 *
## Residuals 79 221.739  2.8068                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#SSR(X1|X3) = SSE(X3) - SSE(X1,X3)
fit_x1x3 <- lm(V1 ~ V2 + V4, data= ch18)
anova(fit_x1x3)

## Analysis of Variance Table
## 
## Response: V1
##           Df  Sum Sq Mean Sq F value  Pr(>F)  
## V2         1  14.819 14.8185  5.2127 0.02515 *
## V4         1   0.003  0.0027  0.0010 0.97534  
## Residuals 78 221.736  2.8428                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

235.511-221.736

## [1] 13.775

7.27 d

In parts b and c, we see the first model has more significant predictors. The value of \(r_{12}\) = 0.5680, \(r_{13}\) = 0.3228, and \(r_{23}\) = 0.2538.

STAT 408 HW 9

Kajal Chokshi

11/14/2018