CProp=read.table("http://users.stat.ufl.edu/~rrandles/sta4210/Rclassnotes/data/textdatasets/KutnerData/Chapter%20%206%20Data%20Sets/CH06PR18.txt")
names(CProp) = c("Rental_Rates", "Age","Operating_Expenses", "Vacancy_Rates", "Square_Footage")
#x1=age, x2=operating, x3=vacancy, x4= square footage
n <- nrow(CProp)
p =ncol(CProp)
#Full model:
fit=lm(Rental_Rates~Square_Footage+Age+Operating_Expenses+Vacancy_Rates,data=CProp)
anova(fit)
## Analysis of Variance Table
##
## Response: Rental_Rates
## Df Sum Sq Mean Sq F value Pr(>F)
## Square_Footage 1 67.775 67.775 52.4369 3.073e-10 ***
## Age 1 42.275 42.275 32.7074 2.004e-07 ***
## Operating_Expenses 1 27.857 27.857 21.5531 1.412e-05 ***
## Vacancy_Rates 1 0.420 0.420 0.3248 0.5704
## Residuals 76 98.231 1.293
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Reduced model (without x3 (Vacancy))
fit_r1 = lm(Rental_Rates~Square_Footage+Age+Operating_Expenses,data=CProp)
anova(fit_r1,fit)
## Analysis of Variance Table
##
## Model 1: Rental_Rates ~ Square_Footage + Age + Operating_Expenses
## Model 2: Rental_Rates ~ Square_Footage + Age + Operating_Expenses + Vacancy_Rates
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 77 98.650
## 2 76 98.231 1 0.41975 0.3248 0.5704
\[H_0: \beta_3 = 0 \] \[H_a: \beta_3 \neq 0 \] With a F of .32 and p-value of \(.57>\alpha=0.01\) we fail-to-reject the null and cannot conclude \(\beta_3 \neq 0\), therefore Vacancy_Rates can be dropped from our model.
#Reduced model (without x3 (Vacancy) and x2 (operating expenses))
fit_r2 = lm(Rental_Rates~Square_Footage+Age,data=CProp)
anova(fit_r2,fit)
## Analysis of Variance Table
##
## Model 1: Rental_Rates ~ Square_Footage + Age
## Model 2: Rental_Rates ~ Square_Footage + Age + Operating_Expenses + Vacancy_Rates
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 78 126.508
## 2 76 98.231 2 28.277 10.939 6.682e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\[H_0: \beta_2= \beta_3 = 0 \] \[H_a: \beta_2 \neq 0~ or~ \beta_3 \neq 0 \] With a F of 10.94 and p-value of \(6.682e^{-05}<\alpha=0.01\) we reject the null and conclude \(H_a: \beta_2 \neq 0~ or~ \beta_3 \neq 0\), therefore Vacancy_Rates and Operating Expenses cannot be dropped from our model.
\(R^2_{Y 1|4}=\frac{SSR(X_1|X_4)}{SSE(X_4)}\)
#x1=age, x2=operating, x3=vacancy, x4= square footage
fit=lm(Rental_Rates~Square_Footage+Age+Operating_Expenses+Vacancy_Rates,data=CProp)
anova(fit)
## Analysis of Variance Table
##
## Response: Rental_Rates
## Df Sum Sq Mean Sq F value Pr(>F)
## Square_Footage 1 67.775 67.775 52.4369 3.073e-10 ***
## Age 1 42.275 42.275 32.7074 2.004e-07 ***
## Operating_Expenses 1 27.857 27.857 21.5531 1.412e-05 ***
## Vacancy_Rates 1 0.420 0.420 0.3248 0.5704
## Residuals 76 98.231 1.293
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
SSR=anova(fit)$"Sum Sq"[2]
SSR
## [1] 42.27457
fit4=lm(Rental_Rates~Square_Footage, data=CProp )
anova(fit4)
## Analysis of Variance Table
##
## Response: Rental_Rates
## Df Sum Sq Mean Sq F value Pr(>F)
## Square_Footage 1 67.775 67.775 31.723 2.628e-07 ***
## Residuals 79 168.782 2.136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
SSE = anova(fit4)$"Sum Sq"[2]
SSE
## [1] 168.7824
R2Y14=SSR/SSE
R2Y14
## [1] 0.2504679
This is the variation additionally explained by adding Age into the model if Square Footage is already in the model.
##Standardized MR
Y=as.matrix(CProp[1])
X2=as.matrix(CProp[3])
Ystar=scale(CProp[1])
X1star=scale(CProp[2])
X2star=scale(CProp[3])
X3star=scale(CProp[4])
X4star=scale(CProp[5])
fitstar=lm(Ystar~X1star+X2star+X3star+X4star)
b2star = fitstar$coefficients[3]
b2star
## X2star
## 0.4236468
summary(fitstar)
##
## Call:
## lm(formula = Ystar ~ X1star + X2star + X3star + X4star)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.85346 -0.34372 -0.05289 0.32446 1.71213
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.365e-16 7.346e-02 0.000 1.00
## X1star -5.479e-01 8.232e-02 -6.655 3.89e-09 ***
## X2star 4.236e-01 9.490e-02 4.464 2.75e-05 ***
## X3star 4.846e-02 8.504e-02 0.570 0.57
## X4star 5.028e-01 8.786e-02 5.722 1.98e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6611 on 76 degrees of freedom
## Multiple R-squared: 0.5847, Adjusted R-squared: 0.5629
## F-statistic: 26.76 on 4 and 76 DF, p-value: 7.272e-14
sd(Y)
## [1] 1.719584
b2=sd(Y)/sd(X2)*b2star
b2
## X2star
## 0.2820165
b2star*sd(Y)
## X2star
## 0.7284963
b2star = .42, means that if X2 increases by 1 standard deviation of X2 and every other X stays constant, then mean response Y increases by .42*1.72 = 0.73.
fit14=lm(Rental_Rates~Age+Square_Footage,data=CProp)
#x1=age, x2=operating, x3=vacancy, x4= square footage
fit14$coefficients
## (Intercept) Age Square_Footage
## 1.436128e+01 -1.144670e-01 1.044493e-05
\(\widehat{RentalRates} = 14.36 - 0.1145Age + 0.00001044SquareFootage\)
or better written as: \(\hat{Y} = 14.36 - 0.114X_1 + 0.00001044X_4\)
In 6.18c: \(\hat{Y} = 12.2 - 0.142X_1 + 0.282X_2 + 0.619X_3 + 0.00000792X_4\)
These estimates are pretty close to what we got in 6.18c. The intercept is slightly larger, \(b_1\) and \(b_4\) are about the same.
fit4=lm(Rental_Rates~Square_Footage,data=CProp)
fit3=lm(Rental_Rates~Vacancy_Rates,data=CProp)
fit4.3=lm(Rental_Rates~Vacancy_Rates+Square_Footage,data=CProp)
anova(fit4.3)
## Analysis of Variance Table
##
## Response: Rental_Rates
## Df Sum Sq Mean Sq F value Pr(>F)
## Vacancy_Rates 1 1.047 1.047 0.4842 0.4886
## Square_Footage 1 66.858 66.858 30.9213 3.626e-07 ***
## Residuals 78 168.652 2.162
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit4)
## Analysis of Variance Table
##
## Response: Rental_Rates
## Df Sum Sq Mean Sq F value Pr(>F)
## Square_Footage 1 67.775 67.775 31.723 2.628e-07 ***
## Residuals 79 168.782 2.136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit3)
## Analysis of Variance Table
##
## Response: Rental_Rates
## Df Sum Sq Mean Sq F value Pr(>F)
## Vacancy_Rates 1 1.047 1.0470 0.3512 0.5551
## Residuals 79 235.511 2.9811
\(SSR(X_4)=67.775\) \(SSR(X_{4|3})= SSE(X_3)-SSE(X_{X_4,X_3})=235.511-168.652=66.859\)
Since these are almost the same, we can say that Square Footage and Vacancy Rates are uncorrelated.
fit1=lm(Rental_Rates~Age,data=CProp)
fit3=lm(Rental_Rates~Vacancy_Rates,data=CProp)
fit13=lm(Rental_Rates~Vacancy_Rates+Age,data=CProp)
anova(fit1)
## Analysis of Variance Table
##
## Response: Rental_Rates
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 1 14.819 14.8185 5.2795 0.02422 *
## Residuals 79 221.739 2.8068
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit3)
## Analysis of Variance Table
##
## Response: Rental_Rates
## Df Sum Sq Mean Sq F value Pr(>F)
## Vacancy_Rates 1 1.047 1.0470 0.3512 0.5551
## Residuals 79 235.511 2.9811
anova(fit13)
## Analysis of Variance Table
##
## Response: Rental_Rates
## Df Sum Sq Mean Sq F value Pr(>F)
## Vacancy_Rates 1 1.047 1.0469 0.3683 0.54570
## Age 1 13.774 13.7743 4.8454 0.03068 *
## Residuals 78 221.736 2.8428
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit3, fit13)
## Analysis of Variance Table
##
## Model 1: Rental_Rates ~ Vacancy_Rates
## Model 2: Rental_Rates ~ Vacancy_Rates + Age
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 79 235.51
## 2 78 221.74 1 13.774 4.8454 0.03068 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\(SSR(X_1)=14.819\) \(SSR(X_{1|3})= SSE(X_3)-SSE(X_{X_1,X_3}) = 235.511-221.736 = 13.775\)
This is also almost the same, so we suspect Age and Vacancy Rates are also uncorrelated.
cor(CProp)
## Rental_Rates Age Operating_Expenses
## Rental_Rates 1.00000000 -0.2502846 0.4137872
## Age -0.25028456 1.0000000 0.3888264
## Operating_Expenses 0.41378716 0.3888264 1.0000000
## Vacancy_Rates 0.06652647 -0.2526635 -0.3797617
## Square_Footage 0.53526237 0.2885835 0.4406971
## Vacancy_Rates Square_Footage
## Rental_Rates 0.06652647 0.53526237
## Age -0.25266347 0.28858350
## Operating_Expenses -0.37976174 0.44069713
## Vacancy_Rates 1.00000000 0.08061073
## Square_Footage 0.08061073 1.00000000
plot(CProp)
Age and Square Footage (Size) do not appear to be correlated. Same with Vacancy Rates and Square Footage