Question (1)
load("C:/Users/Sima/Desktop/Regression/hw4p1.RData")
# Part a) Fit the correct model and check residuals: Assumptions correct
plot(dat1)
fit1=lm(y~x1+x2,data=dat1)
summary(fit1)
##
## Call:
## lm(formula = y ~ x1 + x2, data = dat1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -149.155 -13.544 9.408 19.619 36.950
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 337.4580 41.1994 8.191 1.05e-12 ***
## x1 8.0581 0.5566 14.478 < 2e-16 ***
## x2 -101.0442 1.8601 -54.321 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31.51 on 97 degrees of freedom
## Multiple R-squared: 0.9685, Adjusted R-squared: 0.9679
## F-statistic: 1492 on 2 and 97 DF, p-value: < 2.2e-16
plot(fit1)
library(gvlma)
gvmodel <- gvlma(fit1)
summary(gvmodel)
##
## Call:
## lm(formula = y ~ x1 + x2, data = dat1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -149.155 -13.544 9.408 19.619 36.950
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 337.4580 41.1994 8.191 1.05e-12 ***
## x1 8.0581 0.5566 14.478 < 2e-16 ***
## x2 -101.0442 1.8601 -54.321 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31.51 on 97 degrees of freedom
## Multiple R-squared: 0.9685, Adjusted R-squared: 0.9679
## F-statistic: 1492 on 2 and 97 DF, p-value: < 2.2e-16
##
##
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance = 0.05
##
## Call:
## gvlma(x = fit1)
##
## Value p-value Decision
## Global Stat 248.971 0.000e+00 Assumptions NOT satisfied!
## Skewness 65.686 5.551e-16 Assumptions NOT satisfied!
## Kurtosis 114.756 0.000e+00 Assumptions NOT satisfied!
## Link Function 62.366 2.887e-15 Assumptions NOT satisfied!
## Heteroscedasticity 6.164 1.304e-02 Assumptions NOT satisfied!
# The are Null plots.The more similar residual plots looks to these, the better
# Fit the correct model and check residuals: Assumptions Incorrect
fit2=lm(y~x1+x2+I(x2^2),data=dat1)
summary(fit2)
##
## Call:
## lm(formula = y ~ x1 + x2 + I(x2^2), data = dat1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.982 -9.470 1.723 10.026 24.820
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 219.6997 18.4444 11.91 <2e-16 ***
## x1 7.7676 0.2377 32.68 <2e-16 ***
## x2 -37.9525 3.1183 -12.17 <2e-16 ***
## I(x2^2) -6.2923 0.3008 -20.92 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.43 on 96 degrees of freedom
## Multiple R-squared: 0.9943, Adjusted R-squared: 0.9942
## F-statistic: 5620 on 3 and 96 DF, p-value: < 2.2e-16
plot(fit2)
# The estimate for the quadratic term is near 0, so our assumptions are not horribly violated here
fit3=lm(y~x1+I(x2^2),data=dat1)
summary(fit3)
##
## Call:
## lm(formula = y ~ x1 + I(x2^2), data = dat1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.621 -15.021 -2.051 10.160 89.443
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 150.3572 27.8303 5.403 4.69e-07 ***
## x1 7.4861 0.3753 19.947 < 2e-16 ***
## I(x2^2) -9.8326 0.1213 -81.027 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.31 on 97 degrees of freedom
## Multiple R-squared: 0.9856, Adjusted R-squared: 0.9853
## F-statistic: 3320 on 2 and 97 DF, p-value: < 2.2e-16
plot(fit3)
# This time, the lack of a linear term causes our linearity assumption to be invalid,
# resulting in clear violations in the first plot
# Part b
fit2=lm(y~x1+x2+x2^2+x1:x2,data=dat1)
summary(fit2)
##
## Call:
## lm(formula = y ~ x1 + x2 + x2^2 + x1:x2, data = dat1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -178.28 -10.22 4.01 15.65 34.16
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -141.74687 108.39146 -1.308 0.194
## x1 14.53134 1.46421 9.924 < 2e-16 ***
## x2 0.08339 21.54103 0.004 0.997
## x1:x2 -1.35986 0.28877 -4.709 8.38e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 28.55 on 96 degrees of freedom
## Multiple R-squared: 0.9744, Adjusted R-squared: 0.9736
## F-statistic: 1219 on 3 and 96 DF, p-value: < 2.2e-16
plot(fit2)
Question (2)
# Part a
library(alr3)
## Warning: package 'alr3' was built under R version 3.3.3
## Loading required package: car
## Warning: package 'car' was built under R version 3.3.3
d= BGSall
fit2a=lm(Soma~WT9+ST9+HT9+LG9+Sex,d=BGSall)
summary(fit2a)
##
## Call:
## lm(formula = Soma ~ WT9 + ST9 + HT9 + LG9 + Sex, data = BGSall)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9929 -0.6606 0.0397 0.4927 3.5740
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.594740 3.411741 2.226 0.02773 *
## WT9 0.119604 0.043535 2.747 0.00686 **
## ST9 -0.022957 0.007376 -3.112 0.00228 **
## HT9 -0.056858 0.023985 -2.371 0.01923 *
## LG9 0.037143 0.095460 0.389 0.69784
## Sex 1.438481 0.187718 7.663 3.7e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.016 on 130 degrees of freedom
## Multiple R-squared: 0.5169, Adjusted R-squared: 0.4983
## F-statistic: 27.82 on 5 and 130 DF, p-value: < 2.2e-16
plot(fit2a)
# Part b
fit2b=lm(Soma~WT18+ST18+HT18+LG18+Sex,d=BGSall)
summary(fit2b)
##
## Call:
## lm(formula = Soma ~ WT18 + ST18 + HT18 + LG18 + Sex, data = BGSall)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.13147 -0.43226 0.05707 0.35977 2.66561
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.038160 2.411753 4.577 1.09e-05 ***
## WT18 0.103177 0.014236 7.248 3.36e-11 ***
## ST18 -0.015060 0.003061 -4.919 2.58e-06 ***
## HT18 -0.069241 0.012424 -5.573 1.38e-07 ***
## LG18 0.010741 0.048736 0.220 0.8259
## Sex 0.601937 0.303077 1.986 0.0491 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7797 on 130 degrees of freedom
## Multiple R-squared: 0.7158, Adjusted R-squared: 0.7048
## F-statistic: 65.47 on 5 and 130 DF, p-value: < 2.2e-16
plot(fit2b)