library(MASS)
library(Rcmdr)
## Loading required package: splines
## Loading required package: RcmdrMisc
## Loading required package: car
## Loading required package: sandwich
## The Commander GUI is launched only in interactive sessions
data(Boston)
RegModel.1 <-
lm(medv~age+black+chas+crim+dis+indus+lstat+nox+ptratio+rad+rm+tax+zn,
data=Boston)
summary(RegModel.1)
##
## Call:
## lm(formula = medv ~ age + black + chas + crim + dis + indus +
## lstat + nox + ptratio + rad + rm + tax + zn, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.595 -2.730 -0.518 1.777 26.199
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.646e+01 5.103e+00 7.144 3.28e-12 ***
## age 6.922e-04 1.321e-02 0.052 0.958229
## black 9.312e-03 2.686e-03 3.467 0.000573 ***
## chas 2.687e+00 8.616e-01 3.118 0.001925 **
## crim -1.080e-01 3.286e-02 -3.287 0.001087 **
## dis -1.476e+00 1.995e-01 -7.398 6.01e-13 ***
## indus 2.056e-02 6.150e-02 0.334 0.738288
## lstat -5.248e-01 5.072e-02 -10.347 < 2e-16 ***
## nox -1.777e+01 3.820e+00 -4.651 4.25e-06 ***
## ptratio -9.527e-01 1.308e-01 -7.283 1.31e-12 ***
## rad 3.060e-01 6.635e-02 4.613 5.07e-06 ***
## rm 3.810e+00 4.179e-01 9.116 < 2e-16 ***
## tax -1.233e-02 3.760e-03 -3.280 0.001112 **
## zn 4.642e-02 1.373e-02 3.382 0.000778 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.745 on 492 degrees of freedom
## Multiple R-squared: 0.7406, Adjusted R-squared: 0.7338
## F-statistic: 108.1 on 13 and 492 DF, p-value: < 2.2e-16
vif(RegModel.1)
## age black chas crim dis indus lstat nox
## 3.100826 1.348521 1.073995 1.792192 3.955945 3.991596 2.941491 4.393720
## ptratio rad rm tax zn
## 1.799084 7.484496 1.933744 9.008554 2.298758
library(zoo, pos=15)
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(lmtest, pos=15)
bptest(medv ~ age + black + chas + crim + dis + indus + lstat + nox +
ptratio + rad + rm + tax + zn, varformula = ~ fitted.values(RegModel.1),
studentize=FALSE, data=Boston)
##
## Breusch-Pagan test
##
## data: medv ~ age + black + chas + crim + dis + indus + lstat + nox + ptratio + rad + rm + tax + zn
## BP = 15.244, df = 1, p-value = 9.447e-05
library(gvlma)
#install.packages("gvlma")
summary(gvlma(RegModel.1))
##
## Call:
## lm(formula = medv ~ age + black + chas + crim + dis + indus +
## lstat + nox + ptratio + rad + rm + tax + zn, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.595 -2.730 -0.518 1.777 26.199
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.646e+01 5.103e+00 7.144 3.28e-12 ***
## age 6.922e-04 1.321e-02 0.052 0.958229
## black 9.312e-03 2.686e-03 3.467 0.000573 ***
## chas 2.687e+00 8.616e-01 3.118 0.001925 **
## crim -1.080e-01 3.286e-02 -3.287 0.001087 **
## dis -1.476e+00 1.995e-01 -7.398 6.01e-13 ***
## indus 2.056e-02 6.150e-02 0.334 0.738288
## lstat -5.248e-01 5.072e-02 -10.347 < 2e-16 ***
## nox -1.777e+01 3.820e+00 -4.651 4.25e-06 ***
## ptratio -9.527e-01 1.308e-01 -7.283 1.31e-12 ***
## rad 3.060e-01 6.635e-02 4.613 5.07e-06 ***
## rm 3.810e+00 4.179e-01 9.116 < 2e-16 ***
## tax -1.233e-02 3.760e-03 -3.280 0.001112 **
## zn 4.642e-02 1.373e-02 3.382 0.000778 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.745 on 492 degrees of freedom
## Multiple R-squared: 0.7406, Adjusted R-squared: 0.7338
## F-statistic: 108.1 on 13 and 492 DF, p-value: < 2.2e-16
##
##
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance = 0.05
##
## Call:
## gvlma(x = RegModel.1)
##
## Value p-value Decision
## Global Stat 943.66 0.000e+00 Assumptions NOT satisfied!
## Skewness 195.03 0.000e+00 Assumptions NOT satisfied!
## Kurtosis 588.10 0.000e+00 Assumptions NOT satisfied!
## Link Function 141.61 0.000e+00 Assumptions NOT satisfied!
## Heteroscedasticity 18.91 1.367e-05 Assumptions NOT satisfied!
RegModel.2 <- lm(medv~age+black+chas+crim+dis+indus+lstat+ptratio+rm+zn,
data=Boston)
summary(RegModel.2)
##
## Call:
## lm(formula = medv ~ age + black + chas + crim + dis + indus +
## lstat + ptratio + rm + zn, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.3233 -2.8459 -0.7174 1.6422 28.2254
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.351652 4.281246 4.520 7.74e-06 ***
## age -0.017920 0.013163 -1.361 0.173984
## black 0.009665 0.002721 3.552 0.000419 ***
## chas 2.970374 0.884334 3.359 0.000843 ***
## crim -0.072189 0.030764 -2.347 0.019342 *
## dis -1.247889 0.196981 -6.335 5.34e-10 ***
## indus -0.133684 0.052179 -2.562 0.010701 *
## lstat -0.533297 0.052245 -10.208 < 2e-16 ***
## ptratio -0.702994 0.120322 -5.843 9.32e-09 ***
## rm 4.217442 0.425040 9.922 < 2e-16 ***
## zn 0.040453 0.013750 2.942 0.003412 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.91 on 495 degrees of freedom
## Multiple R-squared: 0.7206, Adjusted R-squared: 0.7149
## F-statistic: 127.7 on 10 and 495 DF, p-value: < 2.2e-16
outlierTest(RegModel.2)
## rstudent unadjusted p-value Bonferonni p
## 369 6.126223 1.8393e-09 9.3068e-07
## 372 5.604489 3.4766e-08 1.7592e-05
## 373 5.233596 2.4623e-07 1.2459e-04
## 413 3.968581 8.3025e-05 4.2010e-02
dataset2 <- Boston[-c(369,372,373,413),]
RegModel.3 <- lm(medv~age+black+chas+crim+dis+indus+lstat+ptratio+rm+zn,
data=dataset2)
summary(RegModel.3)
##
## Call:
## lm(formula = medv ~ age + black + chas + crim + dis + indus +
## lstat + ptratio + rm + zn, data = dataset2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.3525 -2.5824 -0.5903 1.8874 20.3374
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.106196 3.833366 3.680 0.000259 ***
## age -0.029124 0.011734 -2.482 0.013398 *
## black 0.011163 0.002434 4.587 5.71e-06 ***
## chas 2.393837 0.794729 3.012 0.002728 **
## crim -0.082100 0.027286 -3.009 0.002757 **
## dis -1.093885 0.175099 -6.247 9.06e-10 ***
## indus -0.132761 0.046346 -2.865 0.004355 **
## lstat -0.430224 0.047769 -9.006 < 2e-16 ***
## ptratio -0.779062 0.106923 -7.286 1.28e-12 ***
## rm 5.010656 0.384544 13.030 < 2e-16 ***
## zn 0.028784 0.012229 2.354 0.018978 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.353 on 491 degrees of freedom
## Multiple R-squared: 0.7699, Adjusted R-squared: 0.7652
## F-statistic: 164.3 on 10 and 491 DF, p-value: < 2.2e-16
vif(RegModel.3)
## age black chas crim dis indus lstat ptratio
## 2.883320 1.281921 1.056760 1.457071 3.582965 2.672614 3.031501 1.420990
## rm zn
## 1.909531 2.164326
outlierTest(RegModel.3)
## rstudent unadjusted p-value Bonferonni p
## 370 4.900261 1.3029e-06 0.00065406
## 371 4.335649 1.7649e-05 0.00886000
## 368 4.313051 1.9480e-05 0.00977920
## 366 4.289940 2.1540e-05 0.01081300
## 365 -4.202831 3.1333e-05 0.01572900
dataset2 <- within(dataset2, {
chas <- as.factor(chas)
})
RegModel.4 <- lm(medv~age+black+crim+dis+indus+lstat+ptratio+rm+zn,
data=dataset2)
summary(RegModel.4)
##
## Call:
## lm(formula = medv ~ age + black + crim + dis + indus + lstat +
## ptratio + rm + zn, data = dataset2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.375 -2.585 -0.594 1.853 22.390
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.38589 3.86355 3.723 0.000219 ***
## age -0.02715 0.01181 -2.299 0.021937 *
## black 0.01153 0.00245 4.707 3.27e-06 ***
## crim -0.08575 0.02748 -3.120 0.001913 **
## dis -1.10604 0.17648 -6.267 8.04e-10 ***
## indus -0.12188 0.04658 -2.617 0.009157 **
## lstat -0.43665 0.04811 -9.076 < 2e-16 ***
## ptratio -0.81793 0.10701 -7.644 1.12e-13 ***
## rm 5.06764 0.38722 13.087 < 2e-16 ***
## zn 0.02825 0.01233 2.292 0.022343 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.388 on 492 degrees of freedom
## Multiple R-squared: 0.7656, Adjusted R-squared: 0.7613
## F-statistic: 178.6 on 9 and 492 DF, p-value: < 2.2e-16