library(MASS)
library(Rcmdr)
## Loading required package: splines
## Loading required package: RcmdrMisc
## Loading required package: car
## Loading required package: sandwich
## The Commander GUI is launched only in interactive sessions
data(Boston)
RegModel.1 <- 
  lm(medv~age+black+chas+crim+dis+indus+lstat+nox+ptratio+rad+rm+tax+zn, 
     data=Boston)
summary(RegModel.1)
## 
## Call:
## lm(formula = medv ~ age + black + chas + crim + dis + indus + 
##     lstat + nox + ptratio + rad + rm + tax + zn, data = Boston)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.595  -2.730  -0.518   1.777  26.199 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.646e+01  5.103e+00   7.144 3.28e-12 ***
## age          6.922e-04  1.321e-02   0.052 0.958229    
## black        9.312e-03  2.686e-03   3.467 0.000573 ***
## chas         2.687e+00  8.616e-01   3.118 0.001925 ** 
## crim        -1.080e-01  3.286e-02  -3.287 0.001087 ** 
## dis         -1.476e+00  1.995e-01  -7.398 6.01e-13 ***
## indus        2.056e-02  6.150e-02   0.334 0.738288    
## lstat       -5.248e-01  5.072e-02 -10.347  < 2e-16 ***
## nox         -1.777e+01  3.820e+00  -4.651 4.25e-06 ***
## ptratio     -9.527e-01  1.308e-01  -7.283 1.31e-12 ***
## rad          3.060e-01  6.635e-02   4.613 5.07e-06 ***
## rm           3.810e+00  4.179e-01   9.116  < 2e-16 ***
## tax         -1.233e-02  3.760e-03  -3.280 0.001112 ** 
## zn           4.642e-02  1.373e-02   3.382 0.000778 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.745 on 492 degrees of freedom
## Multiple R-squared:  0.7406, Adjusted R-squared:  0.7338 
## F-statistic: 108.1 on 13 and 492 DF,  p-value: < 2.2e-16
vif(RegModel.1)
##      age    black     chas     crim      dis    indus    lstat      nox 
## 3.100826 1.348521 1.073995 1.792192 3.955945 3.991596 2.941491 4.393720 
##  ptratio      rad       rm      tax       zn 
## 1.799084 7.484496 1.933744 9.008554 2.298758
library(zoo, pos=15)
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(lmtest, pos=15)
bptest(medv ~ age + black + chas + crim + dis + indus + lstat + nox + 
         ptratio + rad + rm + tax + zn, varformula = ~ fitted.values(RegModel.1), 
       studentize=FALSE, data=Boston)
## 
##  Breusch-Pagan test
## 
## data:  medv ~ age + black + chas + crim + dis + indus + lstat + nox +     ptratio + rad + rm + tax + zn
## BP = 15.244, df = 1, p-value = 9.447e-05
library(gvlma)
#install.packages("gvlma")
summary(gvlma(RegModel.1))
## 
## Call:
## lm(formula = medv ~ age + black + chas + crim + dis + indus + 
##     lstat + nox + ptratio + rad + rm + tax + zn, data = Boston)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.595  -2.730  -0.518   1.777  26.199 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.646e+01  5.103e+00   7.144 3.28e-12 ***
## age          6.922e-04  1.321e-02   0.052 0.958229    
## black        9.312e-03  2.686e-03   3.467 0.000573 ***
## chas         2.687e+00  8.616e-01   3.118 0.001925 ** 
## crim        -1.080e-01  3.286e-02  -3.287 0.001087 ** 
## dis         -1.476e+00  1.995e-01  -7.398 6.01e-13 ***
## indus        2.056e-02  6.150e-02   0.334 0.738288    
## lstat       -5.248e-01  5.072e-02 -10.347  < 2e-16 ***
## nox         -1.777e+01  3.820e+00  -4.651 4.25e-06 ***
## ptratio     -9.527e-01  1.308e-01  -7.283 1.31e-12 ***
## rad          3.060e-01  6.635e-02   4.613 5.07e-06 ***
## rm           3.810e+00  4.179e-01   9.116  < 2e-16 ***
## tax         -1.233e-02  3.760e-03  -3.280 0.001112 ** 
## zn           4.642e-02  1.373e-02   3.382 0.000778 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.745 on 492 degrees of freedom
## Multiple R-squared:  0.7406, Adjusted R-squared:  0.7338 
## F-statistic: 108.1 on 13 and 492 DF,  p-value: < 2.2e-16
## 
## 
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance =  0.05 
## 
## Call:
##  gvlma(x = RegModel.1) 
## 
##                     Value   p-value                   Decision
## Global Stat        943.66 0.000e+00 Assumptions NOT satisfied!
## Skewness           195.03 0.000e+00 Assumptions NOT satisfied!
## Kurtosis           588.10 0.000e+00 Assumptions NOT satisfied!
## Link Function      141.61 0.000e+00 Assumptions NOT satisfied!
## Heteroscedasticity  18.91 1.367e-05 Assumptions NOT satisfied!
RegModel.2 <- lm(medv~age+black+chas+crim+dis+indus+lstat+ptratio+rm+zn, 
                 data=Boston)
summary(RegModel.2)
## 
## Call:
## lm(formula = medv ~ age + black + chas + crim + dis + indus + 
##     lstat + ptratio + rm + zn, data = Boston)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.3233  -2.8459  -0.7174   1.6422  28.2254 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 19.351652   4.281246   4.520 7.74e-06 ***
## age         -0.017920   0.013163  -1.361 0.173984    
## black        0.009665   0.002721   3.552 0.000419 ***
## chas         2.970374   0.884334   3.359 0.000843 ***
## crim        -0.072189   0.030764  -2.347 0.019342 *  
## dis         -1.247889   0.196981  -6.335 5.34e-10 ***
## indus       -0.133684   0.052179  -2.562 0.010701 *  
## lstat       -0.533297   0.052245 -10.208  < 2e-16 ***
## ptratio     -0.702994   0.120322  -5.843 9.32e-09 ***
## rm           4.217442   0.425040   9.922  < 2e-16 ***
## zn           0.040453   0.013750   2.942 0.003412 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.91 on 495 degrees of freedom
## Multiple R-squared:  0.7206, Adjusted R-squared:  0.7149 
## F-statistic: 127.7 on 10 and 495 DF,  p-value: < 2.2e-16
outlierTest(RegModel.2)
##     rstudent unadjusted p-value Bonferonni p
## 369 6.126223         1.8393e-09   9.3068e-07
## 372 5.604489         3.4766e-08   1.7592e-05
## 373 5.233596         2.4623e-07   1.2459e-04
## 413 3.968581         8.3025e-05   4.2010e-02
dataset2 <- Boston[-c(369,372,373,413),]
RegModel.3 <- lm(medv~age+black+chas+crim+dis+indus+lstat+ptratio+rm+zn, 
                 data=dataset2)
summary(RegModel.3)
## 
## Call:
## lm(formula = medv ~ age + black + chas + crim + dis + indus + 
##     lstat + ptratio + rm + zn, data = dataset2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.3525  -2.5824  -0.5903   1.8874  20.3374 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 14.106196   3.833366   3.680 0.000259 ***
## age         -0.029124   0.011734  -2.482 0.013398 *  
## black        0.011163   0.002434   4.587 5.71e-06 ***
## chas         2.393837   0.794729   3.012 0.002728 ** 
## crim        -0.082100   0.027286  -3.009 0.002757 ** 
## dis         -1.093885   0.175099  -6.247 9.06e-10 ***
## indus       -0.132761   0.046346  -2.865 0.004355 ** 
## lstat       -0.430224   0.047769  -9.006  < 2e-16 ***
## ptratio     -0.779062   0.106923  -7.286 1.28e-12 ***
## rm           5.010656   0.384544  13.030  < 2e-16 ***
## zn           0.028784   0.012229   2.354 0.018978 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.353 on 491 degrees of freedom
## Multiple R-squared:  0.7699, Adjusted R-squared:  0.7652 
## F-statistic: 164.3 on 10 and 491 DF,  p-value: < 2.2e-16
vif(RegModel.3)
##      age    black     chas     crim      dis    indus    lstat  ptratio 
## 2.883320 1.281921 1.056760 1.457071 3.582965 2.672614 3.031501 1.420990 
##       rm       zn 
## 1.909531 2.164326
outlierTest(RegModel.3)
##      rstudent unadjusted p-value Bonferonni p
## 370  4.900261         1.3029e-06   0.00065406
## 371  4.335649         1.7649e-05   0.00886000
## 368  4.313051         1.9480e-05   0.00977920
## 366  4.289940         2.1540e-05   0.01081300
## 365 -4.202831         3.1333e-05   0.01572900
dataset2 <- within(dataset2, {
  chas <- as.factor(chas)
})
RegModel.4 <- lm(medv~age+black+crim+dis+indus+lstat+ptratio+rm+zn, 
                 data=dataset2)
summary(RegModel.4)
## 
## Call:
## lm(formula = medv ~ age + black + crim + dis + indus + lstat + 
##     ptratio + rm + zn, data = dataset2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.375  -2.585  -0.594   1.853  22.390 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 14.38589    3.86355   3.723 0.000219 ***
## age         -0.02715    0.01181  -2.299 0.021937 *  
## black        0.01153    0.00245   4.707 3.27e-06 ***
## crim        -0.08575    0.02748  -3.120 0.001913 ** 
## dis         -1.10604    0.17648  -6.267 8.04e-10 ***
## indus       -0.12188    0.04658  -2.617 0.009157 ** 
## lstat       -0.43665    0.04811  -9.076  < 2e-16 ***
## ptratio     -0.81793    0.10701  -7.644 1.12e-13 ***
## rm           5.06764    0.38722  13.087  < 2e-16 ***
## zn           0.02825    0.01233   2.292 0.022343 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.388 on 492 degrees of freedom
## Multiple R-squared:  0.7656, Adjusted R-squared:  0.7613 
## F-statistic: 178.6 on 9 and 492 DF,  p-value: < 2.2e-16