\(Answer 1\)

setwd("C:/Users/Sulochan/Copy/Fall 2015/Advance Geo/Assignment 9")
require(maptools)
require(spdep)
require(spgwr)
require(mgcv)
require(maps)
#install.packages("classInt",repos="http://cran.us.r-project.org")
library(classInt)
#install.packages("RColorBrewer",repos="http://cran.us.r-project.org")
library(RColorBrewer)
require(spatstat)
def.par <- par(no.readonly = TRUE) # save default, for resetting...

bos = readShapeSpatial("boston.shp")

layout(matrix(c(1,2), 1, 2, byrow = TRUE))

map('county', 'massachusetts', fill = FALSE, col = "black",add=F)
title(main = "Study Locations \n Boston, Massachusetts",line=1)
points(bos@data$LON,bos@data$LAT,col="gray",bg="blue",pch=21)
box()

plot(bos@data$LON,bos@data$LAT,col="gray",bg="blue",pch=21,
     xlab = "Longitude", ylab = "Latitude",
     main = "Study Locations")
axis(1)
axis(2)
box()

\(Preliminary Investigation of Variables\)

bos.data = bos@data
head(bos.data)

##   ID TOWN TOWNNO TRACT      LON     LAT      x       y MEDV CMEDV    CRIM
## 0  1    0      0  2011 -70.9550 42.2550 338.73 4679.73 24.0  24.0 0.00632
## 1  2    0      1  2021 -70.9500 42.2875 339.23 4683.33 21.6  21.6 0.02731
## 2  3    0      1  2022 -70.9360 42.2830 340.37 4682.80 34.7  34.7 0.02729
## 3  4    0      2  2031 -70.9280 42.2930 341.05 4683.89 33.4  33.4 0.03237
## 4  5    0      2  2032 -70.9220 42.2980 341.56 4684.44 36.2  36.2 0.06905
## 5  6    0      2  2033 -70.9165 42.3040 342.03 4685.09 28.7  28.7 0.02985
##   ZN INDUS CHAS   NOX    RM  AGE    DIS RAD TAX PTRATIO      B LSTAT
## 0 18  2.31    0 0.538 6.575 65.2 4.0900   1 296    15.3 396.90  4.98
## 1  0  7.07    0 0.469 6.421 78.9 4.9671   2 242    17.8 396.90  9.14
## 2  0  7.07    0 0.469 7.185 61.1 4.9671   2 242    17.8 392.83  4.03
## 3  0  2.18    0 0.458 6.998 45.8 6.0622   3 222    18.7 394.63  2.94
## 4  0  2.18    0 0.458 7.147 54.2 6.0622   3 222    18.7 396.90  5.33
## 5  0  2.18    0 0.458 6.430 58.7 6.0622   3 222    18.7 394.12  5.21

long_ = bos.data$LON
lat_ = bos.data$LAT

bos.header = colnames(bos.data)

par(def.par)

I developed a function which would help to plot the histogram, boxplot, qq-plot and spatial distribution of each variable.

plotvar = function(data_,var){
  layout(matrix(c(1,2,3,4,4,5), 2, 3, byrow = TRUE))
 
  hist(data_, main = paste("Histogram of ",var,sep=""))
  boxplot(data_, main = paste("Boxplot of ",var,sep=""))
  
  qqnorm(data_, main = paste("Normal Q-Q of ",var,sep=""))
  qqline(data_,col=2)  
  
  nclr = 9
  plotvar = data_
  class = classIntervals(plotvar, nclr, style = "pretty")
  plotclr = brewer.pal(nclr, "YlOrRd")
  colcode = findColours(class, plotclr, digits = 3)
  plot(bos@data$LON,bos@data$LAT,bg=colcode,col="gray",xlim = c(-71.3,-70.75),pch=21,cex=1
       ,xlab = "Longitude", ylab = "Latitude")
  title(main = var,line=2)
  legend("topright", legend = names(attr(colcode,"table")),
         fill = attr(colcode, "palette"),ncol=1,cex=0.85)
  axis(1)
  axis(2)
  box()
  plot(1, type="n", axes=F, xlab="", ylab="")
  par(def.par)
  return()
}

Selected variables based on what is NOT constant per tract. Since some variables have been specified as being constant for the tract and we need to model house prices per tract, we will only select the following variables.

vars_selected = c("CMEDV",
                  "CRIM",
                  "CHAS",
                  "NOX",
                  "RM",
                  "AGE",
                  "DIS",
                  "B",
                  "LSTAT")

CMEDV - Corrected median values of owner-occupied housing in USD

plotvar(bos.data$CMEDV, "CMEDV")

The raw variable is skewed but does not have zero values, so it will be log-transformed.After log transformation, the variable is normally distributed.
This is the response variable which we need to model.

log_CMEDV = log(bos.data$CMEDV)
plotvar(log_CMEDV,"log of CMEDV")

1 CRIM - Per capita crime rate

plotvar(bos.data$CRIM,"CRIME RATE")

This variable is also highly skewed. So a log transform will be applied.

log_CRIM = log(bos.data$CRIM)
plotvar(log_CRIM,"Log of Crime Rate")

Log transforms the variable into normal distribution. This variable will be taken in the model.

2 CHAS - a factor with levels 1 if tract borders Charles River; 0 otherwise

Since this variable is binary, we convert it into factor.

bin_chas = as.factor(bos.data$CHAS)

3 NOX - Nitric oxides concentration (parts per 10 million) per town

plotvar((bos.data$NOX),"Conc of Nitric Oxide")

Since this variable is slightly skewed and log or other transformation did not help much, I kept the variable as it is without transformation.

4 RM - Average numbers of rooms per dwelling

plotvar((bos.data$RM),"Avg No of Rooms \n per Dwelling")

This variable is also normally distributed and does not need any transformation.

5 AGE - Proportion of owner-occupied units built prior to 1940

plotvar((bos.data$AGE),"Prop of units built \n before 1940 (AGE)")

This variable is the proportion of houses. So, we can convert AGE into value between 0 and 1. Since dividing by max value (100) resulted in 1, which prevented the use of logit transformation, we divided the age by 99 and performed a logit transform. This transformed the data into normal distribution.

AGE = bos.data$AGE/101
logit_AGE = log(AGE/(1-AGE))
plotvar(logit_AGE,"Logit \n Transformed AGE")

6 DIS Weighted distance to five Boston employment centres

plotvar((bos.data$DIS),"Weighted Distance \n to Emp Ceter (DIS)")

Since this variable is slightly positive skewed we can use a log transformation.

DIS = bos.data$DIS
log_DIS = log(DIS)
plotvar(log_DIS,"Weighted Distance \n to Emp Ceter (DIS)")

7 B A numeric vector with some formula indicating the proportion of blacks

B = bos.data$B
B1 = sqrt(B/1000)+0.63
plotvar(B,"Vector indicating \n prop of Blacks (B)")

This variable was notoriously skewed and no transformation applied helped. I applied transfomations based on ladder of powers but not helped. I finally decided to use untransformed variable.

log_B = log(B)
plotvar((log_B),"Log of B")

8 LSTAT Percentage values of lower status population

LSTAT = bos.data$LSTAT
plotvar(LSTAT,"Lower Stat Popn \n Percentage (LSTAT)")

This variable is normally distributed so no transformation was applied.

MAKING THE DATASET FOR MODEL

Boston = data.frame(logT_CMEDV = log_CMEDV,
                    logT_CRIME = log_CRIM,
                    CHAS = bin_chas,
                    NOX = bos.data$NOX,
                    RM = bos.data$RM,
                    lgtT_AGE = logit_AGE,
                    logT_DIS = log_DIS,
                    B = bos.data$B,
                    LSTAT = bos.data$LSTAT)
head(Boston)

##   logT_CMEDV logT_CRIME CHAS   NOX    RM   lgtT_AGE logT_DIS      B LSTAT
## 1   3.178054  -5.064036    0 0.538 6.575  0.5995116 1.408545 396.90  4.98
## 2   3.072693  -3.600502    0 0.469 6.421  1.2726036 1.602836 396.90  9.14
## 3   3.546740  -3.601235    0 0.469 7.185  0.4261355 1.602836 392.83  4.03
## 4   3.508556  -3.430523    0 0.458 6.998 -0.1866789 1.802073 394.63  2.94
## 5   3.589059  -2.672924    0 0.458 7.147  0.1467977 1.802073 396.90  5.33
## 6   3.356897  -3.511570    0 0.458 6.430  0.3276526 1.802073 394.12  5.21

lm.r =  lm(logT_CMEDV ~ . , data=Boston)
summary(lm.r)

## 
## Call:
## lm(formula = logT_CMEDV ~ ., data = Boston)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.94267 -0.11721 -0.01221  0.11287  0.94355 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.9644789  0.1892410  15.665  < 2e-16 ***
## logT_CRIME  -0.0283606  0.0080228  -3.535 0.000446 ***
## CHAS1        0.1593378  0.0377297   4.223 2.87e-05 ***
## NOX         -0.5599486  0.1680337  -3.332 0.000925 ***
## RM           0.1309884  0.0174136   7.522 2.54e-13 ***
## lgtT_AGE    -0.0030757  0.0091277  -0.337 0.736285    
## logT_DIS    -0.1938902  0.0358721  -5.405 1.01e-07 ***
## B            0.0004853  0.0001201   4.042 6.15e-05 ***
## LSTAT       -0.0327693  0.0022567 -14.521  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2113 on 497 degrees of freedom
## Multiple R-squared:  0.7363, Adjusted R-squared:  0.7321 
## F-statistic: 173.5 on 8 and 497 DF,  p-value: < 2.2e-16

This summary statistics shows that almost all of the variables (except Age) is significant. With higher Crimes, Nitrous Oxide, distance to employment center and low status housing, there seems to be a decrease in house prices while houses closer to river and with more rooms show increases in house pricing. The slope of black propotion is very less as well as it is very skewed so, I do not have high confidence in its interpretation.
R-squared is also about 0.7 which shows that about 70% variability is explained by these variables and with a high F-statistics and low p-value, we can say that at least some of the variables have significant effect on the response.

  nclr = 6
  plotvar = lm.r$residuals
  class = classIntervals(plotvar, nclr, style = "pretty")
  plotclr = brewer.pal(nclr, "YlOrRd")
  colcode = findColours(class, plotclr, digits = 3)
  plot(bos@data$LON,bos@data$LAT,col=colcode,xlim = c(-71.3,-70.75),pch=16,cex=1
       ,xlab = "Longitude", ylab = "Latitude")
  title(main = "Residuals of OLS",line=2)
  legend("topright", legend = names(attr(colcode,"table")),
         fill = attr(colcode, "palette"),ncol=1,cex=0.85)
  axis(1)
  axis(2)
  box()

The residual plots of OLS shows a strong spatial pattern, which shows that the residuals are not independent.
We now test the spatial autocorrelation of the residuals of this model. We will use Moran’s I with correction for model residuals since standard Moran’s I test with model residuals is biased.
We also need a spatial weight matrix for moran’s I test. So, I used k-nearest neighbor method to make spatial weight matrix.

Forming spatial weight matrix

  coords = coordinates(bos)
  bos.knn = knearneigh(coords,k=4)
  bos.nb = knn2nb(bos.knn)
  bos.listw = nb2listw(bos.nb)
  
  plot(bos.nb,coords)
  box()
  title(main="Spatial weights using Kth Nearest Neighbour")

Moran’s I with correction for model residuals

  lm.morantest(lm.r, bos.listw)

## 
##  Global Moran's I for regression residuals
## 
## data:  
## model: lm(formula = logT_CMEDV ~ ., data = Boston)
## weights: bos.listw
## 
## Moran I statistic standard deviate = 18.2345, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran's I        Expectation           Variance 
##       0.5112692201      -0.0114941262       0.0008219087

This results show that the residuals are significantly spatially autocorrelated. So, we can see that there is high spatial autocorrelation in the model as given by Moran’s I, which needs to be removed and modeled.

Selection of spatial regression model
Lagrange multiplier test
This test can checkif the autocorrelation is in dependent variables or in its errors which helps to choose which spatial regression model to use.
We first use non-robust version of the test.

  summary(lm.LMtests(lm.r, bos.listw, test=c("LMerr","LMlag")))

##  Lagrange multiplier diagnostics for spatial dependence
## data:  
## model: lm(formula = logT_CMEDV ~ ., data = Boston)
## weights: bos.listw
##  
##       statistic parameter   p.value    
## LMerr    304.21         1 < 2.2e-16 ***
## LMlag    233.43         1 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The results indicate that both are significant. So, we perform a robust version of LMtest.

  summary(lm.LMtests(lm.r, bos.listw, test=c("RLMerr","RLMlag")))

##  Lagrange multiplier diagnostics for spatial dependence
## data:  
## model: lm(formula = logT_CMEDV ~ ., data = Boston)
## weights: bos.listw
##  
##        statistic parameter   p.value    
## RLMerr    93.836         1 < 2.2e-16 ***
## RLMlag    23.051         1 1.578e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Still, the results indicate both are significant.So, we test both the models.
Spatial Lag Model

bos.fit.sp_lag = lagsarlm (logT_CMEDV ~ . , data=Boston, bos.listw)
  summary(bos.fit.sp_lag)

## 
## Call:lagsarlm(formula = logT_CMEDV ~ ., data = Boston, listw = bos.listw)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.5686389 -0.0890477 -0.0099302  0.0781989  0.6935512 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)  9.7265e-01  1.6770e-01  5.8000 6.631e-09
## logT_CRIME  -1.0360e-02  6.1097e-03 -1.6957 0.0899491
## CHAS1        4.4130e-02  2.8383e-02  1.5548 0.1199910
## NOX         -2.3429e-01  1.2750e-01 -1.8376 0.0661260
## RM           1.1957e-01  1.3295e-02  8.9940 < 2.2e-16
## lgtT_AGE    -4.5087e-03  6.8814e-03 -0.6552 0.5123350
## logT_DIS    -1.3099e-01  2.7512e-02 -4.7612 1.925e-06
## B            3.4445e-04  9.0987e-05  3.7857 0.0001533
## LSTAT       -1.7995e-02  1.8004e-03 -9.9949 < 2.2e-16
## 
## Rho: 0.56272, LR test value: 243.16, p-value: < 2.22e-16
## Asymptotic standard error: 0.027934
##     z-value: 20.145, p-value: < 2.22e-16
## Wald statistic: 405.82, p-value: < 2.22e-16
## 
## Log likelihood: 194.6631 for lag model
## ML residual variance (sigma squared): 0.025173, (sigma: 0.15866)
## Number of observations: 506 
## Number of parameters estimated: 11 
## AIC: -367.33, (AIC for lm: -126.16)
## LM test for residual autocorrelation
## test value: 43.952, p-value: 3.3647e-11

The summary of this spatial lag model gives estimates of coefficients which make sense, with significant rho coefficient, athough its value is only about 0.6. AIC shows decrease from -126.16 to -367.33 which shows more information is acquired from the spatial model than the linear model. But a LM test on residuals to look for autocorrelation shows significant autocorrelation still present on residuals. Thus, this model is not satisfactory and so I will test spatial error model.
Spatial Error Model

bos.fit.sp_err = errorsarlm (logT_CMEDV ~ . , data=Boston, bos.listw)
  summary(bos.fit.sp_err)

## 
## Call:
## errorsarlm(formula = logT_CMEDV ~ ., data = Boston, listw = bos.listw)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.768782 -0.066184 -0.008979  0.067556  0.677227 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)  2.53773768  0.19839288 12.7915 < 2.2e-16
## logT_CRIME  -0.02928207  0.00929479 -3.1504 0.0016306
## CHAS1       -0.05021243  0.03105756 -1.6168 0.1059314
## NOX         -0.57575446  0.21644376 -2.6601 0.0078126
## RM           0.15890069  0.01365644 11.6356 < 2.2e-16
## lgtT_AGE    -0.02588580  0.00725576 -3.5676 0.0003602
## logT_DIS    -0.15388526  0.05693148 -2.7030 0.0068719
## B            0.00066576  0.00011884  5.6022 2.117e-08
## LSTAT       -0.01716453  0.00190448 -9.0127 < 2.2e-16
## 
## Lambda: 0.78221, LR test value: 310.01, p-value: < 2.22e-16
## Asymptotic standard error: 0.026187
##     z-value: 29.87, p-value: < 2.22e-16
## Wald statistic: 892.2, p-value: < 2.22e-16
## 
## Log likelihood: 228.0878 for error model
## ML residual variance (sigma squared): 0.019934, (sigma: 0.14119)
## Number of observations: 506 
## Number of parameters estimated: 11 
## AIC: -434.18, (AIC for lm: -126.16)

The summary of spatial error model shows residuals which are more-or-less even distributed with median at about 0. The coefficients have changed a bit than the linear model. We get lamda which represents the strength of autocorrelation of residuals. The significance test of this strength coefficient indicates that autocorrelation is still a problem in the model.

In terms of AIC, this model performs better than lag model. But autocorrelation is still a problem in the model.
Since both of these models still contain autocorrelation, I tried the Spatial Durbin Lag model to check if the source of spatial dependency is between dependent variable and neighboring values of independent variable.
Since AIC score was higher in the spatial error model (as well as lower p-value in LM test), I chose to use error version of this model.

Spatial Durbin error Model

bos.fit.sp_Dur_err = errorsarlm (logT_CMEDV ~ . , data=Boston, bos.listw,
                                 etype="emixed")
  summary(bos.fit.sp_Dur_err)

## 
## Call:
## errorsarlm(formula = logT_CMEDV ~ ., data = Boston, listw = bos.listw, 
##     etype = "emixed")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.743066 -0.067142 -0.010586  0.066979  0.721496 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                   Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)     2.9290e+00  4.0735e-01  7.1903 6.466e-13
## logT_CRIME     -2.6674e-02  9.0657e-03 -2.9423  0.003258
## CHAS1          -3.1836e-02  3.1239e-02 -1.0191  0.308145
## NOX            -5.7297e-01  2.1720e-01 -2.6380  0.008339
## RM              1.5720e-01  1.3997e-02 11.2309 < 2.2e-16
## lgtT_AGE       -2.1734e-02  7.5776e-03 -2.8683  0.004127
## logT_DIS       -1.1292e-01  8.7777e-02 -1.2864  0.198302
## B               6.3774e-04  1.1636e-04  5.4806 4.238e-08
## LSTAT          -1.8939e-02  1.9279e-03 -9.8237 < 2.2e-16
## lag.logT_CRIME  8.5854e-03  1.6689e-02  0.5144  0.606944
## lag.CHAS1       1.8552e-01  6.8581e-02  2.7052  0.006827
## lag.NOX        -1.6190e-01  3.5306e-01 -0.4586  0.646557
## lag.RM          7.3191e-03  3.1514e-02  0.2322  0.816347
## lag.lgtT_AGE    1.2767e-02  1.6263e-02  0.7850  0.432438
## lag.logT_DIS   -1.2865e-01  1.0603e-01 -1.2134  0.224990
## lag.B          -7.1351e-05  2.0819e-04 -0.3427  0.731804
## lag.LSTAT      -1.6535e-02  4.1109e-03 -4.0221 5.768e-05
## 
## Lambda: 0.72605, LR test value: 253.48, p-value: < 2.22e-16
## Asymptotic standard error: 0.031023
##     z-value: 23.403, p-value: < 2.22e-16
## Wald statistic: 547.72, p-value: < 2.22e-16
## 
## Log likelihood: 244.0995 for error model
## ML residual variance (sigma squared): 0.019349, (sigma: 0.1391)
## Number of observations: 506 
## Number of parameters estimated: 19 
## AIC: -450.2, (AIC for lm: -198.72)

This model also presents serious concerns about remaining autocorrelation with high p-value and large Lambda.
So, I tried it with the spatial durbin lag model

Spatial Durbin lag Model

bos.fit.sp_Dur_lag = lagsarlm (logT_CMEDV ~ . , data=Boston, bos.listw,
                                 type="mixed")
  summary(bos.fit.sp_Dur_lag)

## 
## Call:lagsarlm(formula = logT_CMEDV ~ ., data = Boston, listw = bos.listw, 
##     type = "mixed")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.7849999 -0.0672047 -0.0079859  0.0647773  0.6994246 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##                   Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)     1.09750141  0.19858543  5.5266 3.265e-08
## logT_CRIME     -0.02821967  0.00954836 -2.9554 0.0031222
## CHAS1          -0.07190558  0.03116426 -2.3073 0.0210376
## NOX            -0.60031113  0.22821101 -2.6305 0.0085257
## RM              0.15629353  0.01355370 11.5314 < 2.2e-16
## lgtT_AGE       -0.02424664  0.00721619 -3.3600 0.0007793
## logT_DIS       -0.10433710  0.09711629 -1.0744 0.2826649
## B               0.00066389  0.00011913  5.5728 2.507e-08
## LSTAT          -0.01657141  0.00188738 -8.7801 < 2.2e-16
## lag.logT_CRIME  0.02647739  0.01116206  2.3721 0.0176878
## lag.CHAS1       0.18146131  0.04745781  3.8236 0.0001315
## lag.NOX         0.36270682  0.24809713  1.4620 0.1437536
## lag.RM         -0.13261601  0.02143032 -6.1882 6.084e-10
## lag.lgtT_AGE    0.03880521  0.01021899  3.7974 0.0001462
## lag.logT_DIS    0.05682840  0.10053528  0.5653 0.5718981
## lag.B          -0.00056525  0.00014854 -3.8054 0.0001416
## lag.LSTAT       0.00021675  0.00294979  0.0735 0.9414236
## 
## Rho: 0.69992, LR test value: 262.17, p-value: < 2.22e-16
## Asymptotic standard error: 0.032167
##     z-value: 21.759, p-value: < 2.22e-16
## Wald statistic: 473.46, p-value: < 2.22e-16
## 
## Log likelihood: 248.4432 for mixed model
## ML residual variance (sigma squared): 0.019276, (sigma: 0.13884)
## Number of observations: 506 
## Number of parameters estimated: 19 
## AIC: -458.89, (AIC for lm: -198.72)
## LM test for residual autocorrelation
## test value: 12.648, p-value: 0.00037589

This model has a lower test value for residual autocorrelation but it is still significant since p-value of LM test is below 0.05. The strength of autoregressive coefficeint has increased upto 0.7 and is still significant. There is still spatial autocorrelation in the model.

One of the reasons that the model might not be performing well could be some non-normality present in some independent variable (Black Proportion specifically) and binary data (Closeness to river).
So, I tried General Additive Model as my next model with the assumption that relationship between response and predictor maybe non-linear.
General Additive Model

library(mgcv)
  x = coords[,1]
  y = coords[,2]
  bos.gam = gam(logT_CMEDV ~ logT_CRIME + CHAS + NOX +RM +lgtT_AGE +
                  logT_DIS +B +LSTAT + te(x,y),
                data=Boston,
                family = gaussian(link="identity"))
  summary(bos.gam)

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## logT_CMEDV ~ logT_CRIME + CHAS + NOX + RM + lgtT_AGE + logT_DIS + 
##     B + LSTAT + te(x, y)
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.3880947  0.1980455  17.108  < 2e-16 ***
## logT_CRIME  -0.0497809  0.0086016  -5.787 1.29e-08 ***
## CHAS1        0.0402700  0.0371893   1.083 0.279422    
## NOX         -0.5428747  0.1635924  -3.318 0.000974 ***
## RM           0.0938099  0.0165357   5.673 2.42e-08 ***
## lgtT_AGE    -0.0095223  0.0089984  -1.058 0.290487    
## logT_DIS    -0.4142499  0.0866213  -4.782 2.31e-06 ***
## B            0.0006128  0.0001124   5.450 8.04e-08 ***
## LSTAT       -0.0314747  0.0020730 -15.183  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##           edf Ref.df     F  p-value    
## te(x,y) 15.61  18.01 6.915 4.35e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.784   Deviance explained = 79.4%
## GCV = 0.037828  Scale est. = 0.035989  n = 506

nclr = 9
plotvar = residuals(bos.gam)
class = classIntervals(plotvar, nclr, style = "pretty")
plotclr = brewer.pal(nclr, "PRGn")
colcode = findColours(class, plotclr, digits = 3)
plot(bos@data$LON,bos@data$LAT,col=colcode,xlim = c(-71.3,-70.75),pch=16,cex=1
       ,xlab = "Longitude", ylab = "Latitude")
title(main = "Boston House Prices (GAM residuals)")
legend("topright", legend = names(attr(colcode,"table")),
fill = attr(colcode, "palette"))

  moran.test(residuals(bos.gam), bos.listw, alternative="two.sided")

## 
##  Moran's I test under randomisation
## 
## data:  residuals(bos.gam)  
## weights: bos.listw  
## 
## Moran I statistic standard deviate = 14.9622, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.432752484      -0.001980198       0.000844214

Based on the Moran’s I test and plot of residuals, we can still see much of autocorrelation not being accounted by the model.
So, I try again with Spatial filtering approach which uses eigen-analysis of spatial weight matrix.

Spatial Filtering Approach
STEP 1: BUild OLS (Present as lm.r)
STEP 2: Build Spatial Filter

#sf.err = SpatialFiltering(lm.r, nb= bos.nb, style = "C",
 #                         alpha=0.25, ExactEV=TRUE, data=Boston)
#sf.err

SpatialFiltering took a very very long time (more than 1 hour) to finish running with the full model, so I tried to make the model smaller with only two main dependent variables (CRIME and RM). Trying many approaches, it still did not work.

So, Finally, I tried the Geographically Weighted Regression with adaptive window selection since the points are concentrated at some areas and sparse at others.

library(spgwr)
bos.bw = gwr.sel(log(CMEDV) ~ CRIM + CHAS + NOX + RM + AGE + DIS + B + LSTAT, data=bos, adapt = TRUE, method = "cv",
                 verbose=TRUE)

## Adaptive q: 0.381966 CV score: 19.66855 
## Adaptive q: 0.618034 CV score: 20.70719 
## Adaptive q: 0.236068 CV score: 18.61773 
## Adaptive q: 0.145898 CV score: 17.4014 
## Adaptive q: 0.09016994 CV score: 15.68635 
## Adaptive q: 0.05572809 CV score: 13.65032 
## Adaptive q: 0.03444185 CV score: 12.24728 
## Adaptive q: 0.02128624 CV score: 11.56029 
## Adaptive q: 0.01315562 CV score: 11.6776 
## Adaptive q: 0.01952496 CV score: 11.5851 
## Adaptive q: 0.02631123 CV score: 11.63487 
## Adaptive q: 0.02205797 CV score: 11.52204 
## Adaptive q: 0.02368257 CV score: 11.44834 
## Adaptive q: 0.02468663 CV score: 11.52885 
## Adaptive q: 0.02334519 CV score: 11.45972 
## Adaptive q: 0.02406609 CV score: 11.47917 
## Adaptive q: 0.02362045 CV score: 11.45029 
## Adaptive q: 0.02382906 CV score: 11.45815 
## Adaptive q: 0.02372326 CV score: 11.44809 
## Adaptive q: 0.02376395 CV score: 11.45202 
## Adaptive q: 0.02372326 CV score: 11.44809

bos.bw

## [1] 0.02372326

print(dim(bos)[1] * bos.bw)

## [1] 12.00397

Each window takes about 12 points

bos.gwr<-gwr(log(CMEDV) ~ CRIM + CHAS + NOX + RM + AGE + DIS + B + LSTAT,
data= bos, adapt = bos.bw, gweight = gwr.Gauss,
hatmatrix = TRUE)
print(bos.gwr)

## Call:
## gwr(formula = log(CMEDV) ~ CRIM + CHAS + NOX + RM + AGE + DIS + 
##     B + LSTAT, data = bos, gweight = gwr.Gauss, adapt = bos.bw, 
##     hatmatrix = TRUE)
## Kernel function: gwr.Gauss 
## Adaptive quantile: 0.02372326 (about 12 of 506 data points)
## Summary of GWR coefficient estimates at data points:
##                    Min.    1st Qu.     Median    3rd Qu.       Max.
## X.Intercept.  0.4694000  1.7910000  2.6460000  3.5190000  7.4460000
## CRIM         -0.3612000 -0.0120700 -0.0097810 -0.0081560  0.1181000
## CHAS         -0.1971000  0.0238100  0.1265000  0.4011000  2.4320000
## NOX          -3.0050000 -0.8222000 -0.5863000 -0.1750000  2.0910000
## RM           -0.2093000  0.0962600  0.1768000  0.2538000  0.3495000
## AGE          -0.0156800 -0.0038710 -0.0021420 -0.0008005  0.0131600
## DIS          -1.1040000 -0.1257000 -0.0540600 -0.0290500  0.2967000
## B            -0.0011080  0.0003139  0.0005608  0.0008303  0.0021630
## LSTAT        -0.0461300 -0.0332800 -0.0203600 -0.0100400  0.0013710
##               Global
## X.Intercept.  2.9244
## CRIM         -0.0094
## CHAS          0.1445
## NOX          -0.5353
## RM            0.1352
## AGE          -0.0005
## DIS          -0.0396
## B             0.0004
## LSTAT        -0.0301
## Number of data points: 506 
## Effective number of parameters (residual: 2traceS - traceS'S): 136.6154 
## Effective degrees of freedom (residual: 2traceS - traceS'S): 369.3846 
## Sigma (residual: 2traceS - traceS'S): 0.1284498 
## Effective number of parameters (model: traceS): 100.8238 
## Effective degrees of freedom (model: traceS): 405.1762 
## Sigma (model: traceS): 0.1226453 
## Sigma (ML): 0.1097483 
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): -544.5301 
## AIC (GWR p. 96, eq. 4.22): -699.2911 
## Residual sum of squares: 6.09461 
## Quasi-global R2: 0.9275982

Multiple local models are formed. A range of values for coefficients are produced which shows chanigng relationship in space. Some relationships (e.g. NOX) changes form -3.0 to 2.0 based on the location.
AIC value is -699 which is higher than any of the any model built previously.
The overall R-squared is shown to be 0.93

nclr <- 9
plotvar <- bos.gwr$SDF$localR2
class <- classIntervals(plotvar, nclr, style = "pretty")
plotclr <- brewer.pal(nclr, "Blues")
colcode <- findColours(class, plotclr, digits = 3)
plot(bos@data$LON,bos@data$LAT,col=colcode,xlim = c(-71.3,-70.75),pch=16,cex=1
       ,xlab = "Longitude", ylab = "Latitude")
title(main = "Local R2 from GWR")
legend("topright", legend = names(attr(colcode,"table")),
fill = attr(colcode, "palette"), ncol=1)

The R-squared value plotted shows that R-squared value changes from 0.6 to 0.95 at some places.

layout(matrix(c(1:2), 1, 2, byrow = TRUE))
for (i in 4:11){
nclr <- 9
plotvar_ <- bos.gwr$SDF[,i]
plotvar = plotvar_@data[,1]
class <- classIntervals(plotvar, nclr, style = "pretty")
plotclr <- brewer.pal(nclr, "YlOrRd")
colcode <- findColours(class, plotclr, digits = 3)
plot(bos@data$LON,bos@data$LAT,col=colcode,xlim = c(-71.3,-70.6),pch=16,cex=1
       ,xlab = "Longitude", ylab = "Latitude")
title(main = paste ("Coeff for ",vars_selected[i-2],sep=""))
legend("topright", legend = names(attr(colcode,"table")),
fill = attr(colcode, "palette"), ncol=1,cex=0.6)
}

par(def.par)

Looking at the coeff figures, we can see which local area have most effect on house values from the dependent variable. e.g there is a small area on the top right where crime rates have an enormous effect on House values. In this small region, a small increase in crime rate would decrease house values drastically.
So, this model has local model and has included the spatial correlation into something meaningful.
I would believe the direction (positive or negative) of coefficients from any spatial lag or error model and use this GWR to see where the model has higher chances of performing well.