#set working directory
setwd("~/Documents/geog6000/lab12")
#Packages
library(RColorBrewer)
library(sf)
## Warning: package 'sf' was built under R version 3.6.2
## Registered S3 methods overwritten by 'tibble':
##   method     from  
##   format.tbl pillar
##   print.tbl  pillar
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
library(spdep)
## Warning: package 'spdep' was built under R version 3.6.2
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.6.2
## Loading required package: spData
## Warning: package 'spData' was built under R version 3.6.2
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(spatialreg)
## Warning: package 'spatialreg' was built under R version 3.6.2
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 3.6.2
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     as_dgRMatrix_listw, as_dsCMatrix_I, as_dsCMatrix_IrW,
##     as_dsTMatrix_listw, as.spam.listw, can.be.simmed, cheb_setup,
##     create_WX, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
##     errorsarlm, get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, GMargminImage, GMerrorsar,
##     griffith_sone, gstsls, Hausman.test, impacts, intImpacts,
##     Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
##     lextrS, lextrW, lmSLX, LU_prepermutate_setup, LU_setup,
##     Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
##     mom_calc_int2, moments_setup, powerWeights, sacsarlm,
##     SE_classic_setup, SE_interp_setup, SE_whichMin_setup,
##     set.ClusterOption, set.coresOption, set.mcOption,
##     set.VerboseOption, set.ZeroPolicyOption, similar.listw, spam_setup,
##     spam_update_setup, SpatialFiltering, spautolm, spBreg_err,
##     spBreg_lag, spBreg_sac, stsls, subgraph_eigenw, trW
library(tmaptools)
## Warning: package 'tmaptools' was built under R version 3.6.2
## Registered S3 methods overwritten by 'stars':
##   method             from
##   st_bbox.SpatRaster sf  
##   st_crs.SpatRaster  sf
#read in data
usedcars.shp <- st_read("../datafiles/usedcars/usa48_usedcars.shp", quiet = TRUE)
plot(st_geometry(usedcars.shp))

Build a neighborhood function linking the 48 states:

usedcars.geometry <- st_geometry(usedcars.shp)
usedcars.coords <- st_centroid(usedcars.geometry)
queensnbcars <- poly2nb(usedcars.shp)
plot(usedcars.geometry, reset = FALSE)
queensplot <- plot(queensnbcars, usedcars.coords, ad = TRUE, col = 5, lwd = 1.5)

I chose to use a queens case neighborhood because it makes the most sense given the data we are using. Including both bounaries and corners that touch is important, specifically for places like Utah and New Mexico.

Build a spatial weight matrix:

swm_queensnbcars <- nb2listw(queensnbcars, style = 'B')
summary(swm_queensnbcars)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 48 
## Number of nonzero links: 214 
## Percentage nonzero weights: 9.288194 
## Average number of links: 4.458333 
## Link number distribution:
## 
##  1  2  3  4  5  6  7  8 
##  1  4  9 11 10  9  2  2 
## 1 least connected region:
## 17 with 1 link
## 2 most connected regions:
## 23 40 with 8 links
## 
## Weights style: B 
## Weights constants summary:
##    n   nn  S0  S1   S2
## B 48 2304 214 428 4296

There is an average of 4.458 links per state.

Moran Test

moran.test(usedcars.shp$price_1960, 
           listw = nb2listw(queensnbcars),
 alternative = "two.sided", 
           randomisation = TRUE)
## 
##  Moran I test under randomisation
## 
## data:  usedcars.shp$price_1960  
## weights: nb2listw(queensnbcars)    
## 
## Moran I statistic standard deviate = 8.1752, p-value = 2.954e-16
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.783561543      -0.021276596       0.009692214

Report:
-Moran I: 0.78356
-z-score: 8.1752
-The p-value is below 0.05 so we can reject the null hypothesis that there is no spatial autocorrelation. (There is spatial autocorrelation)

Simple Linear Model

taxpricelm <- lm(price_1960 ~ tax_charge, data = usedcars.shp)
summary(taxpricelm)
## 
## Call:
## lm(formula = price_1960 ~ tax_charge, data = usedcars.shp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -116.701  -45.053   -1.461   43.400  107.807 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1435.7506    27.5796  52.058  < 2e-16 ***
## tax_charge     0.6872     0.1754   3.918 0.000294 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 57.01 on 46 degrees of freedom
## Multiple R-squared:  0.2503, Adjusted R-squared:  0.234 
## F-statistic: 15.35 on 1 and 46 DF,  p-value: 0.0002939

Report:
-Coefficients: 1435.75 and 0.6872
-R-squared: 0.2503

moran.mc(residuals(taxpricelm), listw = nb2listw(queensnbcars),
         nsim = 999, 
         alternative = 'greater')
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  residuals(taxpricelm) 
## weights: nb2listw(queensnbcars)  
## number of simulations + 1: 1000 
## 
## statistic = 0.57482, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater

With a p-value of 0.001, autocorrelation is present. Once again, we reject the null that there is no autocorrelation.

Legrange Multiplier test:

lmt <- lm.LMtests(taxpricelm, nb2listw(queensnbcars), test = c("LMerr","LMlag"))
summary(lmt)
##  Lagrange multiplier diagnostics for spatial dependence
## data:  
## model: lm(formula = price_1960 ~ tax_charge, data = usedcars.shp)
## weights: nb2listw(queensnbcars)
##  
##       statistic parameter   p.value    
## LMerr    31.793         1 1.715e-08 ***
## LMlag    40.664         1 1.808e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lmt_robust <- lm.LMtests(taxpricelm, nb2listw(queensnbcars), test = c("RLMerr","RLMlag"))
summary(lmt_robust)
##  Lagrange multiplier diagnostics for spatial dependence
## data:  
## model: lm(formula = price_1960 ~ tax_charge, data = usedcars.shp)
## weights: nb2listw(queensnbcars)
##  
##        statistic parameter  p.value   
## RLMerr  0.051751         1 0.820044   
## RLMlag  8.922943         1 0.002816 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

After running the version of the Legrange Multiplier test, it is clear that the lag model is a better choice. The lag model p-value is significant at 0.002, while the error model is not with a p-value of 0.82.

Spatial Lag Model

cars.lag <- lagsarlm(price_1960 ~ tax_charge, 
                     data = usedcars.shp, 
                     nb2listw(queensnbcars))
summary(cars.lag)
## 
## Call:lagsarlm(formula = price_1960 ~ tax_charge, data = usedcars.shp, 
##     listw = nb2listw(queensnbcars))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -77.6781 -16.9504   4.2497  19.5484  58.9811 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) 309.42546  123.03167  2.5150   0.0119
## tax_charge    0.16711    0.10212  1.6364   0.1018
## 
## Rho: 0.78302, LR test value: 42.681, p-value: 6.4426e-11
## Asymptotic standard error: 0.081636
##     z-value: 9.5916, p-value: < 2.22e-16
## Wald statistic: 91.998, p-value: < 2.22e-16
## 
## Log likelihood: -239.8252 for lag model
## ML residual variance (sigma squared): 1036.7, (sigma: 32.197)
## Number of observations: 48 
## Number of parameters estimated: 4 
## AIC: 487.65, (AIC for lm: 528.33)
## LM test for residual autocorrelation
## test value: 2.1141, p-value: 0.14594

Report:
-Intercept: 309.42, p-value 0.0119
-tax_charge: 0.16711, p-value 0.1018
-Rho: 0.78302
-AIC: 487.65
-AIC lm: 528.33

Moran test

moran.test(residuals(cars.lag), 
           listw = nb2listw(queensnbcars),
 alternative = "two.sided", 
           randomisation = TRUE)
## 
##  Moran I test under randomisation
## 
## data:  residuals(cars.lag)  
## weights: nb2listw(queensnbcars)    
## 
## Moran I statistic standard deviate = -0.58145, p-value = 0.5609
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      -0.077924289      -0.021276596       0.009491449

The result of the Moran I test indicates that the lag model adequately accounted for spatial autocorrelation with a p-value of 0.5609. We can finally accept the null hypothesis: there is no spatial autocorrelation.

The coefficient that relates tax and delivery to car price does not reach the significance threshold. One reason for this is that some states may have had a limited number of cars available to buy within the state, forcing them to buy from a different state.