#set working directory
setwd("~/Documents/geog6000/lab12")
#Packages
library(RColorBrewer)
library(sf)
## Warning: package 'sf' was built under R version 3.6.2
## Registered S3 methods overwritten by 'tibble':
## method from
## format.tbl pillar
## print.tbl pillar
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
library(spdep)
## Warning: package 'spdep' was built under R version 3.6.2
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.6.2
## Loading required package: spData
## Warning: package 'spData' was built under R version 3.6.2
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(spatialreg)
## Warning: package 'spatialreg' was built under R version 3.6.2
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 3.6.2
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## as_dgRMatrix_listw, as_dsCMatrix_I, as_dsCMatrix_IrW,
## as_dsTMatrix_listw, as.spam.listw, can.be.simmed, cheb_setup,
## create_WX, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
## errorsarlm, get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, GMargminImage, GMerrorsar,
## griffith_sone, gstsls, Hausman.test, impacts, intImpacts,
## Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
## lextrS, lextrW, lmSLX, LU_prepermutate_setup, LU_setup,
## Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
## mom_calc_int2, moments_setup, powerWeights, sacsarlm,
## SE_classic_setup, SE_interp_setup, SE_whichMin_setup,
## set.ClusterOption, set.coresOption, set.mcOption,
## set.VerboseOption, set.ZeroPolicyOption, similar.listw, spam_setup,
## spam_update_setup, SpatialFiltering, spautolm, spBreg_err,
## spBreg_lag, spBreg_sac, stsls, subgraph_eigenw, trW
library(tmaptools)
## Warning: package 'tmaptools' was built under R version 3.6.2
## Registered S3 methods overwritten by 'stars':
## method from
## st_bbox.SpatRaster sf
## st_crs.SpatRaster sf
#read in data
usedcars.shp <- st_read("../datafiles/usedcars/usa48_usedcars.shp", quiet = TRUE)
plot(st_geometry(usedcars.shp))
Build a neighborhood function linking the 48 states:
usedcars.geometry <- st_geometry(usedcars.shp)
usedcars.coords <- st_centroid(usedcars.geometry)
queensnbcars <- poly2nb(usedcars.shp)
plot(usedcars.geometry, reset = FALSE)
queensplot <- plot(queensnbcars, usedcars.coords, ad = TRUE, col = 5, lwd = 1.5)
I chose to use a queens case neighborhood because it makes the most sense given the data we are using. Including both bounaries and corners that touch is important, specifically for places like Utah and New Mexico.
Build a spatial weight matrix:
swm_queensnbcars <- nb2listw(queensnbcars, style = 'B')
summary(swm_queensnbcars)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 48
## Number of nonzero links: 214
## Percentage nonzero weights: 9.288194
## Average number of links: 4.458333
## Link number distribution:
##
## 1 2 3 4 5 6 7 8
## 1 4 9 11 10 9 2 2
## 1 least connected region:
## 17 with 1 link
## 2 most connected regions:
## 23 40 with 8 links
##
## Weights style: B
## Weights constants summary:
## n nn S0 S1 S2
## B 48 2304 214 428 4296
There is an average of 4.458 links per state.
Moran Test
moran.test(usedcars.shp$price_1960,
listw = nb2listw(queensnbcars),
alternative = "two.sided",
randomisation = TRUE)
##
## Moran I test under randomisation
##
## data: usedcars.shp$price_1960
## weights: nb2listw(queensnbcars)
##
## Moran I statistic standard deviate = 8.1752, p-value = 2.954e-16
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic Expectation Variance
## 0.783561543 -0.021276596 0.009692214
Report:
-Moran I: 0.78356
-z-score: 8.1752
-The p-value is below 0.05 so we can reject the null hypothesis that there is no spatial autocorrelation. (There is spatial autocorrelation)
Simple Linear Model
taxpricelm <- lm(price_1960 ~ tax_charge, data = usedcars.shp)
summary(taxpricelm)
##
## Call:
## lm(formula = price_1960 ~ tax_charge, data = usedcars.shp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -116.701 -45.053 -1.461 43.400 107.807
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1435.7506 27.5796 52.058 < 2e-16 ***
## tax_charge 0.6872 0.1754 3.918 0.000294 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 57.01 on 46 degrees of freedom
## Multiple R-squared: 0.2503, Adjusted R-squared: 0.234
## F-statistic: 15.35 on 1 and 46 DF, p-value: 0.0002939
Report:
-Coefficients: 1435.75 and 0.6872
-R-squared: 0.2503
moran.mc(residuals(taxpricelm), listw = nb2listw(queensnbcars),
nsim = 999,
alternative = 'greater')
##
## Monte-Carlo simulation of Moran I
##
## data: residuals(taxpricelm)
## weights: nb2listw(queensnbcars)
## number of simulations + 1: 1000
##
## statistic = 0.57482, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
With a p-value of 0.001, autocorrelation is present. Once again, we reject the null that there is no autocorrelation.
Legrange Multiplier test:
lmt <- lm.LMtests(taxpricelm, nb2listw(queensnbcars), test = c("LMerr","LMlag"))
summary(lmt)
## Lagrange multiplier diagnostics for spatial dependence
## data:
## model: lm(formula = price_1960 ~ tax_charge, data = usedcars.shp)
## weights: nb2listw(queensnbcars)
##
## statistic parameter p.value
## LMerr 31.793 1 1.715e-08 ***
## LMlag 40.664 1 1.808e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lmt_robust <- lm.LMtests(taxpricelm, nb2listw(queensnbcars), test = c("RLMerr","RLMlag"))
summary(lmt_robust)
## Lagrange multiplier diagnostics for spatial dependence
## data:
## model: lm(formula = price_1960 ~ tax_charge, data = usedcars.shp)
## weights: nb2listw(queensnbcars)
##
## statistic parameter p.value
## RLMerr 0.051751 1 0.820044
## RLMlag 8.922943 1 0.002816 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
After running the version of the Legrange Multiplier test, it is clear that the lag model is a better choice. The lag model p-value is significant at 0.002, while the error model is not with a p-value of 0.82.
Spatial Lag Model
cars.lag <- lagsarlm(price_1960 ~ tax_charge,
data = usedcars.shp,
nb2listw(queensnbcars))
summary(cars.lag)
##
## Call:lagsarlm(formula = price_1960 ~ tax_charge, data = usedcars.shp,
## listw = nb2listw(queensnbcars))
##
## Residuals:
## Min 1Q Median 3Q Max
## -77.6781 -16.9504 4.2497 19.5484 58.9811
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 309.42546 123.03167 2.5150 0.0119
## tax_charge 0.16711 0.10212 1.6364 0.1018
##
## Rho: 0.78302, LR test value: 42.681, p-value: 6.4426e-11
## Asymptotic standard error: 0.081636
## z-value: 9.5916, p-value: < 2.22e-16
## Wald statistic: 91.998, p-value: < 2.22e-16
##
## Log likelihood: -239.8252 for lag model
## ML residual variance (sigma squared): 1036.7, (sigma: 32.197)
## Number of observations: 48
## Number of parameters estimated: 4
## AIC: 487.65, (AIC for lm: 528.33)
## LM test for residual autocorrelation
## test value: 2.1141, p-value: 0.14594
Report:
-Intercept: 309.42, p-value 0.0119
-tax_charge: 0.16711, p-value 0.1018
-Rho: 0.78302
-AIC: 487.65
-AIC lm: 528.33
Moran test
moran.test(residuals(cars.lag),
listw = nb2listw(queensnbcars),
alternative = "two.sided",
randomisation = TRUE)
##
## Moran I test under randomisation
##
## data: residuals(cars.lag)
## weights: nb2listw(queensnbcars)
##
## Moran I statistic standard deviate = -0.58145, p-value = 0.5609
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic Expectation Variance
## -0.077924289 -0.021276596 0.009491449
The result of the Moran I test indicates that the lag model adequately accounted for spatial autocorrelation with a p-value of 0.5609. We can finally accept the null hypothesis: there is no spatial autocorrelation.
The coefficient that relates tax and delivery to car price does not reach the significance threshold. One reason for this is that some states may have had a limited number of cars available to buy within the state, forcing them to buy from a different state.