library(sp)
library(spdep)
## Загрузка требуемого пакета: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Загрузка требуемого пакета: sf
## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(rgdal)
## Please note that rgdal will be retired during 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
## See https://r-spatial.org/r/2022/04/12/evolution.html and https://github.com/r-spatial/evolution
## rgdal: version: 1.6-4, (SVN revision 1196)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.5.2, released 2022/09/02
## Path to GDAL shared files: C:/Users/Kolya/AppData/Local/R/win-library/4.2/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 8.2.1, January 1st, 2022, [PJ_VERSION: 821]
## Path to PROJ shared files: C:/Users/Kolya/AppData/Local/R/win-library/4.2/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.6-0
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
library(maptools)
## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired during 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
MyPath1 <-"C:/Users/Kolya/Documents/GeoAnalisys/R"
setwd(MyPath1) # working directory
###—— read in a shapefile ——-
#regions<- readOGR(file.choose()) #NCVACO.shp
regions <- readOGR("NCVACO.shp")
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\Kolya\Documents\GeoAnalisys\R\NCVACO.shp", layer: "NCVACO"
## with 234 features
## It has 49 fields
## Integer64 fields read as strings: FIPS qtystores PCI COUNTMXBV DC GA KY MD SC TN WV VA COUNTBKGR TOTALPOP POP18OV LABFORCE HHOLDS POP25OV POP16OV
# Integer64 fields read as strings: ...
View(regions@data)
View(regions@data$COUNTY)
View(regions@data$FIPS2)
is.numeric(regions$PCI)
## [1] FALSE
regions$PCI <- as.numeric(regions$PCI)
summary(regions)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
## min max
## x -84.32187 -75.24227
## y 33.84452 39.46601
## Is projected: FALSE
## proj4string : [+proj=longlat +datum=NAD83 +no_defs]
## Data attributes:
## GEO_ID STATE COUNTY NAME
## Length:234 Length:234 Length:234 Length:234
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## LSAD CENSUSAREA FIPS2 Lon
## Length:234 Min. : 1.999 Length:234 Min. :-83.94
## Class :character 1st Qu.:234.037 Class :character 1st Qu.:-80.32
## Mode :character Median :381.377 Mode :character Median :-78.55
## Mean :376.530 Mean :-78.94
## 3rd Qu.:514.512 3rd Qu.:-77.37
## Max. :968.941 Max. :-75.63
## Lat FIPS qtystores SALESPC
## Min. :34.16 Length:234 Length:234 Min. : 0.00
## 1st Qu.:35.82 Class :character Class :character 1st Qu.: 38.00
## Median :36.75 Mode :character Mode :character Median : 58.80
## Mean :36.74 Mean : 64.98
## 3rd Qu.:37.53 3rd Qu.: 78.74
## Max. :39.17 Max. :297.70
## PCI COMM15OVP COLLENRP SOMECOLLP
## Min. :12808 Min. : 5.49 Min. : 0.890 Min. :16.55
## 1st Qu.:16154 1st Qu.:25.89 1st Qu.: 2.663 1st Qu.:23.38
## Median :18073 Median :30.42 Median : 3.180 Median :27.15
## Mean :19003 Mean :30.52 Mean : 4.576 Mean :28.81
## 3rd Qu.:20505 3rd Qu.:34.96 3rd Qu.: 4.180 3rd Qu.:32.39
## Max. :41014 Max. :48.79 Max. :41.300 Max. :59.92
## ARMEDP NONWHITEP UNEMPP ENTRECP
## Min. : 0.0000 Min. : 0.670 Min. : 1.710 Min. : 2.840
## 1st Qu.: 0.0200 1st Qu.: 8.928 1st Qu.: 3.632 1st Qu.: 4.902
## Median : 0.0800 Median :22.170 Median : 4.670 Median : 5.770
## Mean : 0.6656 Mean :24.763 Mean : 5.274 Mean : 6.441
## 3rd Qu.: 0.2400 3rd Qu.:37.795 3rd Qu.: 6.452 3rd Qu.: 7.425
## Max. :21.5300 Max. :81.590 Max. :41.410 Max. :22.540
## PUBASSTP POVPOPP URBANP FOREIGNBP
## Min. :0.480 Min. : 2.75 Min. : 0.00 Min. : 0.000
## 1st Qu.:2.005 1st Qu.: 9.17 1st Qu.: 5.94 1st Qu.: 0.500
## Median :2.760 Median :12.57 Median : 33.84 Median : 0.995
## Mean :3.099 Mean :13.06 Mean : 41.16 Mean : 1.854
## 3rd Qu.:3.890 3rd Qu.:16.71 3rd Qu.: 70.97 3rd Qu.: 2.243
## Max. :8.650 Max. :31.35 Max. :100.00 Max. :16.120
## BAPTISTSP ADHERENTSP BKGRTOMIX COUNTMXBV
## Min. : 0.000 Min. : 13.28 Min. : 0.2625 Length:234
## 1st Qu.: 9.193 1st Qu.: 35.17 1st Qu.: 1.9611 Class :character
## Median :14.110 Median : 43.14 Median : 3.6519 Mode :character
## Mean :15.725 Mean : 45.19 Mean : 5.3687
## 3rd Qu.:21.250 3rd Qu.: 50.70 3rd Qu.: 6.8552
## Max. :60.130 Max. :164.53 Max. :38.2724
## MXBVSQM BKGRTOABC MXBVPPOP18 DUI1802
## Min. :0.00000 Min. : 0.5492 Min. : 0.000 Min. : 0.2521
## 1st Qu.:0.00325 1st Qu.: 2.8035 1st Qu.: 1.082 1st Qu.: 4.4306
## Median :0.02245 Median : 4.4334 Median : 3.213 Median : 6.9195
## Mean :0.41420 Mean : 4.9031 Mean : 4.574 Mean : 7.7809
## 3rd Qu.:0.12915 3rd Qu.: 5.9744 3rd Qu.: 6.500 3rd Qu.: 9.9235
## Max. :8.69690 Max. :21.5013 Max. :39.782 Max. :32.7806
## FVPTHH02 DC GA KY
## Min. : 0.00000 Length:234 Length:234 Length:234
## 1st Qu.: 0.07405 Class :character Class :character Class :character
## Median : 0.38154 Mode :character Mode :character Mode :character
## Mean : 1.64369
## 3rd Qu.: 2.14565
## Max. :20.45836
## MD SC TN WV
## Length:234 Length:234 Length:234 Length:234
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## VA AREALANDSQ COUNTBKGR TOTALPOP
## Length:234 Min. : 1.986 Length:234 Length:234
## Class :character 1st Qu.:235.451 Class :character Class :character
## Mode :character Median :381.783 Mode :character Mode :character
## Mean :377.358
## 3rd Qu.:518.925
## Max. :970.760
## POP18OV LABFORCE HHOLDS POP25OV
## Length:234 Length:234 Length:234 Length:234
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## POP16OV
## Length:234
## Class :character
## Mode :character
##
##
##
proj4string(regions) <- CRS("+init=epsg:4326") #assign to an object with an existing CRS, epsg4326 is WGS84
## Warning in `proj4string<-`(`*tmp*`, value = new("CRS", projargs = "+proj=longlat +datum=WGS84 +no_defs")): A new CRS was assigned to an object with an existing CRS:
## +proj=longlat +datum=NAD83 +no_defs
## without reprojecting.
## For reprojection, use function spTransform
is.projected(regions) #false
## [1] FALSE
class(regions) #must be "SpatialPolygonsDataFrame", package "sp"
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
plot(regions)

regions.prj <- spTransform(regions, CRS("+init=epsg:5070")) #reprojection
is.projected(regions.prj) #true
## [1] TRUE
summary(regions.prj)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
## min max
## x 1054152 1833387
## y 1345257 1966514
## Is projected: TRUE
## proj4string :
## [+proj=aea +lat_0=23 +lon_0=-96 +lat_1=29.5 +lat_2=45.5 +x_0=0 +y_0=0
## +datum=NAD83 +units=m +no_defs]
## Data attributes:
## GEO_ID STATE COUNTY NAME
## Length:234 Length:234 Length:234 Length:234
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## LSAD CENSUSAREA FIPS2 Lon
## Length:234 Min. : 1.999 Length:234 Min. :-83.94
## Class :character 1st Qu.:234.037 Class :character 1st Qu.:-80.32
## Mode :character Median :381.377 Mode :character Median :-78.55
## Mean :376.530 Mean :-78.94
## 3rd Qu.:514.512 3rd Qu.:-77.37
## Max. :968.941 Max. :-75.63
## Lat FIPS qtystores SALESPC
## Min. :34.16 Length:234 Length:234 Min. : 0.00
## 1st Qu.:35.82 Class :character Class :character 1st Qu.: 38.00
## Median :36.75 Mode :character Mode :character Median : 58.80
## Mean :36.74 Mean : 64.98
## 3rd Qu.:37.53 3rd Qu.: 78.74
## Max. :39.17 Max. :297.70
## PCI COMM15OVP COLLENRP SOMECOLLP
## Min. :12808 Min. : 5.49 Min. : 0.890 Min. :16.55
## 1st Qu.:16154 1st Qu.:25.89 1st Qu.: 2.663 1st Qu.:23.38
## Median :18073 Median :30.42 Median : 3.180 Median :27.15
## Mean :19003 Mean :30.52 Mean : 4.576 Mean :28.81
## 3rd Qu.:20505 3rd Qu.:34.96 3rd Qu.: 4.180 3rd Qu.:32.39
## Max. :41014 Max. :48.79 Max. :41.300 Max. :59.92
## ARMEDP NONWHITEP UNEMPP ENTRECP
## Min. : 0.0000 Min. : 0.670 Min. : 1.710 Min. : 2.840
## 1st Qu.: 0.0200 1st Qu.: 8.928 1st Qu.: 3.632 1st Qu.: 4.902
## Median : 0.0800 Median :22.170 Median : 4.670 Median : 5.770
## Mean : 0.6656 Mean :24.763 Mean : 5.274 Mean : 6.441
## 3rd Qu.: 0.2400 3rd Qu.:37.795 3rd Qu.: 6.452 3rd Qu.: 7.425
## Max. :21.5300 Max. :81.590 Max. :41.410 Max. :22.540
## PUBASSTP POVPOPP URBANP FOREIGNBP
## Min. :0.480 Min. : 2.75 Min. : 0.00 Min. : 0.000
## 1st Qu.:2.005 1st Qu.: 9.17 1st Qu.: 5.94 1st Qu.: 0.500
## Median :2.760 Median :12.57 Median : 33.84 Median : 0.995
## Mean :3.099 Mean :13.06 Mean : 41.16 Mean : 1.854
## 3rd Qu.:3.890 3rd Qu.:16.71 3rd Qu.: 70.97 3rd Qu.: 2.243
## Max. :8.650 Max. :31.35 Max. :100.00 Max. :16.120
## BAPTISTSP ADHERENTSP BKGRTOMIX COUNTMXBV
## Min. : 0.000 Min. : 13.28 Min. : 0.2625 Length:234
## 1st Qu.: 9.193 1st Qu.: 35.17 1st Qu.: 1.9611 Class :character
## Median :14.110 Median : 43.14 Median : 3.6519 Mode :character
## Mean :15.725 Mean : 45.19 Mean : 5.3687
## 3rd Qu.:21.250 3rd Qu.: 50.70 3rd Qu.: 6.8552
## Max. :60.130 Max. :164.53 Max. :38.2724
## MXBVSQM BKGRTOABC MXBVPPOP18 DUI1802
## Min. :0.00000 Min. : 0.5492 Min. : 0.000 Min. : 0.2521
## 1st Qu.:0.00325 1st Qu.: 2.8035 1st Qu.: 1.082 1st Qu.: 4.4306
## Median :0.02245 Median : 4.4334 Median : 3.213 Median : 6.9195
## Mean :0.41420 Mean : 4.9031 Mean : 4.574 Mean : 7.7809
## 3rd Qu.:0.12915 3rd Qu.: 5.9744 3rd Qu.: 6.500 3rd Qu.: 9.9235
## Max. :8.69690 Max. :21.5013 Max. :39.782 Max. :32.7806
## FVPTHH02 DC GA KY
## Min. : 0.00000 Length:234 Length:234 Length:234
## 1st Qu.: 0.07405 Class :character Class :character Class :character
## Median : 0.38154 Mode :character Mode :character Mode :character
## Mean : 1.64369
## 3rd Qu.: 2.14565
## Max. :20.45836
## MD SC TN WV
## Length:234 Length:234 Length:234 Length:234
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## VA AREALANDSQ COUNTBKGR TOTALPOP
## Length:234 Min. : 1.986 Length:234 Length:234
## Class :character 1st Qu.:235.451 Class :character Class :character
## Mode :character Median :381.783 Mode :character Mode :character
## Mean :377.358
## 3rd Qu.:518.925
## Max. :970.760
## POP18OV LABFORCE HHOLDS POP25OV
## Length:234 Length:234 Length:234 Length:234
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## POP16OV
## Length:234
## Class :character
## Mode :character
##
##
##
plot(regions.prj)

# CREATE LIST OF NEIGHBORS: queen/rook contiguity
queen.nb<-poly2nb(regions) #create neighbors list (queen contiguity) data from polygon list
summary(queen.nb)
## Neighbour list object:
## Number of regions: 234
## Number of nonzero links: 1132
## Percentage nonzero weights: 2.067353
## Average number of links: 4.837607
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 11
## 16 26 22 32 36 49 37 9 6 1
## 16 least connected regions:
## 45 46 48 51 53 54 57 179 206 207 209 211 216 226 229 231 with 1 link
## 1 most connected region:
## 146 with 11 links
rook.nb<-poly2nb(regions,queen=FALSE)
summary(rook.nb)
## Neighbour list object:
## Number of regions: 234
## Number of nonzero links: 1104
## Percentage nonzero weights: 2.016217
## Average number of links: 4.717949
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10
## 16 26 24 33 41 54 24 10 5 1
## 16 least connected regions:
## 45 46 48 51 53 54 57 179 206 207 209 211 216 226 229 231 with 1 link
## 1 most connected region:
## 146 with 10 links
isTRUE(all.equal(queen.nb,rook.nb,check.attributes=FALSE))
## [1] FALSE
# CREATE SPATIAL WEIGHTS. Compute Global Moran's I
queen.weights <- nb2listw(queen.nb) #create spatial weights from neighbors list
moran(regions$PCI, queen.weights, length(regions$PCI), Szero(queen.weights))
## $I
## [1] 0.6585677
##
## $K
## [1] 8.406088
moran.test(regions$PCI, queen.weights)
##
## Moran I test under randomisation
##
## data: regions$PCI
## weights: queen.weights
##
## Moran I statistic standard deviate = 14.682, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.658567737 -0.004291845 0.002038240
moran.plot(regions$PCI, queen.weights)

moran(regions$SALESPC, queen.weights, length(regions$SALESPC), Szero(queen.weights))
## $I
## [1] -0.01394866
##
## $K
## [1] 9.058721
moran.test(regions$SALESPC, queen.weights)
##
## Moran I test under randomisation
##
## data: regions$SALESPC
## weights: queen.weights
##
## Moran I statistic standard deviate = -0.21421, p-value = 0.5848
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.013948657 -0.004291845 0.002032305
moran.plot(regions$SALESPC, queen.weights)

rook.weights <- nb2listw(rook.nb) #create spatial weights from neighbors list
moran(regions$PCI, rook.weights, length(regions$PCI), Szero(rook.weights))
## $I
## [1] 0.6566088
##
## $K
## [1] 8.406088
moran.test(regions$PCI, rook.weights)
##
## Moran I test under randomisation
##
## data: regions$PCI
## weights: rook.weights
##
## Moran I statistic standard deviate = 14.522, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.656608753 -0.004291845 0.002071169
moran.plot(regions$PCI, rook.weights)

moran(regions$SALESPC, rook.weights, length(regions$SALESPC), Szero(rook.weights))
## $I
## [1] -0.01300157
##
## $K
## [1] 9.058721
moran.test(regions$SALESPC, rook.weights)
##
## Moran I test under randomisation
##
## data: regions$SALESPC
## weights: rook.weights
##
## Moran I statistic standard deviate = -0.19166, p-value = 0.576
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.013001570 -0.004291845 0.002065138
moran.plot(regions$SALESPC, rook.weights)

moran(regions$SALESPC,nb2listw(queen.nb), length(regions$SALESPC), Szero(nb2listw(queen.nb)))
## $I
## [1] -0.01394866
##
## $K
## [1] 9.058721
moran.test(regions$SALESPC,nb2listw(queen.nb))
##
## Moran I test under randomisation
##
## data: regions$SALESPC
## weights: nb2listw(queen.nb)
##
## Moran I statistic standard deviate = -0.21421, p-value = 0.5848
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.013948657 -0.004291845 0.002032305
# CREATE LIST OF NEIGHBORS: k nearest neighbors
library(rgeos)
## rgeos version: 0.6-1, (SVN revision 692)
## GEOS runtime version: 3.9.3-CAPI-1.14.3
## Please note that rgeos will be retired during 2023,
## plan transition to sf functions using GEOS at your earliest convenience.
## GEOS using OverlayNG
## Linking to sp version: 1.6-0
## Polygon checking: TRUE
regions.centroids<-gCentroid(regions,byid=TRUE)
#regions.centroids.prj<-gCentroid(regions.prj,byid=TRUE)
head(regions.centroids)
## SpatialPoints:
## x y
## 0 -80.53085 34.98821
## 1 -78.40829 36.36479
## 2 -78.00390 35.36370
## 3 -79.39878 36.04361
## 4 -81.17707 35.92073
## 5 -81.12768 36.49121
## Coordinate Reference System (CRS) arguments: +proj=longlat +datum=WGS84
## +no_defs
class(regions.centroids)
## [1] "SpatialPoints"
## attr(,"package")
## [1] "sp"
regions.6nn.nb<-knn2nb(knearneigh(regions.centroids,k=6,longlat = TRUE),row.names=regions$FIPS)
#regions.6nn.nb<-knn2nb(knearneigh(regions.centroids.prj,k=6,longlat = FALSE),row.names=regions$FIPS)
summary(regions.6nn.nb)
## Neighbour list object:
## Number of regions: 234
## Number of nonzero links: 1404
## Percentage nonzero weights: 2.564103
## Average number of links: 6
## Non-symmetric neighbours list
## Link number distribution:
##
## 6
## 234
## 234 least connected regions:
## 37179 37181 37191 37001 37003 37005 37017 37025 37033 37043 37055 37057 37063 37073 37081 37091 37101 37109 37123 37127 37135 37145 37151 37155 37167 51003 51013 51025 51035 51043 51049 51063 51075 51081 51097 51109 51121 51127 51135 51145 51159 51169 51181 51187 51197 51515 51530 51570 51595 51610 51640 51660 51683 51690 51720 51740 51770 51820 51830 37007 37009 37011 37013 37015 37019 37021 37023 37027 37029 37031 37035 37037 37039 37041 37045 37047 37049 37051 37053 37185 37187 37189 37193 37195 37197 37059 37061 37065 37067 37069 37071 37075 37077 37079 37083 37085 37087 37089 37199 37093 37095 37097 37099 37103 37105 37107 37111 37113 37115 37117 37119 37121 37125 37129 37131 37133 37137 37139 37141 37143 37147 37149 37153 37157 37159 37161 37163 37165 37169 37171 37173 37175 37177 51009 51011 51015 51017 51019 51021 51023 51027 51029 51031 51033 51036 51037 51041 51045 51047 51051 51053 51057 51059 51061 51065 51067 51069 51071 51073 51077 51079 51083 51085 51087 51089 51091 51093 51095 51099 51101 51103 51105 51107 51111 51113 51115 51117 51119 51125 51131 51133 51137 51139 51141 51143 51147 51149 51153 51155 51157 51161 51163 51165 51167 51171 51173 51175 51177 51179 51183 51185 51191 51193 51195 51199 51510 51520 51540 51550 51580 51590 51600 51620 51630 51650 51670 51678 51680 51685 51700 51710 51730 51735 51750 51760 51775 51790 51800 51810 51840 37183 51001 51005 51007 with 6 links
## 234 most connected regions:
## 37179 37181 37191 37001 37003 37005 37017 37025 37033 37043 37055 37057 37063 37073 37081 37091 37101 37109 37123 37127 37135 37145 37151 37155 37167 51003 51013 51025 51035 51043 51049 51063 51075 51081 51097 51109 51121 51127 51135 51145 51159 51169 51181 51187 51197 51515 51530 51570 51595 51610 51640 51660 51683 51690 51720 51740 51770 51820 51830 37007 37009 37011 37013 37015 37019 37021 37023 37027 37029 37031 37035 37037 37039 37041 37045 37047 37049 37051 37053 37185 37187 37189 37193 37195 37197 37059 37061 37065 37067 37069 37071 37075 37077 37079 37083 37085 37087 37089 37199 37093 37095 37097 37099 37103 37105 37107 37111 37113 37115 37117 37119 37121 37125 37129 37131 37133 37137 37139 37141 37143 37147 37149 37153 37157 37159 37161 37163 37165 37169 37171 37173 37175 37177 51009 51011 51015 51017 51019 51021 51023 51027 51029 51031 51033 51036 51037 51041 51045 51047 51051 51053 51057 51059 51061 51065 51067 51069 51071 51073 51077 51079 51083 51085 51087 51089 51091 51093 51095 51099 51101 51103 51105 51107 51111 51113 51115 51117 51119 51125 51131 51133 51137 51139 51141 51143 51147 51149 51153 51155 51157 51161 51163 51165 51167 51171 51173 51175 51177 51179 51183 51185 51191 51193 51195 51199 51510 51520 51540 51550 51580 51590 51600 51620 51630 51650 51670 51678 51680 51685 51700 51710 51730 51735 51750 51760 51775 51790 51800 51810 51840 37183 51001 51005 51007 with 6 links
head(regions.6nn.nb)
## [[1]]
## [1] 8 19 25 60 111 125
##
## [[2]]
## [1] 13 22 80 90 93 177
##
## [[3]]
## [1] 17 84 87 94 106 127
##
## [[4]]
## [1] 9 13 15 21 72 124
##
## [[5]]
## [1] 18 67 68 71 83 102
##
## [[6]]
## [1] 45 51 61 83 130 160
class(regions.6nn.nb)
## [1] "nb"
# Compute Global Moran's I with k nearest neighbors weight matrix
#...
regions.6nn.weights <- nb2listw(regions.6nn.nb) #create spatial weights from neighbors list
moran(regions$PCI, regions.6nn.weights, length(regions$PCI), Szero(regions.6nn.weights))
## $I
## [1] 0.5295223
##
## $K
## [1] 8.406088
moran.test(regions$PCI, regions.6nn.weights)
##
## Moran I test under randomisation
##
## data: regions$PCI
## weights: regions.6nn.weights
##
## Moran I statistic standard deviate = 15.131, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.529522297 -0.004291845 0.001244615
moran.plot(regions$PCI, regions.6nn.weights)

# HOW TO DRAW A BEAUTIFUL MAP
# https://r-spatial.org/r/2018/10/25/ggplot2-sf.html
library(sf)
library(ggplot2)
NCVACO_sf <- st_read("NCVACO.shp")
## Reading layer `NCVACO' from data source
## `C:\Users\Kolya\Documents\GeoAnalisys\R\NCVACO.shp' using driver `ESRI Shapefile'
## Simple feature collection with 234 features and 49 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -84.32187 ymin: 33.84452 xmax: -75.24227 ymax: 39.46601
## Geodetic CRS: NAD83
class(NCVACO_sf)
## [1] "sf" "data.frame"
ggplot(data = NCVACO_sf) +
geom_sf() +
xlab("Longitude") + ylab("Latitude") +
ggtitle("Map of counties", subtitle = paste0("(", length(unique(NCVACO_sf$COUNTY)), " counties)"))

ggplot(data = NCVACO_sf) +
geom_sf(aes(fill = PCI)) +
scale_fill_viridis_c(option = "plasma", trans = "log") #plasma, magma

#"viridis" colorblind-friendly palette for the color gradient was used
# Global Moran's test for residuals
attach(regions@data)
#DUI = Driving Under Influence
eq1 <- DUI1802~SALESPC+SOMECOLLP+NONWHITEP+BAPTISTSP+BKGRTOABC
reg1 <- lm(eq1)
summary(reg1)
##
## Call:
## lm(formula = eq1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.0061 -3.2837 -0.7178 1.8488 25.0742
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.799980 2.233946 2.596 0.0100 *
## SALESPC 0.018651 0.007891 2.364 0.0189 *
## SOMECOLLP -0.001760 0.050161 -0.035 0.9720
## NONWHITEP -0.022973 0.020605 -1.115 0.2661
## BAPTISTSP 0.062103 0.035830 1.733 0.0844 .
## BKGRTOABC 0.084020 0.136196 0.617 0.5379
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5 on 228 degrees of freedom
## Multiple R-squared: 0.0468, Adjusted R-squared: 0.0259
## F-statistic: 2.239 on 5 and 228 DF, p-value: 0.05138
lm.morantest(reg1, queen.weights)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = eq1)
## weights: queen.weights
##
## Moran I statistic standard deviate = 5.2806, p-value = 6.438e-08
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.225620905 -0.012356925 0.002030974
moran.test(reg1$residuals, queen.weights)
##
## Moran I test under randomisation
##
## data: reg1$residuals
## weights: queen.weights
##
## Moran I statistic standard deviate = 5.1214, p-value = 1.516e-07
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.225620905 -0.004291845 0.002015349
moran.plot(reg1$residuals, queen.weights)

# NEW
# plot residuals as a map ======================================================
res1 <- reg1$residuals
summary(res1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -10.0061 -3.2837 -0.7178 0.0000 1.8488 25.0742
plot(coordinates(regions),pch=21, bg="green", cex=0.1*(res1-min(res1)))

# Lagrange multiplier diagnostics for spatial dependence
lm.LMtests(reg1, listw=queen.weights, test="all", zero.policy=TRUE)
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = eq1)
## weights: queen.weights
##
## LMerr = 23.908, df = 1, p-value = 1.011e-06
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = eq1)
## weights: queen.weights
##
## LMlag = 28.04, df = 1, p-value = 1.188e-07
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = eq1)
## weights: queen.weights
##
## RLMerr = 5.1425, df = 1, p-value = 0.02335
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = eq1)
## weights: queen.weights
##
## RLMlag = 9.2745, df = 1, p-value = 0.002324
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = eq1)
## weights: queen.weights
##
## SARMA = 33.182, df = 2, p-value = 6.231e-08
#RLMlag = 9.2745, df = 1, p-value = 0.002324 => SAR model: y = X beta + rho W1 y + u
# SPATIAL REGRESSION MODELS
# SEM
library(spatialreg)
## Загрузка требуемого пакета: Matrix
##
## Присоединяю пакет: 'spatialreg'
## Следующие объекты скрыты от 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
#SEM - spatial error model
# y = X beta + rho W1 y + u
# u = lambda W2 u + e
SEM1 <- errorsarlm(eq1, listw=queen.weights, zero.policy=TRUE)
summary(SEM1)
##
## Call:errorsarlm(formula = eq1, listw = queen.weights, zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.18952 -2.75581 -0.77255 1.53411 24.82251
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.0025545 2.4533119 2.0391 0.04144
## SALESPC 0.0131756 0.0071518 1.8423 0.06544
## SOMECOLLP 0.0680844 0.0559367 1.2172 0.22354
## NONWHITEP -0.0323080 0.0252645 -1.2788 0.20097
## BAPTISTSP 0.0259486 0.0392675 0.6608 0.50873
## BKGRTOABC 0.1161419 0.1313365 0.8843 0.37653
##
## Lambda: 0.4335, LR test value: 25.379, p-value: 4.7105e-07
## Asymptotic standard error: 0.073471
## z-value: 5.9003, p-value: 3.628e-09
## Wald statistic: 34.814, p-value: 3.628e-09
##
## Log likelihood: -692.9221 for error model
## ML residual variance (sigma squared): 20.888, (sigma: 4.5703)
## Number of observations: 234
## Number of parameters estimated: 8
## AIC: 1401.8, (AIC for lm: 1425.2)
# SAR - spatial autoregressive model
# y = X beta + rho W1 y + u
SAR1 <- lagsarlm(eq1, listw=queen.weights, zero.policy=TRUE)
summary(SAR1)
##
## Call:lagsarlm(formula = eq1, listw = queen.weights, zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.4170 -2.7800 -0.8867 1.5935 25.1949
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.6335696 2.1177519 1.2436 0.21366
## SALESPC 0.0172949 0.0072243 2.3940 0.01667
## SOMECOLLP 0.0186026 0.0459369 0.4050 0.68551
## NONWHITEP -0.0233846 0.0189456 -1.2343 0.21709
## BAPTISTSP 0.0321759 0.0328674 0.9790 0.32760
## BKGRTOABC 0.1005083 0.1246988 0.8060 0.42024
##
## Rho: 0.41197, LR test value: 25.742, p-value: 3.9024e-07
## Asymptotic standard error: 0.074232
## z-value: 5.5498, p-value: 2.8601e-08
## Wald statistic: 30.8, p-value: 2.8601e-08
##
## Log likelihood: -692.7405 for lag model
## ML residual variance (sigma squared): 20.955, (sigma: 4.5777)
## Number of observations: 234
## Number of parameters estimated: 8
## AIC: 1401.5, (AIC for lm: 1425.2)
## LM test for residual autocorrelation
## test value: 8.0126, p-value: 0.0046454
# to interpret spatial models with Wy, compute impacts
impacts(SAR1, listw=queen.weights)
## Impact measures (lag, exact):
## Direct Indirect Total
## SALESPC 0.01805828 0.01135352 0.02941180
## SOMECOLLP 0.01942370 0.01221198 0.03163568
## NONWHITEP -0.02441681 -0.01535123 -0.03976804
## BAPTISTSP 0.03359611 0.02112239 0.05471850
## BKGRTOABC 0.10494469 0.06598036 0.17092505
# SDM
# y = x beta + rho W y + theta W X + u
SDM1 <- lagsarlm(eq1, listw=queen.weights, zero.policy=TRUE, type="mixed")
summary(SDM1)
##
## Call:lagsarlm(formula = eq1, listw = queen.weights, type = "mixed",
## zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.28672 -2.76676 -0.71718 1.65724 23.81805
##
## Type: mixed
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.089160 3.387600 2.0927 0.0363778
## SALESPC 0.020151 0.007223 2.7898 0.0052740
## SOMECOLLP 0.102488 0.060145 1.7040 0.0883772
## NONWHITEP -0.026847 0.030890 -0.8691 0.3847873
## BAPTISTSP 0.026982 0.041298 0.6534 0.5135205
## BKGRTOABC 0.112418 0.128053 0.8779 0.3799952
## lag.SALESPC 0.050723 0.018219 2.7841 0.0053682
## lag.SOMECOLLP -0.274877 0.082277 -3.3409 0.0008351
## lag.NONWHITEP -0.021329 0.038990 -0.5470 0.5843554
## lag.BAPTISTSP 0.024346 0.055829 0.4361 0.6627785
## lag.BKGRTOABC -0.240221 0.258061 -0.9309 0.3519221
##
## Rho: 0.34748, LR test value: 17.222, p-value: 3.3251e-05
## Asymptotic standard error: 0.077433
## z-value: 4.4875, p-value: 7.2063e-06
## Wald statistic: 20.138, p-value: 7.2063e-06
##
## Log likelihood: -682.6046 for mixed model
## ML residual variance (sigma squared): 19.457, (sigma: 4.411)
## Number of observations: 234
## Number of parameters estimated: 13
## AIC: 1391.2, (AIC for lm: 1406.4)
## LM test for residual autocorrelation
## test value: 1.1421, p-value: 0.28522
impacts(SDM1, listw=queen.weights)
## Impact measures (mixed, exact):
## Direct Indirect Total
## SALESPC 0.02512984 0.08348549 0.10861533
## SOMECOLLP 0.08184998 -0.34604022 -0.26419024
## NONWHITEP -0.02949147 -0.04433926 -0.07383073
## BAPTISTSP 0.02989121 0.04877077 0.07866199
## BKGRTOABC 0.09506713 -0.29092806 -0.19586093
# goodness of fit
AIC(SEM1)
## [1] 1401.844
AIC(SAR1)
## [1] 1401.481
AIC(SDM1)
## [1] 1391.209
# Extract log-likelihood
logLik(SEM1)
## 'log Lik.' -692.9221 (df=8)
logLik(SAR1)
## 'log Lik.' -692.7405 (df=8)
logLik(SDM1)
## 'log Lik.' -682.6046 (df=13)
#R2 not appropriate!
cor(DUI1802, fitted(reg1))^2
## [1] 0.04680096
cor(DUI1802, fitted(SEM1))^2
## This method assumes the response is known - see manual page
## [1] 0.1973918
cor(DUI1802, fitted(SAR1))^2
## This method assumes the response is known - see manual page
## [1] 0.1922327
cor(DUI1802, fitted(SDM1))^2
## This method assumes the response is known - see manual page
## [1] 0.2413917
# Assignment ===================================================================
# SAR, SEM based on rook.weights
# LM-tests for rook.weight
# SAR, SEM based on rook.weights -----------------------------------------------
# SPATIAL REGRESSION MODELS
# SEM
library(spatialreg)
#SEM - spatial error model
# y = X beta + rho W1 y + u
# u = lambda W2 u + e
SEM2 <- errorsarlm(eq1, listw=rook.weights, zero.policy=TRUE)
summary(SEM2)
##
## Call:errorsarlm(formula = eq1, listw = rook.weights, zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.17958 -2.79622 -0.78683 1.52166 24.81822
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.0753830 2.4535480 2.0686 0.03858
## SALESPC 0.0131657 0.0071382 1.8444 0.06513
## SOMECOLLP 0.0662494 0.0559146 1.1848 0.23608
## NONWHITEP -0.0329825 0.0253206 -1.3026 0.19271
## BAPTISTSP 0.0267924 0.0392660 0.6823 0.49503
## BKGRTOABC 0.1130596 0.1309863 0.8631 0.38806
##
## Lambda: 0.43462, LR test value: 25.8, p-value: 3.7867e-07
## Asymptotic standard error: 0.072964
## z-value: 5.9566, p-value: 2.5757e-09
## Wald statistic: 35.481, p-value: 2.5757e-09
##
## Log likelihood: -692.7115 for error model
## ML residual variance (sigma squared): 20.829, (sigma: 4.5639)
## Number of observations: 234
## Number of parameters estimated: 8
## AIC: 1401.4, (AIC for lm: 1425.2)
# SAR
# y = X beta + rho W1 y + u
SAR2 <- lagsarlm(eq1, listw=rook.weights, zero.policy=TRUE)
summary(SAR2)
##
## Call:lagsarlm(formula = eq1, listw = rook.weights, zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.40138 -2.81597 -0.86044 1.63072 25.19594
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.6424757 2.1141527 1.2499 0.21134
## SALESPC 0.0173232 0.0072136 2.4015 0.01633
## SOMECOLLP 0.0181287 0.0458684 0.3952 0.69267
## NONWHITEP -0.0234914 0.0189203 -1.2416 0.21439
## BAPTISTSP 0.0320012 0.0328196 0.9751 0.32953
## BKGRTOABC 0.0997135 0.1245141 0.8008 0.42324
##
## Rho: 0.41383, LR test value: 26.176, p-value: 3.116e-07
## Asymptotic standard error: 0.073673
## z-value: 5.6171, p-value: 1.942e-08
## Wald statistic: 31.552, p-value: 1.942e-08
##
## Log likelihood: -692.5233 for lag model
## ML residual variance (sigma squared): 20.893, (sigma: 4.5709)
## Number of observations: 234
## Number of parameters estimated: 8
## AIC: 1401, (AIC for lm: 1425.2)
## LM test for residual autocorrelation
## test value: 6.9895, p-value: 0.0081991
# to interpret spatial models with Wy, compute impacts
impacts(SAR2, listw=rook.weights)
## Impact measures (lag, exact):
## Direct Indirect Total
## SALESPC 0.01810842 0.01144478 0.02955320
## SOMECOLLP 0.01895042 0.01197694 0.03092736
## NONWHITEP -0.02455615 -0.01551984 -0.04007598
## BAPTISTSP 0.03345170 0.02114196 0.05459366
## BKGRTOABC 0.10423320 0.06587688 0.17011008
# goodness of fit
AIC(SEM2)
## [1] 1401.423
AIC(SAR2)
## [1] 1401.047
# Extract log-likelihood
logLik(SEM2)
## 'log Lik.' -692.7115 (df=8)
logLik(SAR2)
## 'log Lik.' -692.5233 (df=8)
# LM-tests for rook.weight -----------------------------------------------------
# Lagrange multiplier diagnostics for spatial dependence
lm.LMtests(reg1, listw=rook.weights, test="all", zero.policy=TRUE)
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = eq1)
## weights: rook.weights
##
## LMerr = 24.187, df = 1, p-value = 8.743e-07
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = eq1)
## weights: rook.weights
##
## LMlag = 28.246, df = 1, p-value = 1.068e-07
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = eq1)
## weights: rook.weights
##
## RLMerr = 4.9515, df = 1, p-value = 0.02607
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = eq1)
## weights: rook.weights
##
## RLMlag = 9.0107, df = 1, p-value = 0.002684
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = eq1)
## weights: rook.weights
##
## SARMA = 33.198, df = 2, p-value = 6.184e-08
# Analitics
# LMerr = 24.187, df = 1, p-value = 8.743e-07 => significant
# LMlag = 28.246, df = 1, p-value = 1.068e-07 => significant
# conclusion => moving to Robust LM Diagnostics
# RLMerr = 4.9515, df = 1, p-value = 0.02607
# RLMlag = 9.0107, df = 1, p-value = 0.002684
# p-value: RLMlag < RLMerr => RLMlag is chosen (Spatial lag model)
# SAR: y = X beta + rho W1 y + u