library(sp)
library(spdep)
## Загрузка требуемого пакета: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Загрузка требуемого пакета: sf
## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(rgdal)
## Please note that rgdal will be retired during 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
## See https://r-spatial.org/r/2022/04/12/evolution.html and https://github.com/r-spatial/evolution
## rgdal: version: 1.6-4, (SVN revision 1196)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.5.2, released 2022/09/02
## Path to GDAL shared files: C:/Users/Kolya/AppData/Local/R/win-library/4.2/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 8.2.1, January 1st, 2022, [PJ_VERSION: 821]
## Path to PROJ shared files: C:/Users/Kolya/AppData/Local/R/win-library/4.2/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.6-0
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
library(maptools)
## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired during 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
MyPath1 <-"C:/Users/Kolya/Documents/GeoAnalisys/R"
setwd(MyPath1) # working directory

###—— read in a shapefile ——-
#regions<- readOGR(file.choose()) #NCVACO.shp
regions <- readOGR("NCVACO.shp")
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Kolya\Documents\GeoAnalisys\R\NCVACO.shp", layer: "NCVACO"
## with 234 features
## It has 49 fields
## Integer64 fields read as strings:  FIPS qtystores PCI COUNTMXBV DC GA KY MD SC TN WV VA COUNTBKGR TOTALPOP POP18OV LABFORCE HHOLDS POP25OV POP16OV
# Integer64 fields read as strings: ...
View(regions@data)
View(regions@data$COUNTY)
View(regions@data$FIPS2)
is.numeric(regions$PCI)
## [1] FALSE
regions$PCI <- as.numeric(regions$PCI)
summary(regions)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
##         min       max
## x -84.32187 -75.24227
## y  33.84452  39.46601
## Is projected: FALSE 
## proj4string : [+proj=longlat +datum=NAD83 +no_defs]
## Data attributes:
##     GEO_ID             STATE              COUNTY              NAME          
##  Length:234         Length:234         Length:234         Length:234        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      LSAD             CENSUSAREA         FIPS2                Lon        
##  Length:234         Min.   :  1.999   Length:234         Min.   :-83.94  
##  Class :character   1st Qu.:234.037   Class :character   1st Qu.:-80.32  
##  Mode  :character   Median :381.377   Mode  :character   Median :-78.55  
##                     Mean   :376.530                      Mean   :-78.94  
##                     3rd Qu.:514.512                      3rd Qu.:-77.37  
##                     Max.   :968.941                      Max.   :-75.63  
##       Lat            FIPS            qtystores            SALESPC      
##  Min.   :34.16   Length:234         Length:234         Min.   :  0.00  
##  1st Qu.:35.82   Class :character   Class :character   1st Qu.: 38.00  
##  Median :36.75   Mode  :character   Mode  :character   Median : 58.80  
##  Mean   :36.74                                         Mean   : 64.98  
##  3rd Qu.:37.53                                         3rd Qu.: 78.74  
##  Max.   :39.17                                         Max.   :297.70  
##       PCI          COMM15OVP        COLLENRP        SOMECOLLP    
##  Min.   :12808   Min.   : 5.49   Min.   : 0.890   Min.   :16.55  
##  1st Qu.:16154   1st Qu.:25.89   1st Qu.: 2.663   1st Qu.:23.38  
##  Median :18073   Median :30.42   Median : 3.180   Median :27.15  
##  Mean   :19003   Mean   :30.52   Mean   : 4.576   Mean   :28.81  
##  3rd Qu.:20505   3rd Qu.:34.96   3rd Qu.: 4.180   3rd Qu.:32.39  
##  Max.   :41014   Max.   :48.79   Max.   :41.300   Max.   :59.92  
##      ARMEDP          NONWHITEP          UNEMPP          ENTRECP      
##  Min.   : 0.0000   Min.   : 0.670   Min.   : 1.710   Min.   : 2.840  
##  1st Qu.: 0.0200   1st Qu.: 8.928   1st Qu.: 3.632   1st Qu.: 4.902  
##  Median : 0.0800   Median :22.170   Median : 4.670   Median : 5.770  
##  Mean   : 0.6656   Mean   :24.763   Mean   : 5.274   Mean   : 6.441  
##  3rd Qu.: 0.2400   3rd Qu.:37.795   3rd Qu.: 6.452   3rd Qu.: 7.425  
##  Max.   :21.5300   Max.   :81.590   Max.   :41.410   Max.   :22.540  
##     PUBASSTP        POVPOPP          URBANP         FOREIGNBP     
##  Min.   :0.480   Min.   : 2.75   Min.   :  0.00   Min.   : 0.000  
##  1st Qu.:2.005   1st Qu.: 9.17   1st Qu.:  5.94   1st Qu.: 0.500  
##  Median :2.760   Median :12.57   Median : 33.84   Median : 0.995  
##  Mean   :3.099   Mean   :13.06   Mean   : 41.16   Mean   : 1.854  
##  3rd Qu.:3.890   3rd Qu.:16.71   3rd Qu.: 70.97   3rd Qu.: 2.243  
##  Max.   :8.650   Max.   :31.35   Max.   :100.00   Max.   :16.120  
##    BAPTISTSP        ADHERENTSP       BKGRTOMIX        COUNTMXBV        
##  Min.   : 0.000   Min.   : 13.28   Min.   : 0.2625   Length:234        
##  1st Qu.: 9.193   1st Qu.: 35.17   1st Qu.: 1.9611   Class :character  
##  Median :14.110   Median : 43.14   Median : 3.6519   Mode  :character  
##  Mean   :15.725   Mean   : 45.19   Mean   : 5.3687                     
##  3rd Qu.:21.250   3rd Qu.: 50.70   3rd Qu.: 6.8552                     
##  Max.   :60.130   Max.   :164.53   Max.   :38.2724                     
##     MXBVSQM          BKGRTOABC         MXBVPPOP18        DUI1802       
##  Min.   :0.00000   Min.   : 0.5492   Min.   : 0.000   Min.   : 0.2521  
##  1st Qu.:0.00325   1st Qu.: 2.8035   1st Qu.: 1.082   1st Qu.: 4.4306  
##  Median :0.02245   Median : 4.4334   Median : 3.213   Median : 6.9195  
##  Mean   :0.41420   Mean   : 4.9031   Mean   : 4.574   Mean   : 7.7809  
##  3rd Qu.:0.12915   3rd Qu.: 5.9744   3rd Qu.: 6.500   3rd Qu.: 9.9235  
##  Max.   :8.69690   Max.   :21.5013   Max.   :39.782   Max.   :32.7806  
##     FVPTHH02             DC                 GA                 KY           
##  Min.   : 0.00000   Length:234         Length:234         Length:234        
##  1st Qu.: 0.07405   Class :character   Class :character   Class :character  
##  Median : 0.38154   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 1.64369                                                           
##  3rd Qu.: 2.14565                                                           
##  Max.   :20.45836                                                           
##       MD                 SC                 TN                 WV           
##  Length:234         Length:234         Length:234         Length:234        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##       VA              AREALANDSQ       COUNTBKGR           TOTALPOP        
##  Length:234         Min.   :  1.986   Length:234         Length:234        
##  Class :character   1st Qu.:235.451   Class :character   Class :character  
##  Mode  :character   Median :381.783   Mode  :character   Mode  :character  
##                     Mean   :377.358                                        
##                     3rd Qu.:518.925                                        
##                     Max.   :970.760                                        
##    POP18OV            LABFORCE            HHOLDS            POP25OV         
##  Length:234         Length:234         Length:234         Length:234        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    POP16OV         
##  Length:234        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
proj4string(regions) <- CRS("+init=epsg:4326") #assign to an object with an existing CRS, epsg4326 is WGS84
## Warning in `proj4string<-`(`*tmp*`, value = new("CRS", projargs = "+proj=longlat +datum=WGS84 +no_defs")): A new CRS was assigned to an object with an existing CRS:
## +proj=longlat +datum=NAD83 +no_defs
## without reprojecting.
## For reprojection, use function spTransform
is.projected(regions) #false
## [1] FALSE
class(regions) #must be "SpatialPolygonsDataFrame", package "sp"
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
plot(regions)

regions.prj <- spTransform(regions, CRS("+init=epsg:5070")) #reprojection
is.projected(regions.prj) #true
## [1] TRUE
summary(regions.prj)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
##       min     max
## x 1054152 1833387
## y 1345257 1966514
## Is projected: TRUE 
## proj4string :
## [+proj=aea +lat_0=23 +lon_0=-96 +lat_1=29.5 +lat_2=45.5 +x_0=0 +y_0=0
## +datum=NAD83 +units=m +no_defs]
## Data attributes:
##     GEO_ID             STATE              COUNTY              NAME          
##  Length:234         Length:234         Length:234         Length:234        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      LSAD             CENSUSAREA         FIPS2                Lon        
##  Length:234         Min.   :  1.999   Length:234         Min.   :-83.94  
##  Class :character   1st Qu.:234.037   Class :character   1st Qu.:-80.32  
##  Mode  :character   Median :381.377   Mode  :character   Median :-78.55  
##                     Mean   :376.530                      Mean   :-78.94  
##                     3rd Qu.:514.512                      3rd Qu.:-77.37  
##                     Max.   :968.941                      Max.   :-75.63  
##       Lat            FIPS            qtystores            SALESPC      
##  Min.   :34.16   Length:234         Length:234         Min.   :  0.00  
##  1st Qu.:35.82   Class :character   Class :character   1st Qu.: 38.00  
##  Median :36.75   Mode  :character   Mode  :character   Median : 58.80  
##  Mean   :36.74                                         Mean   : 64.98  
##  3rd Qu.:37.53                                         3rd Qu.: 78.74  
##  Max.   :39.17                                         Max.   :297.70  
##       PCI          COMM15OVP        COLLENRP        SOMECOLLP    
##  Min.   :12808   Min.   : 5.49   Min.   : 0.890   Min.   :16.55  
##  1st Qu.:16154   1st Qu.:25.89   1st Qu.: 2.663   1st Qu.:23.38  
##  Median :18073   Median :30.42   Median : 3.180   Median :27.15  
##  Mean   :19003   Mean   :30.52   Mean   : 4.576   Mean   :28.81  
##  3rd Qu.:20505   3rd Qu.:34.96   3rd Qu.: 4.180   3rd Qu.:32.39  
##  Max.   :41014   Max.   :48.79   Max.   :41.300   Max.   :59.92  
##      ARMEDP          NONWHITEP          UNEMPP          ENTRECP      
##  Min.   : 0.0000   Min.   : 0.670   Min.   : 1.710   Min.   : 2.840  
##  1st Qu.: 0.0200   1st Qu.: 8.928   1st Qu.: 3.632   1st Qu.: 4.902  
##  Median : 0.0800   Median :22.170   Median : 4.670   Median : 5.770  
##  Mean   : 0.6656   Mean   :24.763   Mean   : 5.274   Mean   : 6.441  
##  3rd Qu.: 0.2400   3rd Qu.:37.795   3rd Qu.: 6.452   3rd Qu.: 7.425  
##  Max.   :21.5300   Max.   :81.590   Max.   :41.410   Max.   :22.540  
##     PUBASSTP        POVPOPP          URBANP         FOREIGNBP     
##  Min.   :0.480   Min.   : 2.75   Min.   :  0.00   Min.   : 0.000  
##  1st Qu.:2.005   1st Qu.: 9.17   1st Qu.:  5.94   1st Qu.: 0.500  
##  Median :2.760   Median :12.57   Median : 33.84   Median : 0.995  
##  Mean   :3.099   Mean   :13.06   Mean   : 41.16   Mean   : 1.854  
##  3rd Qu.:3.890   3rd Qu.:16.71   3rd Qu.: 70.97   3rd Qu.: 2.243  
##  Max.   :8.650   Max.   :31.35   Max.   :100.00   Max.   :16.120  
##    BAPTISTSP        ADHERENTSP       BKGRTOMIX        COUNTMXBV        
##  Min.   : 0.000   Min.   : 13.28   Min.   : 0.2625   Length:234        
##  1st Qu.: 9.193   1st Qu.: 35.17   1st Qu.: 1.9611   Class :character  
##  Median :14.110   Median : 43.14   Median : 3.6519   Mode  :character  
##  Mean   :15.725   Mean   : 45.19   Mean   : 5.3687                     
##  3rd Qu.:21.250   3rd Qu.: 50.70   3rd Qu.: 6.8552                     
##  Max.   :60.130   Max.   :164.53   Max.   :38.2724                     
##     MXBVSQM          BKGRTOABC         MXBVPPOP18        DUI1802       
##  Min.   :0.00000   Min.   : 0.5492   Min.   : 0.000   Min.   : 0.2521  
##  1st Qu.:0.00325   1st Qu.: 2.8035   1st Qu.: 1.082   1st Qu.: 4.4306  
##  Median :0.02245   Median : 4.4334   Median : 3.213   Median : 6.9195  
##  Mean   :0.41420   Mean   : 4.9031   Mean   : 4.574   Mean   : 7.7809  
##  3rd Qu.:0.12915   3rd Qu.: 5.9744   3rd Qu.: 6.500   3rd Qu.: 9.9235  
##  Max.   :8.69690   Max.   :21.5013   Max.   :39.782   Max.   :32.7806  
##     FVPTHH02             DC                 GA                 KY           
##  Min.   : 0.00000   Length:234         Length:234         Length:234        
##  1st Qu.: 0.07405   Class :character   Class :character   Class :character  
##  Median : 0.38154   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 1.64369                                                           
##  3rd Qu.: 2.14565                                                           
##  Max.   :20.45836                                                           
##       MD                 SC                 TN                 WV           
##  Length:234         Length:234         Length:234         Length:234        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##       VA              AREALANDSQ       COUNTBKGR           TOTALPOP        
##  Length:234         Min.   :  1.986   Length:234         Length:234        
##  Class :character   1st Qu.:235.451   Class :character   Class :character  
##  Mode  :character   Median :381.783   Mode  :character   Mode  :character  
##                     Mean   :377.358                                        
##                     3rd Qu.:518.925                                        
##                     Max.   :970.760                                        
##    POP18OV            LABFORCE            HHOLDS            POP25OV         
##  Length:234         Length:234         Length:234         Length:234        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    POP16OV         
##  Length:234        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
plot(regions.prj)

# CREATE LIST OF NEIGHBORS: queen/rook contiguity
queen.nb<-poly2nb(regions) #create neighbors list (queen contiguity) data from polygon list
summary(queen.nb)
## Neighbour list object:
## Number of regions: 234 
## Number of nonzero links: 1132 
## Percentage nonzero weights: 2.067353 
## Average number of links: 4.837607 
## Link number distribution:
## 
##  1  2  3  4  5  6  7  8  9 11 
## 16 26 22 32 36 49 37  9  6  1 
## 16 least connected regions:
## 45 46 48 51 53 54 57 179 206 207 209 211 216 226 229 231 with 1 link
## 1 most connected region:
## 146 with 11 links
rook.nb<-poly2nb(regions,queen=FALSE)
summary(rook.nb)
## Neighbour list object:
## Number of regions: 234 
## Number of nonzero links: 1104 
## Percentage nonzero weights: 2.016217 
## Average number of links: 4.717949 
## Link number distribution:
## 
##  1  2  3  4  5  6  7  8  9 10 
## 16 26 24 33 41 54 24 10  5  1 
## 16 least connected regions:
## 45 46 48 51 53 54 57 179 206 207 209 211 216 226 229 231 with 1 link
## 1 most connected region:
## 146 with 10 links
isTRUE(all.equal(queen.nb,rook.nb,check.attributes=FALSE))
## [1] FALSE
# CREATE SPATIAL WEIGHTS. Compute Global Moran's I
queen.weights <- nb2listw(queen.nb) #create spatial weights from neighbors list

moran(regions$PCI, queen.weights, length(regions$PCI), Szero(queen.weights))
## $I
## [1] 0.6585677
## 
## $K
## [1] 8.406088
moran.test(regions$PCI, queen.weights)
## 
##  Moran I test under randomisation
## 
## data:  regions$PCI  
## weights: queen.weights    
## 
## Moran I statistic standard deviate = 14.682, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.658567737      -0.004291845       0.002038240
moran.plot(regions$PCI, queen.weights)

moran(regions$SALESPC, queen.weights, length(regions$SALESPC), Szero(queen.weights))
## $I
## [1] -0.01394866
## 
## $K
## [1] 9.058721
moran.test(regions$SALESPC, queen.weights)
## 
##  Moran I test under randomisation
## 
## data:  regions$SALESPC  
## weights: queen.weights    
## 
## Moran I statistic standard deviate = -0.21421, p-value = 0.5848
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      -0.013948657      -0.004291845       0.002032305
moran.plot(regions$SALESPC, queen.weights)

rook.weights <- nb2listw(rook.nb) #create spatial weights from neighbors list

moran(regions$PCI, rook.weights, length(regions$PCI), Szero(rook.weights))
## $I
## [1] 0.6566088
## 
## $K
## [1] 8.406088
moran.test(regions$PCI, rook.weights)
## 
##  Moran I test under randomisation
## 
## data:  regions$PCI  
## weights: rook.weights    
## 
## Moran I statistic standard deviate = 14.522, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.656608753      -0.004291845       0.002071169
moran.plot(regions$PCI, rook.weights)

moran(regions$SALESPC, rook.weights, length(regions$SALESPC), Szero(rook.weights))
## $I
## [1] -0.01300157
## 
## $K
## [1] 9.058721
moran.test(regions$SALESPC, rook.weights)
## 
##  Moran I test under randomisation
## 
## data:  regions$SALESPC  
## weights: rook.weights    
## 
## Moran I statistic standard deviate = -0.19166, p-value = 0.576
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      -0.013001570      -0.004291845       0.002065138
moran.plot(regions$SALESPC, rook.weights)

moran(regions$SALESPC,nb2listw(queen.nb), length(regions$SALESPC), Szero(nb2listw(queen.nb)))
## $I
## [1] -0.01394866
## 
## $K
## [1] 9.058721
moran.test(regions$SALESPC,nb2listw(queen.nb))
## 
##  Moran I test under randomisation
## 
## data:  regions$SALESPC  
## weights: nb2listw(queen.nb)    
## 
## Moran I statistic standard deviate = -0.21421, p-value = 0.5848
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      -0.013948657      -0.004291845       0.002032305
# CREATE LIST OF NEIGHBORS: k nearest neighbors
library(rgeos)
## rgeos version: 0.6-1, (SVN revision 692)
##  GEOS runtime version: 3.9.3-CAPI-1.14.3 
##  Please note that rgeos will be retired during 2023,
## plan transition to sf functions using GEOS at your earliest convenience.
##  GEOS using OverlayNG
##  Linking to sp version: 1.6-0 
##  Polygon checking: TRUE
regions.centroids<-gCentroid(regions,byid=TRUE)
#regions.centroids.prj<-gCentroid(regions.prj,byid=TRUE)
head(regions.centroids)
## SpatialPoints:
##           x        y
## 0 -80.53085 34.98821
## 1 -78.40829 36.36479
## 2 -78.00390 35.36370
## 3 -79.39878 36.04361
## 4 -81.17707 35.92073
## 5 -81.12768 36.49121
## Coordinate Reference System (CRS) arguments: +proj=longlat +datum=WGS84
## +no_defs
class(regions.centroids)
## [1] "SpatialPoints"
## attr(,"package")
## [1] "sp"
regions.6nn.nb<-knn2nb(knearneigh(regions.centroids,k=6,longlat = TRUE),row.names=regions$FIPS)
#regions.6nn.nb<-knn2nb(knearneigh(regions.centroids.prj,k=6,longlat = FALSE),row.names=regions$FIPS)
summary(regions.6nn.nb)
## Neighbour list object:
## Number of regions: 234 
## Number of nonzero links: 1404 
## Percentage nonzero weights: 2.564103 
## Average number of links: 6 
## Non-symmetric neighbours list
## Link number distribution:
## 
##   6 
## 234 
## 234 least connected regions:
## 37179 37181 37191 37001 37003 37005 37017 37025 37033 37043 37055 37057 37063 37073 37081 37091 37101 37109 37123 37127 37135 37145 37151 37155 37167 51003 51013 51025 51035 51043 51049 51063 51075 51081 51097 51109 51121 51127 51135 51145 51159 51169 51181 51187 51197 51515 51530 51570 51595 51610 51640 51660 51683 51690 51720 51740 51770 51820 51830 37007 37009 37011 37013 37015 37019 37021 37023 37027 37029 37031 37035 37037 37039 37041 37045 37047 37049 37051 37053 37185 37187 37189 37193 37195 37197 37059 37061 37065 37067 37069 37071 37075 37077 37079 37083 37085 37087 37089 37199 37093 37095 37097 37099 37103 37105 37107 37111 37113 37115 37117 37119 37121 37125 37129 37131 37133 37137 37139 37141 37143 37147 37149 37153 37157 37159 37161 37163 37165 37169 37171 37173 37175 37177 51009 51011 51015 51017 51019 51021 51023 51027 51029 51031 51033 51036 51037 51041 51045 51047 51051 51053 51057 51059 51061 51065 51067 51069 51071 51073 51077 51079 51083 51085 51087 51089 51091 51093 51095 51099 51101 51103 51105 51107 51111 51113 51115 51117 51119 51125 51131 51133 51137 51139 51141 51143 51147 51149 51153 51155 51157 51161 51163 51165 51167 51171 51173 51175 51177 51179 51183 51185 51191 51193 51195 51199 51510 51520 51540 51550 51580 51590 51600 51620 51630 51650 51670 51678 51680 51685 51700 51710 51730 51735 51750 51760 51775 51790 51800 51810 51840 37183 51001 51005 51007 with 6 links
## 234 most connected regions:
## 37179 37181 37191 37001 37003 37005 37017 37025 37033 37043 37055 37057 37063 37073 37081 37091 37101 37109 37123 37127 37135 37145 37151 37155 37167 51003 51013 51025 51035 51043 51049 51063 51075 51081 51097 51109 51121 51127 51135 51145 51159 51169 51181 51187 51197 51515 51530 51570 51595 51610 51640 51660 51683 51690 51720 51740 51770 51820 51830 37007 37009 37011 37013 37015 37019 37021 37023 37027 37029 37031 37035 37037 37039 37041 37045 37047 37049 37051 37053 37185 37187 37189 37193 37195 37197 37059 37061 37065 37067 37069 37071 37075 37077 37079 37083 37085 37087 37089 37199 37093 37095 37097 37099 37103 37105 37107 37111 37113 37115 37117 37119 37121 37125 37129 37131 37133 37137 37139 37141 37143 37147 37149 37153 37157 37159 37161 37163 37165 37169 37171 37173 37175 37177 51009 51011 51015 51017 51019 51021 51023 51027 51029 51031 51033 51036 51037 51041 51045 51047 51051 51053 51057 51059 51061 51065 51067 51069 51071 51073 51077 51079 51083 51085 51087 51089 51091 51093 51095 51099 51101 51103 51105 51107 51111 51113 51115 51117 51119 51125 51131 51133 51137 51139 51141 51143 51147 51149 51153 51155 51157 51161 51163 51165 51167 51171 51173 51175 51177 51179 51183 51185 51191 51193 51195 51199 51510 51520 51540 51550 51580 51590 51600 51620 51630 51650 51670 51678 51680 51685 51700 51710 51730 51735 51750 51760 51775 51790 51800 51810 51840 37183 51001 51005 51007 with 6 links
head(regions.6nn.nb)
## [[1]]
## [1]   8  19  25  60 111 125
## 
## [[2]]
## [1]  13  22  80  90  93 177
## 
## [[3]]
## [1]  17  84  87  94 106 127
## 
## [[4]]
## [1]   9  13  15  21  72 124
## 
## [[5]]
## [1]  18  67  68  71  83 102
## 
## [[6]]
## [1]  45  51  61  83 130 160
class(regions.6nn.nb)
## [1] "nb"
# Compute Global Moran's I with k nearest neighbors weight matrix
#...
regions.6nn.weights <- nb2listw(regions.6nn.nb) #create spatial weights from neighbors list

moran(regions$PCI, regions.6nn.weights, length(regions$PCI), Szero(regions.6nn.weights))
## $I
## [1] 0.5295223
## 
## $K
## [1] 8.406088
moran.test(regions$PCI, regions.6nn.weights)
## 
##  Moran I test under randomisation
## 
## data:  regions$PCI  
## weights: regions.6nn.weights    
## 
## Moran I statistic standard deviate = 15.131, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.529522297      -0.004291845       0.001244615
moran.plot(regions$PCI, regions.6nn.weights)

# HOW TO DRAW A BEAUTIFUL MAP
# https://r-spatial.org/r/2018/10/25/ggplot2-sf.html
library(sf)
library(ggplot2)
NCVACO_sf <- st_read("NCVACO.shp")
## Reading layer `NCVACO' from data source 
##   `C:\Users\Kolya\Documents\GeoAnalisys\R\NCVACO.shp' using driver `ESRI Shapefile'
## Simple feature collection with 234 features and 49 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -84.32187 ymin: 33.84452 xmax: -75.24227 ymax: 39.46601
## Geodetic CRS:  NAD83
class(NCVACO_sf)
## [1] "sf"         "data.frame"
ggplot(data = NCVACO_sf) +
  geom_sf() +
  xlab("Longitude") + ylab("Latitude") +
  ggtitle("Map of counties", subtitle = paste0("(", length(unique(NCVACO_sf$COUNTY)), " counties)"))

ggplot(data = NCVACO_sf) +
  geom_sf(aes(fill = PCI)) +
  scale_fill_viridis_c(option = "plasma", trans = "log") #plasma, magma

#"viridis" colorblind-friendly palette for the color gradient was used






# Global Moran's test for residuals
attach(regions@data)
#DUI = Driving Under Influence
eq1 <- DUI1802~SALESPC+SOMECOLLP+NONWHITEP+BAPTISTSP+BKGRTOABC
reg1 <- lm(eq1)
summary(reg1)
## 
## Call:
## lm(formula = eq1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0061  -3.2837  -0.7178   1.8488  25.0742 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  5.799980   2.233946   2.596   0.0100 *
## SALESPC      0.018651   0.007891   2.364   0.0189 *
## SOMECOLLP   -0.001760   0.050161  -0.035   0.9720  
## NONWHITEP   -0.022973   0.020605  -1.115   0.2661  
## BAPTISTSP    0.062103   0.035830   1.733   0.0844 .
## BKGRTOABC    0.084020   0.136196   0.617   0.5379  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5 on 228 degrees of freedom
## Multiple R-squared:  0.0468, Adjusted R-squared:  0.0259 
## F-statistic: 2.239 on 5 and 228 DF,  p-value: 0.05138
lm.morantest(reg1, queen.weights)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = eq1)
## weights: queen.weights
## 
## Moran I statistic standard deviate = 5.2806, p-value = 6.438e-08
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.225620905     -0.012356925      0.002030974
moran.test(reg1$residuals, queen.weights)
## 
##  Moran I test under randomisation
## 
## data:  reg1$residuals  
## weights: queen.weights    
## 
## Moran I statistic standard deviate = 5.1214, p-value = 1.516e-07
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.225620905      -0.004291845       0.002015349
moran.plot(reg1$residuals, queen.weights)

# NEW
# plot residuals as a map ======================================================
res1 <- reg1$residuals
summary(res1)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -10.0061  -3.2837  -0.7178   0.0000   1.8488  25.0742
plot(coordinates(regions),pch=21, bg="green", cex=0.1*(res1-min(res1)))

# Lagrange multiplier diagnostics for spatial dependence
lm.LMtests(reg1, listw=queen.weights, test="all", zero.policy=TRUE)
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq1)
## weights: queen.weights
## 
## LMerr = 23.908, df = 1, p-value = 1.011e-06
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq1)
## weights: queen.weights
## 
## LMlag = 28.04, df = 1, p-value = 1.188e-07
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq1)
## weights: queen.weights
## 
## RLMerr = 5.1425, df = 1, p-value = 0.02335
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq1)
## weights: queen.weights
## 
## RLMlag = 9.2745, df = 1, p-value = 0.002324
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq1)
## weights: queen.weights
## 
## SARMA = 33.182, df = 2, p-value = 6.231e-08
#RLMlag = 9.2745, df = 1, p-value = 0.002324 => SAR model: y = X beta + rho W1 y + u

# SPATIAL REGRESSION MODELS
# SEM
library(spatialreg)
## Загрузка требуемого пакета: Matrix
## 
## Присоединяю пакет: 'spatialreg'
## Следующие объекты скрыты от 'package:spdep':
## 
##     get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption
#SEM - spatial error model
# y = X beta + rho W1 y + u
# u = lambda W2 u + e
SEM1 <- errorsarlm(eq1, listw=queen.weights, zero.policy=TRUE)
summary(SEM1)
## 
## Call:errorsarlm(formula = eq1, listw = queen.weights, zero.policy = TRUE)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -9.18952 -2.75581 -0.77255  1.53411 24.82251 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept)  5.0025545  2.4533119  2.0391  0.04144
## SALESPC      0.0131756  0.0071518  1.8423  0.06544
## SOMECOLLP    0.0680844  0.0559367  1.2172  0.22354
## NONWHITEP   -0.0323080  0.0252645 -1.2788  0.20097
## BAPTISTSP    0.0259486  0.0392675  0.6608  0.50873
## BKGRTOABC    0.1161419  0.1313365  0.8843  0.37653
## 
## Lambda: 0.4335, LR test value: 25.379, p-value: 4.7105e-07
## Asymptotic standard error: 0.073471
##     z-value: 5.9003, p-value: 3.628e-09
## Wald statistic: 34.814, p-value: 3.628e-09
## 
## Log likelihood: -692.9221 for error model
## ML residual variance (sigma squared): 20.888, (sigma: 4.5703)
## Number of observations: 234 
## Number of parameters estimated: 8 
## AIC: 1401.8, (AIC for lm: 1425.2)
# SAR - spatial autoregressive model
# y = X beta + rho W1 y + u
SAR1 <- lagsarlm(eq1, listw=queen.weights, zero.policy=TRUE)
summary(SAR1)
## 
## Call:lagsarlm(formula = eq1, listw = queen.weights, zero.policy = TRUE)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.4170 -2.7800 -0.8867  1.5935 25.1949 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept)  2.6335696  2.1177519  1.2436  0.21366
## SALESPC      0.0172949  0.0072243  2.3940  0.01667
## SOMECOLLP    0.0186026  0.0459369  0.4050  0.68551
## NONWHITEP   -0.0233846  0.0189456 -1.2343  0.21709
## BAPTISTSP    0.0321759  0.0328674  0.9790  0.32760
## BKGRTOABC    0.1005083  0.1246988  0.8060  0.42024
## 
## Rho: 0.41197, LR test value: 25.742, p-value: 3.9024e-07
## Asymptotic standard error: 0.074232
##     z-value: 5.5498, p-value: 2.8601e-08
## Wald statistic: 30.8, p-value: 2.8601e-08
## 
## Log likelihood: -692.7405 for lag model
## ML residual variance (sigma squared): 20.955, (sigma: 4.5777)
## Number of observations: 234 
## Number of parameters estimated: 8 
## AIC: 1401.5, (AIC for lm: 1425.2)
## LM test for residual autocorrelation
## test value: 8.0126, p-value: 0.0046454
# to interpret spatial models with Wy, compute impacts
impacts(SAR1, listw=queen.weights)
## Impact measures (lag, exact):
##                Direct    Indirect       Total
## SALESPC    0.01805828  0.01135352  0.02941180
## SOMECOLLP  0.01942370  0.01221198  0.03163568
## NONWHITEP -0.02441681 -0.01535123 -0.03976804
## BAPTISTSP  0.03359611  0.02112239  0.05471850
## BKGRTOABC  0.10494469  0.06598036  0.17092505
# SDM
# y = x beta + rho W y + theta W X + u
SDM1 <- lagsarlm(eq1, listw=queen.weights, zero.policy=TRUE, type="mixed")
summary(SDM1)
## 
## Call:lagsarlm(formula = eq1, listw = queen.weights, type = "mixed", 
##     zero.policy = TRUE)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7.28672 -2.76676 -0.71718  1.65724 23.81805 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##                Estimate Std. Error z value  Pr(>|z|)
## (Intercept)    7.089160   3.387600  2.0927 0.0363778
## SALESPC        0.020151   0.007223  2.7898 0.0052740
## SOMECOLLP      0.102488   0.060145  1.7040 0.0883772
## NONWHITEP     -0.026847   0.030890 -0.8691 0.3847873
## BAPTISTSP      0.026982   0.041298  0.6534 0.5135205
## BKGRTOABC      0.112418   0.128053  0.8779 0.3799952
## lag.SALESPC    0.050723   0.018219  2.7841 0.0053682
## lag.SOMECOLLP -0.274877   0.082277 -3.3409 0.0008351
## lag.NONWHITEP -0.021329   0.038990 -0.5470 0.5843554
## lag.BAPTISTSP  0.024346   0.055829  0.4361 0.6627785
## lag.BKGRTOABC -0.240221   0.258061 -0.9309 0.3519221
## 
## Rho: 0.34748, LR test value: 17.222, p-value: 3.3251e-05
## Asymptotic standard error: 0.077433
##     z-value: 4.4875, p-value: 7.2063e-06
## Wald statistic: 20.138, p-value: 7.2063e-06
## 
## Log likelihood: -682.6046 for mixed model
## ML residual variance (sigma squared): 19.457, (sigma: 4.411)
## Number of observations: 234 
## Number of parameters estimated: 13 
## AIC: 1391.2, (AIC for lm: 1406.4)
## LM test for residual autocorrelation
## test value: 1.1421, p-value: 0.28522
impacts(SDM1, listw=queen.weights)
## Impact measures (mixed, exact):
##                Direct    Indirect       Total
## SALESPC    0.02512984  0.08348549  0.10861533
## SOMECOLLP  0.08184998 -0.34604022 -0.26419024
## NONWHITEP -0.02949147 -0.04433926 -0.07383073
## BAPTISTSP  0.02989121  0.04877077  0.07866199
## BKGRTOABC  0.09506713 -0.29092806 -0.19586093
# goodness of fit

AIC(SEM1)
## [1] 1401.844
AIC(SAR1)
## [1] 1401.481
AIC(SDM1)
## [1] 1391.209
# Extract log-likelihood
logLik(SEM1)
## 'log Lik.' -692.9221 (df=8)
logLik(SAR1)
## 'log Lik.' -692.7405 (df=8)
logLik(SDM1)
## 'log Lik.' -682.6046 (df=13)
#R2 not appropriate!
cor(DUI1802, fitted(reg1))^2
## [1] 0.04680096
cor(DUI1802, fitted(SEM1))^2
## This method assumes the response is known - see manual page
## [1] 0.1973918
cor(DUI1802, fitted(SAR1))^2
## This method assumes the response is known - see manual page
## [1] 0.1922327
cor(DUI1802, fitted(SDM1))^2
## This method assumes the response is known - see manual page
## [1] 0.2413917
# Assignment ===================================================================
# SAR, SEM based on rook.weights
# LM-tests for rook.weight

# SAR, SEM based on rook.weights -----------------------------------------------
# SPATIAL REGRESSION MODELS
# SEM
library(spatialreg)
#SEM - spatial error model
# y = X beta + rho W1 y + u
# u = lambda W2 u + e
SEM2 <- errorsarlm(eq1, listw=rook.weights, zero.policy=TRUE)
summary(SEM2)
## 
## Call:errorsarlm(formula = eq1, listw = rook.weights, zero.policy = TRUE)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -9.17958 -2.79622 -0.78683  1.52166 24.81822 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept)  5.0753830  2.4535480  2.0686  0.03858
## SALESPC      0.0131657  0.0071382  1.8444  0.06513
## SOMECOLLP    0.0662494  0.0559146  1.1848  0.23608
## NONWHITEP   -0.0329825  0.0253206 -1.3026  0.19271
## BAPTISTSP    0.0267924  0.0392660  0.6823  0.49503
## BKGRTOABC    0.1130596  0.1309863  0.8631  0.38806
## 
## Lambda: 0.43462, LR test value: 25.8, p-value: 3.7867e-07
## Asymptotic standard error: 0.072964
##     z-value: 5.9566, p-value: 2.5757e-09
## Wald statistic: 35.481, p-value: 2.5757e-09
## 
## Log likelihood: -692.7115 for error model
## ML residual variance (sigma squared): 20.829, (sigma: 4.5639)
## Number of observations: 234 
## Number of parameters estimated: 8 
## AIC: 1401.4, (AIC for lm: 1425.2)
# SAR
# y = X beta + rho W1 y + u
SAR2 <- lagsarlm(eq1, listw=rook.weights, zero.policy=TRUE)
summary(SAR2)
## 
## Call:lagsarlm(formula = eq1, listw = rook.weights, zero.policy = TRUE)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -8.40138 -2.81597 -0.86044  1.63072 25.19594 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept)  2.6424757  2.1141527  1.2499  0.21134
## SALESPC      0.0173232  0.0072136  2.4015  0.01633
## SOMECOLLP    0.0181287  0.0458684  0.3952  0.69267
## NONWHITEP   -0.0234914  0.0189203 -1.2416  0.21439
## BAPTISTSP    0.0320012  0.0328196  0.9751  0.32953
## BKGRTOABC    0.0997135  0.1245141  0.8008  0.42324
## 
## Rho: 0.41383, LR test value: 26.176, p-value: 3.116e-07
## Asymptotic standard error: 0.073673
##     z-value: 5.6171, p-value: 1.942e-08
## Wald statistic: 31.552, p-value: 1.942e-08
## 
## Log likelihood: -692.5233 for lag model
## ML residual variance (sigma squared): 20.893, (sigma: 4.5709)
## Number of observations: 234 
## Number of parameters estimated: 8 
## AIC: 1401, (AIC for lm: 1425.2)
## LM test for residual autocorrelation
## test value: 6.9895, p-value: 0.0081991
# to interpret spatial models with Wy, compute impacts
impacts(SAR2, listw=rook.weights)
## Impact measures (lag, exact):
##                Direct    Indirect       Total
## SALESPC    0.01810842  0.01144478  0.02955320
## SOMECOLLP  0.01895042  0.01197694  0.03092736
## NONWHITEP -0.02455615 -0.01551984 -0.04007598
## BAPTISTSP  0.03345170  0.02114196  0.05459366
## BKGRTOABC  0.10423320  0.06587688  0.17011008
# goodness of fit

AIC(SEM2)
## [1] 1401.423
AIC(SAR2)
## [1] 1401.047
# Extract log-likelihood
logLik(SEM2)
## 'log Lik.' -692.7115 (df=8)
logLik(SAR2)
## 'log Lik.' -692.5233 (df=8)
# LM-tests for rook.weight -----------------------------------------------------
# Lagrange multiplier diagnostics for spatial dependence
lm.LMtests(reg1, listw=rook.weights, test="all", zero.policy=TRUE)
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq1)
## weights: rook.weights
## 
## LMerr = 24.187, df = 1, p-value = 8.743e-07
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq1)
## weights: rook.weights
## 
## LMlag = 28.246, df = 1, p-value = 1.068e-07
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq1)
## weights: rook.weights
## 
## RLMerr = 4.9515, df = 1, p-value = 0.02607
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq1)
## weights: rook.weights
## 
## RLMlag = 9.0107, df = 1, p-value = 0.002684
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq1)
## weights: rook.weights
## 
## SARMA = 33.198, df = 2, p-value = 6.184e-08
# Analitics
# LMerr = 24.187, df = 1, p-value = 8.743e-07 => significant
# LMlag = 28.246, df = 1, p-value = 1.068e-07 => significant
# conclusion => moving to Robust LM Diagnostics

# RLMerr = 4.9515, df = 1, p-value = 0.02607
# RLMlag = 9.0107, df = 1, p-value = 0.002684
# p-value: RLMlag < RLMerr => RLMlag is chosen (Spatial lag model)

# SAR: y = X beta + rho W1 y + u