library(gstat)
library(sp)
library(spatstat)
## Loading required package: nlme
##
## spatstat 1.45-0 (nickname: 'One Lakh')
## For an introduction to spatstat, type 'beginner'
##
## Attaching package: 'spatstat'
## The following object is masked from 'package:gstat':
##
## idw
library(ncf)
library(spdep)
## Loading required package: Matrix
library(maptools)
## Checking rgeos availability: FALSE
## Note: when rgeos is not available, polygon geometry computations in maptools depend on gpclib,
## which has a restricted licence. It is disabled by default;
## to enable gpclib, type gpclibPermit()
library(raster)
##
## Attaching package: 'raster'
## The following objects are masked from 'package:spatstat':
##
## area, rotate, shift
## The following object is masked from 'package:nlme':
##
## getData
library(ggplot2)
library(gstat)
load("D:/timeseries/wUSPrec_Bnd.Rdata")
summary(wUSBoundary)#Washington state boundary
## Object of class SpatialPolygonsDataFrame
## Coordinates:
## min max
## x -341782.5 427134.1
## y -444538.4 566947.7
## Is projected: TRUE
## proj4string :
## [+proj=aea +lat_1=41 +lat_2=47 +lat_0=44 +lon_0=-120 +x_0=0 +y_0=0
## +ellps=GRS80 +datum=NAD83 +units=m +no_defs +towgs84=0,0,0]
## Data attributes:
## OBJECTID ID_0 ISO NAME_0
## Min. : 6.00 Min. :244 Length:6 Length:6
## 1st Qu.:17.50 1st Qu.:244 Class :character Class :character
## Median :29.00 Median :244 Mode :character Mode :character
## Mean :27.67 Mean :244
## 3rd Qu.:36.75 3rd Qu.:244
## Max. :49.00 Max. :244
##
## ID_1 NAME_1 HASC_1 CCN_1
## Min. : 5.00 Length:6 Length:6 Min. : NA
## 1st Qu.:16.50 Class :character Class :character 1st Qu.: NA
## Median :28.00 Mode :character Mode :character Median : NA
## Mean :26.67 Mean :NaN
## 3rd Qu.:35.75 3rd Qu.: NA
## Max. :48.00 Max. : NA
## NA's :6
## CCA_1 TYPE_1 ENGTYPE_1
## Length:6 Length:6 Length:6
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## NL_NAME_1 VARNAME_1
## Length:6 Length:6
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
summary(wUSPrec)#precip data
## Object of class SpatialPointsDataFrame
## Coordinates:
## min max
## long -302251.8 381330.4
## lat -422252.6 556846.7
## Is projected: TRUE
## proj4string :
## [+proj=aea +lat_1=41 +lat_2=47 +lat_0=44 +lon_0=-120 +x_0=0 +y_0=0
## +ellps=GRS80 +datum=NAD83 +units=m +no_defs +towgs84=0,0,0]
## Number of points: 113
## Data attributes:
## prec
## Min. : 23.69
## 1st Qu.: 40.88
## Median : 60.92
## Mean : 75.00
## 3rd Qu.: 97.62
## Max. :272.76
sp_map <- fortify(wUSBoundary)
## Regions defined for each Polygons
wUSPrec.df <- data.frame(wUSPrec)
gg <- ggplot()
gg <- gg + geom_map(data=sp_map,map=sp_map,
aes(x=long, y=lat, map_id=id),
fill= "white",color="black")
gg <- gg + geom_point(data=wUSPrec.df, aes(x=long, y=lat, size=prec),
color="orange")
gg <- gg + scale_size(breaks=seq(0,300,by=50))
gg <- gg + labs(x=NULL, y=NULL)
gg <- gg + theme(panel.grid=element_blank(), panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank(), axis.text=element_blank())
gg <- gg + theme(legend.position="top")
gg
This map shows greater precipitation amounts along the Cascades Mountain Range and the Idaho panhandle. Other areas of increased rainfall amounts are in the area of the Salish Sea and along the outer coast near the mouth of the Colombia River.
hscat(prec~1,data=wUSPrec,breaks = seq(0,2e5,by=2.5e4))
Distances were set to 250 kilometers (25,000m). The lagged scatterplots show significant autocorrelations up to distances of 500 km. After 500 km, autocorrelations fall below r values of 0.5.
cadVarCloud <- variogram(prec~1, wUSPrec, cloud = TRUE)
plot(cadVarCloud,pch=20,cex=1.5,col="black",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)",main = "Precipitation (mm)")
cadVar <- variogram(prec~1, wUSPrec, cloud = FALSE)
plot(cadVar,pch=20,cex=1.5,col="black",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")
Again, the plot shows high autocorrelation at closer distances. These hit a sill somewhere around 2.5e05, which is consistent with the lagged correlogram plot.
leadVar<- variogram((prec)~1, wUSPrec, cutoff="2.5e05",width="10000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")
leadVar<- variogram((prec)~1, wUSPrec, cutoff="2.5e05",width="15000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")
leadVar<- variogram((prec)~1, wUSPrec, cutoff="2.5e05",width="20000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")
leadVar<- variogram((prec)~1, wUSPrec, cutoff="2.5e05",width="25000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")
The width (distance) we set for the lagged scatterplots looks like the best option here.
leadVar<- variogram((prec)~1, wUSPrec, cutoff="1e05",width="25000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")
leadVar<- variogram((prec)~1, wUSPrec, cutoff="2e05",width="25000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")
leadVar<- variogram((prec)~1, wUSPrec, cutoff="3e05",width="25000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")
leadVar<- variogram((prec)~1, wUSPrec, cutoff="4e05",width="25000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")
Again, the best cutoff is ~2e05, like the lagged scatterplots. It’s interesting to see that the longer cutoff of 4e05 really shows the sill at 2.5e05 and a decline after that point. So far, the Variograms and lagged scatterplots are in agreement.Let’s see if Moran’s I supports these numbers as well…
proj4string(wUSPrec) <- CRS("+init=epsg:28992")
## Warning in `proj4string<-`(`*tmp*`, value = <S4 object of class structure("CRS", package = "sp")>): A new CRS was assigned to an object with an existing CRS:
## +proj=aea +lat_1=41 +lat_2=47 +lat_0=44 +lon_0=-120 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform
w <- 1/as.matrix(dist(coordinates(wUSPrec)))
diag(w) <- 0
moran.test(wUSPrec$prec,mat2listw(w))
##
## Moran I test under randomisation
##
## data: wUSPrec$prec
## weights: mat2listw(w)
##
## Moran I statistic standard deviate = 11.886, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.1657032288 -0.0089285714 0.0002158469
This shows that precipitation data is positively (weakly) autocorrelated. So,points where precipitation data was collected are similar to those nearby (the entire data set is compared here).
Let’s look at just the 8 nearest neighbors and see how it affects the Moran I statistic…
w <- knn2nb(knearneigh(wUSPrec,k=8))
moran.test(wUSPrec$prec,nb2listw(w))
##
## Moran I test under randomisation
##
## data: wUSPrec$prec
## weights: nb2listw(w)
##
## Moran I statistic standard deviate = 10.687, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.444845197 -0.008928571 0.001802824
Now the Moran’s I statistic is at 0.44, much more significant than the entire data set. What if we went to even fewer nearest neighbors?
n <- 6
res <- data.frame(k=2^(1:n),I=rep(NA,n))
for(i in 1:n){
w <- knn2nb(knearneigh(wUSPrec,k=2^i))
res$I[i] <- moran.test(wUSPrec$prec,nb2listw(w))$estimate[1]
}
plot(res,type="b",main="Moran's I by number of neighbors",pch=20,cex=1.5)
w <- knn2nb(knearneigh(wUSPrec,k=3))
moran.test(wUSPrec$prec,nb2listw(w))
##
## Moran I test under randomisation
##
## data: wUSPrec$prec
## weights: nb2listw(w)
##
## Moran I statistic standard deviate = 9.1411, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.637562282 -0.008928571 0.005001847
n <- 3
res <- data.frame(k=2^(1:n),I=rep(NA,n))
for(i in 1:n){
w <- knn2nb(knearneigh(wUSPrec,k=2^i))
res$I[i] <- moran.test(wUSPrec$prec,nb2listw(w))$estimate[1]
}
plot(res,type="b",main="Moran's I by number of neighbors",pch=20,cex=1.5)
This plot shows how the value of nearest affects the value of Moran’s I. The smaller the number of nearest neighbors, the greater the Moran’s I value. Just to explore this concept, I played around with the nearest neighbor number and it’s affect on Moran’s I. As I went with fewer neighbors, the I statistic increased. The above plot shows the value of Moran’s I when three neighbors were used–and an improved I statistic (0.64).
Precip <- spline.correlog(x=coordinates(wUSPrec)[,1], y=coordinates(wUSPrec)[,2],
z=wUSPrec$prec, resamp=100, quiet=TRUE)
plot(Precip)
The correlogram shows positive spatial autocorrelation at distances up to ~200,000 meters (200km). From 200,000 to 600,000 meters, there is negligible autocorrelation between sample points, and at distances greater than 600,000 meters, a trend in negative spatial autocorrelation begins.
These three approaches all come to the same conclusion–that there is spatial autocorrelation in precipitation out to distances of ~200,000 meters, or 200 km. What is missing is the point pattern component to this analysis. That is, what are the spatial attributes that most influence this autocorrelation?
I’m really unclear of how to perform these…?