library(gstat)
library(sp)
library(spatstat)
## Loading required package: nlme
## 
## spatstat 1.45-0       (nickname: 'One Lakh') 
## For an introduction to spatstat, type 'beginner'
## 
## Attaching package: 'spatstat'
## The following object is masked from 'package:gstat':
## 
##     idw
library(ncf)
library(spdep)
## Loading required package: Matrix
library(maptools)
## Checking rgeos availability: FALSE
##      Note: when rgeos is not available, polygon geometry     computations in maptools depend on gpclib,
##      which has a restricted licence. It is disabled by default;
##      to enable gpclib, type gpclibPermit()
library(raster)
## 
## Attaching package: 'raster'
## The following objects are masked from 'package:spatstat':
## 
##     area, rotate, shift
## The following object is masked from 'package:nlme':
## 
##     getData
library(ggplot2)
library(gstat)
load("D:/timeseries/wUSPrec_Bnd.Rdata")
summary(wUSBoundary)#Washington state boundary
## Object of class SpatialPolygonsDataFrame
## Coordinates:
##         min      max
## x -341782.5 427134.1
## y -444538.4 566947.7
## Is projected: TRUE 
## proj4string :
## [+proj=aea +lat_1=41 +lat_2=47 +lat_0=44 +lon_0=-120 +x_0=0 +y_0=0
## +ellps=GRS80 +datum=NAD83 +units=m +no_defs +towgs84=0,0,0]
## Data attributes:
##     OBJECTID          ID_0         ISO               NAME_0         
##  Min.   : 6.00   Min.   :244   Length:6           Length:6          
##  1st Qu.:17.50   1st Qu.:244   Class :character   Class :character  
##  Median :29.00   Median :244   Mode  :character   Mode  :character  
##  Mean   :27.67   Mean   :244                                        
##  3rd Qu.:36.75   3rd Qu.:244                                        
##  Max.   :49.00   Max.   :244                                        
##                                                                     
##       ID_1          NAME_1             HASC_1              CCN_1    
##  Min.   : 5.00   Length:6           Length:6           Min.   : NA  
##  1st Qu.:16.50   Class :character   Class :character   1st Qu.: NA  
##  Median :28.00   Mode  :character   Mode  :character   Median : NA  
##  Mean   :26.67                                         Mean   :NaN  
##  3rd Qu.:35.75                                         3rd Qu.: NA  
##  Max.   :48.00                                         Max.   : NA  
##                                                        NA's   :6    
##     CCA_1              TYPE_1           ENGTYPE_1        
##  Length:6           Length:6           Length:6          
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##   NL_NAME_1          VARNAME_1        
##  Length:6           Length:6          
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 
summary(wUSPrec)#precip data
## Object of class SpatialPointsDataFrame
## Coordinates:
##            min      max
## long -302251.8 381330.4
## lat  -422252.6 556846.7
## Is projected: TRUE 
## proj4string :
## [+proj=aea +lat_1=41 +lat_2=47 +lat_0=44 +lon_0=-120 +x_0=0 +y_0=0
## +ellps=GRS80 +datum=NAD83 +units=m +no_defs +towgs84=0,0,0]
## Number of points: 113
## Data attributes:
##       prec       
##  Min.   : 23.69  
##  1st Qu.: 40.88  
##  Median : 60.92  
##  Mean   : 75.00  
##  3rd Qu.: 97.62  
##  Max.   :272.76
sp_map <- fortify(wUSBoundary)
## Regions defined for each Polygons
wUSPrec.df <- data.frame(wUSPrec)
gg <- ggplot()
gg <- gg + geom_map(data=sp_map,map=sp_map,
                    aes(x=long, y=lat, map_id=id),
                    fill= "white",color="black")
gg <- gg + geom_point(data=wUSPrec.df, aes(x=long, y=lat, size=prec), 
                      color="orange")
gg <- gg + scale_size(breaks=seq(0,300,by=50))
gg <- gg + labs(x=NULL, y=NULL)
gg <- gg + theme(panel.grid=element_blank(), panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank(), axis.text=element_blank())
gg <- gg + theme(legend.position="top")
gg

This map shows greater precipitation amounts along the Cascades Mountain Range and the Idaho panhandle. Other areas of increased rainfall amounts are in the area of the Salish Sea and along the outer coast near the mouth of the Colombia River.

Lagged Scatterplots show significant autocorrelation at closer distances

hscat(prec~1,data=wUSPrec,breaks = seq(0,2e5,by=2.5e4))

Distances were set to 250 kilometers (25,000m). The lagged scatterplots show significant autocorrelations up to distances of 500 km. After 500 km, autocorrelations fall below r values of 0.5.

Variogram

cadVarCloud <- variogram(prec~1, wUSPrec, cloud = TRUE)
plot(cadVarCloud,pch=20,cex=1.5,col="black",
     ylab=expression("Semivariance ("*gamma*")"),
     xlab="Distance (m)",main = "Precipitation (mm)")

 cadVar <- variogram(prec~1, wUSPrec, cloud = FALSE)
  plot(cadVar,pch=20,cex=1.5,col="black",
     ylab=expression("Semivariance ("*gamma*")"),
     xlab="Distance (m)", main = "Precipitation (mm)")

Again, the plot shows high autocorrelation at closer distances. These hit a sill somewhere around 2.5e05, which is consistent with the lagged correlogram plot.

Adjusting the width…

leadVar<- variogram((prec)~1, wUSPrec, cutoff="2.5e05",width="10000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")

leadVar<- variogram((prec)~1, wUSPrec, cutoff="2.5e05",width="15000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")

leadVar<- variogram((prec)~1, wUSPrec, cutoff="2.5e05",width="20000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")

leadVar<- variogram((prec)~1, wUSPrec, cutoff="2.5e05",width="25000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")

The width (distance) we set for the lagged scatterplots looks like the best option here.

Adjusting the cutoff…

leadVar<- variogram((prec)~1, wUSPrec, cutoff="1e05",width="25000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")

leadVar<- variogram((prec)~1, wUSPrec, cutoff="2e05",width="25000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")

leadVar<- variogram((prec)~1, wUSPrec, cutoff="3e05",width="25000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")

leadVar<- variogram((prec)~1, wUSPrec, cutoff="4e05",width="25000", cloud = FALSE)
plot(leadVar,pch=20,cex=.9,col="gray36",
ylab=expression("Semivariance ("*gamma*")"),
xlab="Distance (m)", main = "Precipitation (mm)")

Again, the best cutoff is ~2e05, like the lagged scatterplots. It’s interesting to see that the longer cutoff of 4e05 really shows the sill at 2.5e05 and a decline after that point. So far, the Variograms and lagged scatterplots are in agreement.Let’s see if Moran’s I supports these numbers as well…

Moran’s I

proj4string(wUSPrec) <- CRS("+init=epsg:28992")
## Warning in `proj4string<-`(`*tmp*`, value = <S4 object of class structure("CRS", package = "sp")>): A new CRS was assigned to an object with an existing CRS:
## +proj=aea +lat_1=41 +lat_2=47 +lat_0=44 +lon_0=-120 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform
w <- 1/as.matrix(dist(coordinates(wUSPrec)))
diag(w) <- 0
moran.test(wUSPrec$prec,mat2listw(w))
## 
##  Moran I test under randomisation
## 
## data:  wUSPrec$prec  
## weights: mat2listw(w)  
## 
## Moran I statistic standard deviate = 11.886, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.1657032288     -0.0089285714      0.0002158469

This shows that precipitation data is positively (weakly) autocorrelated. So,points where precipitation data was collected are similar to those nearby (the entire data set is compared here).

Let’s look at just the 8 nearest neighbors and see how it affects the Moran I statistic…

w <- knn2nb(knearneigh(wUSPrec,k=8))
moran.test(wUSPrec$prec,nb2listw(w))
## 
##  Moran I test under randomisation
## 
## data:  wUSPrec$prec  
## weights: nb2listw(w)  
## 
## Moran I statistic standard deviate = 10.687, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.444845197      -0.008928571       0.001802824

Now the Moran’s I statistic is at 0.44, much more significant than the entire data set. What if we went to even fewer nearest neighbors?

n <- 6
res <- data.frame(k=2^(1:n),I=rep(NA,n))
for(i in 1:n){
  w <- knn2nb(knearneigh(wUSPrec,k=2^i))
  res$I[i] <- moran.test(wUSPrec$prec,nb2listw(w))$estimate[1]
}
plot(res,type="b",main="Moran's I by number of neighbors",pch=20,cex=1.5)

w <- knn2nb(knearneigh(wUSPrec,k=3))
moran.test(wUSPrec$prec,nb2listw(w))
## 
##  Moran I test under randomisation
## 
## data:  wUSPrec$prec  
## weights: nb2listw(w)  
## 
## Moran I statistic standard deviate = 9.1411, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.637562282      -0.008928571       0.005001847
n <- 3
res <- data.frame(k=2^(1:n),I=rep(NA,n))
for(i in 1:n){
  w <- knn2nb(knearneigh(wUSPrec,k=2^i))
  res$I[i] <- moran.test(wUSPrec$prec,nb2listw(w))$estimate[1]
}
plot(res,type="b",main="Moran's I by number of neighbors",pch=20,cex=1.5)

This plot shows how the value of nearest affects the value of Moran’s I. The smaller the number of nearest neighbors, the greater the Moran’s I value. Just to explore this concept, I played around with the nearest neighbor number and it’s affect on Moran’s I. As I went with fewer neighbors, the I statistic increased. The above plot shows the value of Moran’s I when three neighbors were used–and an improved I statistic (0.64).

Let’s look at a correlogram of Moran’s I

Precip <- spline.correlog(x=coordinates(wUSPrec)[,1], y=coordinates(wUSPrec)[,2],
z=wUSPrec$prec, resamp=100, quiet=TRUE)
plot(Precip)

The correlogram shows positive spatial autocorrelation at distances up to ~200,000 meters (200km). From 200,000 to 600,000 meters, there is negligible autocorrelation between sample points, and at distances greater than 600,000 meters, a trend in negative spatial autocorrelation begins.

These three approaches all come to the same conclusion–that there is spatial autocorrelation in precipitation out to distances of ~200,000 meters, or 200 km. What is missing is the point pattern component to this analysis. That is, what are the spatial attributes that most influence this autocorrelation?

Directional Variograms

I’m really unclear of how to perform these…?