library(sf)
library(gstat)
library(tidyverse)
library(terra)
library(tidyterra)
library(PNWColors)

sf_2_rast <-function(sfObject,variableIndex = 1){
  # coerce sf to a data.frame
  dfObject <- data.frame(st_coordinates(sfObject),
                         z=as.data.frame(sfObject)[,variableIndex])
  # coerce data.frame to SpatRaster
  rastObject <- rast(dfObject,crs=crs(sfObject))
  
  names(rastObject) <- names(sfObject)[variableIndex]
  
  return(rastObject)
}

Data

Annual precipitation data for California.

# Precipitation data
prcpCA <- readRDS("data/prcpCA.rds")
# CA Grid coordinates
gridCA <- readRDS("data/gridCA.rds")

# make into sf
prcpCA_sf <- prcpCA |>
  st_as_sf(coords = c("X", "Y")) |>
  st_set_crs(3310)

gridCA_sf <- gridCA |>
  st_as_sf(coords = c("X", "Y")) |>
  st_set_crs(3310)

prcpCA_sf$logANNUAL <- log(prcpCA_sf$ANNUAL)

pal1 <- pnw_palette("Lake", n = nrow(prcpCA))


ggplot() +
  geom_sf(data = prcpCA_sf, aes(fill = ANNUAL, size = ANNUAL),
          color = "white", shape = 21, alpha = 0.8) +
  labs(title = "Total Annual Precipitation") +
  scale_size(guide = "none") +
  scale_fill_gradientn(colors = (pal1),
                       name = "Annual Precip (mm)",
                       na.value = "transparent")

Probabilisitic Interpolation with Kriging

Observational Variogram

# create observational variogram of annual precipitation
prcpVar <- variogram(ANNUAL~1, prcpCA_sf)

plot(prcpVar, pch = 20, cex = 1.5, col = "black",
     ylab = expression("Semivariance ("*gamma*")"),
     xlab = "Distance (m)", main = "Annual Precipitation (mm)")

The observational variogram appears mainly isotropic, with the range around 10000 but a slope within the variation of semivariance points at the sill. There is likely a slight gradient in precipitation across the extent, in addition to spatial autocorrelation at distances below 100 km.

Estimates:

Range: 100,000
Sill: 120,000 to 170,000
Nugget: 25,000

Directional Observational Variograms

# use alpha = c() argument to assess impact of directionality
prcpVarD <- variogram(ANNUAL~1, data = prcpCA_sf, 
                      alpha = c(0, 45, 90, 135))
plot(prcpVarD, pch = 20, cex = 1.5, col = "black",
     ylab = expression(Semivariance~(gamma)),
     xlab = "Distance (m)",
     main = "Annual Precipitation (mm)")

There appears to be a slight gradient in the data but primarily spatial structure at distances below 100km.

Variogram Model

Spherical Model

# initial estimates for sill, range, and nugget
# psill = sill - nugget
sph.model <- vgm(psill = 125000, model = "Sph", range = 150000, nugget = 25000)
  # fit model to leadVar
sph.fit <- fit.variogram(object = prcpVar, model = sph.model)
sph.fit

##   model      psill    range
## 1   Nug   6131.906      0.0
## 2   Sph 130703.393 156692.1

# plot fitted variogram
plot(prcpVar, model = sph.fit, pch = 20, cex = 1.5, col = "black",
     ylab = expression("Semivariance ("*gamma*")"),
     xlab = "Distance (m)", main = "Annual Precipitation (mm)",
     sub = "Points: Empirical, Line: Spherical Model")

The spherical model does not fit the empirical variogram very well due to the anisotropy.

Exponential Model

# initial estimates for sill, range, and nugget
sph.model <- vgm(psill = 120000, model = "Exp", range = 100000, nugget = 25000)

  # fit model to leadVar
sph.fit <- fit.variogram(object = prcpVar, model = sph.model)

# plot fitted variogram
plot(prcpVar, model = sph.fit, pch = 20, cex = 1.5, col = "black",
     ylab = expression("Semivariance ("*gamma*")"),
     xlab = "Distance (m)", main = "Annual Precipitation (mm)",
     sub = "Points: Empirical, Line: Spherical Model")

The exponential model fits the spatial structure better both at short distances and as variance increases past the range.

Cross Validation

Moving forward with an exponential model, we need to test the model with k-fold cross validation. I used the “standard” of k = 10.

# observational variogram
prcpVar <- variogram(ANNUAL~1, prcpCA_sf)
# Variogram model
prcpmodel <- vgm(psill = 120000, model = "Exp", range = 100000, nugget = 25000)
# fit variogram model
prcpFit <- fit.variogram(object = prcpVar, model = prcpmodel)

# k-fold cross validation
prcpKrige_kfold_sf <- krige.cv(formula = ANNUAL ~ 1,
                               locations = prcpCA_sf,
                               model = prcpFit,
                               nfold = 10,
                               verbose = FALSE)
prcpKrige_kfold_sf

## Simple feature collection with 432 features and 6 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -356891.8 ymin: -602543 xmax: 537273.8 ymax: 442589.1
## Projected CRS: NAD83 / California Albers
## First 10 features:
##    var1.pred  var1.var observed    residual      zscore fold
## 1  289.97950 130946.00     52.1 -237.879502 -0.65737155    6
## 2   79.89086  23129.29     52.2  -27.690861 -0.18207710    8
## 3   62.36798  16104.83     67.2    4.832021  0.03807594    7
## 4   73.02305  18206.15     60.1  -12.923048 -0.09577582    2
## 5   86.97485  60779.90     78.0   -8.974848 -0.03640383    3
## 6  103.79246  30589.64     37.2  -66.592464 -0.38074822    2
## 7   62.10231  14464.60     68.0    5.897694  0.04903758    6
## 8   71.84233  15688.63     76.6    4.757675  0.03798413    4
## 9  553.03921  34783.87    400.8 -152.239214 -0.81627702    6
## 10  89.96645  11661.63     83.0   -6.966450 -0.06451071    5
##                      geometry
## 1  POINT (280058.5 -167265.7)
## 2  POINT (355394.7 -480020.1)
## 3    POINT (416370.6 -551681)
## 4  POINT (415173.2 -566152.8)
## 5  POINT (418431.9 -516087.4)
## 6  POINT (405858.5 -567692.2)
## 7  POINT (416472.7 -573853.2)
## 8  POINT (345640.5 -468202.5)
## 9  POINT (15619.86 -398445.4)
## 10 POINT (343035.1 -472743.5)

rsq <- cor(prcpKrige_kfold_sf$observed, prcpKrige_kfold_sf$var1.pred)^2
rsq

## [1] 0.8998837

The k-fold cross validation \(R^2\) is 0.8995 from the observed and model-predicted values, with k = 10 folds.

Full Model

prcpVar <- variogram(ANNUAL~1, prcpCA_sf)
prcpmodel <- vgm(psill = 120000, model = "Exp", range = 100000, nugget = 25000)
prcpFit <- fit.variogram(object = prcpVar, model = prcpmodel)
prcpGstat <- gstat(formula = ANNUAL~1, locations = prcpCA_sf, 
                   model = prcpFit)
prcpKrige_sf <- predict(prcpGstat, newdata = gridCA_sf)

## [using ordinary kriging]

prcpKrige_sf

## Simple feature collection with 4053 features and 2 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -366692.4 ymin: -594520.9 xmax: 533307.6 ymax: 445479.1
## Projected CRS: NAD83 / California Albers
## First 10 features:
##    var1.pred var1.var                   geometry
## 1   1869.180 33672.40 POINT (-346692.4 445479.1)
## 2   2002.170 31798.42 POINT (-336692.4 445479.1)
## 3   2050.668 34570.37 POINT (-326692.4 445479.1)
## 4   1954.922 44847.63 POINT (-316692.4 445479.1)
## 5   1801.061 53202.87 POINT (-306692.4 445479.1)
## 6   1635.615 55173.83 POINT (-296692.4 445479.1)
## 7   1470.799 53518.59 POINT (-286692.4 445479.1)
## 8   1309.879 54493.56 POINT (-276692.4 445479.1)
## 9   1155.632 60042.47 POINT (-266692.4 445479.1)
## 10  1012.609 65616.67 POINT (-256692.4 445479.1)

# convert to raster
prcpKrige_rast <- sf_2_rast(prcpKrige_sf)
prcpKrige_rast

## class       : SpatRaster 
## dimensions  : 105, 91, 1  (nrow, ncol, nlyr)
## resolution  : 10000, 10000  (x, y)
## extent      : -371692.4, 538307.6, -599520.9, 450479.1  (xmin, xmax, ymin, ymax)
## coord. ref. : NAD83 / California Albers (EPSG:3310) 
## source(s)   : memory
## name        :  var1.pred 
## min value   :   54.23203 
## max value   : 2263.84950

ggplot() +
  geom_spatraster(data = prcpKrige_rast, aes(fill = var1.pred), 
                    alpha = 0.9) +
  labs(title = "Total Annual Precipitation",
       x = "Longitude", y = "Latitude") +
  scale_fill_gradientn(colors = (pal1),
                       name = "Annual Precip (mm)",
                       na.value = "transparent") +
  theme_minimal()

Annual precipitation is highest in California along the northwestern coast and along the Western mountains above 36\(^\circ\)N. The Kriging predictions (probabilistic interpolation) appear smoother and less spotty than those created with IDW (deterministic interpolation). The small gradient seen in the directional variograms is likely due to the North-South spread of average precipitation across the extent.

ggplot() +
  geom_spatraster(data = prcpKrige_rast, aes(fill = var1.pred), 
                    alpha = 0.9) +
  labs(title = "Total Annual Precipitation",
       subtitle = "Probabilistic (Exponential) Interpolation with Kriging",
       x = "Longitude", y = "Latitude",
       caption = "Points show empirical data.") +
  scale_fill_gradientn(colors = (pal1),
                       name = "Annual Precip (mm)",
                       na.value = "transparent") +
  geom_sf(data = prcpCA_sf, aes(fill = ANNUAL),
          color = "white", shape = 21, alpha = 0.6) +
  annotate(geom = "text", x = 300000, y = 400000, 
           label = expression(paste(R^{2}, " = 0.8995"))) +
    theme_minimal()

Geostats Kriging

Luke Ghallahorne

2024-05-17