library(sf)
library(gstat)
library(tidyverse)
library(terra)
library(tidyterra)
library(PNWColors)
sf_2_rast <-function(sfObject,variableIndex = 1){
# coerce sf to a data.frame
dfObject <- data.frame(st_coordinates(sfObject),
z=as.data.frame(sfObject)[,variableIndex])
# coerce data.frame to SpatRaster
rastObject <- rast(dfObject,crs=crs(sfObject))
names(rastObject) <- names(sfObject)[variableIndex]
return(rastObject)
}
Annual precipitation data for California.
# Precipitation data
prcpCA <- readRDS("data/prcpCA.rds")
# CA Grid coordinates
gridCA <- readRDS("data/gridCA.rds")
# make into sf
prcpCA_sf <- prcpCA |>
st_as_sf(coords = c("X", "Y")) |>
st_set_crs(3310)
gridCA_sf <- gridCA |>
st_as_sf(coords = c("X", "Y")) |>
st_set_crs(3310)
prcpCA_sf$logANNUAL <- log(prcpCA_sf$ANNUAL)
pal1 <- pnw_palette("Lake", n = nrow(prcpCA))
ggplot() +
geom_sf(data = prcpCA_sf, aes(fill = ANNUAL, size = ANNUAL),
color = "white", shape = 21, alpha = 0.8) +
labs(title = "Total Annual Precipitation") +
scale_size(guide = "none") +
scale_fill_gradientn(colors = (pal1),
name = "Annual Precip (mm)",
na.value = "transparent")
# create observational variogram of annual precipitation
prcpVar <- variogram(ANNUAL~1, prcpCA_sf)
plot(prcpVar, pch = 20, cex = 1.5, col = "black",
ylab = expression("Semivariance ("*gamma*")"),
xlab = "Distance (m)", main = "Annual Precipitation (mm)")
The observational variogram appears mainly isotropic, with the range around 10000 but a slope within the variation of semivariance points at the sill. There is likely a slight gradient in precipitation across the extent, in addition to spatial autocorrelation at distances below 100 km.
Estimates:
# use alpha = c() argument to assess impact of directionality
prcpVarD <- variogram(ANNUAL~1, data = prcpCA_sf,
alpha = c(0, 45, 90, 135))
plot(prcpVarD, pch = 20, cex = 1.5, col = "black",
ylab = expression(Semivariance~(gamma)),
xlab = "Distance (m)",
main = "Annual Precipitation (mm)")
There appears to be a slight gradient in the data but primarily spatial structure at distances below 100km.
# initial estimates for sill, range, and nugget
# psill = sill - nugget
sph.model <- vgm(psill = 125000, model = "Sph", range = 150000, nugget = 25000)
# fit model to leadVar
sph.fit <- fit.variogram(object = prcpVar, model = sph.model)
sph.fit
## model psill range
## 1 Nug 6131.906 0.0
## 2 Sph 130703.393 156692.1
# plot fitted variogram
plot(prcpVar, model = sph.fit, pch = 20, cex = 1.5, col = "black",
ylab = expression("Semivariance ("*gamma*")"),
xlab = "Distance (m)", main = "Annual Precipitation (mm)",
sub = "Points: Empirical, Line: Spherical Model")
The spherical model does not fit the empirical variogram very well due to the anisotropy.
# initial estimates for sill, range, and nugget
sph.model <- vgm(psill = 120000, model = "Exp", range = 100000, nugget = 25000)
# fit model to leadVar
sph.fit <- fit.variogram(object = prcpVar, model = sph.model)
# plot fitted variogram
plot(prcpVar, model = sph.fit, pch = 20, cex = 1.5, col = "black",
ylab = expression("Semivariance ("*gamma*")"),
xlab = "Distance (m)", main = "Annual Precipitation (mm)",
sub = "Points: Empirical, Line: Spherical Model")
The exponential model fits the spatial structure better both at short distances and as variance increases past the range.
Moving forward with an exponential model, we need to test the model with k-fold cross validation. I used the “standard” of k = 10.
# observational variogram
prcpVar <- variogram(ANNUAL~1, prcpCA_sf)
# Variogram model
prcpmodel <- vgm(psill = 120000, model = "Exp", range = 100000, nugget = 25000)
# fit variogram model
prcpFit <- fit.variogram(object = prcpVar, model = prcpmodel)
# k-fold cross validation
prcpKrige_kfold_sf <- krige.cv(formula = ANNUAL ~ 1,
locations = prcpCA_sf,
model = prcpFit,
nfold = 10,
verbose = FALSE)
prcpKrige_kfold_sf
## Simple feature collection with 432 features and 6 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -356891.8 ymin: -602543 xmax: 537273.8 ymax: 442589.1
## Projected CRS: NAD83 / California Albers
## First 10 features:
## var1.pred var1.var observed residual zscore fold
## 1 289.97950 130946.00 52.1 -237.879502 -0.65737155 6
## 2 79.89086 23129.29 52.2 -27.690861 -0.18207710 8
## 3 62.36798 16104.83 67.2 4.832021 0.03807594 7
## 4 73.02305 18206.15 60.1 -12.923048 -0.09577582 2
## 5 86.97485 60779.90 78.0 -8.974848 -0.03640383 3
## 6 103.79246 30589.64 37.2 -66.592464 -0.38074822 2
## 7 62.10231 14464.60 68.0 5.897694 0.04903758 6
## 8 71.84233 15688.63 76.6 4.757675 0.03798413 4
## 9 553.03921 34783.87 400.8 -152.239214 -0.81627702 6
## 10 89.96645 11661.63 83.0 -6.966450 -0.06451071 5
## geometry
## 1 POINT (280058.5 -167265.7)
## 2 POINT (355394.7 -480020.1)
## 3 POINT (416370.6 -551681)
## 4 POINT (415173.2 -566152.8)
## 5 POINT (418431.9 -516087.4)
## 6 POINT (405858.5 -567692.2)
## 7 POINT (416472.7 -573853.2)
## 8 POINT (345640.5 -468202.5)
## 9 POINT (15619.86 -398445.4)
## 10 POINT (343035.1 -472743.5)
rsq <- cor(prcpKrige_kfold_sf$observed, prcpKrige_kfold_sf$var1.pred)^2
rsq
## [1] 0.8998837
The k-fold cross validation \(R^2\) is 0.8995 from the observed and model-predicted values, with k = 10 folds.
prcpVar <- variogram(ANNUAL~1, prcpCA_sf)
prcpmodel <- vgm(psill = 120000, model = "Exp", range = 100000, nugget = 25000)
prcpFit <- fit.variogram(object = prcpVar, model = prcpmodel)
prcpGstat <- gstat(formula = ANNUAL~1, locations = prcpCA_sf,
model = prcpFit)
prcpKrige_sf <- predict(prcpGstat, newdata = gridCA_sf)
## [using ordinary kriging]
prcpKrige_sf
## Simple feature collection with 4053 features and 2 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -366692.4 ymin: -594520.9 xmax: 533307.6 ymax: 445479.1
## Projected CRS: NAD83 / California Albers
## First 10 features:
## var1.pred var1.var geometry
## 1 1869.180 33672.40 POINT (-346692.4 445479.1)
## 2 2002.170 31798.42 POINT (-336692.4 445479.1)
## 3 2050.668 34570.37 POINT (-326692.4 445479.1)
## 4 1954.922 44847.63 POINT (-316692.4 445479.1)
## 5 1801.061 53202.87 POINT (-306692.4 445479.1)
## 6 1635.615 55173.83 POINT (-296692.4 445479.1)
## 7 1470.799 53518.59 POINT (-286692.4 445479.1)
## 8 1309.879 54493.56 POINT (-276692.4 445479.1)
## 9 1155.632 60042.47 POINT (-266692.4 445479.1)
## 10 1012.609 65616.67 POINT (-256692.4 445479.1)
# convert to raster
prcpKrige_rast <- sf_2_rast(prcpKrige_sf)
prcpKrige_rast
## class : SpatRaster
## dimensions : 105, 91, 1 (nrow, ncol, nlyr)
## resolution : 10000, 10000 (x, y)
## extent : -371692.4, 538307.6, -599520.9, 450479.1 (xmin, xmax, ymin, ymax)
## coord. ref. : NAD83 / California Albers (EPSG:3310)
## source(s) : memory
## name : var1.pred
## min value : 54.23203
## max value : 2263.84950
ggplot() +
geom_spatraster(data = prcpKrige_rast, aes(fill = var1.pred),
alpha = 0.9) +
labs(title = "Total Annual Precipitation",
x = "Longitude", y = "Latitude") +
scale_fill_gradientn(colors = (pal1),
name = "Annual Precip (mm)",
na.value = "transparent") +
theme_minimal()
Annual precipitation is highest in California along the northwestern coast and along the Western mountains above 36\(^\circ\)N. The Kriging predictions (probabilistic interpolation) appear smoother and less spotty than those created with IDW (deterministic interpolation). The small gradient seen in the directional variograms is likely due to the North-South spread of average precipitation across the extent.
ggplot() +
geom_spatraster(data = prcpKrige_rast, aes(fill = var1.pred),
alpha = 0.9) +
labs(title = "Total Annual Precipitation",
subtitle = "Probabilistic (Exponential) Interpolation with Kriging",
x = "Longitude", y = "Latitude",
caption = "Points show empirical data.") +
scale_fill_gradientn(colors = (pal1),
name = "Annual Precip (mm)",
na.value = "transparent") +
geom_sf(data = prcpCA_sf, aes(fill = ANNUAL),
color = "white", shape = 21, alpha = 0.6) +
annotate(geom = "text", x = 300000, y = 400000,
label = expression(paste(R^{2}, " = 0.8995"))) +
theme_minimal()