library(sf)
## Linking to GEOS 3.8.1, GDAL 3.2.1, PROJ 7.2.1
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gstat)
library(stars)
## Loading required package: abind
## Registered S3 methods overwritten by 'stars':
##   method             from
##   st_bbox.SpatRaster sf  
##   st_crs.SpatRaster  sf
library(viridis)
## Loading required package: viridisLite

Instructions: The compressed file oregon.zip contains data on average annual temperatures for Oregon from a set of climate stations in the shapefile oregontann.shp in a variable called ‘tann’, and station elevation in a variable called elevation. A second file orgrid.shp contains a set of gridded elevations for the state at 10 minute resolution. Code is given below to read in these data and convert to sf and stars objects for geostatistical analysis. Using the gstat library, carry out the following analyses:

or_shp <- st_read("../Lab 12/Oregon/orotl.shp", quiet = TRUE)
or_temp <- st_read("../Lab 12/Oregon/oregontann.shp", quiet = TRUE)
or_grid <- st_read("../Lab 12/Oregon/orgrid.shp", quiet = TRUE) 

st_crs(or_shp) <- 4326
st_crs(or_temp) <- 4326
st_crs(or_grid) <- 4326

# Checking distribution
par(mfrow = c(1, 2))
hist(or_temp$tann)
hist(log(or_temp$tann))

# Opting not to log-transform the data because of the left skew.

or.var <- variogram(tann ~ 1, data = or_temp)
plot(or.var, plot.numbers = TRUE, pch = 15, main = "Sample Variogram")

modNugget <- 0.5
modRange <- 150
modSill <- 3.5

or.vgm1 <- vgm(psill = modSill,
                model = "Cir",
                range = modRange,
                nugget = modNugget)

modNuggetG <- 0.5
modRangeG <- 125
modSillG <- 4

or.vgmG <- vgm(psill = modSillG,
                model = "Gau",
                range = modRangeG,
                nugget = modNuggetG)

a <- plot(or.var, or.vgm1, main = "Oregon Annual Temperature\n Variogram: Circular Model")
b <- plot(or.var, or.vgmG, main = "Oregon Annual Temperature\n Variogram: Gaussian Model")
print(a, position = c(0, 0, 0.5, 1), more = TRUE)
print(b, position = c(0.5, 0, 1, 1))

Answer: After a lot of testing, I chose the circular model with a nugget of 0.5, a sill of 3.5, and a range of 150. I don’t like that the model appears convex where the data has a concave shape. It doesn’t really capture the trend, although it seems like the spatial dependency does taper off at the asymptote at 150.

I wanted to avoid using a Gaussian model, but I did take a look at the results. It does follow the concavity of the points within the first 150 units. Where the spatial dependency appears to end at ~150 in the points, I had to select a range of 125 in the Gaussian mode to fit the model to the data.

or.vgm2 <- fit.variogram(or.var, or.vgm1)
plot(or.var, or.vgm2, main = "Final Variogram: Oregon Annual Temperature Data")

/n

Kriging

pred.or <- krige(tann ~ 1,
                locations = or_temp,
                newdata = or_grid,
                model = or.vgm2)
## [using ordinary kriging]
plot(pred.or["var1.pred"], pch = 15, cex = 2, main = "Interpolated Temperature Values (OK)")

plot(pred.or["var1.var"], pch = 15, cex = 2, main = "Interpolated Temperature Prediction Error (OK)")

- Use the krige.cv() function with 5-fold cross-validation to report the root mean squared error and R2

or.cv.ok <- krige.cv(tann ~ 1,
                     locations = or_temp,
                     model = or.vgm2,
                     nmax = 40,
                     nfold = 5)

sqrt(mean(or.cv.ok$residual^2))
## [1] 1.231104
cor(or.cv.ok$observed, or.cv.ok$var1.pred)^2
## [1] 0.5657937
sp::bubble(as_Spatial(or.cv.ok)[,"residual"], pch = 16, main = "Residual Pattern")

Answer: Using five-fold cross-validation, the root mean squared error is 1.3191, and the R-squared is 0.5086. We would expect an average error of 1.31 when making predictions. Our model captures about 51 percent of the variance in the dataset.

The residuals are relatively un-patterned, with slightly larger residuals in the southwest, where (I believe) the Cascades Range narrows and the Willamette Valley begins. The temperature variation is likely more extreme here than the rest of the state.