##Set working directory
setwd("~/Desktop/University of Utah PhD /Course Work/Fall 2022 Semester/GEOG6000_Data Analysis/lab12")

##Load required libraries
library(ggplot2)
library(gstat)
library(RColorBrewer)
library(sf)
library(stars)
library(viridis)

Reading the Data

swiss <- read.csv("../datafiles/swiss_ppt/swiss_ppt.csv")
head(swiss)
##    id       x       y  ppt elev
## 1 287 2688674 1292361 18.4  754
## 2 292 2692432 1289049 12.1  516
## 3 259 2678227 1288974 13.8  561
## 4 319 2701430 1285778 12.6  412
## 5 257 2676752 1283409 15.6  419
## 6 302 2696966 1282408 10.0  423

Swiss Precipitation

##Set as simple feature. Use EPSG code 2056 for a Lambert projection coordinate reference system.

swiss.sf <- st_as_sf(swiss, 
                     coords = c("x", "y"),
                     crs = 2056)

##Load the DEM for Switzerland as a raster object

swiss.dem <- read_stars("../datafiles/swiss_ppt//swiss_dem.asc")

##Set to Lambert projection
st_crs(swiss.dem) <- 2056

##Visualize
plot(swiss.dem)

##Load the countries shapefile
countries <- st_read("../datafiles/ne_50m_admin_0_countries/ne_50m_admin_0_countries.shp")
## Reading layer `ne_50m_admin_0_countries' from data source 
##   `/Users/davidleydet/Desktop/University of Utah PhD /Course Work/Fall 2022 Semester/GEOG6000_Data Analysis/datafiles/ne_50m_admin_0_countries/ne_50m_admin_0_countries.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 241 features and 94 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -89.99893 xmax: 180 ymax: 83.59961
## Geodetic CRS:  WGS 84
##Extract Switzerland's Border
swiss.bord <- subset(countries, NAME == "Switzerland")

##Transform to the Lambert Projection
swiss.bord <- st_transform(swiss.bord, 2056)


##Visualize Precipitation
plot(swiss.sf["ppt"], 
     reset = FALSE, 
     pch = 16)

plot(st_geometry(swiss.bord), add = TRUE)

ggplot() + 
  geom_sf(data = swiss.bord) +
  geom_sf(data = swiss.sf, aes(col = ppt), size = 1.5, alpha = 0.6) +
  scale_color_gradient(low = "lightblue", high = "darkblue") +
  theme_bw()

Meuse Soil Sample Dataset

##Read in Meuse Point Data
meuse <- st_read("../datafiles/meuse/meuse.shp", quiet = TRUE)

##Read in Mease River Shape
meuseriv <- st_read("../datafiles/meuse/meuseriv.shp", quiet = TRUE)

Variogram Analysis

##Plot the sf object using the variable zinc. Add the river geometry as well
plot(meuse["zinc"], 
     pch = 16, 
     cex = 1,
     reset = FALSE)

plot(meuseriv, 
     add = TRUE, 
     col = NA)

hist(meuse$zinc,
     xlab = "Zinc")

##Higher concentrations closer to the river.
##It is skewed left. Log transform the zinc values to make them more normally distributed.
##Log Transform
meuse$lzinc <- log(meuse$zinc)

hist(meuse$lzinc)

##Visualize
plot(meuse["lzinc"], 
     pch = 16, 
     cex = 1.25, 
     reset = FALSE)

plot(meuseriv, 
     add = TRUE, 
     col = NA)

Sample Variogram

From Simon’s Lab:

We will now use the variogram() function to build a sample variogram for the log-transformed data. We start here by loading the gstat library, then build the sample variogram. This uses the usual R model syntax, which we will later use to include covariates. Here we simply use the formula lzinc ~ 1, which indicates that we are assuming the mean log zinc value does not vary across our region. Finally we plot the variogram, adding an argument to show the number of pairs of points used to calculate each point:

##Build the Variogram
mzinc.var <- variogram(lzinc ~ 1, data = meuse)


##Plot the Variogram. Display the number of pairs by using the syntax plot.numbers = TRUE
plot(mzinc.var, plot.numbers = TRUE, pch = '+')

##Build variogram 2
##Cutoff = the maximum distance over which we will consider pairwise differences between points.
##Width = the size of each lag (bin width)

mzinc.var2 <- variogram(lzinc ~ 1, 
                        data = meuse, 
                        cutoff = 1500, 
                        width = 100)

##Plot the variogram
plot(mzinc.var2, plot.numbers = TRUE, pch = '+')

##Build the variogram
ppt.var <- variogram(ppt ~ 1,
                     swiss.sf,
                     cutoff = 100000,
                     width = 8000)


##Plot it
plot(ppt.var, plot.numbers = TRUE)

Variogram Modeling

##Look at the set of standard parametric models that can be used:

vgm()
##    short                                      long
## 1    Nug                              Nug (nugget)
## 2    Exp                         Exp (exponential)
## 3    Sph                           Sph (spherical)
## 4    Gau                            Gau (gaussian)
## 5    Exc        Exclass (Exponential class/stable)
## 6    Mat                              Mat (Matern)
## 7    Ste Mat (Matern, M. Stein's parameterization)
## 8    Cir                            Cir (circular)
## 9    Lin                              Lin (linear)
## 10   Bes                              Bes (bessel)
## 11   Pen                      Pen (pentaspherical)
## 12   Per                            Per (periodic)
## 13   Wav                                Wav (wave)
## 14   Hol                                Hol (hole)
## 15   Log                         Log (logarithmic)
## 16   Pow                               Pow (power)
## 17   Spl                              Spl (spline)
## 18   Leg                            Leg (Legendre)
## 19   Err                   Err (Measurement error)
## 20   Int                           Int (Intercept)

From Simon’s Lab:

To fit a model, it is necessary to create a first model by hand, then use the fit.variogram() function, which uses a weighted least squares method to fit this to the sample variogram. The first model requires you to specify:

  • the model form
  • the value of the nugget (the intercept with the Y axis)
  • the model range (the distance at which the sample variogram becomes flat)
  • the sill, the semivariance value (y-axis) of the range

Here we specify these as separate variables, then use the vgm() function to build the initial model. As in the previous section, we will start by doing this for the Meuse dataset. Some suggested values are given in the code below, but it is worth replotting the original variogram, to see how these values compare to the sample variable.

##Create variables for the nugget, range, and sill
modNugget <- 0.05
modRange <- 1100
modSill <- 0.65

##Build the Model using a circular model approach
mzinc.vgm1 <- vgm(psill = modSill, 
                  model = "Cir", 
                  range = modRange, 
                  nugget = modNugget)

##Visualize the Model
plot(mzinc.var, mzinc.vgm1, main = "Meuse zinc variogram")

##The model only fits approximately to the sample variogram, so we can now use an iterative weighted OLS method (fit.variogram()) to fit the model variogram to the sample variogram.


mzinc.vgm2 <- fit.variogram(mzinc.var, mzinc.vgm1)

plot(mzinc.var, mzinc.vgm2, main = "Meuse zinc variogram")

mzinc.vgm2
##   model      psill    range
## 1   Nug 0.05605164   0.0000
## 2   Cir 0.57944767 779.3348
  • Nearly 0.6 (partial sill) is explained by the spatial error of the data set.

  • The range or plateau is the distance of the variogram plateau.

##Set the values
modnug = 10
modrange2 = 75000
modsill2 = 140

##Build the model
ppt.vgm1 = vgm(psill = modsill2,
               model = "Sph",
               range = modrange2,
               nugget = modnug)

##Fit the model to the data
ppt.vgm2 = fit.variogram(ppt.var, ppt.vgm1)


##Visualize it
plot(ppt.var, 
     ppt.vgm2,
     main = "Swiss Precipitation Variogram")

ppt.vgm2
##   model     psill    range
## 1   Nug   2.02863     0.00
## 2   Sph 151.02202 91760.34

Spatial Prediction

Ordinary Kriging

Use the krige() function to perform spatial prediction (interpolation) using ordinary kriging as a default. The following inputs are needed:

  • A model formula specifying the variable to be predicted. This can be expanded to include covariates.

  • The spatial object with the observed values

  • A spatial object with the coordinates to be used for prediction

  • The fitted variogram model

  • An optional parameter that limits the number of points to be used in predicting any given location

##Kriging 
ppt.pred.ok = krige(ppt ~ 1,
                    locations = swiss.sf,
                    newdata = swiss.dem,
                    model = ppt.vgm2,
                    nmax = 40)
## [using ordinary kriging]
names(ppt.pred.ok)
## [1] "var1.pred" "var1.var"
##Predictions are in var1.pred
##Prediction Errors are in var1.var
##Set a color palette

my.pal = brewer.pal(n = 11, "Blues")
## Warning in brewer.pal(n = 11, "Blues"): n too large, allowed maximum for palette Blues is 9
## Returning the palette you asked for with that many colors
plot(ppt.pred.ok,
     main = "Interpolated Values",
     col = my.pal)

plot(ppt.pred.ok["var1.var"],
     main = "Interpolated Value Error")

##Errors are typically smaller near known points

Assessing Model Quality

From Simon’s Lab:

To assess the performance of our kriging model, we use a n-fold cross-validation (also called k-fold). This splits the data into n subsets, then iteratively predicts each subset from the other n−1 sets. The krige.cv() function performs the cross-validation: this takes the same arguments as the krige() function, but we leave out the object with coordinates for new predictions, and specify nfold, the number of subsets to be used. You will see some warnings about the projection. You can safely ignore these.

ppt.cv.ok = krige.cv(ppt ~ 1,
                     locations = swiss.sf,
                     model = ppt.vgm2,
                     nmax = 40,
                     nfold = 5)

head(ppt.cv.ok)
## Simple feature collection with 6 features and 6 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 2676752 ymin: 1282408 xmax: 2701430 ymax: 1292361
## Projected CRS: CH1903+ / LV95
##   var1.pred var1.var observed   residual     zscore fold
## 1  12.88014 21.70097     18.4  5.5198568  1.1849177    2
## 2  14.92293 16.67446     12.1 -2.8229343 -0.6913132    3
## 3  16.91949 23.44264     13.8 -3.1194886 -0.6442881    5
## 4  11.97504 21.40840     12.6  0.6249609  0.1350706    1
## 5  13.37531 13.88293     15.6  2.2246932  0.5970759    3
## 6  11.60296 17.37992     10.0 -1.6029563 -0.3845013    5
##                  geometry
## 1 POINT (2688674 1292361)
## 2 POINT (2692432 1289049)
## 3 POINT (2678227 1288974)
## 4 POINT (2701430 1285778)
## 5 POINT (2676752 1283409)
## 6 POINT (2696966 1282408)

The output of this function is a spatial object with the following variables:

  • var1.pred: the cross-validated prediction at the site (when it is in the test set)
  • var1.var: the cross-validated prediction error at the site
  • observed: the observed value at the site
  • residual: the difference between the predicted and observed value
  • z-score: a z-score calculated as the residual divided by the error
  • fold: the ‘fold’ or iteration when the site was in the test set
##RMSEP
sqrt(mean(ppt.cv.ok$residual^2))
## [1] 4.823913
#Prediction r^2
cor(ppt.cv.ok$observed, ppt.cv.ok$var1.pred)^2
## [1] 0.8158796

Note:

  • The first of these (RMSEP) gives the average error that might be expected when making a prediction, the second (R2P) gives the amount of variance in the test dataset predicted by the model.
sp::bubble(as_Spatial(ppt.cv.ok)[,"residual"], pch = 16)

##The map shows little pattern, which is good. Any systematic under or over estimation would suggest that there is a trend or other structural component which is not being captured by the model.
##Residual Plot
plot(ppt.cv.ok$var1.pred, ppt.cv.ok$residual, 
     xlab = 'PPT Predicted Values', 
     ylab = 'PPT Residuals')

abline(h = 0, lty = 2)

Exercise

# Oregon boundaries
orotl <- st_read("../datafiles/oregon/orotl.shp", quiet = TRUE)
st_crs(orotl) <- 4326

# Oregon temperatures
ortann <- st_read("../datafiles/oregon/oregontann.shp", quiet = TRUE)
st_crs(ortann) <- 4326

# Oregon DEM file
orgrid <- st_read("../datafiles/oregon/orgrid.shp", quiet = TRUE) 
st_crs(orgrid) <- 4326

##Rasterize the point data of the orgrid file into a raster format (interpolated)
orgrid.dem <- st_rasterize(orgrid, dx = 0.1667, dy = 0.1667)

##plot
plot(orgrid.dem, reset = FALSE)

##Add the temperature data
plot(ortann["tann"], 
     add = TRUE, 
     pch = 16, 
     cex = 1.5)

1.1 Annual Temperature Sample Variogram

##Build the Variogram
temp.var = variogram(tann ~ 1, data = ortann)


##Plot the Variogram. Display the number of pairs by using the syntax plot.numbers = TRUE
plot(temp.var, plot.numbers = TRUE, pch = '+')

##Cutoff = distance cutoff
##Width = binwidth

temp.var2 <- variogram(tann ~ 1, 
                        data = ortann, 
                        cutoff = 225, 
                        width = 20)

##Plot the variogram
plot(temp.var2, plot.numbers = TRUE, pch = '+')

1.2 Annual Temperature Variogram Model

##Build the Model using a circular model approach
temp.vgm1 <- vgm(psill = 4, 
                  model = "Cir", 
                  range = 180, 
                  nugget = 0.25)

##Visualize the Model
plot(temp.var2, temp.vgm1, main = "Oregon Temperature Variogram")

Note:

  • The following values were selected for this model:
    • Nugget: 0.25
    • Sill: 4
    • Range: 180

1.3 Annual Temperature Variogram Model Fit

##Variogram Fit using sample variogram 2 and the initial model
temp.vgm2 = fit.variogram(temp.var, temp.vgm1)

##Plot it
plot(temp.var, temp.vgm2, main = "Final Temperature Variogram")

##Note: The original sample variogram (temp.var) fits a bit better than temp.var2.

1.4 Annual Temperature Interpolation (Kriging)

##Kriging 
temp.pred.or = krige(tann ~ 1,
                    locations = ortann,
                    newdata = orgrid.dem,
                    model = temp.vgm2,
                    nmax = 40)
## [using ordinary kriging]
##Visualize
##Color scheme
my.pal2 = brewer.pal(n = 9, "YlOrRd")

##Oregon Geometry
orotl.geom = st_geometry(orotl)

plot(temp.pred.or,
     main = "Interpolated Oregon Temperature Values",
     col = my.pal2,
     reset = FALSE)

plot(orotl.geom,
     add = TRUE)

##Use var1.var to plot the residuals
plot(temp.pred.or["var1.var"],
     main = "Temperature Prediction Error",
     reset = FALSE)
plot(orotl.geom,
     add = TRUE)

1.5 Cross-validation

temp.cv.or = krige.cv(tann ~ 1,
                     locations = ortann,
                     model = temp.vgm2,
                     nmax = 40,
                     nfold = 5)

head(temp.cv.or)
## Simple feature collection with 6 features and 6 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -123.883 ymin: 42.217 xmax: -117.817 ymax: 46.15
## Geodetic CRS:  WGS 84
##   var1.pred var1.var observed   residual     zscore fold
## 1  8.285806 1.038094      9.6  1.3141938  1.2898555    1
## 2 10.836866 1.217973     12.5  1.6631339  1.5069831    5
## 3 10.712722 1.175961     11.1  0.3872775  0.3571295    5
## 4 10.733468 1.035825     10.3 -0.4334676 -0.4259056    1
## 5  9.527037 1.312671      7.6 -1.9270372 -1.6819466    1
## 6  9.668535 1.372288      8.4 -1.2685353 -1.0828789    1
##                  geometry
## 1 POINT (-120.717 44.917)
## 2   POINT (-120.2 45.717)
## 3 POINT (-122.717 42.217)
## 4  POINT (-123.883 46.15)
## 5 POINT (-117.817 44.833)
## 6 POINT (-117.833 44.783)
##RMSEP
temp.rmse = sqrt(mean(temp.cv.or$residual^2))

temp.rmse
## [1] 1.322844
#Prediction r^2
temp.rsquared = cor(temp.cv.or$observed, temp.cv.or$var1.pred)^2

temp.rsquared
## [1] 0.5124818

Note:

The cross-validation RMSE is 1.3228443 and the \(r^{2}\) is 0.5124818.