##Set working directory
setwd("~/Desktop/University of Utah PhD /Course Work/Fall 2022 Semester/GEOG6000_Data Analysis/lab12")
##Load required libraries
library(ggplot2)
library(gstat)
library(RColorBrewer)
library(sf)
library(stars)
library(viridis)
swiss <- read.csv("../datafiles/swiss_ppt/swiss_ppt.csv")
head(swiss)
## id x y ppt elev
## 1 287 2688674 1292361 18.4 754
## 2 292 2692432 1289049 12.1 516
## 3 259 2678227 1288974 13.8 561
## 4 319 2701430 1285778 12.6 412
## 5 257 2676752 1283409 15.6 419
## 6 302 2696966 1282408 10.0 423
##Set as simple feature. Use EPSG code 2056 for a Lambert projection coordinate reference system.
swiss.sf <- st_as_sf(swiss,
coords = c("x", "y"),
crs = 2056)
##Load the DEM for Switzerland as a raster object
swiss.dem <- read_stars("../datafiles/swiss_ppt//swiss_dem.asc")
##Set to Lambert projection
st_crs(swiss.dem) <- 2056
##Visualize
plot(swiss.dem)
##Load the countries shapefile
countries <- st_read("../datafiles/ne_50m_admin_0_countries/ne_50m_admin_0_countries.shp")
## Reading layer `ne_50m_admin_0_countries' from data source
## `/Users/davidleydet/Desktop/University of Utah PhD /Course Work/Fall 2022 Semester/GEOG6000_Data Analysis/datafiles/ne_50m_admin_0_countries/ne_50m_admin_0_countries.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 241 features and 94 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -89.99893 xmax: 180 ymax: 83.59961
## Geodetic CRS: WGS 84
##Extract Switzerland's Border
swiss.bord <- subset(countries, NAME == "Switzerland")
##Transform to the Lambert Projection
swiss.bord <- st_transform(swiss.bord, 2056)
##Visualize Precipitation
plot(swiss.sf["ppt"],
reset = FALSE,
pch = 16)
plot(st_geometry(swiss.bord), add = TRUE)
ggplot() +
geom_sf(data = swiss.bord) +
geom_sf(data = swiss.sf, aes(col = ppt), size = 1.5, alpha = 0.6) +
scale_color_gradient(low = "lightblue", high = "darkblue") +
theme_bw()
##Read in Meuse Point Data
meuse <- st_read("../datafiles/meuse/meuse.shp", quiet = TRUE)
##Read in Mease River Shape
meuseriv <- st_read("../datafiles/meuse/meuseriv.shp", quiet = TRUE)
##Plot the sf object using the variable zinc. Add the river geometry as well
plot(meuse["zinc"],
pch = 16,
cex = 1,
reset = FALSE)
plot(meuseriv,
add = TRUE,
col = NA)
hist(meuse$zinc,
xlab = "Zinc")
##Higher concentrations closer to the river.
##It is skewed left. Log transform the zinc values to make them more normally distributed.
##Log Transform
meuse$lzinc <- log(meuse$zinc)
hist(meuse$lzinc)
##Visualize
plot(meuse["lzinc"],
pch = 16,
cex = 1.25,
reset = FALSE)
plot(meuseriv,
add = TRUE,
col = NA)
From Simon’s Lab:
We will now use the variogram() function to build a sample variogram for the log-transformed data. We start here by loading the gstat library, then build the sample variogram. This uses the usual R model syntax, which we will later use to include covariates. Here we simply use the formula lzinc ~ 1, which indicates that we are assuming the mean log zinc value does not vary across our region. Finally we plot the variogram, adding an argument to show the number of pairs of points used to calculate each point:
##Build the Variogram
mzinc.var <- variogram(lzinc ~ 1, data = meuse)
##Plot the Variogram. Display the number of pairs by using the syntax plot.numbers = TRUE
plot(mzinc.var, plot.numbers = TRUE, pch = '+')
##Build variogram 2
##Cutoff = the maximum distance over which we will consider pairwise differences between points.
##Width = the size of each lag (bin width)
mzinc.var2 <- variogram(lzinc ~ 1,
data = meuse,
cutoff = 1500,
width = 100)
##Plot the variogram
plot(mzinc.var2, plot.numbers = TRUE, pch = '+')
##Build the variogram
ppt.var <- variogram(ppt ~ 1,
swiss.sf,
cutoff = 100000,
width = 8000)
##Plot it
plot(ppt.var, plot.numbers = TRUE)
##Look at the set of standard parametric models that can be used:
vgm()
## short long
## 1 Nug Nug (nugget)
## 2 Exp Exp (exponential)
## 3 Sph Sph (spherical)
## 4 Gau Gau (gaussian)
## 5 Exc Exclass (Exponential class/stable)
## 6 Mat Mat (Matern)
## 7 Ste Mat (Matern, M. Stein's parameterization)
## 8 Cir Cir (circular)
## 9 Lin Lin (linear)
## 10 Bes Bes (bessel)
## 11 Pen Pen (pentaspherical)
## 12 Per Per (periodic)
## 13 Wav Wav (wave)
## 14 Hol Hol (hole)
## 15 Log Log (logarithmic)
## 16 Pow Pow (power)
## 17 Spl Spl (spline)
## 18 Leg Leg (Legendre)
## 19 Err Err (Measurement error)
## 20 Int Int (Intercept)
From Simon’s Lab:
To fit a model, it is necessary to create a first model by hand, then use the fit.variogram() function, which uses a weighted least squares method to fit this to the sample variogram. The first model requires you to specify:
Here we specify these as separate variables, then use the vgm() function to build the initial model. As in the previous section, we will start by doing this for the Meuse dataset. Some suggested values are given in the code below, but it is worth replotting the original variogram, to see how these values compare to the sample variable.
##Create variables for the nugget, range, and sill
modNugget <- 0.05
modRange <- 1100
modSill <- 0.65
##Build the Model using a circular model approach
mzinc.vgm1 <- vgm(psill = modSill,
model = "Cir",
range = modRange,
nugget = modNugget)
##Visualize the Model
plot(mzinc.var, mzinc.vgm1, main = "Meuse zinc variogram")
##The model only fits approximately to the sample variogram, so we can now use an iterative weighted OLS method (fit.variogram()) to fit the model variogram to the sample variogram.
mzinc.vgm2 <- fit.variogram(mzinc.var, mzinc.vgm1)
plot(mzinc.var, mzinc.vgm2, main = "Meuse zinc variogram")
mzinc.vgm2
## model psill range
## 1 Nug 0.05605164 0.0000
## 2 Cir 0.57944767 779.3348
Nearly 0.6 (partial sill) is explained by the spatial error of the data set.
The range or plateau is the distance of the variogram plateau.
##Set the values
modnug = 10
modrange2 = 75000
modsill2 = 140
##Build the model
ppt.vgm1 = vgm(psill = modsill2,
model = "Sph",
range = modrange2,
nugget = modnug)
##Fit the model to the data
ppt.vgm2 = fit.variogram(ppt.var, ppt.vgm1)
##Visualize it
plot(ppt.var,
ppt.vgm2,
main = "Swiss Precipitation Variogram")
ppt.vgm2
## model psill range
## 1 Nug 2.02863 0.00
## 2 Sph 151.02202 91760.34
Use the krige() function to perform spatial prediction (interpolation) using ordinary kriging as a default. The following inputs are needed:
A model formula specifying the variable to be predicted. This can be expanded to include covariates.
The spatial object with the observed values
A spatial object with the coordinates to be used for prediction
The fitted variogram model
An optional parameter that limits the number of points to be used in predicting any given location
##Kriging
ppt.pred.ok = krige(ppt ~ 1,
locations = swiss.sf,
newdata = swiss.dem,
model = ppt.vgm2,
nmax = 40)
## [using ordinary kriging]
names(ppt.pred.ok)
## [1] "var1.pred" "var1.var"
##Predictions are in var1.pred
##Prediction Errors are in var1.var
##Set a color palette
my.pal = brewer.pal(n = 11, "Blues")
## Warning in brewer.pal(n = 11, "Blues"): n too large, allowed maximum for palette Blues is 9
## Returning the palette you asked for with that many colors
plot(ppt.pred.ok,
main = "Interpolated Values",
col = my.pal)
plot(ppt.pred.ok["var1.var"],
main = "Interpolated Value Error")
##Errors are typically smaller near known points
From Simon’s Lab:
To assess the performance of our kriging model, we use a n-fold cross-validation (also called k-fold). This splits the data into n subsets, then iteratively predicts each subset from the other n−1 sets. The krige.cv() function performs the cross-validation: this takes the same arguments as the krige() function, but we leave out the object with coordinates for new predictions, and specify nfold, the number of subsets to be used. You will see some warnings about the projection. You can safely ignore these.
ppt.cv.ok = krige.cv(ppt ~ 1,
locations = swiss.sf,
model = ppt.vgm2,
nmax = 40,
nfold = 5)
head(ppt.cv.ok)
## Simple feature collection with 6 features and 6 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 2676752 ymin: 1282408 xmax: 2701430 ymax: 1292361
## Projected CRS: CH1903+ / LV95
## var1.pred var1.var observed residual zscore fold
## 1 12.88014 21.70097 18.4 5.5198568 1.1849177 2
## 2 14.92293 16.67446 12.1 -2.8229343 -0.6913132 3
## 3 16.91949 23.44264 13.8 -3.1194886 -0.6442881 5
## 4 11.97504 21.40840 12.6 0.6249609 0.1350706 1
## 5 13.37531 13.88293 15.6 2.2246932 0.5970759 3
## 6 11.60296 17.37992 10.0 -1.6029563 -0.3845013 5
## geometry
## 1 POINT (2688674 1292361)
## 2 POINT (2692432 1289049)
## 3 POINT (2678227 1288974)
## 4 POINT (2701430 1285778)
## 5 POINT (2676752 1283409)
## 6 POINT (2696966 1282408)
The output of this function is a spatial object with the following variables:
##RMSEP
sqrt(mean(ppt.cv.ok$residual^2))
## [1] 4.823913
#Prediction r^2
cor(ppt.cv.ok$observed, ppt.cv.ok$var1.pred)^2
## [1] 0.8158796
Note:
sp::bubble(as_Spatial(ppt.cv.ok)[,"residual"], pch = 16)
##The map shows little pattern, which is good. Any systematic under or over estimation would suggest that there is a trend or other structural component which is not being captured by the model.
##Residual Plot
plot(ppt.cv.ok$var1.pred, ppt.cv.ok$residual,
xlab = 'PPT Predicted Values',
ylab = 'PPT Residuals')
abline(h = 0, lty = 2)
# Oregon boundaries
orotl <- st_read("../datafiles/oregon/orotl.shp", quiet = TRUE)
st_crs(orotl) <- 4326
# Oregon temperatures
ortann <- st_read("../datafiles/oregon/oregontann.shp", quiet = TRUE)
st_crs(ortann) <- 4326
# Oregon DEM file
orgrid <- st_read("../datafiles/oregon/orgrid.shp", quiet = TRUE)
st_crs(orgrid) <- 4326
##Rasterize the point data of the orgrid file into a raster format (interpolated)
orgrid.dem <- st_rasterize(orgrid, dx = 0.1667, dy = 0.1667)
##plot
plot(orgrid.dem, reset = FALSE)
##Add the temperature data
plot(ortann["tann"],
add = TRUE,
pch = 16,
cex = 1.5)
##Build the Variogram
temp.var = variogram(tann ~ 1, data = ortann)
##Plot the Variogram. Display the number of pairs by using the syntax plot.numbers = TRUE
plot(temp.var, plot.numbers = TRUE, pch = '+')
##Cutoff = distance cutoff
##Width = binwidth
temp.var2 <- variogram(tann ~ 1,
data = ortann,
cutoff = 225,
width = 20)
##Plot the variogram
plot(temp.var2, plot.numbers = TRUE, pch = '+')
##Build the Model using a circular model approach
temp.vgm1 <- vgm(psill = 4,
model = "Cir",
range = 180,
nugget = 0.25)
##Visualize the Model
plot(temp.var2, temp.vgm1, main = "Oregon Temperature Variogram")
Note:
##Variogram Fit using sample variogram 2 and the initial model
temp.vgm2 = fit.variogram(temp.var, temp.vgm1)
##Plot it
plot(temp.var, temp.vgm2, main = "Final Temperature Variogram")
##Note: The original sample variogram (temp.var) fits a bit better than temp.var2.
##Kriging
temp.pred.or = krige(tann ~ 1,
locations = ortann,
newdata = orgrid.dem,
model = temp.vgm2,
nmax = 40)
## [using ordinary kriging]
##Visualize
##Color scheme
my.pal2 = brewer.pal(n = 9, "YlOrRd")
##Oregon Geometry
orotl.geom = st_geometry(orotl)
plot(temp.pred.or,
main = "Interpolated Oregon Temperature Values",
col = my.pal2,
reset = FALSE)
plot(orotl.geom,
add = TRUE)
##Use var1.var to plot the residuals
plot(temp.pred.or["var1.var"],
main = "Temperature Prediction Error",
reset = FALSE)
plot(orotl.geom,
add = TRUE)
temp.cv.or = krige.cv(tann ~ 1,
locations = ortann,
model = temp.vgm2,
nmax = 40,
nfold = 5)
head(temp.cv.or)
## Simple feature collection with 6 features and 6 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -123.883 ymin: 42.217 xmax: -117.817 ymax: 46.15
## Geodetic CRS: WGS 84
## var1.pred var1.var observed residual zscore fold
## 1 8.285806 1.038094 9.6 1.3141938 1.2898555 1
## 2 10.836866 1.217973 12.5 1.6631339 1.5069831 5
## 3 10.712722 1.175961 11.1 0.3872775 0.3571295 5
## 4 10.733468 1.035825 10.3 -0.4334676 -0.4259056 1
## 5 9.527037 1.312671 7.6 -1.9270372 -1.6819466 1
## 6 9.668535 1.372288 8.4 -1.2685353 -1.0828789 1
## geometry
## 1 POINT (-120.717 44.917)
## 2 POINT (-120.2 45.717)
## 3 POINT (-122.717 42.217)
## 4 POINT (-123.883 46.15)
## 5 POINT (-117.817 44.833)
## 6 POINT (-117.833 44.783)
##RMSEP
temp.rmse = sqrt(mean(temp.cv.or$residual^2))
temp.rmse
## [1] 1.322844
#Prediction r^2
temp.rsquared = cor(temp.cv.or$observed, temp.cv.or$var1.pred)^2
temp.rsquared
## [1] 0.5124818
Note:
The cross-validation RMSE is 1.3228443 and the \(r^{2}\) is 0.5124818.