Spatial data frames
library(sp)
library(gstat)
data(meuse)
names(meuse)
## [1] "x" "y" "cadmium" "copper" "lead" "zinc" "elev"
## [8] "dist" "om" "ffreq" "soil" "lime" "landuse" "dist.m"
class(meuse)
## [1] "data.frame"
Function coordinates promotes the data.frame meuse into a SpatialPointsDataFrame:
coordinates(meuse) = ~x + y
class(meuse)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
summary(meuse)
## Object of class SpatialPointsDataFrame
## Coordinates:
## min max
## x 178605 181390
## y 329714 333611
## Is projected: NA
## proj4string : [NA]
## Number of points: 155
## Data attributes:
## cadmium copper lead zinc
## Min. : 0.20 Min. : 14.0 Min. : 37.0 Min. : 113
## 1st Qu.: 0.80 1st Qu.: 23.0 1st Qu.: 72.5 1st Qu.: 198
## Median : 2.10 Median : 31.0 Median :123.0 Median : 326
## Mean : 3.25 Mean : 40.3 Mean :153.4 Mean : 470
## 3rd Qu.: 3.85 3rd Qu.: 49.5 3rd Qu.:207.0 3rd Qu.: 674
## Max. :18.10 Max. :128.0 Max. :654.0 Max. :1839
##
## elev dist om ffreq soil lime
## Min. : 5.18 Min. :0.0000 Min. : 1.00 1:84 1:97 0:111
## 1st Qu.: 7.55 1st Qu.:0.0757 1st Qu.: 5.30 2:48 2:46 1: 44
## Median : 8.18 Median :0.2118 Median : 6.90 3:23 3:12
## Mean : 8.16 Mean :0.2400 Mean : 7.48
## 3rd Qu.: 8.96 3rd Qu.:0.3641 3rd Qu.: 9.00
## Max. :10.52 Max. :0.8804 Max. :17.00
## NA's :2
## landuse dist.m
## W :50 Min. : 10
## Ah :39 1st Qu.: 80
## Am :22 Median : 270
## Fw :10 Mean : 290
## Ab : 8 3rd Qu.: 450
## (Other):25 Max. :1000
## NA's : 1
coordinates, when not assigned, retrieves the spatial coordinates from a SpatialPointsDataFrame.
coordinates(meuse)[1:5, ]
## x y
## 1 181072 333611
## 2 181025 333558
## 3 181165 333537
## 4 181298 333484
## 5 181307 333330
bubble(meuse, "zinc", col = c("#00ff0088", "#00ff0088"), main = "zinc concentrations (ppm)")
Spatial data on a regular grid
data(meuse.grid)
summary(meuse.grid)
## x y part.a part.b
## Min. :178460 Min. :329620 Min. :0.000 Min. :0.000
## 1st Qu.:179420 1st Qu.:330460 1st Qu.:0.000 1st Qu.:0.000
## Median :179980 Median :331220 Median :0.000 Median :1.000
## Mean :179985 Mean :331348 Mean :0.399 Mean :0.601
## 3rd Qu.:180580 3rd Qu.:332140 3rd Qu.:1.000 3rd Qu.:1.000
## Max. :181540 Max. :333740 Max. :1.000 Max. :1.000
## dist soil ffreq
## Min. :0.000 1:1665 1: 779
## 1st Qu.:0.119 2:1084 2:1335
## Median :0.272 3: 354 3: 989
## Mean :0.297
## 3rd Qu.:0.440
## Max. :0.993
class(meuse.grid)
## [1] "data.frame"
coordinates(meuse.grid) = ~x + y
class(meuse.grid)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
gridded(meuse.grid) = TRUE
class(meuse.grid)
## [1] "SpatialPixelsDataFrame"
## attr(,"package")
## [1] "sp"
image(meuse.grid["dist"])
# title('distance to river (red = 0)')
If you compare the bubble plot of zinc measurements with the map with distances to the river, it becomes evident that the larger concentrations are measured at locations close to the river.
zinc.idw = krige(zinc ~ 1, meuse, meuse.grid)
## [inverse distance weighted interpolation]
class(zinc.idw)
## [1] "SpatialPixelsDataFrame"
## attr(,"package")
## [1] "sp"
spplot(zinc.idw["var1.pred"], main = "zinc inverse distance weighted interpolations")
Relationship can be linearized by log-transforming the zinc concentrations, and taking the square root of distance to the river:
plot(log(zinc) ~ sqrt(dist), meuse)
abline(lm(log(zinc) ~ sqrt(dist), meuse))
Variograms
Function variogram takes a formula as its first argument: log(zinc)~1 means that we assume a constant trend for the variable log(zinc):
lzn.vgm = variogram(log(zinc) ~ 1, meuse)
lzn.vgm
## np dist gamma dir.hor dir.ver id
## 1 57 79.29 0.1234 0 0 var1
## 2 299 163.97 0.2162 0 0 var1
## 3 419 267.36 0.3028 0 0 var1
## 4 457 372.74 0.4121 0 0 var1
## 5 547 478.48 0.4634 0 0 var1
## 6 533 585.34 0.5647 0 0 var1
## 7 574 693.15 0.5690 0 0 var1
## 8 564 796.18 0.6187 0 0 var1
## 9 589 903.15 0.6471 0 0 var1
## 10 543 1011.29 0.6916 0 0 var1
## 11 500 1117.86 0.7034 0 0 var1
## 12 477 1221.33 0.6039 0 0 var1
## 13 452 1329.16 0.6517 0 0 var1
## 14 457 1437.26 0.5665 0 0 var1
## 15 415 1543.20 0.5748 0 0 var1
lzn.fit = fit.variogram(lzn.vgm, model = vgm(1, "Sph", 900, 1))
lzn.fit
## model psill range
## 1 Nug 0.05066 0
## 2 Sph 0.59061 897
plot(lzn.vgm, lzn.fit)
Instead of the constant mean, denoted by ~1, we can specify a mean function, e.g. using ~sqrt(dist) as a predictor variable:
lznr.vgm = variogram(log(zinc) ~ sqrt(dist), meuse)
lznr.fit = fit.variogram(lznr.vgm, model = vgm(1, "Exp", 300, 1))
lznr.fit
## model psill range
## 1 Nug 0.05712 0.0
## 2 Exp 0.17642 340.3
In this case, the variogram of residuals with respect to a fitted mean function are shown. Residuals were calculated using ordinary least squares.
plot(lznr.vgm, lznr.fit)
Kriging
lzn.kriged = krige(log(zinc) ~ 1, meuse, meuse.grid, model = lzn.fit)
## [using ordinary kriging]
spplot(lzn.kriged["var1.pred"])
Conditional simulation
lzn.condsim = krige(log(zinc) ~ 1, meuse, meuse.grid, model = lzn.fit, nmax = 30,
nsim = 4)
## drawing 4 GLS realisations of beta...
## [using conditional Gaussian simulation]
spplot(lzn.condsim, main = "three conditional simulations")
For UK/residuals:
lzn.condsim2 = krige(log(zinc) ~ sqrt(dist), meuse, meuse.grid, model = lznr.fit,
nmax = 30, nsim = 4)
## drawing 4 GLS realisations of beta...
## [using conditional Gaussian simulation]
spplot(lzn.condsim2, main = "three UK conditional simulations")
Directional variograms
Calculate a directional sample variogram, where directions are binned by direction angle alone. For two point pairs, Z(s) and Z(s+h), the separation vector is h, and it has a direction. Here, we will classify directions into four direction intervals:
lzn.dir = variogram(log(zinc) ~ 1, meuse, alpha = c(0, 45, 90, 135))
lzndir.fit = vgm(0.59, "Sph", 1200, 0.05, anis = c(45, 0.4))
plot(lzn.dir, lzndir.fit, as.table = TRUE)
#
lznr.dir = variogram(log(zinc) ~ sqrt(dist), meuse, alpha = c(0, 45, 90, 135))
plot(lznr.dir, lznr.fit, as.table = TRUE)
Variogram maps
vgm.map = variogram(log(zinc) ~ sqrt(dist), meuse, cutoff = 1500, width = 100,
map = TRUE)
plot(vgm.map, threshold = 5)
The threshold assures that only semivariogram map values based on at least 5 point pairs are shown, removing too noisy estimation.
Cross variography
Fitting a linear model of coregionalization:
g = gstat(NULL, "log(zn)", log(zinc) ~ sqrt(dist), meuse)
g = gstat(g, "log(cd)", log(cadmium) ~ sqrt(dist), meuse)
g = gstat(g, "log(pb)", log(lead) ~ sqrt(dist), meuse)
g = gstat(g, "log(cu)", log(copper) ~ sqrt(dist), meuse)
v = variogram(g)
g = gstat(g, model = vgm(1, "Exp", 300, 1), fill.all = TRUE)
g.fit = fit.lmc(v, g)
g.fit
## data:
## log(zn) : formula = log(zinc)`~`sqrt(dist) ; data dim = 155 x 12
## log(cd) : formula = log(cadmium)`~`sqrt(dist) ; data dim = 155 x 12
## log(pb) : formula = log(lead)`~`sqrt(dist) ; data dim = 155 x 12
## log(cu) : formula = log(copper)`~`sqrt(dist) ; data dim = 155 x 12
## variograms:
## model psill range
## log(zn)[1] Nug 0.05142 0
## log(zn)[2] Exp 0.17556 300
## log(cd)[1] Nug 0.39997 0
## log(cd)[2] Exp 0.47894 300
## log(pb)[1] Nug 0.04771 0
## log(pb)[2] Exp 0.21323 300
## log(cu)[1] Nug 0.04578 0
## log(cu)[2] Exp 0.07827 300
## log(zn).log(cd)[1] Nug 0.09191 0
## log(zn).log(cd)[2] Exp 0.24542 300
## log(zn).log(pb)[1] Nug 0.04528 0
## log(zn).log(pb)[2] Exp 0.18407 300
## log(cd).log(pb)[1] Nug 0.06425 0
## log(cd).log(pb)[2] Exp 0.25525 300
## log(zn).log(cu)[1] Nug 0.02913 0
## log(zn).log(cu)[2] Exp 0.10439 300
## log(cd).log(cu)[1] Nug 0.09442 0
## log(cd).log(cu)[2] Exp 0.13074 300
## log(pb).log(cu)[1] Nug 0.02370 0
## log(pb).log(cu)[2] Exp 0.10268 300
plot(v, g.fit)
vgm.map = variogram(g, cutoff = 1500, width = 100, map = TRUE)
plot(vgm.map, threshold = 5, col.regions = bpy.colors(), xlab = "", ylab = "")
# library(sp,gstat) coordinates(meuse) = ~x+y coordinates(meuse.grid) = ~x+y
gridded(meuse.grid) = TRUE
cadmium.idw = krige(cadmium ~ 1, meuse, meuse.grid)
## [inverse distance weighted interpolation]
spplot(cadmium.idw["var1.pred"], main = "cadmium inverse distance weighted interpolations")
krige.c in package gstat) and show the histogram of the cross validation residuals. Explain how these residuals were computed. Carry out the same analysis, but this time for log(cadmium); give a histogram and explain the differences found to the first histogram.m <- vgm(0.59, "Sph", 874, 0.04)
cd.kcv1 <- krige.cv(cadmium ~ 1, meuse, vgm(0.59, "Sph", 874, 0.04), nmax = 40,
nfold = 5)
bubble(cd.kcv1, "residual", main = "Cadmium: 5-fold CV residuals")
hist(cd.kcv1$residual, main = paste("Cadmium Histogram: 5-fold CV residuals"))
cd.kcv2 <- krige.cv(log(cadmium) ~ 1, meuse, m, nmax = 40, nfold = 5)
bubble(cd.kcv2, "residual", main = "Log-Cadmium: 5-fold CV residuals")
hist(cd.kcv2$residual, main = paste("Log-Cadmium Histogram: 5-fold CV residuals"))
The residuals are the prediction errors calculated by the difference between observed and predicted cadmium values.