AOSD - Exercise # 05

1) Read vignette gstat in package gstat. Copy and past the commands from the tutorial and report whether you succeed in doing this.

Spatial data frames

library(sp)
library(gstat)
data(meuse)
names(meuse)
##  [1] "x"       "y"       "cadmium" "copper"  "lead"    "zinc"    "elev"   
##  [8] "dist"    "om"      "ffreq"   "soil"    "lime"    "landuse" "dist.m"
class(meuse)
## [1] "data.frame"

Function coordinates promotes the data.frame meuse into a SpatialPointsDataFrame:

coordinates(meuse) = ~x + y
class(meuse)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
summary(meuse)
## Object of class SpatialPointsDataFrame
## Coordinates:
##      min    max
## x 178605 181390
## y 329714 333611
## Is projected: NA 
## proj4string : [NA]
## Number of points: 155
## Data attributes:
##     cadmium          copper           lead            zinc     
##  Min.   : 0.20   Min.   : 14.0   Min.   : 37.0   Min.   : 113  
##  1st Qu.: 0.80   1st Qu.: 23.0   1st Qu.: 72.5   1st Qu.: 198  
##  Median : 2.10   Median : 31.0   Median :123.0   Median : 326  
##  Mean   : 3.25   Mean   : 40.3   Mean   :153.4   Mean   : 470  
##  3rd Qu.: 3.85   3rd Qu.: 49.5   3rd Qu.:207.0   3rd Qu.: 674  
##  Max.   :18.10   Max.   :128.0   Max.   :654.0   Max.   :1839  
##                                                                
##       elev            dist              om        ffreq  soil   lime   
##  Min.   : 5.18   Min.   :0.0000   Min.   : 1.00   1:84   1:97   0:111  
##  1st Qu.: 7.55   1st Qu.:0.0757   1st Qu.: 5.30   2:48   2:46   1: 44  
##  Median : 8.18   Median :0.2118   Median : 6.90   3:23   3:12          
##  Mean   : 8.16   Mean   :0.2400   Mean   : 7.48                        
##  3rd Qu.: 8.96   3rd Qu.:0.3641   3rd Qu.: 9.00                        
##  Max.   :10.52   Max.   :0.8804   Max.   :17.00                        
##                                   NA's   :2                            
##     landuse       dist.m    
##  W      :50   Min.   :  10  
##  Ah     :39   1st Qu.:  80  
##  Am     :22   Median : 270  
##  Fw     :10   Mean   : 290  
##  Ab     : 8   3rd Qu.: 450  
##  (Other):25   Max.   :1000  
##  NA's   : 1

coordinates, when not assigned, retrieves the spatial coordinates from a SpatialPointsDataFrame.

coordinates(meuse)[1:5, ]
##        x      y
## 1 181072 333611
## 2 181025 333558
## 3 181165 333537
## 4 181298 333484
## 5 181307 333330
bubble(meuse, "zinc", col = c("#00ff0088", "#00ff0088"), main = "zinc concentrations (ppm)")

plot of chunk unnamed-chunk-3

Spatial data on a regular grid

data(meuse.grid)
summary(meuse.grid)
##        x                y              part.a          part.b     
##  Min.   :178460   Min.   :329620   Min.   :0.000   Min.   :0.000  
##  1st Qu.:179420   1st Qu.:330460   1st Qu.:0.000   1st Qu.:0.000  
##  Median :179980   Median :331220   Median :0.000   Median :1.000  
##  Mean   :179985   Mean   :331348   Mean   :0.399   Mean   :0.601  
##  3rd Qu.:180580   3rd Qu.:332140   3rd Qu.:1.000   3rd Qu.:1.000  
##  Max.   :181540   Max.   :333740   Max.   :1.000   Max.   :1.000  
##       dist       soil     ffreq   
##  Min.   :0.000   1:1665   1: 779  
##  1st Qu.:0.119   2:1084   2:1335  
##  Median :0.272   3: 354   3: 989  
##  Mean   :0.297                    
##  3rd Qu.:0.440                    
##  Max.   :0.993
class(meuse.grid)
## [1] "data.frame"
coordinates(meuse.grid) = ~x + y
class(meuse.grid)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
gridded(meuse.grid) = TRUE
class(meuse.grid)
## [1] "SpatialPixelsDataFrame"
## attr(,"package")
## [1] "sp"
image(meuse.grid["dist"])

plot of chunk unnamed-chunk-4

# title('distance to river (red = 0)')

If you compare the bubble plot of zinc measurements with the map with distances to the river, it becomes evident that the larger concentrations are measured at locations close to the river.

zinc.idw = krige(zinc ~ 1, meuse, meuse.grid)
## [inverse distance weighted interpolation]
class(zinc.idw)
## [1] "SpatialPixelsDataFrame"
## attr(,"package")
## [1] "sp"
spplot(zinc.idw["var1.pred"], main = "zinc inverse distance weighted interpolations")

plot of chunk unnamed-chunk-5

Relationship can be linearized by log-transforming the zinc concentrations, and taking the square root of distance to the river:

plot(log(zinc) ~ sqrt(dist), meuse)
abline(lm(log(zinc) ~ sqrt(dist), meuse))

plot of chunk unnamed-chunk-6

Variograms

Function variogram takes a formula as its first argument: log(zinc)~1 means that we assume a constant trend for the variable log(zinc):

lzn.vgm = variogram(log(zinc) ~ 1, meuse)
lzn.vgm
##     np    dist  gamma dir.hor dir.ver   id
## 1   57   79.29 0.1234       0       0 var1
## 2  299  163.97 0.2162       0       0 var1
## 3  419  267.36 0.3028       0       0 var1
## 4  457  372.74 0.4121       0       0 var1
## 5  547  478.48 0.4634       0       0 var1
## 6  533  585.34 0.5647       0       0 var1
## 7  574  693.15 0.5690       0       0 var1
## 8  564  796.18 0.6187       0       0 var1
## 9  589  903.15 0.6471       0       0 var1
## 10 543 1011.29 0.6916       0       0 var1
## 11 500 1117.86 0.7034       0       0 var1
## 12 477 1221.33 0.6039       0       0 var1
## 13 452 1329.16 0.6517       0       0 var1
## 14 457 1437.26 0.5665       0       0 var1
## 15 415 1543.20 0.5748       0       0 var1
lzn.fit = fit.variogram(lzn.vgm, model = vgm(1, "Sph", 900, 1))
lzn.fit
##   model   psill range
## 1   Nug 0.05066     0
## 2   Sph 0.59061   897
plot(lzn.vgm, lzn.fit)

plot of chunk unnamed-chunk-7

Instead of the constant mean, denoted by ~1, we can specify a mean function, e.g. using ~sqrt(dist) as a predictor variable:

lznr.vgm = variogram(log(zinc) ~ sqrt(dist), meuse)
lznr.fit = fit.variogram(lznr.vgm, model = vgm(1, "Exp", 300, 1))
lznr.fit
##   model   psill range
## 1   Nug 0.05712   0.0
## 2   Exp 0.17642 340.3

In this case, the variogram of residuals with respect to a fitted mean function are shown. Residuals were calculated using ordinary least squares.

plot(lznr.vgm, lznr.fit)

plot of chunk unnamed-chunk-9

Kriging

lzn.kriged = krige(log(zinc) ~ 1, meuse, meuse.grid, model = lzn.fit)
## [using ordinary kriging]
spplot(lzn.kriged["var1.pred"])

plot of chunk unnamed-chunk-10

Conditional simulation

lzn.condsim = krige(log(zinc) ~ 1, meuse, meuse.grid, model = lzn.fit, nmax = 30, 
    nsim = 4)
## drawing 4 GLS realisations of beta...
## [using conditional Gaussian simulation]
spplot(lzn.condsim, main = "three conditional simulations")

plot of chunk unnamed-chunk-11

For UK/residuals:

lzn.condsim2 = krige(log(zinc) ~ sqrt(dist), meuse, meuse.grid, model = lznr.fit, 
    nmax = 30, nsim = 4)
## drawing 4 GLS realisations of beta...
## [using conditional Gaussian simulation]
spplot(lzn.condsim2, main = "three UK conditional simulations")

plot of chunk unnamed-chunk-12

Directional variograms

Calculate a directional sample variogram, where directions are binned by direction angle alone. For two point pairs, Z(s) and Z(s+h), the separation vector is h, and it has a direction. Here, we will classify directions into four direction intervals:

lzn.dir = variogram(log(zinc) ~ 1, meuse, alpha = c(0, 45, 90, 135))
lzndir.fit = vgm(0.59, "Sph", 1200, 0.05, anis = c(45, 0.4))
plot(lzn.dir, lzndir.fit, as.table = TRUE)

plot of chunk unnamed-chunk-13

#

lznr.dir = variogram(log(zinc) ~ sqrt(dist), meuse, alpha = c(0, 45, 90, 135))
plot(lznr.dir, lznr.fit, as.table = TRUE)

plot of chunk unnamed-chunk-14

Variogram maps

vgm.map = variogram(log(zinc) ~ sqrt(dist), meuse, cutoff = 1500, width = 100, 
    map = TRUE)
plot(vgm.map, threshold = 5)

plot of chunk unnamed-chunk-15

The threshold assures that only semivariogram map values based on at least 5 point pairs are shown, removing too noisy estimation.

Cross variography

Fitting a linear model of coregionalization:

g = gstat(NULL, "log(zn)", log(zinc) ~ sqrt(dist), meuse)
g = gstat(g, "log(cd)", log(cadmium) ~ sqrt(dist), meuse)
g = gstat(g, "log(pb)", log(lead) ~ sqrt(dist), meuse)
g = gstat(g, "log(cu)", log(copper) ~ sqrt(dist), meuse)
v = variogram(g)
g = gstat(g, model = vgm(1, "Exp", 300, 1), fill.all = TRUE)
g.fit = fit.lmc(v, g)
g.fit
## data:
## log(zn) : formula = log(zinc)`~`sqrt(dist) ; data dim = 155 x 12
## log(cd) : formula = log(cadmium)`~`sqrt(dist) ; data dim = 155 x 12
## log(pb) : formula = log(lead)`~`sqrt(dist) ; data dim = 155 x 12
## log(cu) : formula = log(copper)`~`sqrt(dist) ; data dim = 155 x 12
## variograms:
##                    model   psill range
## log(zn)[1]           Nug 0.05142     0
## log(zn)[2]           Exp 0.17556   300
## log(cd)[1]           Nug 0.39997     0
## log(cd)[2]           Exp 0.47894   300
## log(pb)[1]           Nug 0.04771     0
## log(pb)[2]           Exp 0.21323   300
## log(cu)[1]           Nug 0.04578     0
## log(cu)[2]           Exp 0.07827   300
## log(zn).log(cd)[1]   Nug 0.09191     0
## log(zn).log(cd)[2]   Exp 0.24542   300
## log(zn).log(pb)[1]   Nug 0.04528     0
## log(zn).log(pb)[2]   Exp 0.18407   300
## log(cd).log(pb)[1]   Nug 0.06425     0
## log(cd).log(pb)[2]   Exp 0.25525   300
## log(zn).log(cu)[1]   Nug 0.02913     0
## log(zn).log(cu)[2]   Exp 0.10439   300
## log(cd).log(cu)[1]   Nug 0.09442     0
## log(cd).log(cu)[2]   Exp 0.13074   300
## log(pb).log(cu)[1]   Nug 0.02370     0
## log(pb).log(cu)[2]   Exp 0.10268   300
plot(v, g.fit)

plot of chunk unnamed-chunk-16

vgm.map = variogram(g, cutoff = 1500, width = 100, map = TRUE)
plot(vgm.map, threshold = 5, col.regions = bpy.colors(), xlab = "", ylab = "")

plot of chunk unnamed-chunk-16

2) Carry out an inverse distance weighted interpolation on the cadmium variable of the meuse dataset in package sp, to the grid points of meuse.grid in the same package. Report the R commands you used, and show the map of the interpolated cadmium values.

# library(sp,gstat) coordinates(meuse) = ~x+y coordinates(meuse.grid) = ~x+y
gridded(meuse.grid) = TRUE
cadmium.idw = krige(cadmium ~ 1, meuse, meuse.grid)
## [inverse distance weighted interpolation]
spplot(cadmium.idw["var1.pred"], main = "cadmium inverse distance weighted interpolations")

plot of chunk unnamed-chunk-17

3) Carry out a cross validation of cadmium in the meuse data set, (using function krige.c in package gstat) and show the histogram of the cross validation residuals. Explain how these residuals were computed. Carry out the same analysis, but this time for log(cadmium); give a histogram and explain the differences found to the first histogram.

m <- vgm(0.59, "Sph", 874, 0.04)
cd.kcv1 <- krige.cv(cadmium ~ 1, meuse, vgm(0.59, "Sph", 874, 0.04), nmax = 40, 
    nfold = 5)
bubble(cd.kcv1, "residual", main = "Cadmium: 5-fold CV residuals")

plot of chunk unnamed-chunk-18

hist(cd.kcv1$residual, main = paste("Cadmium Histogram: 5-fold CV residuals"))

plot of chunk unnamed-chunk-18


cd.kcv2 <- krige.cv(log(cadmium) ~ 1, meuse, m, nmax = 40, nfold = 5)
bubble(cd.kcv2, "residual", main = "Log-Cadmium: 5-fold CV residuals")

plot of chunk unnamed-chunk-18

hist(cd.kcv2$residual, main = paste("Log-Cadmium Histogram: 5-fold CV residuals"))

plot of chunk unnamed-chunk-18

The residuals are the prediction errors calculated by the difference between observed and predicted cadmium values.