Analysis of Spatio-Temporal Data - Exercise 05

Christopher Stephan

1. Read vignette gstat in package gstat. Try to understand what is going on this tutorial.

# library(gstat) vignette('gstat')

Copy and paste the commands in this vignette from and run them in your R session. Report whether you succeeded in doing this.

# edit(vignette('gstat'))
### R code from vignette source
### '/home/christopher/R/i686-pc-linux-gnu-library/3.0/gstat/doc/gstat.Rnw'

################################################### code chunk number 1: gstat.Rnw:71-81
library(sp)
data(meuse)
class(meuse)
## [1] "data.frame"
names(meuse)
##  [1] "x"       "y"       "cadmium" "copper"  "lead"    "zinc"    "elev"   
##  [8] "dist"    "om"      "ffreq"   "soil"    "lime"    "landuse" "dist.m"
coordinates(meuse) = ~x + y
class(meuse)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
summary(meuse)
## Object of class SpatialPointsDataFrame
## Coordinates:
##      min    max
## x 178605 181390
## y 329714 333611
## Is projected: NA 
## proj4string : [NA]
## Number of points: 155
## Data attributes:
##     cadmium          copper           lead            zinc     
##  Min.   : 0.20   Min.   : 14.0   Min.   : 37.0   Min.   : 113  
##  1st Qu.: 0.80   1st Qu.: 23.0   1st Qu.: 72.5   1st Qu.: 198  
##  Median : 2.10   Median : 31.0   Median :123.0   Median : 326  
##  Mean   : 3.25   Mean   : 40.3   Mean   :153.4   Mean   : 470  
##  3rd Qu.: 3.85   3rd Qu.: 49.5   3rd Qu.:207.0   3rd Qu.: 674  
##  Max.   :18.10   Max.   :128.0   Max.   :654.0   Max.   :1839  
##                                                                
##       elev            dist              om        ffreq  soil   lime   
##  Min.   : 5.18   Min.   :0.0000   Min.   : 1.00   1:84   1:97   0:111  
##  1st Qu.: 7.55   1st Qu.:0.0757   1st Qu.: 5.30   2:48   2:46   1: 44  
##  Median : 8.18   Median :0.2118   Median : 6.90   3:23   3:12          
##  Mean   : 8.16   Mean   :0.2400   Mean   : 7.48                        
##  3rd Qu.: 8.96   3rd Qu.:0.3641   3rd Qu.: 9.00                        
##  Max.   :10.52   Max.   :0.8804   Max.   :17.00                        
##                                   NA's   :2                            
##     landuse       dist.m    
##  W      :50   Min.   :  10  
##  Ah     :39   1st Qu.:  80  
##  Am     :22   Median : 270  
##  Fw     :10   Mean   : 290  
##  Ab     : 8   3rd Qu.: 450  
##  (Other):25   Max.   :1000  
##  NA's   : 1
coordinates(meuse)[1:5, ]
##        x      y
## 1 181072 333611
## 2 181025 333558
## 3 181165 333537
## 4 181298 333484
## 5 181307 333330
bubble(meuse, "zinc", col = c("#00ff0088", "#00ff0088"), main = "zinc concentrations (ppm)")

plot of chunk unnamed-chunk-3



################################################### code chunk number 2: gstat.Rnw:86-87
print(bubble(meuse, "zinc", main = "zinc concentrations (ppm)"))

plot of chunk unnamed-chunk-3



################################################### code chunk number 3: gstat.Rnw:108-121
data(meuse.grid)
summary(meuse.grid)
##        x                y              part.a          part.b     
##  Min.   :178460   Min.   :329620   Min.   :0.000   Min.   :0.000  
##  1st Qu.:179420   1st Qu.:330460   1st Qu.:0.000   1st Qu.:0.000  
##  Median :179980   Median :331220   Median :0.000   Median :1.000  
##  Mean   :179985   Mean   :331348   Mean   :0.399   Mean   :0.601  
##  3rd Qu.:180580   3rd Qu.:332140   3rd Qu.:1.000   3rd Qu.:1.000  
##  Max.   :181540   Max.   :333740   Max.   :1.000   Max.   :1.000  
##       dist       soil     ffreq   
##  Min.   :0.000   1:1665   1: 779  
##  1st Qu.:0.119   2:1084   2:1335  
##  Median :0.272   3: 354   3: 989  
##  Mean   :0.297                    
##  3rd Qu.:0.440                    
##  Max.   :0.993
class(meuse.grid)
## [1] "data.frame"
coordinates(meuse.grid) = ~x + y
class(meuse.grid)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
gridded(meuse.grid) = TRUE
class(meuse.grid)
## [1] "SpatialPixelsDataFrame"
## attr(,"package")
## [1] "sp"
image(meuse.grid["dist"])
title("distance to river (red = 0)")

plot of chunk unnamed-chunk-3

library(gstat)
zinc.idw = krige(zinc ~ 1, meuse, meuse.grid)
## [inverse distance weighted interpolation]
class(zinc.idw)
## [1] "SpatialPixelsDataFrame"
## attr(,"package")
## [1] "sp"
spplot(zinc.idw["var1.pred"], main = "zinc inverse distance weighted interpolations")

plot of chunk unnamed-chunk-3



################################################### code chunk number 4: gstat.Rnw:124-125
print(spplot(zinc.idw["var1.pred"], main = "zinc inverse distance weighted interpolations"))

plot of chunk unnamed-chunk-3



################################################### code chunk number 5: gstat.Rnw:134-136
plot(log(zinc) ~ sqrt(dist), meuse)
abline(lm(log(zinc) ~ sqrt(dist), meuse))

plot of chunk unnamed-chunk-3



################################################### code chunk number 6: gstat.Rnw:145-150
lzn.vgm = variogram(log(zinc) ~ 1, meuse)
lzn.vgm
##     np    dist  gamma dir.hor dir.ver   id
## 1   57   79.29 0.1234       0       0 var1
## 2  299  163.97 0.2162       0       0 var1
## 3  419  267.36 0.3028       0       0 var1
## 4  457  372.74 0.4121       0       0 var1
## 5  547  478.48 0.4634       0       0 var1
## 6  533  585.34 0.5647       0       0 var1
## 7  574  693.15 0.5690       0       0 var1
## 8  564  796.18 0.6187       0       0 var1
## 9  589  903.15 0.6471       0       0 var1
## 10 543 1011.29 0.6916       0       0 var1
## 11 500 1117.86 0.7034       0       0 var1
## 12 477 1221.33 0.6039       0       0 var1
## 13 452 1329.16 0.6517       0       0 var1
## 14 457 1437.26 0.5665       0       0 var1
## 15 415 1543.20 0.5748       0       0 var1
lzn.fit = fit.variogram(lzn.vgm, model = vgm(1, "Sph", 900, 1))
lzn.fit
##   model   psill range
## 1   Nug 0.05066     0
## 2   Sph 0.59061   897
plot(lzn.vgm, lzn.fit)

plot of chunk unnamed-chunk-3



################################################### code chunk number 7: gstat.Rnw:153-154
print(plot(lzn.vgm, lzn.fit))

plot of chunk unnamed-chunk-3



################################################### code chunk number 8: gstat.Rnw:160-164
lznr.vgm = variogram(log(zinc) ~ sqrt(dist), meuse)
lznr.fit = fit.variogram(lznr.vgm, model = vgm(1, "Exp", 300, 1))
lznr.fit
##   model   psill range
## 1   Nug 0.05712   0.0
## 2   Exp 0.17642 340.3
plot(lznr.vgm, lznr.fit)

plot of chunk unnamed-chunk-3



################################################### code chunk number 9: gstat.Rnw:167-168
print(plot(lznr.vgm, lznr.fit))

plot of chunk unnamed-chunk-3



################################################### code chunk number 10: gstat.Rnw:177-179
lzn.kriged = krige(log(zinc) ~ 1, meuse, meuse.grid, model = lzn.fit)
## [using ordinary kriging]
spplot(lzn.kriged["var1.pred"])

plot of chunk unnamed-chunk-3



################################################### code chunk number 11: gstat.Rnw:182-183
print(spplot(lzn.kriged["var1.pred"]))

plot of chunk unnamed-chunk-3



################################################### code chunk number 12: gstat.Rnw:187-190
lzn.condsim = krige(log(zinc) ~ 1, meuse, meuse.grid, model = lzn.fit, nmax = 30, 
    nsim = 4)
## drawing 4 GLS realisations of beta...
## [using conditional Gaussian simulation]
spplot(lzn.condsim, main = "three conditional simulations")

plot of chunk unnamed-chunk-3



################################################### code chunk number 13: gstat.Rnw:193-194
print(spplot(lzn.condsim, main = "three conditional simulations"))

plot of chunk unnamed-chunk-3



################################################### code chunk number 14: gstat.Rnw:199-202
lzn.condsim2 = krige(log(zinc) ~ sqrt(dist), meuse, meuse.grid, model = lznr.fit, 
    nmax = 30, nsim = 4)
## drawing 4 GLS realisations of beta...
## [using conditional Gaussian simulation]
spplot(lzn.condsim2, main = "three UK conditional simulations")

plot of chunk unnamed-chunk-3



################################################### code chunk number 15: gstat.Rnw:205-206
print(spplot(lzn.condsim2, main = "three UK conditional simulations"))

plot of chunk unnamed-chunk-3



################################################### code chunk number 16: gstat.Rnw:215-218
lzn.dir = variogram(log(zinc) ~ 1, meuse, alpha = c(0, 45, 90, 135))
lzndir.fit = vgm(0.59, "Sph", 1200, 0.05, anis = c(45, 0.4))
plot(lzn.dir, lzndir.fit, as.table = TRUE)

plot of chunk unnamed-chunk-3



################################################### code chunk number 17: gstat.Rnw:221-222
print(plot(lzn.dir, lzndir.fit, as.table = TRUE))

plot of chunk unnamed-chunk-3



################################################### code chunk number 18: gstat.Rnw:255-257
lznr.dir = variogram(log(zinc) ~ sqrt(dist), meuse, alpha = c(0, 45, 90, 135))
plot(lznr.dir, lznr.fit, as.table = TRUE)

plot of chunk unnamed-chunk-3



################################################### code chunk number 19: gstat.Rnw:260-261
print(plot(lznr.dir, lznr.fit, as.table = TRUE))

plot of chunk unnamed-chunk-3



################################################### code chunk number 20: gstat.Rnw:278-281
vgm.map = variogram(log(zinc) ~ sqrt(dist), meuse, cutoff = 1500, width = 100, 
    map = TRUE)
plot(vgm.map, threshold = 5)

plot of chunk unnamed-chunk-3



################################################### code chunk number 21: gstat.Rnw:284-285
print(plot(vgm.map, threshold = 5))

plot of chunk unnamed-chunk-3



################################################### code chunk number 22: gstat.Rnw:298-309
g = gstat(NULL, "log(zn)", log(zinc) ~ sqrt(dist), meuse)
g = gstat(g, "log(cd)", log(cadmium) ~ sqrt(dist), meuse)
g = gstat(g, "log(pb)", log(lead) ~ sqrt(dist), meuse)
g = gstat(g, "log(cu)", log(copper) ~ sqrt(dist), meuse)
v = variogram(g)
g = gstat(g, model = vgm(1, "Exp", 300, 1), fill.all = TRUE)
g.fit = fit.lmc(v, g)
g.fit
## data:
## log(zn) : formula = log(zinc)`~`sqrt(dist) ; data dim = 155 x 12
## log(cd) : formula = log(cadmium)`~`sqrt(dist) ; data dim = 155 x 12
## log(pb) : formula = log(lead)`~`sqrt(dist) ; data dim = 155 x 12
## log(cu) : formula = log(copper)`~`sqrt(dist) ; data dim = 155 x 12
## variograms:
##                    model   psill range
## log(zn)[1]           Nug 0.05142     0
## log(zn)[2]           Exp 0.17556   300
## log(cd)[1]           Nug 0.39997     0
## log(cd)[2]           Exp 0.47894   300
## log(pb)[1]           Nug 0.04771     0
## log(pb)[2]           Exp 0.21323   300
## log(cu)[1]           Nug 0.04578     0
## log(cu)[2]           Exp 0.07827   300
## log(zn).log(cd)[1]   Nug 0.09191     0
## log(zn).log(cd)[2]   Exp 0.24542   300
## log(zn).log(pb)[1]   Nug 0.04528     0
## log(zn).log(pb)[2]   Exp 0.18407   300
## log(cd).log(pb)[1]   Nug 0.06425     0
## log(cd).log(pb)[2]   Exp 0.25525   300
## log(zn).log(cu)[1]   Nug 0.02913     0
## log(zn).log(cu)[2]   Exp 0.10439   300
## log(cd).log(cu)[1]   Nug 0.09442     0
## log(cd).log(cu)[2]   Exp 0.13074   300
## log(pb).log(cu)[1]   Nug 0.02370     0
## log(pb).log(cu)[2]   Exp 0.10268   300
plot(v, g.fit)

plot of chunk unnamed-chunk-3

vgm.map = variogram(g, cutoff = 1500, width = 100, map = TRUE)
plot(vgm.map, threshold = 5, col.regions = bpy.colors(), xlab = "", ylab = "")

plot of chunk unnamed-chunk-3



################################################### code chunk number 23: gstat.Rnw:312-313
print(plot(v, g.fit))

plot of chunk unnamed-chunk-3



################################################### code chunk number 24: gstat.Rnw:316-317
print(plot(vgm.map, threshold = 5, col.regions = bpy.colors(), ylab = "", xlab = ""))

plot of chunk unnamed-chunk-3

I had no issues by running the script inside R studio.

2 Carry out an inverse distance weighted interpolation on the cadmium variable of the meuse dataset in package sp, to the grid points of meuse.grid in the same package. Report the R commands you used, and show the map of the interpolated cadmium values.

cadmium.idw = krige(cadmium ~ 1, meuse, meuse.grid)
## [inverse distance weighted interpolation]
spplot(cadmium.idw["var1.pred"], main = "inverse distance weighted interpolations for cadmium", 
    col.regions = bpy.colors())

plot of chunk unnamed-chunk-4

3 Carry out a cross validation of cadmium in the meuse data set, (using function krige.cv in package gstat) and show the histogram of the cross validation residuals.

x <- krige.cv(cadmium ~ 1, meuse, vgm(0.59, "Sph", 874, 0.04), nmax = 40, nfold = 5)
hist(x$residual, main = paste("Histogram of Cadmium, CV residuals"))

plot of chunk unnamed-chunk-5

Explain how these residuals were computed.

x[1:10, ]
##         coordinates var1.pred var1.var observed residual   zscore fold
## 1  (181072, 333611)     7.908   0.1693     11.7  3.79223  9.21620    4
## 2  (181025, 333558)     9.912   0.1639      8.6 -1.31176 -3.24012    5
## 3  (181165, 333537)     6.583   0.1724      6.5 -0.08280 -0.19939    3
## 4  (181298, 333484)     4.980   0.2205      2.6 -2.37964 -5.06782    5
## 5  (181307, 333330)     2.624   0.1734      2.8  0.17623  0.42325    2
## 6  (181390, 333260)     2.527   0.2238      3.0  0.47350  1.00096    3
## 7  (181165, 333370)     3.181   0.1777      3.2  0.01924  0.04563    2
## 8  (181027, 333363)     6.215   0.1718      2.8 -3.41463 -8.23814    4
## 9  (181060, 333231)     2.111   0.1181      2.4  0.28917  0.84149    3
## 10 (181232, 333168)     1.673   0.1480      1.6 -0.07287 -0.18943    5
resisduals <- x$observed - x$var1.pred

Carry out the same analysis, but this time for log(cadmium); give a histogram and explain the differences found to the first histogram.

y <- krige.cv(log(cadmium) ~ 1, meuse, vgm(0.59, "Sph", 874, 0.04), nmax = 40, 
    nfold = 5)
hist(y$residual, main = paste("Histogram of Log-Cadmium CV residuals"))

plot of chunk unnamed-chunk-7

The difference in the histogram is the range of the residuals depicted on the x-axis. On the log scale it is smaller than on the one without log transformation.