date()
## [1] "Wed Apr 24 13:54:05 2013"
Krige the Parana rainfall.
require(geoR)
## Loading required package: geoR
## Warning: package 'geoR' was built under R version 2.15.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 2.15.2
## Loading required package: MASS
## -------------------------------------------------------------- Analysis of
## geostatistical data For an Introduction to geoR go to
## http://www.leg.ufpr.br/geoR geoR version 1.7-4 (built on 2012-06-29) is
## now loaded --------------------------------------------------------------
data(parana)
str(parana, lev = 2)
## List of 4
## $ east , north : num [1:143, 1:2] 403 502 556 573 702 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:2] "east" "north"
## $ data : num [1:143] 306 201 167 163 164 ...
## $ borders : num [1:369, 1:2] 670 664 656 650 643 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:2] "east" "north"
## $ loci.paper : num [1:4, 1:2] 300 648 362 410 484 ...
## - attr(*, "class")= chr "geodata"
summary(parana)
## $n
## [1] 143
##
## $coords.summary
## east north
## min 150.1 70.36
## max 768.5 461.97
##
## $distances.summary
## min max
## 1.0 619.5
##
## $data.summary
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 163 234 270 274 318 414
##
## $borders.summary
## east north
## min 138.0 46.77
## max 798.6 507.93
##
## $others
## [1] "loci.paper"
##
## attr(,"class")
## [1] "summary.geodata"
dup.coords(parana) #No duplicate points
## NULL
plot(parana)
plot(parana, trend = "1st")
plot(parana, trend = "2nd")
There are 143 recording stations with data. The spatial bounding box is located in the coordinates summary. The minimum distance between locations is 1.0 degrees and the maximum distance is 619.5 degrees. A summary of the data values represent average May-June rainfall amounts.
The data shows a clear trend. With the 1st trend removed, there still seems to be a linear trend in the X Coord vs residuals graph. There is also a bit of clustering in both residual graphs. Thus, I removed the 2nd trend as well (which still shows clustering but lessened the linear trend).
Max distance is about 619.5 degrees, so ½ this value (309.75) should be used to plot the variogram.
plot(variog(parana, trend = "2nd", max.dist = 309.75))
## variog: computing omnidirectional variogram
grid()
plot(variog4(parana, trend = "2nd", max.dist = 309.75), omni = TRUE, legend = TRUE) #omnidirectional variogram
## variog: computing variogram for direction = 0 degrees (0 radians)
## tolerance angle = 22.5 degrees (0.393 radians)
## variog: computing variogram for direction = 45 degrees (0.785 radians)
## tolerance angle = 22.5 degrees (0.393 radians)
## variog: computing variogram for direction = 90 degrees (1.571 radians)
## tolerance angle = 22.5 degrees (0.393 radians)
## variog: computing variogram for direction = 135 degrees (2.356 radians)
## tolerance angle = 22.5 degrees (0.393 radians)
## variog: computing omnidirectional variogram
grid()
Set initial values for the sill and range at 900 and 200, respectively.
iv = c(900, 200)
summary(likfit(parana, ini = iv, cov.model = "exp", fix.nug = TRUE, message = FALSE))$likelihood$AIC
## [1] 1394
summary(likfit(parana, ini = iv, cov.model = "exp", trend = "1st", fix.nug = TRUE,
message = FALSE))$likelihood$AIC
## [1] 1365
summary(likfit(parana, ini = iv, cov.model = "exp", trend = "2nd", fix.nug = TRUE,
message = FALSE))$likelihood$AIC
## [1] 1347
summary(likfit(parana, ini = iv, cov.model = "exp", trend = "2nd", fix.nug = FALSE,
message = FALSE))$likelihood$AIC
## [1] 1338
summary(likfit(parana, ini = iv, cov.model = "sph", trend = "2nd", fix.nug = TRUE,
message = FALSE))$likelihood$AIC
## [1] 1350
summary(likfit(parana, ini = iv, cov.model = "sph", trend = "2nd", fix.nug = FALSE,
message = FALSE))$likelihood$AIC
## [1] 1338
A better fit corresponds to a larger log-likelihood and a smaller AIC. From these results, the best variogram model would be on the residuals of a 2nd order trend using a spherical function without a fixed nugget.
An exponential function without a fixed nugget is also a reasonable model.
Obtain the model parameters.
summary(likfit(parana, ini = iv, cov.model = "sph", trend = "2nd", fix.nug = FALSE))
## kappa not used for the spherical correlation function
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: maximum likelihood
##
## Parameters of the mean component (trend):
## beta0 beta1 beta2 beta3 beta4 beta5
## 435.493 0.048 -0.701 0.000 0.000 0.001
##
## Parameters of the spatial component:
## correlation function: spherical
## (estimated) variance parameter sigmasq (partial sill) = 361
## (estimated) cor. fct. parameter phi (range parameter) = 215
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 420
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 214.5
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-660" "9" "1338" "1364"
##
## non spatial model:
## log.L n.params AIC BIC
## "-671" "7" "1356" "1376"
##
## Call:
## likfit(geodata = parana, trend = "2nd", ini.cov.pars = iv, fix.nugget = FALSE,
## cov.model = "sph")
summary(likfit(parana, ini = iv, cov.model = "exp", trend = "2nd", fix.nug = FALSE))
## kappa not used for the exponential correlation function
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: maximum likelihood
##
## Parameters of the mean component (trend):
## beta0 beta1 beta2 beta3 beta4 beta5
## 423.928 0.062 -0.636 0.000 0.000 0.001
##
## Parameters of the spatial component:
## correlation function: exponential
## (estimated) variance parameter sigmasq (partial sill) = 373
## (estimated) cor. fct. parameter phi (range parameter) = 77.5
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 381
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 232.3
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-660" "9" "1338" "1365"
##
## non spatial model:
## log.L n.params AIC BIC
## "-671" "7" "1356" "1376"
##
## Call:
## likfit(geodata = parana, trend = "2nd", ini.cov.pars = iv, fix.nugget = FALSE,
## cov.model = "exp")
Plot the two models on the empirical variogram.
plot(variog(parana, trend = "2nd", uvec = seq(0, 309.75, l = 29)))
## variog: computing omnidirectional variogram
lines.variomodel(cov.model = "sph", cov.pars = c(360.8, 214.5), nug = 419.9,
max.dist = 309.75, col = "red")
lines.variomodel(cov.model = "exp", cov.pars = c(372.6, 77.54), nug = 381.2,
max.dist = 309.75, col = "green")
Here the exponential model seems to be a slightly better fit.
Save the models.
r.modelS = likfit(parana, ini = iv, cov.model = "sph", trend = "2nd", fix.nug = FALSE,
messages = TRUE)
## kappa not used for the spherical correlation function
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
r.modelE = likfit(parana, ini = iv, cov.model = "exp", trend = "2nd", fix.nug = FALSE,
messages = TRUE)
## kappa not used for the exponential correlation function
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
r.grd = expand.grid(seq(0, 800, l = 200), seq(0, 600, l = 200))
kcS = krige.conv(parana, loc = r.grd, krige = krige.control(trend.d = "2nd",
trend.l = "2nd", obj.m = r.modelS))
## krige.conv: results will be returned only for prediction locations inside the borders
## krige.conv: model with mean given by a 2nd order polynomial on the coordinates
## krige.conv: Kriging performed using global neighbourhood
kcE = krige.conv(parana, loc = r.grd, krige = krige.control(trend.d = "2nd",
trend.l = "2nd", obj.m = r.modelE))
## krige.conv: results will be returned only for prediction locations inside the borders
## krige.conv: model with mean given by a 2nd order polynomial on the coordinates
## krige.conv: Kriging performed using global neighbourhood
Plot the surface.
image(kcS, col = rev(rainbow(20)), xlab = "Longitude", ylab = "Latitude")
title(main = "Spherical Model w/ Non-zero Nugget")
contour(kcS, add = TRUE, nlevel = 20)
image(kcE, col = rev(rainbow(20)), xlab = "Longitude", ylab = "Latitude")
title(main = "Exponential Model w/ Non-zero Nugget")
contour(kcE, add = TRUE, nlevel = 20)
The spherical model is smoother, due to greater the nugget (relative to the sill) further smoothing the interpolation. Repeat using a spherical model and fixed nugget equal to 1.
r.modelS2 = likfit(parana, ini = iv, cov.model = "sph", trend = "2nd", fix.nug = TRUE,
nugget = 1, messages = TRUE)
## kappa not used for the spherical correlation function
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
kcS2 = krige.conv(parana, loc = r.grd, krige = krige.control(trend.d = "2nd",
trend.l = "2nd", obj.m = r.modelS2))
## krige.conv: results will be returned only for prediction locations inside the borders
## krige.conv: model with mean given by a 2nd order polynomial on the coordinates
## krige.conv: Kriging performed using global neighbourhood
image(kcS2, col = rev(rainbow(20)), xlab = "Longitude", ylab = "Latitude")
title(main = "Spherical Model w/ Nugget = 1")
contour(kcS2, add = TRUE, nlevel = 20)
Increasing nugget, increases the smoothing.
xv = xvalid(parana, model = r.modelS)
## xvalid: number of data locations = 143
## xvalid: number of validation locations = 143
## xvalid: performing cross-validation at location ... 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
## xvalid: end of cross-validation
plot(xv$data, xv$predicted, xlab = "Observed Rainfall (in)", ylab = "Predicted Rainfall (in) at Observed Location",
asp = 1, col = "red", pch = 20)
abline(0, 1)
abline(lm(xv$predicted ~ xv$data), col = "red")
mean(xv$error^2)
## [1] 536.6
mean(abs(xv$error))
## [1] 17.87
Thus, on average, we can expect to make about a 18 inch error in the prediction of the mean rainfall at an arbitrary location in the domain (based on the mean absolute error).