Spatial structure with a variogram

Data generated as part of a long-term research study on an experimental forest in central Oregon. This dataset holds the coordinates for all trees in the experimental forest. The typical stem measurements are recorded for each tree. Crown radius was measured at the cardinal directions for a subset of trees. Mean crown radius was calculated for all trees using a simple relationship between DBH, Height, and observed crown dimension. Contain 2422 rows and 15 columns.

library(spBayes)
data(WEF.dat)
head(WEF.dat)
##   Tree_ID Species DBH_cm Live_dead Stump North_m East_m  ELEV_m Tree_height_m
## 1       5      SF   63.3         D         79.03 221.85 1112.15            NA
## 2      18      SF   38.0         D        100.70 225.05 1113.03            NA
## 3      20      SF   16.4         D         95.41 223.93 1112.76            NA
## 4      26      WH   12.6         D        108.01 231.90 1113.46            NA
## 5      30      SF   21.5         D         98.43 233.42 1112.80            NA
## 6      39      SF   42.4         D Stump   88.15 239.38 1113.20            NA
##   Crown_depth_m Mean_crown_radius_m_from_center
## 1            NA                              NA
## 2            NA                              NA
## 3            NA                              NA
## 4            NA                              NA
## 5            NA                              NA
## 6            NA                              NA
##   Crown_radius_north_m_from_center Crown_radius_east_m_from_center
## 1                               NA                              NA
## 2                               NA                              NA
## 3                               NA                              NA
## 4                               NA                              NA
## 5                               NA                              NA
## 6                               NA                              NA
##   Crown_radius_west_m_from_center Crown_radius_south_m_from_center
## 1                              NA                               NA
## 2                              NA                               NA
## 3                              NA                               NA
## 4                              NA                               NA
## 5                              NA                               NA
## 6                              NA                               NA
str(WEF.dat)
## 'data.frame':    2422 obs. of  15 variables:
##  $ Tree_ID                         : int  5 18 20 26 30 39 97 138 534 539 ...
##  $ Species                         : Factor w/ 6 levels "DF","GF","NF",..: 4 4 4 6 4 4 4 4 1 4 ...
##  $ DBH_cm                          : num  63.3 38 16.4 12.6 21.5 42.4 10.3 46 96.9 27.7 ...
##  $ Live_dead                       : Factor w/ 2 levels "D","L": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Stump                           : Factor w/ 2 levels "","Stump": 1 1 1 1 1 2 1 1 1 1 ...
##  $ North_m                         : num  79 100.7 95.4 108 98.4 ...
##  $ East_m                          : num  222 225 224 232 233 ...
##  $ ELEV_m                          : num  1112 1113 1113 1113 1113 ...
##  $ Tree_height_m                   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Crown_depth_m                   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Mean_crown_radius_m_from_center : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Crown_radius_north_m_from_center: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Crown_radius_east_m_from_center : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Crown_radius_west_m_from_center : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Crown_radius_south_m_from_center: num  NA NA NA NA NA NA NA NA NA NA ...

Plot tree height data

WEF.dat <- WEF.dat[!apply(WEF.dat[, c("East_m", "North_m", "DBH_cm", "Tree_height_m", "ELEV_m")], 1, function(x) any(is.na(x))),]
DBH <- WEF.dat$DBH_cm
HT <- WEF.dat$Tree_height_m


coords <- as.matrix(WEF.dat[, c("East_m", "North_m")])
plot(coords, pch = 1, cex = sqrt(DBH)/10, col = "darkgreen", xlab = "Easting (m)", ylab = "Northing (m)")
leg.vals <- round(quantile(DBH), 0)
legend("topleft", pch = 1, legend = leg.vals, col = "darkgreen", pt.cex = sqrt(leg.vals)/10, bty = "n", title = "DBH (cm)")

data_sp<-sp::SpatialPointsDataFrame(WEF.dat[, c("East_m", "North_m")],WEF.dat[, c("Tree_height_m", "ELEV_m")])

make a variogram to see if there is a spatial structure.

library(gstat)
lzn.vgm <- variogram(Tree_height_m~1, data = data_sp) # calculates sample variogram values 
plot(lzn.vgm)

lzn.fit <- fit.variogram(lzn.vgm, model=vgm(1, "Sph", 60, 1)) # fit model
plot(lzn.vgm, lzn.fit)

Ordinary kriging

The meuse data set provided by package sp is a data set comprising of four heavy metals measured in the top soil in a flood plain along the river Meuse, along with a handful of covariates. The process governing heavy metal distribu- tion seems that polluted sediment is carried by the river, and mostly deposited close to the river bank, and areas with low elevation. This document shows a geostatistical analysis of this data set. The data set was introduced by Burrough and McDonnell, 1998. Link to the CRAN vignette

library(sp)
library(gstat)
library(dplyr) # for "glimpse"
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(scales) # for "comma"
library(magrittr)
data(meuse)
meuse %>% as.data.frame %>% 
  ggplot(aes(x, y)) + geom_point(aes(size=zinc), color="blue", alpha=3/4) + 
  ggtitle("Zinc Concentration (ppm)") + coord_equal() + theme_bw()

change object class to have an “sp” format

class(meuse)
## [1] "data.frame"
coordinates(meuse) <- ~ x + y
class(meuse)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"

create and plot exprerimental variogram, then create the varigram model.

exp.vgm <- variogram(log(zinc)~1, meuse) # calculates sample variogram values 
plot(exp.vgm)

vgm.fit <- fit.variogram(exp.vgm, model=vgm(1, "Sph", 900, 1)) # fit model
plot(exp.vgm, vgm.fit) 

look at the dataset and grid to be kriged

# load spatial domain to interpolate over
data("meuse.grid")

# to compare, recall the bubble plot above; those points were what there were values for. this is much more sparse
plot1 <- meuse %>% as.data.frame %>%
  ggplot(aes(x, y)) + geom_point(size=1) + coord_equal() + 
  ggtitle("Points with measurements")

# this is clearly gridded over the region of interest
plot2 <- meuse.grid %>% as.data.frame %>%
  ggplot(aes(x, y)) + geom_point(size=1) + coord_equal() + 
  ggtitle("Points at which to estimate")

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
grid.arrange(plot1, plot2, ncol = 2)

krige with the gstat::krige function

coordinates(meuse.grid) <- ~ x + y 
lzn.kriged <- krige(log(zinc) ~ 1, meuse, meuse.grid, model=vgm.fit)
## [using ordinary kriging]

plot it

lzn.kriged %>% as.data.frame %>%
  ggplot(aes(x=x, y=y)) + geom_tile(aes(fill=var1.pred)) + coord_equal() +
  scale_fill_gradient(low = "yellow", high="red") +
  ggtitle("kriged values of [zinc]") +
  scale_x_continuous(labels=comma) + scale_y_continuous(labels=comma) +
  theme_bw()

lzn.kriged %>% as.data.frame %>%
  ggplot(aes(x=x, y=y)) + geom_tile(aes(fill=var1.var)) + coord_equal() +
  scale_fill_gradient(low = "yellow", high="red") +
  ggtitle("Variance of the prediction") +
  scale_x_continuous(labels=comma) + scale_y_continuous(labels=comma) +
  theme_bw()