This Vignette continues with spatial structures within INLA and inlabru and aims to summarize some of the information available on spatial covariate modelling.
I will use the gorillas data set as an example
(tutorial available here https://inlabru-org.github.io/inlabru/articles/web/2d_lgcp_covars.html)
and then I will continue with the Dragonflies case study.
The general idea is to fit a LGCP that includes (1) a spatial
covariate (elevation in this case) and (2) an SPDE to account for
unexplained spatial variation. Spatial covariates can be available in
different formats. In the gorillas example, the
elevation variable is available as ‘SpatialGridDataFrame’ spatial-class
object. However, in many data sets spatial covariates are read as raster
data. Thus, I will illustrate the work around with this type of data as
well.
First, we read and visualize the data.
library(INLA)
library(inlabru)
library(raster)
library(stars)
library(ggplot2)
data(gorillas, package = "inlabru")
# get the boundary
boundary <- gorillas$boundary
# obtain covariates
gcov <- gorillas$gcov
# scale the elevation covariate
elev <- gcov$elevation
elev$elevation <- round((elev$elevation - mean(elev$elevation, na.rm = TRUE))/sd(elev$elevation,na.rm = TRUE),digits = 1)
# read the raster data and then scale it as well
ele_stars<- read_stars('elevRaster.tif')
ele_stars$elevRaster.tif<- round((ele_stars$elevRaster.tif - mean(ele_stars$elevRaster.tif, na.rm = TRUE))/sd(ele_stars$elevRaster.tif,na.rm = TRUE),digits = 1)
# visualize it
p1<- ggplot() +
gg(elev) +
gg(boundary, alpha = 0) +
coord_fixed()
p2<- ggplot() +
geom_stars(data = ele_stars) +
gg(boundary, alpha = 0) +
coord_fixed()
gridExtra::grid.arrange(p1,p2,ncol=2)Now, I follow the steps described in the tutorial https://inlabru-org.github.io/inlabru/articles/web/2d_lgcp_covars.html
to fit a LGCP with a spatial covariate in a raster format (see the
tutorial for an example using a SpatialPixelsDataFrame). They key part
in here is the function f.elev() that enables to
extract the covariate values at any location within the survey region. I
couldn’t find how the lgcp() call internally the
function within the model structure. Nevertheless, there are two key
components in the definition of the f.elev()
function that I will cover next.
# load the mesh and nests locations
mesh <- gorillas$mesh
nests <- gorillas$nests
# specify priors for the variance-covariance matrix
matern <- inla.spde2.pcmatern(mesh,
prior.sigma = c(0.1, 0.01),
prior.range = c(5, 0.01)
)
# function to extract covariate values at arbitrary points in the survey region
f.elev <- function(x, y) {
# turn coordinates into SpatialPoints object:
# with the appropriate coordinate reference system (CRS)
spp <- SpatialPoints(data.frame(x = x, y = y), proj4string = fm_sp_get_crs(elev))
proj4string(spp) <- fm_sp_get_crs(elev)
# Extract elevation values at spp coords, from our elev SpatialGridDataFrame
v <- over(spp, elev)
if (any(is.na(v$elevation))) {
v$elevation <- inlabru:::bru_fill_missing(elev, spp, v$elevation)
}
return(v$elevation)
}
# model structure
ecomp <- coordinates ~ ele_stars(f.elev(x, y), model = "linear") +
mySmooth(coordinates, model = matern) + Intercept(1)
# fit the LGCP
efit <- lgcp(ecomp, nests, samplers = boundary, domain = list(coordinates = mesh))
# Compare the effects of elevation and the SPDE
e.lp <- predict(
efit, pixels(mesh, mask = boundary),
~ list(
smooth_elev = mySmooth + ele_stars,
elev = ele_stars,
smooth = mySmooth
)
)
# set the color scale
library(RColorBrewer)
lprange <- range(e.lp$smooth_elev$mean, e.lp$elev$mean, e.lp$smooth$mean)
csc <- scale_fill_gradientn(colours = brewer.pal(9, "YlOrRd"), limits = lprange)
plot.e.lp <- ggplot() +
gg(e.lp$smooth_elev, mask = boundary) +
theme(legend.position = "bottom") +
gg(boundary, alpha = 0) +
ggtitle("SPDE + elevation") +
coord_equal()+csc
#> Regions defined for each Polygons
plot.e.lp.spde <- ggplot() +
gg(e.lp$smooth, mask = boundary) +
theme(legend.position = "bottom") +
gg(boundary, alpha = 0) +
ggtitle("SPDE") +
coord_equal()+csc
#> Regions defined for each Polygons
plot.e.lp.elev <- ggplot() +
gg(e.lp$elev, mask = boundary) +
theme(legend.position = "bottom") +
gg(boundary, alpha = 0) +
ggtitle("elevation") +
coord_equal()+csc
#> Regions defined for each Polygons
gridExtra::grid.arrange(plot.e.lp,plot.e.lp.spde,plot.e.lp.elev,ncol=3)the lgcp() function call internally the
f.elev() function to retrieve the spatial
covariate values at specific locations within the mesh. In here there
are two imporant components within the function The first one, (1)
converts any pair of coordinates to a spatial location while assigning
the appropriate CRS associated with the corresponding
raster/SpatialPixelsDataFrame-class object and (2) extract the covariate
values at the specific coordinate. Lets try now this functionality using
both the sp and the more updated
sf approaches to achieve this.
#I will randomly draw a location within the raster using the st_sample function()
set.seed(1234)
st_sample(st_as_sf(ele_stars), 1)
#> Geometry set for 1 feature
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: 580.8503 ymin: 677.0875 xmax: 580.8503 ymax: 677.0875
#> CRS: unknown
#> POINT (580.8503 677.0875)
# take the coordinates
x <- 583.8595
y <- 677.0931
### Approach using sp environment
# convert location to a spatial points-class object
spp <- SpatialPoints(data.frame(x = x, y = y))
slot(spp, "proj4string") <- crs(elev)
# the fm_sp_get_crs can be used to suppress warnings for the unused information in old pro4strings format
# Extract elevation values at spp coords, from the elev SpatialGridDataFrame
v <- over(spp, elev)
v$elevation
#> [1] 1.1
### Approach using sf environment
# Unfortunately this option might cause some issues when working with the new sf or star spatial-class objects
# e.g. spp <- SpatialPoints(data.frame(x = x, y = y),proj4string = st_crs(ele_stars))
# So it is better to define this function de novo to work around with sf and stars object too.
# first we create an simple feature object for the desire locations and assign the proper CRS
locations_sf<- st_as_sf(x = data.frame(x = x, y = y),
coords = c("x", "y"),
crs = st_crs(ele_stars))
# extract raster values at points locations
v_sf <- st_extract(ele_stars, locations_sf)
v_sf$elevRaster.tif
#> [1] 1.1Now, it may occur that the information of the spatial covariate at
the target location is not available. In such cases,
inlabru has the
bru_fill_missing() to impute missing values with
the nearest-available-value or feature. This is the second important
argument we define in the f.elev() and I will
illustrate how this works and how it can be extended for handling
sf-star class objects by defining buffers at of \(k\) size at each location.
To make this example mor trackable, I will focus on a smaller extent of all the region by croping the original raster file and removing some of the elevation values at certain cells:
# this give us the spatial extent of the raster
st_bbox(ele_stars)
#> xmin ymin xmax ymax
#> 580.1599 673.8742 586.2320 679.0378
# then by using the st_crop function I'll sellect a smaller window for our study
ele_crop <- ele_stars %>%
st_crop(st_bbox(c(
xmin = 581,
xmax = 581.5,
ymin = 675,
ymax = 675.5
), crs = st_crs(ele_stars)))
# Now I will fill in some missing values within the grid
ele_crop$elevRaster.tif[4,10] <- ele_crop$elevRaster.tif[15,5] <- ele_crop$elevRaster.tif[9,11] <- ele_crop$elevRaster.tif[9,12] <-ele_crop$elevRaster.tif[10,11] <- NA
plot(ele_crop, text_values = TRUE, key.pos = NULL)Now I will select a random number of locations an extract the elevation values at those locations:
set.seed(12345)
pts <- st_sample(st_as_sfc(st_bbox(ele_crop)), 10)
# extract raster values at points locations
pts_values <- st_extract(ele_crop, pts)
pts_values$elevRaster.tif
#> [1] 1.3 1.1 NA 1.1 NA NA 1.3 NA 1.3 0.1We could tell just by looking at this plot that the first point on
the right is closer to a grid cell with an elevation value of 1.3 and
thus, this would be the value we will use to impute that cell. At this
stage, we could use the bru_fill_missing()
function to impute the missing values. To do so we first need to convert
the sf class object into an sp spatial-class object and then use the
bru_fill_missing() to replace each missing value
with the nearest-available-value or feature. The function receives the
sp-spatial object (in this case we need also to convert the crop-spatial
raster to a sp spatial class), the spatial points object, and the
elevation values at those locations.
# using inlabru function to interpolate raster values with closest neighbours
pts_sp <- as_Spatial(pts_values)
imputation <- inlabru:::bru_fill_missing(as(ele_crop, "Spatial"),pts_sp , pts_sp$elevRaster.tif)
imputation
#> [1] 1.3 1.1 0.4 1.1 1.1 1.3 1.3 1.1 1.3 0.1We can see that the missing value we saw earlier has now been imputed with its closets neighbour which had an elevation of 1.1. Now we will try a slighly different approach to do this imputation by using the advantages of the sf and tidyverse environments. For this approach I will define a series of buffers centred at each of the locations where elevation values are missing.
library(tibble)
library(tidyr)
#>
#> Attaching package: 'tidyr'
#> The following object is masked from 'package:raster':
#>
#> extract
#> The following objects are masked from 'package:Matrix':
#>
#> expand, pack, unpack
# Step 1 - Identify observation with NAs
pts_values_na <- pts_values[which(is.na(pts_values$elevRaster.tif)),]
pts_values_na$point_id = 1:nrow(pts_values_na) # create an index column
# Step 2 - Create buffer around each point (change the dist for a different buffer size)
buffer <- st_buffer(pts_values_na, dist = 0.03)
# Step 3 - Convert points and raster to sf and add column ID
poly_sf <- buffer %>%
st_as_sf() %>%
rowid_to_column(var = "poly_id")
ele_sf <- st_as_sf(ele_crop)
We can find which cells lie within each buffer to determine the
neighbourhood at each location.
# Step 4 - Select only those cells within the buffer
df_sf <- st_join(ele_sf, poly_sf) %>%
drop_na(poly_id)
ggplot() + geom_sf(data = (df_sf),aes(fill=elevRaster.tif.x))+
geom_sf(data = (poly_sf),fill='transparent',col=2)+
geom_sf(data = pts_values_na, color = "red", size = 2) +
geom_sf(data=st_centroid(df_sf))
#> Warning in st_centroid.sf(df_sf): st_centroid assumes attributes are constant
#> over geometries of x
Finally, we can compute the distance from the centroid of each cell in
the neighborhood to each point and select the value within the minimum
distance to the point.We can alos use other imputation apporach such as
the mean, median, mode (useful when dealing categorical
variables),maximum or minimum values within the neighborhood of each
point by using the aggregate function.
#compute distance from each point to the centroid of each cell
centroids<- st_centroid(df_sf) # define centroids
imputed.values <- pts_values
for(i in seq_len(nrow(pts_values_na))){
imputed.values[is.na(pts_values$elevRaster.tif),]$elevRaster.tif[i]<-df_sf[which.min(st_distance(centroids, pts_values_na[i,])),]$elevRaster.tif.x
}
#See we get the same result as with inlabru::bru_fill_missing
imputed.values$elevRaster.tif
#> [1] 1.3 1.1 0.4 1.1 1.1 1.3 1.3 1.1 1.3 0.1
# we can aggregate the poylgons and compute the average, min, max, mode , etc. to do the imputation as well
x <- aggregate(df_sf, poly_sf, FUN = mean)
imputed.values[is.na(pts_values$elevRaster.tif),]$elevRaster.tif <- x$elevRaster.tif.x
round(imputed.values$elevRaster.tif,digits=2)
#> [1] 1.30 1.10 0.42 1.10 1.09 1.30 1.30 0.97 1.30 0.10