Spatial Data Structures with INLA and inlabru part 2

Vignette Info

This Vignette continues with spatial structures within INLA and inlabru and aims to summarize some of the information available on spatial covariate modelling.

Continuous covariates - imputing missing values

I will use the gorillas data set as an example (tutorial available here https://inlabru-org.github.io/inlabru/articles/web/2d_lgcp_covars.html) and then I will continue with the Dragonflies case study.

The general idea is to fit a LGCP that includes (1) a spatial covariate (elevation in this case) and (2) an SPDE to account for unexplained spatial variation. Spatial covariates can be available in different formats. In the gorillas example, the elevation variable is available as ‘SpatialGridDataFrame’ spatial-class object. However, in many data sets spatial covariates are read as raster data. Thus, I will illustrate the work around with this type of data as well.

First, we read and visualize the data.

library(INLA)
library(inlabru)
library(raster)
library(stars)
library(ggplot2)

data(gorillas, package = "inlabru")

# get the boundary
boundary <- gorillas$boundary

# obtain covariates
gcov <- gorillas$gcov

# scale the elevation covariate
elev <- gcov$elevation
elev$elevation <- round((elev$elevation - mean(elev$elevation, na.rm = TRUE))/sd(elev$elevation,na.rm = TRUE),digits = 1)


# read the raster data and then scale it as well
ele_stars<- read_stars('elevRaster.tif')
ele_stars$elevRaster.tif<- round((ele_stars$elevRaster.tif - mean(ele_stars$elevRaster.tif, na.rm = TRUE))/sd(ele_stars$elevRaster.tif,na.rm = TRUE),digits = 1) 
  
# visualize it

p1<- ggplot() +
  gg(elev) +
  gg(boundary, alpha = 0) +
  coord_fixed()

p2<- ggplot() +
  geom_stars(data = ele_stars) +
  gg(boundary, alpha = 0) +
  coord_fixed()

gridExtra::grid.arrange(p1,p2,ncol=2)

Now, I follow the steps described in the tutorial https://inlabru-org.github.io/inlabru/articles/web/2d_lgcp_covars.html to fit a LGCP with a spatial covariate in a raster format (see the tutorial for an example using a SpatialPixelsDataFrame). They key part in here is the function f.elev() that enables to extract the covariate values at any location within the survey region. I couldn’t find how the lgcp() call internally the function within the model structure. Nevertheless, there are two key components in the definition of the f.elev() function that I will cover next.

# load the mesh and nests locations

mesh <- gorillas$mesh

nests <- gorillas$nests
# specify priors for the variance-covariance matrix

matern <- inla.spde2.pcmatern(mesh,
  prior.sigma = c(0.1, 0.01),
  prior.range = c(5, 0.01)
)




# function to extract covariate values at arbitrary points in the survey region

f.elev <- function(x, y) {
  # turn coordinates into SpatialPoints object:
  # with the appropriate coordinate reference system (CRS)
  spp <- SpatialPoints(data.frame(x = x, y = y), proj4string = fm_sp_get_crs(elev))
  proj4string(spp) <- fm_sp_get_crs(elev)
  # Extract elevation values at spp coords, from our elev SpatialGridDataFrame
  v <- over(spp, elev)
  if (any(is.na(v$elevation))) {
    v$elevation <- inlabru:::bru_fill_missing(elev, spp, v$elevation)
  }
  return(v$elevation)
}


# model structure 
ecomp <- coordinates ~ ele_stars(f.elev(x, y), model = "linear") +
  mySmooth(coordinates, model = matern) + Intercept(1)

# fit the LGCP

efit <- lgcp(ecomp, nests, samplers = boundary, domain = list(coordinates = mesh))

# Compare the effects of elevation and the SPDE

e.lp <- predict(
  efit, pixels(mesh, mask = boundary),
  ~ list(
    smooth_elev = mySmooth + ele_stars,
    elev = ele_stars,
    smooth = mySmooth
  )
)

# set the color scale
library(RColorBrewer)
lprange <- range(e.lp$smooth_elev$mean, e.lp$elev$mean, e.lp$smooth$mean)
csc <- scale_fill_gradientn(colours = brewer.pal(9, "YlOrRd"), limits = lprange)

plot.e.lp <- ggplot() +
  gg(e.lp$smooth_elev, mask = boundary) +
  theme(legend.position = "bottom") +
  gg(boundary, alpha = 0) +
  ggtitle("SPDE + elevation") +
  coord_equal()+csc
#> Regions defined for each Polygons

plot.e.lp.spde <- ggplot() +
  gg(e.lp$smooth, mask = boundary) +
  theme(legend.position = "bottom") +
  gg(boundary, alpha = 0) +
  ggtitle("SPDE") +
  coord_equal()+csc
#> Regions defined for each Polygons

plot.e.lp.elev <- ggplot() +
  gg(e.lp$elev, mask = boundary) +
  theme(legend.position = "bottom") +
  gg(boundary, alpha = 0) +
  ggtitle("elevation") +
  coord_equal()+csc
#> Regions defined for each Polygons

gridExtra::grid.arrange(plot.e.lp,plot.e.lp.spde,plot.e.lp.elev,ncol=3)

the lgcp() function call internally the f.elev() function to retrieve the spatial covariate values at specific locations within the mesh. In here there are two imporant components within the function The first one, (1) converts any pair of coordinates to a spatial location while assigning the appropriate CRS associated with the corresponding raster/SpatialPixelsDataFrame-class object and (2) extract the covariate values at the specific coordinate. Lets try now this functionality using both the sp and the more updated sf approaches to achieve this.


#I will randomly draw a location within the raster using the st_sample function()
set.seed(1234)
st_sample(st_as_sf(ele_stars), 1)
#> Geometry set for 1 feature 
#> Geometry type: POINT
#> Dimension:     XY
#> Bounding box:  xmin: 580.8503 ymin: 677.0875 xmax: 580.8503 ymax: 677.0875
#> CRS:           unknown
#> POINT (580.8503 677.0875)

# take the coordinates 
x <- 583.8595  
y <- 677.0931 

### Approach using sp environment

# convert location to a spatial points-class object  
spp <- SpatialPoints(data.frame(x = x, y = y))
slot(spp, "proj4string") <- crs(elev)

# the fm_sp_get_crs can be used to suppress warnings for the unused information in old pro4strings format
# Extract elevation values at spp coords, from the elev SpatialGridDataFrame
v <- over(spp, elev)
v$elevation
#> [1] 1.1

### Approach using sf environment

# Unfortunately this option might cause some issues when working with the new sf or star spatial-class objects
# e.g. spp <- SpatialPoints(data.frame(x = x, y = y),proj4string = st_crs(ele_stars))

# So it is better to define this function de novo to work around with sf and stars object too.

# first we create an simple feature object for the desire locations  and assign the proper CRS

locations_sf<- st_as_sf(x = data.frame(x = x, y = y),                         
                       coords = c("x", "y"),
                       crs = st_crs(ele_stars))

# extract raster values at points locations 
v_sf <- st_extract(ele_stars, locations_sf)
v_sf$elevRaster.tif
#> [1] 1.1

Now, it may occur that the information of the spatial covariate at the target location is not available. In such cases, inlabru has the bru_fill_missing() to impute missing values with the nearest-available-value or feature. This is the second important argument we define in the f.elev() and I will illustrate how this works and how it can be extended for handling sf-star class objects by defining buffers at of \(k\) size at each location.

To make this example mor trackable, I will focus on a smaller extent of all the region by croping the original raster file and removing some of the elevation values at certain cells:


# this give us the spatial extent of the raster
st_bbox(ele_stars)
#>     xmin     ymin     xmax     ymax 
#> 580.1599 673.8742 586.2320 679.0378

# then by using the st_crop function I'll sellect a smaller window for our study
ele_crop <- ele_stars %>%
  st_crop(st_bbox(c(
    xmin = 581,
    xmax = 581.5,
    ymin = 675,
    ymax = 675.5
  ), crs = st_crs(ele_stars)))


# Now I will fill in some missing values within the grid

ele_crop$elevRaster.tif[4,10] <- ele_crop$elevRaster.tif[15,5] <- ele_crop$elevRaster.tif[9,11] <- ele_crop$elevRaster.tif[9,12] <-ele_crop$elevRaster.tif[10,11] <- NA

plot(ele_crop, text_values = TRUE, key.pos = NULL)

Now I will select a random number of locations an extract the elevation values at those locations:

set.seed(12345)
pts <- st_sample(st_as_sfc(st_bbox(ele_crop)), 10)
# extract raster values at points locations
pts_values <- st_extract(ele_crop, pts)
pts_values$elevRaster.tif
#>  [1] 1.3 1.1  NA 1.1  NA  NA 1.3  NA 1.3 0.1

We could tell just by looking at this plot that the first point on the right is closer to a grid cell with an elevation value of 1.3 and thus, this would be the value we will use to impute that cell. At this stage, we could use the bru_fill_missing() function to impute the missing values. To do so we first need to convert the sf class object into an sp spatial-class object and then use the bru_fill_missing() to replace each missing value with the nearest-available-value or feature. The function receives the sp-spatial object (in this case we need also to convert the crop-spatial raster to a sp spatial class), the spatial points object, and the elevation values at those locations.

# using inlabru function to interpolate raster values with closest neighbours
pts_sp <- as_Spatial(pts_values)
imputation <- inlabru:::bru_fill_missing(as(ele_crop, "Spatial"),pts_sp , pts_sp$elevRaster.tif)
imputation
#>  [1] 1.3 1.1 0.4 1.1 1.1 1.3 1.3 1.1 1.3 0.1

We can see that the missing value we saw earlier has now been imputed with its closets neighbour which had an elevation of 1.1. Now we will try a slighly different approach to do this imputation by using the advantages of the sf and tidyverse environments. For this approach I will define a series of buffers centred at each of the locations where elevation values are missing.

library(tibble)
library(tidyr)
#> 
#> Attaching package: 'tidyr'
#> The following object is masked from 'package:raster':
#> 
#>     extract
#> The following objects are masked from 'package:Matrix':
#> 
#>     expand, pack, unpack
# Step 1 - Identify observation with NAs
pts_values_na <- pts_values[which(is.na(pts_values$elevRaster.tif)),]
pts_values_na$point_id = 1:nrow(pts_values_na) # create an index column

# Step 2 - Create buffer around each point (change the dist for a different buffer size)
buffer <- st_buffer(pts_values_na, dist = 0.03)

# Step 3 - Convert points and raster to sf and add column ID
poly_sf <- buffer %>% 
  st_as_sf() %>% 
  rowid_to_column(var = "poly_id")

ele_sf <- st_as_sf(ele_crop)

We can find which cells lie within each buffer to determine the neighbourhood at each location.


# Step 4 - Select only those cells within the buffer

df_sf <- st_join(ele_sf, poly_sf) %>% 
  drop_na(poly_id)


ggplot() +  geom_sf(data = (df_sf),aes(fill=elevRaster.tif.x))+
   geom_sf(data = (poly_sf),fill='transparent',col=2)+
  geom_sf(data = pts_values_na, color = "red", size = 2) +
  geom_sf(data=st_centroid(df_sf))
#> Warning in st_centroid.sf(df_sf): st_centroid assumes attributes are constant
#> over geometries of x

Finally, we can compute the distance from the centroid of each cell in the neighborhood to each point and select the value within the minimum distance to the point.We can alos use other imputation apporach such as the mean, median, mode (useful when dealing categorical variables),maximum or minimum values within the neighborhood of each point by using the aggregate function.

#compute distance from each point to the centroid of each cell

centroids<- st_centroid(df_sf) # define centroids

imputed.values <- pts_values

for(i in seq_len(nrow(pts_values_na))){
  imputed.values[is.na(pts_values$elevRaster.tif),]$elevRaster.tif[i]<-df_sf[which.min(st_distance(centroids, pts_values_na[i,])),]$elevRaster.tif.x
}

#See we get the same result as with inlabru::bru_fill_missing
imputed.values$elevRaster.tif
#>  [1] 1.3 1.1 0.4 1.1 1.1 1.3 1.3 1.1 1.3 0.1

# we can aggregate the poylgons and compute the average, min, max, mode , etc. to do the imputation as well
x <- aggregate(df_sf, poly_sf, FUN = mean)
imputed.values[is.na(pts_values$elevRaster.tif),]$elevRaster.tif <- x$elevRaster.tif.x

round(imputed.values$elevRaster.tif,digits=2)
#>  [1] 1.30 1.10 0.42 1.10 1.09 1.30 1.30 0.97 1.30 0.10

Spatial Data Structures with INLA and inlabru part 2

Jafet Belmont

2022-03-28

Vignette Info

Continuous covariates - imputing missing values