This document is a quick and dirty illustration of how spatially correlated random effect can be fit with INLA. It is based on the question and the data posted on the R-Sig-mixedmodels mailing list: https://stat.ethz.ch/pipermail/r-sig-mixed-models/2016q3/024938.html The source code of this document is available at https://github.com/inbo/Rpubs. Suggestions to improve it are welcome.

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(ggplot2)
library(scales)
library(INLA)

## Loading required package: sp

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following object is masked from 'package:tidyr':
## 
##     expand

## Loading required package: splines

Data import and cleaning

dataset <- readRDS("data.Rd")
summary(dataset)

##   prec_nov_apr   t_min_nov_apr     srad_nov_apr         age       
##  Min.   : 47.3   Min.   :0.9167   Min.   : 869.2   Min.   : 1.00  
##  1st Qu.:118.5   1st Qu.:3.2500   1st Qu.:1232.0   1st Qu.: 9.00  
##  Median :137.7   Median :3.9917   Median :1386.2   Median :16.00  
##  Mean   :131.6   Mean   :3.9667   Mean   :1359.5   Mean   :17.73  
##  3rd Qu.:146.1   3rd Qu.:4.7833   3rd Qu.:1448.4   3rd Qu.:22.00  
##  Max.   :152.2   Max.   :6.9167   Max.   :1769.0   Max.   :48.00  
##    date     evaluation hail         rlat              rlon       
##  2009:494   1:1050     0:929   Min.   :4704304   Min.   :466044  
##  2010:330   2: 120     1:241   1st Qu.:4770128   1st Qu.:507622  
##  2011:143                      Median :4781687   Median :528596  
##  2012: 97                      Mean   :4777992   Mean   :532296  
##  NA's:106                      3rd Qu.:4789497   3rd Qu.:553595  
##                                Max.   :4810808   Max.   :602423

dataset <- dataset %>%
  filter(!is.na(date)) %>%
  mutate(
    hail = as.integer(hail == "1"),
    rx = rlat * 1e-3,
    ry = rlon * 1e-3,
    srad = srad_nov_apr * 1e-3 - 1,
    prec = prec_nov_apr * 1e-2 - 1,
    temp = t_min_nov_apr - 4
  )
summary(dataset)

##   prec_nov_apr   t_min_nov_apr     srad_nov_apr         age       
##  Min.   : 47.3   Min.   :0.9167   Min.   : 869.2   Min.   : 1.00  
##  1st Qu.:117.2   1st Qu.:3.2333   1st Qu.:1225.2   1st Qu.:10.00  
##  Median :136.3   Median :3.9833   Median :1388.0   Median :16.00  
##  Mean   :130.7   Mean   :3.9575   Mean   :1359.4   Mean   :17.68  
##  3rd Qu.:145.8   3rd Qu.:4.7833   3rd Qu.:1448.5   3rd Qu.:22.00  
##  Max.   :152.2   Max.   :6.9167   Max.   :1769.0   Max.   :48.00  
##    date     evaluation      hail             rlat              rlon       
##  2009:494   1:980      Min.   :0.0000   Min.   :4705059   Min.   :466044  
##  2010:330   2: 84      1st Qu.:0.0000   1st Qu.:4769384   1st Qu.:505848  
##  2011:143              Median :0.0000   Median :4781435   Median :528114  
##  2012: 97              Mean   :0.2246   Mean   :4777523   Mean   :531519  
##                        3rd Qu.:0.0000   3rd Qu.:4789367   3rd Qu.:553620  
##                        Max.   :1.0000   Max.   :4810808   Max.   :602402  
##        rx             ry             srad              prec        
##  Min.   :4705   Min.   :466.0   Min.   :-0.1308   Min.   :-0.5270  
##  1st Qu.:4769   1st Qu.:505.8   1st Qu.: 0.2252   1st Qu.: 0.1725  
##  Median :4781   Median :528.1   Median : 0.3880   Median : 0.3628  
##  Mean   :4778   Mean   :531.5   Mean   : 0.3594   Mean   : 0.3072  
##  3rd Qu.:4789   3rd Qu.:553.6   3rd Qu.: 0.4485   3rd Qu.: 0.4583  
##  Max.   :4811   Max.   :602.4   Max.   : 0.7690   Max.   : 0.5220  
##       temp         
##  Min.   :-3.08333  
##  1st Qu.:-0.76667  
##  Median :-0.01667  
##  Mean   :-0.04245  
##  3rd Qu.: 0.78333  
##  Max.   : 2.91667

EDA

ggplot(dataset, aes(x = age)) + geom_histogram(binwidth = 1)

ggplot(dataset, aes(x = srad)) + geom_density()

ggplot(dataset, aes(x = temp)) + geom_density()

ggplot(dataset, aes(x = age, y = hail)) +
  geom_point() +
  geom_smooth(
    method = "gam", 
    formula = y ~ s(x, bs = "cs", k = 4),
    method.args = list(family = binomial)
  )

ggplot(dataset, aes(x = srad, y = hail)) +
  geom_point() +
  geom_smooth(
    method = "gam", 
    formula = y ~ s(x, bs = "cs", k = 4),
    method.args = list(family = binomial)
  )

ggplot(dataset, aes(x = temp, y = hail)) +
  geom_point() +
  geom_smooth(
    method = "gam", 
    formula = y ~ s(x, bs = "cs", k = 4),
    method.args = list(family = binomial)
  )

ggplot(dataset, aes(x = prec, y = hail)) +
  geom_point() +
  geom_smooth(
    method = "gam", 
    formula = y ~ s(x, bs = "cs", k = 4),
    method.args = list(family = binomial)
  )

ggplot(dataset, aes(x = prec, colour = date)) + geom_density()

ggplot(dataset, aes(x = temp, colour = date)) + geom_density()

ggplot(dataset, aes(x = rx, y = ry, colour = factor(hail))) +
  geom_point() +
  coord_fixed() +
  facet_wrap(~date)

ggplot(dataset, aes(x = rx, y = ry, colour = temp)) +
  geom_point() +
  coord_fixed() +
  scale_colour_gradientn(colors = rainbow(5)) +
  facet_wrap(~date)

ggplot(dataset, aes(x = rx, y = ry, colour = prec)) +
  geom_point() +
  coord_fixed() +
  scale_colour_gradientn(colors = rainbow(5)) +
  facet_wrap(~date)

ggplot(dataset, aes(x = rx, y = ry, colour = srad)) +
  geom_point() +
  coord_fixed() +
  scale_colour_gradientn(colors = rainbow(5)) +
  facet_wrap(~date)

Model without spatial correlation

m1 <- inla(
  hail ~ prec + t_min_nov_apr + srad + age + date,
  family = "binomial",
  data = dataset
)
summary(m1)

## 
## Call:
## c("inla(formula = hail ~ prec + t_min_nov_apr + srad + age + date, ",  "    family = \"binomial\", data = dataset)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.1112          0.1986          0.0343          0.3441 
## 
## Fixed effects:
##                  mean     sd 0.025quant 0.5quant 0.975quant    mode kld
## (Intercept)   -4.4698 0.5367    -5.5477  -4.4615    -3.4384 -4.4448   0
## prec           8.5218 0.8238     6.9474   8.5068    10.1833  8.4764   0
## t_min_nov_apr  0.1756 0.0979    -0.0156   0.1752     0.3687  0.1744   0
## srad           0.2582 0.5462    -0.8140   0.2581     1.3299  0.2579   0
## age            0.0000 0.0079    -0.0157   0.0001     0.0155  0.0002   0
## date2010      -1.2200 0.2280    -1.6755  -1.2173    -0.7797 -1.2119   0
## date2011      -1.2438 0.2524    -1.7483  -1.2408    -0.7563 -1.2347   0
## date2012      -2.9748 0.4230    -3.8627  -2.9547    -2.1987 -2.9132   0
## 
## The model has no random effects
## 
## The model has no hyperparameters
## 
## Expected number of effective parameters(std dev): 8.001(0.00)
## Number of equivalent replicates : 132.98 
## 
## Marginal log-Likelihood:  -467.48

Model with spatial random intercept

coordinates <- dataset %>%
  select(rx, ry) %>%
  as.matrix()
boundary <- inla.nonconvex.hull(coordinates)

## Loading required package: splancs

## 
## Spatial Point Pattern Analysis Code in S-Plus
##  
##  Version 2 - Spatial and Space-Time analysis

mesh <- inla.mesh.2d(
  loc = coordinates, 
  boundary = boundary,
  max.edge = 20,
  cutoff = 5
)
plot(mesh)
points(coordinates, col = "red")

spde <- inla.spde2.matern(mesh = mesh)
A <- inla.spde.make.A(mesh = mesh, loc = coordinates)
s.index <- inla.spde.make.index(name = "spatial.field", n.spde = spde$n.spde)
stack <- inla.stack(
  data = dataset %>%
    select(hail) %>%
    as.list(),
  A = list(A, 1),
  effects = list(
    c(
      s.index,
      list(intercept = rep(1, spde$n.spde))
    ),
    dataset %>%
      select(temp, prec, srad, age) %>%
      as.list()
  )
)
m2 <- inla(
  hail ~ 0 + intercept + temp + prec + srad + age + 
    f(spatial.field, model = spde),
  data = inla.stack.data(stack),
  family = "binomial",
  control.predictor = list(
    A = inla.stack.A(stack),
    compute = TRUE
  )
)

## Note: method with signature 'TsparseMatrix#Matrix#missing#replValue' chosen for function '[<-',
##  target signature 'dgTMatrix#ngCMatrix#missing#numeric'.
##  "Matrix#nsparseMatrix#missing#replValue" would also be valid

summary(m2)

## 
## Call:
## c("inla(formula = hail ~ 0 + intercept + temp + prec + srad + age + ",  "    f(spatial.field, model = spde), family = \"binomial\", data = inla.stack.data(stack), ",  "    control.predictor = list(A = inla.stack.A(stack), compute = TRUE))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.3629          7.4674          0.2646          8.0948 
## 
## Fixed effects:
##              mean     sd 0.025quant 0.5quant 0.975quant    mode kld
## intercept -2.4264 0.8388    -4.1599  -2.3997    -0.8449 -2.3491   0
## temp       1.0562 0.2385     0.6040   1.0504     1.5418  1.0385   0
## prec       0.5118 1.8519    -3.2316   0.5417     4.0859  0.5982   0
## srad      -1.8995 0.8337    -3.5636  -1.8903    -0.2877 -1.8719   0
## age       -0.0134 0.0109    -0.0352  -0.0132     0.0078 -0.0129   0
## 
## Random effects:
## Name   Model
##  spatial.field   SPDE2 model 
## 
## Model hyperparameters:
##                             mean     sd 0.025quant 0.5quant 0.975quant
## Theta1 for spatial.field -2.1022 0.5592     -3.297  -2.0586    -1.1165
## Theta2 for spatial.field -0.7455 0.3444     -1.364  -0.7686    -0.0181
##                             mode
## Theta1 for spatial.field -1.9191
## Theta2 for spatial.field -0.8415
## 
## Expected number of effective parameters(std dev): 75.05(3.627)
## Number of equivalent replicates : 14.18 
## 
## Marginal log-Likelihood:  -365.15 
## Posterior marginals for linear predictor and fitted values computed

Predict values on a grid

n.grid <- 50
dx <- diff(pretty(dataset$rx, n.grid)[1:2])
dy <- diff(pretty(dataset$ry, n.grid)[1:2])
delta <- max(dx, dy)
grid <- expand.grid(
  rx = seq(
    floor(min(dataset$rx) / delta) * delta,
    max(dataset$rx) + delta,
    by = delta
  ),
  ry = seq(
    floor(min(dataset$ry) / delta) * delta,
    max(dataset$ry) + delta,
    by = delta
  )
)
A.grid <- inla.spde.make.A(mesh = mesh, loc = as.matrix(grid))
stack.grid <- inla.stack(
  data = list(hail = NA),
  A = list(A.grid),
  effects = list(
    c(
      s.index,
      list(intercept = rep(1, spde$n.spde))
    )
  ),
  tag = "grid"
)
stack.join <- inla.stack(stack, stack.grid)
m3 <- inla(
  hail ~ 0 + intercept + temp + prec + srad + age + f(spatial.field, model = spde),
  data = inla.stack.data(stack.join),
  family = "binomial",
  control.predictor = list(
    A = inla.stack.A(stack.join),
    compute = TRUE
  )
)
summary(m3)

## 
## Call:
## c("inla(formula = hail ~ 0 + intercept + temp + prec + srad + age + ",  "    f(spatial.field, model = spde), family = \"binomial\", data = inla.stack.data(stack.join), ",  "    control.predictor = list(A = inla.stack.A(stack.join), compute = TRUE))" )
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          0.3001         17.1765          0.5508         18.0274 
## 
## Fixed effects:
##              mean     sd 0.025quant 0.5quant 0.975quant    mode kld
## intercept -2.4264 0.8388    -4.1598  -2.3997    -0.8450 -2.3491   0
## temp       1.0562 0.2385     0.6040   1.0504     1.5418  1.0385   0
## prec       0.5120 1.8517    -3.2311   0.5417     4.0859  0.5982   0
## srad      -1.8995 0.8336    -3.5636  -1.8903    -0.2877 -1.8719   0
## age       -0.0134 0.0109    -0.0352  -0.0132     0.0078 -0.0129   0
## 
## Random effects:
## Name   Model
##  spatial.field   SPDE2 model 
## 
## Model hyperparameters:
##                            mean     sd 0.025quant 0.5quant 0.975quant
## Theta1 for spatial.field -2.103 0.5588     -3.297  -2.0592    -1.1182
## Theta2 for spatial.field -0.745 0.3441     -1.362  -0.7683    -0.0179
##                             mode
## Theta1 for spatial.field -1.9192
## Theta2 for spatial.field -0.8414
## 
## Expected number of effective parameters(std dev): 75.05(3.626)
## Number of equivalent replicates : 14.18 
## 
## Marginal log-Likelihood:  -365.15 
## Posterior marginals for linear predictor and fitted values computed

grid.index <- inla.stack.index(stack.join, tag = "grid")$data
grid$mean <- m3$summary.fitted.values[grid.index, "mean"]
ggplot(grid, aes(x = rx, y = ry, fill = mean)) + 
  geom_tile() + 
  scale_fill_gradient2() +
  coord_fixed()

grid$lcl <- m3$summary.fitted.values[grid.index, "0.025quant"]
ggplot(grid, aes(x = rx, y = ry, fill = lcl)) + 
  geom_tile() + 
  scale_fill_gradient2() +
  coord_fixed()

grid$ucl <- m3$summary.fitted.values[grid.index, "0.975quant"]
ggplot(grid, aes(x = rx, y = ry, fill = ucl)) + 
  geom_tile() + 
  scale_fill_gradient2() +
  coord_fixed()

grid %>%
  gather(key = "type", value = "estimate", mean:ucl) %>%
  mutate(estimate = plogis(estimate)) %>%
  ggplot(aes(x = rx, y = ry, fill = estimate)) +
  geom_tile() +
  scale_fill_gradient2(
    "Probabily of hail\nat reference values", 
    midpoint = 0.5, 
    limits = 0:1, 
    label = percent
  ) +
  coord_fixed() +
  facet_wrap(~type, nrow = 1)

Spatially correlated random effects with INLA

Data import and cleaning

EDA

Model without spatial correlation

Model with spatial random intercept

Predict values on a grid