This document is a quick and dirty illustration of how spatially correlated random effect can be fit with INLA. It is based on the question and the data posted on the R-Sig-mixedmodels mailing list: https://stat.ethz.ch/pipermail/r-sig-mixed-models/2016q3/024938.html The source code of this document is available at https://github.com/inbo/Rpubs. Suggestions to improve it are welcome.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(scales)
library(INLA)
## Loading required package: sp
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
## Loading required package: splines
dataset <- readRDS("data.Rd")
summary(dataset)
## prec_nov_apr t_min_nov_apr srad_nov_apr age
## Min. : 47.3 Min. :0.9167 Min. : 869.2 Min. : 1.00
## 1st Qu.:118.5 1st Qu.:3.2500 1st Qu.:1232.0 1st Qu.: 9.00
## Median :137.7 Median :3.9917 Median :1386.2 Median :16.00
## Mean :131.6 Mean :3.9667 Mean :1359.5 Mean :17.73
## 3rd Qu.:146.1 3rd Qu.:4.7833 3rd Qu.:1448.4 3rd Qu.:22.00
## Max. :152.2 Max. :6.9167 Max. :1769.0 Max. :48.00
## date evaluation hail rlat rlon
## 2009:494 1:1050 0:929 Min. :4704304 Min. :466044
## 2010:330 2: 120 1:241 1st Qu.:4770128 1st Qu.:507622
## 2011:143 Median :4781687 Median :528596
## 2012: 97 Mean :4777992 Mean :532296
## NA's:106 3rd Qu.:4789497 3rd Qu.:553595
## Max. :4810808 Max. :602423
dataset <- dataset %>%
filter(!is.na(date)) %>%
mutate(
hail = as.integer(hail == "1"),
rx = rlat * 1e-3,
ry = rlon * 1e-3,
srad = srad_nov_apr * 1e-3 - 1,
prec = prec_nov_apr * 1e-2 - 1,
temp = t_min_nov_apr - 4
)
summary(dataset)
## prec_nov_apr t_min_nov_apr srad_nov_apr age
## Min. : 47.3 Min. :0.9167 Min. : 869.2 Min. : 1.00
## 1st Qu.:117.2 1st Qu.:3.2333 1st Qu.:1225.2 1st Qu.:10.00
## Median :136.3 Median :3.9833 Median :1388.0 Median :16.00
## Mean :130.7 Mean :3.9575 Mean :1359.4 Mean :17.68
## 3rd Qu.:145.8 3rd Qu.:4.7833 3rd Qu.:1448.5 3rd Qu.:22.00
## Max. :152.2 Max. :6.9167 Max. :1769.0 Max. :48.00
## date evaluation hail rlat rlon
## 2009:494 1:980 Min. :0.0000 Min. :4705059 Min. :466044
## 2010:330 2: 84 1st Qu.:0.0000 1st Qu.:4769384 1st Qu.:505848
## 2011:143 Median :0.0000 Median :4781435 Median :528114
## 2012: 97 Mean :0.2246 Mean :4777523 Mean :531519
## 3rd Qu.:0.0000 3rd Qu.:4789367 3rd Qu.:553620
## Max. :1.0000 Max. :4810808 Max. :602402
## rx ry srad prec
## Min. :4705 Min. :466.0 Min. :-0.1308 Min. :-0.5270
## 1st Qu.:4769 1st Qu.:505.8 1st Qu.: 0.2252 1st Qu.: 0.1725
## Median :4781 Median :528.1 Median : 0.3880 Median : 0.3628
## Mean :4778 Mean :531.5 Mean : 0.3594 Mean : 0.3072
## 3rd Qu.:4789 3rd Qu.:553.6 3rd Qu.: 0.4485 3rd Qu.: 0.4583
## Max. :4811 Max. :602.4 Max. : 0.7690 Max. : 0.5220
## temp
## Min. :-3.08333
## 1st Qu.:-0.76667
## Median :-0.01667
## Mean :-0.04245
## 3rd Qu.: 0.78333
## Max. : 2.91667
ggplot(dataset, aes(x = age)) + geom_histogram(binwidth = 1)
ggplot(dataset, aes(x = srad)) + geom_density()
ggplot(dataset, aes(x = temp)) + geom_density()
ggplot(dataset, aes(x = age, y = hail)) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ s(x, bs = "cs", k = 4),
method.args = list(family = binomial)
)
ggplot(dataset, aes(x = srad, y = hail)) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ s(x, bs = "cs", k = 4),
method.args = list(family = binomial)
)
ggplot(dataset, aes(x = temp, y = hail)) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ s(x, bs = "cs", k = 4),
method.args = list(family = binomial)
)
ggplot(dataset, aes(x = prec, y = hail)) +
geom_point() +
geom_smooth(
method = "gam",
formula = y ~ s(x, bs = "cs", k = 4),
method.args = list(family = binomial)
)
ggplot(dataset, aes(x = prec, colour = date)) + geom_density()
ggplot(dataset, aes(x = temp, colour = date)) + geom_density()
ggplot(dataset, aes(x = rx, y = ry, colour = factor(hail))) +
geom_point() +
coord_fixed() +
facet_wrap(~date)
ggplot(dataset, aes(x = rx, y = ry, colour = temp)) +
geom_point() +
coord_fixed() +
scale_colour_gradientn(colors = rainbow(5)) +
facet_wrap(~date)
ggplot(dataset, aes(x = rx, y = ry, colour = prec)) +
geom_point() +
coord_fixed() +
scale_colour_gradientn(colors = rainbow(5)) +
facet_wrap(~date)
ggplot(dataset, aes(x = rx, y = ry, colour = srad)) +
geom_point() +
coord_fixed() +
scale_colour_gradientn(colors = rainbow(5)) +
facet_wrap(~date)
m1 <- inla(
hail ~ prec + t_min_nov_apr + srad + age + date,
family = "binomial",
data = dataset
)
summary(m1)
##
## Call:
## c("inla(formula = hail ~ prec + t_min_nov_apr + srad + age + date, ", " family = \"binomial\", data = dataset)")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.1112 0.1986 0.0343 0.3441
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) -4.4698 0.5367 -5.5477 -4.4615 -3.4384 -4.4448 0
## prec 8.5218 0.8238 6.9474 8.5068 10.1833 8.4764 0
## t_min_nov_apr 0.1756 0.0979 -0.0156 0.1752 0.3687 0.1744 0
## srad 0.2582 0.5462 -0.8140 0.2581 1.3299 0.2579 0
## age 0.0000 0.0079 -0.0157 0.0001 0.0155 0.0002 0
## date2010 -1.2200 0.2280 -1.6755 -1.2173 -0.7797 -1.2119 0
## date2011 -1.2438 0.2524 -1.7483 -1.2408 -0.7563 -1.2347 0
## date2012 -2.9748 0.4230 -3.8627 -2.9547 -2.1987 -2.9132 0
##
## The model has no random effects
##
## The model has no hyperparameters
##
## Expected number of effective parameters(std dev): 8.001(0.00)
## Number of equivalent replicates : 132.98
##
## Marginal log-Likelihood: -467.48
coordinates <- dataset %>%
select(rx, ry) %>%
as.matrix()
boundary <- inla.nonconvex.hull(coordinates)
## Loading required package: splancs
##
## Spatial Point Pattern Analysis Code in S-Plus
##
## Version 2 - Spatial and Space-Time analysis
mesh <- inla.mesh.2d(
loc = coordinates,
boundary = boundary,
max.edge = 20,
cutoff = 5
)
plot(mesh)
points(coordinates, col = "red")
spde <- inla.spde2.matern(mesh = mesh)
A <- inla.spde.make.A(mesh = mesh, loc = coordinates)
s.index <- inla.spde.make.index(name = "spatial.field", n.spde = spde$n.spde)
stack <- inla.stack(
data = dataset %>%
select(hail) %>%
as.list(),
A = list(A, 1),
effects = list(
c(
s.index,
list(intercept = rep(1, spde$n.spde))
),
dataset %>%
select(temp, prec, srad, age) %>%
as.list()
)
)
m2 <- inla(
hail ~ 0 + intercept + temp + prec + srad + age +
f(spatial.field, model = spde),
data = inla.stack.data(stack),
family = "binomial",
control.predictor = list(
A = inla.stack.A(stack),
compute = TRUE
)
)
## Note: method with signature 'TsparseMatrix#Matrix#missing#replValue' chosen for function '[<-',
## target signature 'dgTMatrix#ngCMatrix#missing#numeric'.
## "Matrix#nsparseMatrix#missing#replValue" would also be valid
summary(m2)
##
## Call:
## c("inla(formula = hail ~ 0 + intercept + temp + prec + srad + age + ", " f(spatial.field, model = spde), family = \"binomial\", data = inla.stack.data(stack), ", " control.predictor = list(A = inla.stack.A(stack), compute = TRUE))" )
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.3629 7.4674 0.2646 8.0948
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## intercept -2.4264 0.8388 -4.1599 -2.3997 -0.8449 -2.3491 0
## temp 1.0562 0.2385 0.6040 1.0504 1.5418 1.0385 0
## prec 0.5118 1.8519 -3.2316 0.5417 4.0859 0.5982 0
## srad -1.8995 0.8337 -3.5636 -1.8903 -0.2877 -1.8719 0
## age -0.0134 0.0109 -0.0352 -0.0132 0.0078 -0.0129 0
##
## Random effects:
## Name Model
## spatial.field SPDE2 model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant 0.975quant
## Theta1 for spatial.field -2.1022 0.5592 -3.297 -2.0586 -1.1165
## Theta2 for spatial.field -0.7455 0.3444 -1.364 -0.7686 -0.0181
## mode
## Theta1 for spatial.field -1.9191
## Theta2 for spatial.field -0.8415
##
## Expected number of effective parameters(std dev): 75.05(3.627)
## Number of equivalent replicates : 14.18
##
## Marginal log-Likelihood: -365.15
## Posterior marginals for linear predictor and fitted values computed
n.grid <- 50
dx <- diff(pretty(dataset$rx, n.grid)[1:2])
dy <- diff(pretty(dataset$ry, n.grid)[1:2])
delta <- max(dx, dy)
grid <- expand.grid(
rx = seq(
floor(min(dataset$rx) / delta) * delta,
max(dataset$rx) + delta,
by = delta
),
ry = seq(
floor(min(dataset$ry) / delta) * delta,
max(dataset$ry) + delta,
by = delta
)
)
A.grid <- inla.spde.make.A(mesh = mesh, loc = as.matrix(grid))
stack.grid <- inla.stack(
data = list(hail = NA),
A = list(A.grid),
effects = list(
c(
s.index,
list(intercept = rep(1, spde$n.spde))
)
),
tag = "grid"
)
stack.join <- inla.stack(stack, stack.grid)
m3 <- inla(
hail ~ 0 + intercept + temp + prec + srad + age + f(spatial.field, model = spde),
data = inla.stack.data(stack.join),
family = "binomial",
control.predictor = list(
A = inla.stack.A(stack.join),
compute = TRUE
)
)
summary(m3)
##
## Call:
## c("inla(formula = hail ~ 0 + intercept + temp + prec + srad + age + ", " f(spatial.field, model = spde), family = \"binomial\", data = inla.stack.data(stack.join), ", " control.predictor = list(A = inla.stack.A(stack.join), compute = TRUE))" )
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.3001 17.1765 0.5508 18.0274
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## intercept -2.4264 0.8388 -4.1598 -2.3997 -0.8450 -2.3491 0
## temp 1.0562 0.2385 0.6040 1.0504 1.5418 1.0385 0
## prec 0.5120 1.8517 -3.2311 0.5417 4.0859 0.5982 0
## srad -1.8995 0.8336 -3.5636 -1.8903 -0.2877 -1.8719 0
## age -0.0134 0.0109 -0.0352 -0.0132 0.0078 -0.0129 0
##
## Random effects:
## Name Model
## spatial.field SPDE2 model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant 0.975quant
## Theta1 for spatial.field -2.103 0.5588 -3.297 -2.0592 -1.1182
## Theta2 for spatial.field -0.745 0.3441 -1.362 -0.7683 -0.0179
## mode
## Theta1 for spatial.field -1.9192
## Theta2 for spatial.field -0.8414
##
## Expected number of effective parameters(std dev): 75.05(3.626)
## Number of equivalent replicates : 14.18
##
## Marginal log-Likelihood: -365.15
## Posterior marginals for linear predictor and fitted values computed
grid.index <- inla.stack.index(stack.join, tag = "grid")$data
grid$mean <- m3$summary.fitted.values[grid.index, "mean"]
ggplot(grid, aes(x = rx, y = ry, fill = mean)) +
geom_tile() +
scale_fill_gradient2() +
coord_fixed()
grid$lcl <- m3$summary.fitted.values[grid.index, "0.025quant"]
ggplot(grid, aes(x = rx, y = ry, fill = lcl)) +
geom_tile() +
scale_fill_gradient2() +
coord_fixed()
grid$ucl <- m3$summary.fitted.values[grid.index, "0.975quant"]
ggplot(grid, aes(x = rx, y = ry, fill = ucl)) +
geom_tile() +
scale_fill_gradient2() +
coord_fixed()
grid %>%
gather(key = "type", value = "estimate", mean:ucl) %>%
mutate(estimate = plogis(estimate)) %>%
ggplot(aes(x = rx, y = ry, fill = estimate)) +
geom_tile() +
scale_fill_gradient2(
"Probabily of hail\nat reference values",
midpoint = 0.5,
limits = 0:1,
label = percent
) +
coord_fixed() +
facet_wrap(~type, nrow = 1)