Update Mixture processes – using Barrier model as prototype

0 Introduction

This file demonstrate 2 update method for dealing with mixture processes. We use the prototype model and code from Bakka’s Barrier model in INLA. The update methods are

Model as a single stationary process and mask the land with desired values
Joint mixture model

1 Simulate toy data

First we simulate some toy data on a rectangular region with a rectangular hole insde the region as the “barrier”.

## load libraries and source codes
library(INLA); library(rgdal); library(maptools); library(GEOmap); library(rgl)
source("C:/ZSwork/glbm/Experiment1b/mixture_model/BarrierModel/functions-barriers-dt-models-march2017.R")
set.seed(18)

Create the study regions: a \([0,1] \times [0, 1]\) rectangle and a smaller one inside as the barrier.

## boundaries and interiors
loc.bnd = matrix(c(0,0, 1,0, 1,1, 0,1), 4, 2, byrow=TRUE)
loc.int = matrix(c(0.3,0.3, 0.8,0.4, 0.7,0.9, 0.2,0.8, 0.3,0.3), 5, 2, byrow=TRUE)
segm.bnd = inla.mesh.segment(loc.bnd)
segm.int = inla.mesh.segment(loc.int, is.bnd=FALSE)
int.Poly <- SpatialPolygons(list(Polygons(list(Polygon(loc.int)), ID = "in")))
out.Poly <- SpatialPolygons(list(Polygons(list(Polygon(loc.bnd)), ID = "out")))

NOTES: Why use interior to set up the boundary of the inner rectangle? Because in INLA mesh generator, the interior lines will be inluded as the edges of the triangles.

Now generate points inside these region for creating a desired mesh. We want dense mesh outside and sparse inside and a smooth transition between the two.

# first create uniformaly dense points
loc0 <- spsample(out.Poly, n = 1000, type = "hexagonal", offset = c(0, 0))
# remove points inside the polygon
loc0_id <- unlist(over(int.Poly, loc0, returnList=T))
loc0a <- loc0[-loc0_id,]

# sparse grid for the inside
loc1 <- spsample(int.Poly, n = 25, type = "hexagonal", offset = c(0,0))

## add more points along the interior lines
intLines <- Lines(list(Line(int.Poly@polygons[[1]]@Polygons[[1]]@coords)), ID = 1)
intLines <- SpatialLines(list(intLines))
loc2 <- spsample(intLines, n = 12, type = "regular", offset = c(0,0))
loc2@coords <- jitter(loc2@coords)
loc <- rbind(loc0a, loc1, loc2)
plot(loc)

mesh = inla.mesh.2d(loc = loc, boundary = segm.bnd, interior = segm.int, max.edge = 0.5)
# Note that when using interior here offset and max.edge can only be scalars.
# Bondary defines the boundary of the domain of interest
# interior defines a set of segments inside the domain of interest that are desired to the triagle edges.
plot(mesh, asp = 1)

## Do another iteration to make the boudary smooth
loc_new <- mesh$loc
mesh = inla.mesh.2d(loc = loc_new, boundary = segm.bnd, cutoff = 0.02, max.edge = 0.5)
plot(mesh, asp = 1)
plot(int.Poly, add = TRUE, border = "red", lwd = 2)

Plot the mesh and see check which triangles are inside the polygons

mesh = dt.mesh.addon.posTri(mesh)
# - Add on mesh$posTri
# - - contains the positions of the triangles

TinP = unlist(over(int.Poly, SpatialPoints(mesh$posTri), returnList=T))
# - checking which mesh triangles are inside the barrier area
Omega = dt.Omega(list(TinP, 1:mesh$t), mesh)
Omega.SP = dt.polygon.omega(mesh, Omega)

plot(mesh, main ="Mesh and Omega", asp = 1)
plot(Omega.SP[[1]], add=T, col='grey')
plot(Omega.SP[[2]], add=T, col='lightblue')
plot(mesh, add=T)

Now use this mesh to build the model and simulate data.

## Create the precsion matrix function
Q.function = dt.create.Q(mesh, Omega)
ranges = c(10, 0.17)
# - the first range is for the barrier area
# - - it is not sensitive to the exact value here, 
# - the second range is for the normal area
Q = Q.function(theta = c(log(1), log(ranges)))
# - the precision matrix for fixed ranges

## Simulate the field
u = inla.qsample(n=1, Q=Q, seed = 3)
u = u[ ,1]

## A wrapper for plotting the result
local.plot.field = function(field, ...){
  xlim = c(0, 1); ylim = xlim;
  proj = inla.mesh.projector(mesh, xlim = xlim, 
                             ylim = ylim, dims=c(300, 300))
  field.proj = inla.mesh.project(proj, field)
  image.plot(list(x = proj$x, y=proj$y, z = field.proj), 
             xlim = xlim, ylim = ylim, ...)  
}

## Plot the simulated field
local.plot.field(u, main="The true (simulated) spatial field", asp = 1, zlim = c(-6, 2.5))

Now sample observations from this fied. The real observations are usually all over the study region but for our “pseudo-polygons”, we normally do not have real observations but we know what values they should be.

We try two different schemes here for the polygon area:

Observatiosn along the boundary only;
Use one single Polygon observations.

The following code sample the observations uisng shceme 1.

## Scheme 1: boundary points
obs0 <- spsample(out.Poly, n = 200, type = "random")
ids <- unlist(over(int.Poly, obs0, returnList=T))
obs0 <- obs0[-ids,]

## add more points along the interior lines
obs1 <- spsample(intLines, n = 40, type = "random")
obs1@coords <- jitter(obs1@coords)
obs1 <- rbind(obs0, obs1)
plot(obs1)

A1.data = inla.spde.make.A(mesh, obs1)
# - the projector matrix required for any spatial model
# - this matrix can transform the field-defined-on-the-mesh 
#   to the field-defined-on-the-data-locations

u.data1 = A1.data %*% u
# - project the field from the finite element  
#   representation to the data locations
df1 = data.frame(obs1) # - df is the dataframe used for modeling
names(df1) = c('locx', 'locy')
sigma.epsilon = 0.2 # - size of the iid noise in the Gaussian likelihood
df1$y = drop(u.data1 + sigma.epsilon*rnorm(nrow(df1)))
# - sample observations with gaussian noise

summary(df1)

##       locx               locy                y          
##  Min.   :0.001906   Min.   :0.002095   Min.   :-5.1199  
##  1st Qu.:0.216716   1st Qu.:0.236435   1st Qu.:-4.2959  
##  Median :0.450305   Median :0.478055   Median :-1.3445  
##  Mean   :0.473828   Mean   :0.496441   Mean   :-1.7372  
##  3rd Qu.:0.747539   3rd Qu.:0.784840   3rd Qu.: 0.1356  
##  Max.   :0.999100   Max.   :0.999572   Max.   : 2.2089

The following code sample the observations uisng shceme 2.

## Scheme 2: polygon observation
obs2 <- obs0

## build the polygon block
Vt_in <- unlist(over(int.Poly, SpatialPoints(coords = mesh$loc[,1:2]), returnList=T))

## use a polygon observations -- average of triangle vertices inside the polygon
obs2b <- SpatialPoints(coords = matrix(rep(c(0.5, 0.6), length(Vt_in)), ncol =2, byrow=TRUE)) # use a single location to represent this polygon at this stage
obs2 <- rbind(obs2, obs2b)

block_ind <- c(1:length(obs0), rep(length(obs0)+1, length(Vt_in)))
A2.data <- inla.spde.make.A(mesh = mesh, loc = obs2, block = block_ind, block.rescale = "count") #polygon -- average





u.data2 = A2.data %*% u
# - project the field from the finite element  
#   representation to the data locations
df2 = data.frame(obs2[1:(length(obs0)+1)]) # - df is the dataframe used for modeling
names(df2) = c('locx', 'locy')
sigma.epsilon = 0.2 # - size of the iid noise in the Gaussian likelihood for the point data
sigma.epsilon2 = 0.05 # - size of the iid noise in the Gaussian likelihood for the polygon data
df2$y = drop(u.data2 + sigma.epsilon*rnorm(nrow(df2)))
df2$y[nrow(df2)] = drop(u.data2[nrow(df2)] + sigma.epsilon2*rnorm(1))
# - sample observations with gaussian noise

summary(df2)

##       locx               locy                y          
##  Min.   :0.001906   Min.   :0.002095   Min.   :-4.8002  
##  1st Qu.:0.188813   1st Qu.:0.175777   1st Qu.:-2.0528  
##  Median :0.465152   Median :0.399768   Median :-0.6674  
##  Mean   :0.481471   Mean   :0.465724   Mean   :-0.9834  
##  3rd Qu.:0.767413   3rd Qu.:0.771381   3rd Qu.: 0.3515  
##  Max.   :0.999100   Max.   :0.999572   Max.   : 1.9281

2 Inference – stationary

stk1 <- inla.stack(data=list(y=df1$y), A=list(A1.data),
                  effects=list(s=1:mesh$n), 
                  remove.unused = FALSE, tag='est')

stk2 <- inla.stack(data=list(y=df2$y), A=list(A2.data),
                  effects=list(s=1:mesh$n), 
                  remove.unused = FALSE, tag='est')

model.stat = inla.spde2.matern(mesh)
# - Set up the model component for the spatial SPDE model: 
#   Stationary Matern model
# - I assume you are somewhat familiar with this model

formula <- y ~ -1 + f(s, model=model.stat)
# - Remove the default intercept
# - - Having it in the stack instead improves the numerical 
#     accuracy of the INLA algorithm
# - Fixed effects + random effects

hyper <- list(prec = list(fixed = TRUE, initial = log(1)))
  
  
res.stationary1 <- inla(formula, data=inla.stack.data(stk1),
            control.predictor=list(A = inla.stack.A(stk1)),
            family = 'gaussian', scale = rep(1/0.04, length(obs1)),
            control.family = list(hyper = hyper))

summary(res.stationary1)

## 
## Call:
## c("inla(formula = formula, family = \"gaussian\", data = inla.stack.data(stk1), ",  "    scale = rep(1/0.04, length(obs1)), control.predictor = list(A = inla.stack.A(stk1)), ",  "    control.family = list(hyper = hyper))")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##           4.508           3.663           0.266           8.437 
## 
## The model has no fixed effects
## 
## Random effects:
## Name   Model
##  s   SPDE2 model 
## 
## Model hyperparameters:
##                mean     sd 0.025quant 0.5quant 0.975quant   mode
## Theta1 for s -3.904 0.0713     -4.041   -3.905     -3.760 -3.910
## Theta2 for s  1.576 0.2287      1.089    1.593      1.981  1.655
## 
## Expected number of effective parameters(std dev): 142.05(2.676)
## Number of equivalent replicates : 1.338 
## 
## Marginal log-Likelihood:  -211.74

res_pars1 <- inla.spde2.result(res.stationary1, "s", model.stat, do.transf=TRUE)
plot(res_pars1$marginals.range.nominal[[1]], type = "l", main = "range")

plot(res_pars1$marginals.variance.nominal[[1]], type = "l", main = "variance")

prec_scale <- 1/c(rep(0.2^2, length(obs0)), 0.05^2)
res.stationary2 <- inla(formula, data=inla.stack.data(stk2),
            control.predictor=list(A = inla.stack.A(stk2)),
            family = 'gaussian',
            control.family = list(hyper = hyper),
            scale = prec_scale)

summary(res.stationary2)

## 
## Call:
## c("inla(formula = formula, family = \"gaussian\", data = inla.stack.data(stk2), ",  "    scale = prec_scale, control.predictor = list(A = inla.stack.A(stk2)), ",  "    control.family = list(hyper = hyper))")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##           4.256           3.568           0.173           7.997 
## 
## The model has no fixed effects
## 
## Random effects:
## Name   Model
##  s   SPDE2 model 
## 
## Model hyperparameters:
##                mean     sd 0.025quant 0.5quant 0.975quant   mode
## Theta1 for s -3.922 0.0803     -4.077   -3.923     -3.761 -3.928
## Theta2 for s  1.826 0.1904      1.428    1.837      2.174  1.875
## 
## Expected number of effective parameters(std dev): 121.77(2.206)
## Number of equivalent replicates : 1.24 
## 
## Marginal log-Likelihood:  -187.32

res_pars2 <- inla.spde2.result(res.stationary2, "s", model.stat, do.transf=TRUE)
plot(res_pars2$marginals.range.nominal[[1]], type = "l", main = "range")

plot(res_pars2$marginals.variance.nominal[[1]], type = "l", main = "variance")

local.plot.field(res.stationary1$summary.random$s$mean, asp =1,  zlim = c(-6, 2.5),
          main="Spatial estimate with the stationary model -- mean")

local.plot.field(res.stationary2$summary.random$s$mean, asp =1,  zlim = c(-6, 2.5),
          main="Spatial estimate with the stationary model -- mean")

local.plot.field(res.stationary1$summary.random$s$sd, asp =1, zlim = c(0, 3),
          main="Spatial estimate with the stationary model -- uncertainty")

local.plot.field(res.stationary2$summary.random$s$sd, asp =1,zlim = c(0, 3),
          main="Spatial estimate with the stationary model -- uncertainty")

2 Inference – barrier model

Q.barrier = dt.create.Q(mesh, Omega, fixed.ranges = c(5, NA))
# - We fix the barrier range to a different value than we 
#   used for simulations
# - - Why? It does not matter, as long as it is 'small' 
#     the models are very
#     similar
# - - This shows that you do not need to know the 
#     true 'barrier range'!

log.prior = dt.create.prior.log.exp(prior.param = c(1,1))
# - The prior parameters are the lambdas in the exponential 
#   priors for standard deviation and inverse-range

barrier.model = dt.inla.model(Q = Q.barrier, log.prior=log.prior)

formula2 <- y ~ -1 + f(s, model=barrier.model)
# - The spatial model component is different from before
# - The rest of the model setup is the same! 
#   (as in the stationary case)
# - - e.g. the inla(...) call below is the same, 
#     only this formula is different

res.barrier1 = inla(formula2, data=inla.stack.data(stk1),
       control.predictor=list(A = inla.stack.A(stk1)),
       family = 'gaussian',
       control.family = list(hyper = hyper),
            scale = 1/0.04)

## Warning in inla.model.properties.generic(inla.trim.family(model), (mm[names(mm) == : Model 'rgeneric' in section 'latent' is marked as 'experimental'; changes may appear at any time.
##   Use this model with extra care!!! Further warnings are disabled.

summary(res.barrier1)

## 
## Call:
## c("inla(formula = formula2, family = \"gaussian\", data = inla.stack.data(stk1), ",  "    scale = 1/0.04, control.predictor = list(A = inla.stack.A(stk1)), ",  "    control.family = list(hyper = hyper))")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##           0.929           8.409           0.244           9.582 
## 
## The model has no fixed effects
## 
## Random effects:
## Name   Model
##  s   RGeneric2 
## 
## Model hyperparameters:
##                 mean     sd 0.025quant 0.5quant 0.975quant    mode
## Theta1 for s -0.0261 0.0856    -0.1873  -0.0289     0.1483 -0.0388
## Theta2 for s -1.7775 0.0969    -1.9596  -1.7807    -1.5799 -1.7923
## 
## Expected number of effective parameters(std dev): 114.65(2.668)
## Number of equivalent replicates : 1.657 
## 
## Marginal log-Likelihood:  -129.87

theta1 <- res.barrier1$marginals.hyperpar$`Theta1 for s`
theta2 <- res.barrier1$marginals.hyperpar$`Theta2 for s`

Vmar1 <- inla.tmarginal(exp, theta1)
Rmar1 <- inla.tmarginal(exp, theta2)
plot(Vmar1, type = "l", main = "posterior varianace")

plot(Rmar1, type = "l", main = "posterior range")

res.barrier2 = inla(formula2, data=inla.stack.data(stk2),
       control.predictor=list(A = inla.stack.A(stk2)),
       family = 'gaussian',
       control.family = list(hyper = hyper),
            scale = prec_scale)

summary(res.barrier2)

## 
## Call:
## c("inla(formula = formula2, family = \"gaussian\", data = inla.stack.data(stk2), ",  "    scale = prec_scale, control.predictor = list(A = inla.stack.A(stk2)), ",  "    control.family = list(hyper = hyper))")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##           0.608           8.347           0.136           9.091 
## 
## The model has no fixed effects
## 
## Random effects:
## Name   Model
##  s   RGeneric2 
## 
## Model hyperparameters:
##                 mean     sd 0.025quant 0.5quant 0.975quant    mode
## Theta1 for s  0.0239 0.0949    -0.1535   0.0202     0.2188  0.0067
## Theta2 for s -1.7273 0.1097    -1.9321  -1.7318    -1.5012 -1.7481
## 
## Expected number of effective parameters(std dev): 111.98(2.548)
## Number of equivalent replicates : 1.349 
## 
## Marginal log-Likelihood:  -138.95

theta1 <- res.barrier2$marginals.hyperpar$`Theta1 for s`
theta2 <- res.barrier2$marginals.hyperpar$`Theta2 for s`

Vmar2 <- inla.tmarginal(exp, theta1)
Rmar2 <- inla.tmarginal(exp, theta2)
plot(Vmar2, type = "l", main = "posterior varianace")

plot(Rmar2, type = "l", main = "posterior range")

local.plot.field(res.barrier1$summary.random$s$mean, asp = 1,  zlim = c(-6, 2.5),
                 main="Spatial posterior for Barrier model -- mean")

local.plot.field(res.barrier1$summary.random$s$sd, asp = 1, zlim = c(0, 3),
                 main="Spatial posterior for Barrier model -- uncertainty")

local.plot.field(res.barrier2$summary.random$s$mean, asp = 1,  zlim = c(-6, 2.5),
                 main="Spatial posterior for Barrier model -- mean")

local.plot.field(res.barrier2$summary.random$s$sd, asp = 1,zlim = c(0, 3),
                 main="Spatial posterior for Barrier model -- uncertainty")

Update Mixture processes – using Barrier model as prototype

Zhe Sha (zhe.sha@bristol.ac.uk)

23 Oct 2017

0 Introduction

1 Simulate toy data

2 Inference – stationary

2 Inference – barrier model