Sequential Monte Carlo

Marie Mørch

Theory:

We consider the hidden markov model given by the update formulas:

\[\epsilon_t \sim \mathcal{N}(0, \sigma^2)\]

\[X_t= \max(\alpha 1(X_{t−1} ≤\gamma)+\beta 1(X_{t−1} >\gamma )+\epsilon_t,1)\] \[Y_t \sim Pois(X_t)\]

for parameters \(\alpha, \beta \in \mathbb{R}\) and \(\sigma>0\) and with \(\gamma = \frac{\alpha + \beta}{2}\).

This is a Hidden Markov Model where \(Y_t\) is observed, but \(X_t\) is not.

We will focus on computing \[E(X_t \vert Y_0, . . . , Y_t)\] as a prediction of the \(X\)-process.

We want to implement a filter model using sequential Monte Carlo methods.

Simulation

SimXY <- function(x0, t, alpha, beta, sigma) {
  gamma <- (alpha+beta)/2
  x <- numeric(t)
  y <- numeric(t)
  x[1] <- x0
  y[1] <- rpois(1, x0)
  for(s in 2:t) {
    x[s] <- max(rnorm(1, alpha*(x[s - 1]<=gamma) + beta*(x[s - 1]>gamma), sigma),1)
    y[s] <- rpois(1, x[s])
  }
  list(x = x, y = y)
}

Simulation

sim<-SimXY(1,1000,5,20,3)

No resampling

impfilt<- function(y, x0, n, alpha, beta, sigma) {
  t <- length(y)
  gamma <- (alpha+beta)/2
  x <- matrix(nrow=n, ncol=t)
  u <- w <- matrix(nrow=n, ncol=t)
  x[, 1] <- rep(x0,n)
  w[, 1] <- u[, 1] <- 1/n
  for(s in 2:t) {
    x[, s] <- pmax(rnorm(n, alpha*(x[, s - 1]<=gamma) + beta*(x[, s - 1]>gamma), sigma),rep(1,n))
    #Calculate weigth update factor
    u[, s] <- dpois(y[s], x[, s])
    #Update the weigths
    w[, s] <- u[, s] * w[, s - 1]
    #Normalize
    w[, s] <- w[, s] / sum(w[, s])
  }
  pHat <- colSums(x * w)
  list(x = x, u = u, w = w, pHat = pHat)
}

Bootstrapfilter

bootfilt<- function(y, x0, n, alpha, beta, sigma) {
  t <- length(y)
  gamma <- (alpha+beta)/2
  x <-xre<- matrix(0, n, t)
  u <- matrix(1 / n, n, t)
  x[, 1] <- rep(x0,n)
  xre[, 1] <- x[, 1]
  for(s in 2:t) {
    x[, s] <- pmax(rnorm(n, alpha*(xre[, s - 1]<=gamma) + beta*(xre[, s - 1]>gamma), sigma),rep(1,n))
    u[, s] <- dpois(y[s], x[, s])
    resamp <- sample(n, n, replace = TRUE, prob = u[, s])
    xre[ , s] <- x[resamp, s]
  }
  pHat <- colSums(x * u)/colSums(u)
  list(x = x, u = u, pHat = pHat)
}

Effective sample size filter

Here we introduce a threshold that governs the lowest tolerable ratio of effective sample size to actual sample size.

ESSfilt<- function(y, x0, n, alpha, beta, sigma, a) {
  t <- length(y)
  gamma <- (alpha+beta)/2
  x <- matrix(0, n, t)
  u <- w<- matrix(1 / n, n, t)
  x[, 1] <- rep(x0,n)
  re <- 0
  for(s in 2:t) {
    x[, s] <- pmax(rnorm(n, alpha*(x[, s - 1]<=gamma) + beta*(x[, s - 1]>gamma), sigma),rep(1,n))
    u[, s] <- dpois(y[s], x[, s])
    w[, s] <- u[, s] * w[, s - 1]
    w[, s] <- w[, s] / sum(w[, s])
    N <- 1/sum(w[ ,s]^2)
    if(N< a*n){
          resamp <- sample(n, n, replace = TRUE, prob = u[, s])
          x[ , s] <- x[resamp, s]
          w[, s] <- rep(1/n, n)
          re <- re+1
    }
  }
  pHat <- colSums(x * u)/colSums(u)
  list(x = x, u = u, pHat = pHat, re= re)
}

Test

Red is ESS, blue is bootstrap and green is no filter.

##       Nofilt     Boot      ESS
## ssd 1193.832 393.4132 451.6339
##       Nofilt    Boot      ESS
## ssd 10276.93 1409.46 1426.568
##       Nofilt     Boot      ESS
## ssd 33318.37 5419.696 5680.498

Weight degeneracy

Weights degenerate as t increases:

Most of the weigths are effectively zero. Resampling remove particles with low weights.

Effect of resampling

intervalfilt<- function(y, x0, n, alpha, beta, sigma,k) {
  t <- length(y)
  gamma <- (alpha+beta)/2
  x <- matrix(0, n, t)
  u <- matrix(1 / n, n, t)
  x[, 1] <- rep(x0,n)
  re <- 0
  for(s in 2:t) {
    x[, s] <- pmax(rnorm(n, alpha*(x[, s - 1]<=gamma) + beta*(x[, s - 1]>gamma), sigma),rep(1,n))
    u[, s] <- dpois(y[s], x[, s])
     if(s %% k==0) {
      resamp <- sample(n, n, replace = TRUE, prob = u[, s])
      x[ , s] <- x[resamp, s]
   }
  }
  pHat <- colSums(x * u)/colSums(u)
  list(x = x, u = u, pHat = pHat)
}

Benchmarking

library(microbenchmark)
sim <- SimXY(1,1000,alpha,beta, sigma)
microbenchmark(impfilt(sim$y, 1, 1000, alpha, beta, sigma), bootfilt(sim$y, 1, 1000, alpha, beta, sigma), ESSfilt(sim$y, 1, 1000, alpha, beta, sigma, a=0.3)) 
## Unit: milliseconds
##                                                  expr     min       lq
##           impfilt(sim$y, 1, 1000, alpha, beta, sigma) 383.589 422.7641
##          bootfilt(sim$y, 1, 1000, alpha, beta, sigma) 385.657 444.7529
##  ESSfilt(sim$y, 1, 1000, alpha, beta, sigma, a = 0.3) 440.411 482.4630
##      mean   median       uq      max neval
##  445.5887 444.5556 465.6983 523.7103   100
##  469.4601 470.6066 488.5625 556.3263   100
##  514.5947 504.9835 537.1274 676.4153   100

Number of particles

Running time

Profiling

http://rpubs.com/mamo3007/326269

Bottleneck:

Optimization:

Avoid resampling:

bootfiltFast<- function(y, x0, n, alpha, beta, sigma) {
  t <- length(y)
  gamma <- (alpha+beta)/2
  x <-xre<- matrix(0, n, t)
  u <- matrix(1 / n, n, t)
  x[, 1] <- rep(x0,n)
  xre[, 1] <- x[, 1]
  e <- matrix(rnorm(n*t,0,sigma), n, t)
  for(s in 2:t) {
    x[, s] <- pmax(alpha*(xre[, s - 1]<=gamma) + beta*(xre[, s - 1]>gamma)+e[, s],rep(1,n))
    u[, s] <- dpois(y[s], x[, s])
    resamp <- sample(n, n, replace = TRUE, prob = u[, s])
    xre[ , s] <- x[resamp, s]
  }
  pHat <- colSums(x * u)/colSums(u)
  list(x = x, u = u, pHat = pHat)
}
sim <- SimXY(1,1000,alpha,beta, sigma)
microbenchmark(bootfilt(sim$y, 1, 1000, alpha, beta, sigma), bootfiltFast(sim$y, 1, 1000, alpha, beta, sigma))
## Unit: milliseconds
##                                              expr      min       lq
##      bootfilt(sim$y, 1, 1000, alpha, beta, sigma) 412.1979 430.7838
##  bootfiltFast(sim$y, 1, 1000, alpha, beta, sigma) 422.2585 448.9855
##      mean   median       uq      max neval
##  466.1453 453.4577 493.3871 747.5039   100
##  480.0587 483.9262 507.2143 559.7064   100

Implementation in RCPP:

// [[Rcpp::depends(RcppArmadillo)]]
#include <RcppArmadilloExtensions/sample.h>
#include <Rcpp.h>
#include <Rmath.h>
#include <R.h>
using namespace Rcpp;
// [[Rcpp::export]]
List bootFast(NumericVector y, double x0, int n,double alpha, double beta, double sigma) {
  int t = y.size();
  double gamma = (alpha+beta)/2;
  double c;
  NumericMatrix x(n, t), xre(n,t), u(n, t);
  IntegerVector resamp(n), indices = seq_len(n) - 1;
  NumericVector e1 = rnorm(n*t , 0, sigma);
  NumericMatrix e(n, t, e1.begin()); 
  NumericVector pHat(t);
  for(int i = 0; i < n; i++){
    x(i,0) = x0; 
    xre(i,0) = x0;
    u(i,0) = 1/(double) n;
  }
  for(int s = 1; s < t; s++) {
    for(int k = 0; k<n; k++){
      if(xre(k,s - 1)<=gamma){
        c = alpha;
      }else{
        c = beta;
      }
      x(k, s) = std::max(c + e(k,s),1.0);
      u(k, s) = R::dpois(y[s], x(k,s), false);
    }
    resamp = RcppArmadillo::sample(indices, n, TRUE, u(_,s));
    for(int k = 0; k < n; k++) {
    xre(k, s) = x(resamp(k), s);
    }
  }
  List ret; ret["x"] = x; ret["u"] = u;
  return ret;
}

Benchmarking

Benchmarking

sim <- SimXY(1,1000,alpha,beta, sigma)
microbenchmark(bootfilt(sim$y, 1, 1000, alpha, beta, sigma), bootFast(sim$y, 1, 1000, alpha, beta, sigma)) 
## Unit: milliseconds
##                                          expr      min       lq     mean
##  bootfilt(sim$y, 1, 1000, alpha, beta, sigma) 406.1229 428.5150 506.4502
##  bootFast(sim$y, 1, 1000, alpha, beta, sigma) 263.0286 326.5685 334.9695
##    median       uq       max neval
##  482.7573 500.3594 2058.3986   100
##  334.2294 348.0820  560.9867   100

Test

Test that we get the same result with the new implementation:

set.seed(12345)
b1<-bootFast(sim$y, 1, 1000, alpha, beta, sigma)
set.seed(12345)
b2<-bootfiltFast(sim$y,1,1000,alpha,beta,sigma)

all.equal(b1$x, b2$x)
## [1] TRUE
all.equal(b1$u, b2$u)
## [1] TRUE

Object Oriented implementation

hmm <- structure(list(y = sim$y,
                  x0=1, 
                  xproc=function(x, alpha= 5,beta= 20,sigma =3){
                    gamma=(alpha+beta)/2
                    n <- length(x)
                    pmax(rnorm(n, alpha*(x<=gamma) + beta*(x>gamma),
                               sigma),rep(1,n))}, 
                  yproc = function(x, y){
                    dpois(x,y)
                  }, 
                  pHat=NULL, 
                  u= NULL, 
                  x = NULL),
  class = "HMM")

bootFilt <- function(hm,n) UseMethod("bootFilt")

bootFilt.HMM <- function(hm,n){
  t <- length(hm$y)
  x <-xre<- matrix(0, n, t)
  u <- matrix(1 / n, n, t)
  x[, 1] <- rep(hm$x0,n)
  xre[, 1] <- x[, 1]
  for(s in 2:t) {
    x[, s] <- hm$xproc(xre[, s-1])
    u[, s] <- hm$yproc(hm$y[s], x[, s])
    resamp <- sample(n, n, replace = TRUE, prob = u[, s])
    xre[ , s] <- x[resamp, s]
  }
  hm$pHat <- colSums(x * u)/colSums(u)
  hm$x <- x
  hm$u <- u
  hm
}

Test:

set.seed(12345)
hmm<-bootFilt(hmm,1000)
set.seed(12345)
b<-bootfilt(sim$y,1,1000, alpha=5, beta=20, sigma=3)

#Test that we get the same result
all.equal(hmm$pHat, b$pHat)
## [1] TRUE
microbenchmark(bootFilt(hmm,1000),bootfilt(sim$y,1,1000, alpha=5, beta=20, sigma=3))
## Unit: milliseconds
##                                                       expr      min
##                                        bootFilt(hmm, 1000) 521.6083
##  bootfilt(sim$y, 1, 1000, alpha = 5, beta = 20, sigma = 3) 479.1359
##        lq     mean   median       uq      max neval
##  564.9764 666.4483 598.9520 658.7425 1857.248   100
##  568.3594 676.3574 620.2402 686.2773 2261.884   100

Additional methods:

print.HMM <- function(x) {
  cat("y process: \n")
  print(hmm$y)
  cat("\n Estimated  hidden x process: \n")
  print(hmm$pHat)
}

plot.HMM <- function(hmm) {
  par(mfrow=c(1,2))
  plot(hmm$pHat, type = "l", col="red", main="X process", xlab="t", ylab="Conditional mean")
  plot(hmm$y,type="l", col="blue", main = "Y process",xlab="t", ylab = "y")
}

Use on the SMC data

The SMC data set contains 500 observations of the \(Y_t\) for the model with \(\alpha = 5\), \(\beta = 20\) and \(\sigma = 3\). We can assume that \(X_0 = 1\).

hmmObj <- structure(list(y = SMC$y,
                  x0=1, 
                  xproc=function(x, alpha= 5,beta= 20,sigma =3){
                    gamma=(alpha+beta)/2
                    n <- length(x)
                    pmax(rnorm(n, alpha*(x<=gamma) + beta*(x>gamma),
                               sigma),rep(1,n))}, 
                  yproc = function(x, y){
                    dpois(x,y)
                  }, 
                  pHat=NULL, 
                  u= NULL, 
                  x = NULL),
  class = "HMM")

hmmObj <-bootFilt(hmmObj,1000) 
plot(hmmObj)

plot(rpois(500,hmmObj$pHat), type="l", xlab="t", ylab="Y")
lines(SMC$y, col="red")