We consider the hidden markov model given by the update formulas:
for parameters \( \alpha, \beta \in \mathbb{R} \) and \( \sigma>0 \) and with \( \gamma = \frac{\alpha + \beta}{2} \).
This is a Hidden Markov Model where \( Y_t \) is observed, but \( X_t \) is not.
We want to implement a filter model using sequential Monte Carlo methods.
\[ X_{1:t} = (X_0, X_1, ..., X_t) \]
SimXY <- function(x0, t, alpha, beta, sigma) {
gamma <- (alpha+beta)/2
x <- rep(0, t)
y <- rep(0, t)
x[1] <- x0
y[1] <- rpois(1, x0)
for(s in 2:t) {
x[s] <- max(rnorm(1, alpha*(x[s - 1]<=gamma) + beta*(x[s - 1]>gamma), sigma),1)
y[s] <- rpois(1, x[s])
}
list(x = x, y = y)
}
sim<-SimXY(1,1000,5,20,3)
impfilt<- function(y, x0, n, alpha, beta, sigma) {
t <- length(y)
gamma <- (alpha+beta)/2
x <- matrix(nrow=n, ncol=t)
u <- w <- matrix(nrow=n, ncol=t)
x[, 1] <- rep(x0,n)
w[, 1] <- u[, 1] <- 1/n
for(s in 2:t) {
x[, s] <- pmax(rnorm(n, alpha*(x[, s - 1]<=gamma) + beta*(x[, s - 1]>gamma), sigma),rep(1,n))
u[, s] <- dpois(y[s], x[, s])
w[, s] <- u[, s] * w[, s - 1]
w[, s] <- w[, s] / sum(w[, s])
}
pHat <- colSums(x * w)
list(x = x, u = u, w = w, pHat = pHat)
}
bootfilt<- function(y, x0, n, alpha, beta, sigma) {
t <- length(y)
gamma <- (alpha+beta)/2
x <-xre<- matrix(0, n, t)
u <- matrix(1 / n, n, t)
x[, 1] <- rep(x0,n)
xre[, 1] <- x[, 1]
for(s in 2:t) {
x[, s] <- pmax(rnorm(n, alpha*(xre[, s - 1]<=gamma) + beta*(xre[, s - 1]>gamma), sigma),rep(1,n))
u[, s] <- dpois(y[s], x[, s])
resamp <- sample(n, n, replace = TRUE, prob = u[, s])
xre[ , s] <- x[resamp, s]
}
pHat <- colSums(x * u)/colSums(u)
list(x = x, u = u, pHat = pHat)
}
SISfilt<- function(y, x0, n, alpha, beta, sigma, a) {
t <- length(y)
gamma <- (alpha+beta)/2
x <- matrix(0, n, t)
u <- w<- matrix(1 / n, n, t)
x[, 1] <- rep(x0,n)
re <- 0
for(s in 2:t) {
x[, s] <- pmax(rnorm(n, alpha*(x[, s - 1]<=gamma) + beta*(x[, s - 1]>gamma), sigma),rep(1,n))
u[, s] <- dpois(y[s], x[, s])
w[, s] <- u[, s] * w[, s - 1]
w[, s] <- w[, s] / sum(w[, s])
N <- 1/sum(w[ ,s]^2)
if(N< a*n){
resamp <- sample(n, n, replace = TRUE, prob = u[, s])
x[ , s] <- x[resamp, s]
w[, s] <- rep(1/n, n)
re <- re+1
}
}
pHat <- colSums(x * u)/colSums(u)
list(x = x, u = u, pHat = pHat, re= re)
}
Nofilt Boot SIS
ssd 1070.416 623.339 693.0226
Weights degenerate as t increases:
Most of the weigths are effectively zero. Resampling remove particles with low weights.
library(microbenchmark)
sim <- SimXY(1,1000,alpha,beta, sigma)
microbenchmark(impfilt(sim$y, 1, 1000, alpha, beta, sigma), bootfilt(sim$y, 1, 1000, alpha, beta, sigma), SISfilt(sim$y, 1, 1000, alpha, beta, sigma, a=0.3))
LINK
Bottleneck:
Optimization:
bootfiltFast<- function(y, x0, n, alpha, beta, sigma) {
t <- length(y)
gamma <- (alpha+beta)/2
x <-xre<- matrix(0, n, t)
u <- matrix(1 / n, n, t)
x[, 1] <- rep(x0,n)
xre[, 1] <- x[, 1]
e <- matrix(rnorm(n*t,0,sigma), n, t)
for(s in 2:t) {
x[, s] <- pmax(alpha*(xre[, s - 1]<=gamma) + beta*(xre[, s - 1]>gamma)+e[, s],rep(1,n))
u[, s] <- dpois(y[s], x[, s])
resamp <- sample(n, n, replace = TRUE, prob = u[, s])
xre[ , s] <- x[resamp, s]
}
pHat <- colSums(x * u)/colSums(u)
list(x = x, u = u, pHat = pHat)
}
library(microbenchmark)
sim <- SimXY(1,1000,alpha,beta, sigma)
microbenchmark(bootfilt(sim$y, 1, 1000, alpha, beta, sigma), bootfiltFast(sim$y, 1, 1000, alpha, beta, sigma))
Unit: milliseconds
expr min lq
bootfilt(sim$y, 1, 1000, alpha, beta, sigma) 414.6727 443.6720
bootfiltFast(sim$y, 1, 1000, alpha, beta, sigma) 427.5535 470.5089
mean median uq max neval
479.6061 476.0410 505.1454 630.0359 100
497.0876 493.2846 513.7842 699.8366 100
// [[Rcpp::depends(RcppArmadillo)]]
#include <RcppArmadilloExtensions/sample.h>
#include <Rcpp.h>
#include <Rmath.h>
#include <R.h>
using namespace Rcpp;
// [[Rcpp::export]]
List bootFast(NumericVector y, double x0, int n,double alpha, double beta, double sigma) {
int t = y.size();
double gamma = (alpha+beta)/2;
double c;
NumericMatrix x(n, t), xre(n,t), u(n, t);
IntegerVector resamp(n), indices = seq_len(n) - 1;
NumericVector e1 = rnorm(n*t , 0, sigma);
NumericMatrix e(n, t, e1.begin());
NumericVector pHat(t);
for(int i = 0; i < n; i++){
x(i,0) = x0;
xre(i,0) = x0;
u(i,0) = 1/(double) n;
}
for(int s = 1; s < t; s++) {
for(int k = 0; k<n; k++){
if(xre(k,s - 1)<=gamma){
c = alpha;
}else{
c = beta;
}
x(k, s) = std::max(c + e(k,s),1.0);
u(k, s) = R::dpois(y[s], x(k,s), false);
}
resamp = RcppArmadillo::sample(indices, n, TRUE, u(_,s));
for(int k = 0; k < n; k++) {
xre(k, s) = x(resamp(k), s);
}
}
List ret; ret["x"] = x; ret["u"] = u;
return ret;
}
Benchmarking
sim <- SimXY(1,1000,alpha,beta, sigma)
microbenchmark(bootfilt(sim$y, 1, 1000, alpha, beta, sigma), bootFast(sim$y, 1, 1000, alpha, beta, sigma))
Unit: milliseconds
expr min lq mean
bootfilt(sim$y, 1, 1000, alpha, beta, sigma) 410.1434 446.3238 467.7234
bootFast(sim$y, 1, 1000, alpha, beta, sigma) 257.2370 275.9854 294.0761
median uq max neval
462.7166 485.5113 546.2902 100
284.5205 314.1366 360.2201 100
Test that we get the same result with the new implementation:
set.seed(12345)
b1<-bootFast(sim$y, 1, 1000, alpha, beta, sigma)
set.seed(12345)
b2<-bootfiltFast(sim$y,1,1000,alpha,beta,sigma)
all.equal(b1$x, b2$x)
[1] TRUE
all.equal(b1$u, b2$u)
[1] TRUE
hmm <- structure(list(y = sim$y,
x0=1,
xproc=function(x, alpha= 5,beta= 20,sigma =3){
gamma=(alpha+beta)/2
n <- length(x)
pmax(rnorm(n, alpha*(x<=gamma) + beta*(x>gamma),
sigma),rep(1,n))},
yproc = function(x, y){
dpois(x,y)
},
pHat=NULL,
u= NULL,
x = NULL),
class = "HMM")
bootFilt <- function(hm,n) UseMethod("bootFilt")
bootFilt.HMM <- function(hm,n){
t <- length(hm$y)
x <-xre<- matrix(0, n, t)
u <- matrix(1 / n, n, t)
x[, 1] <- rep(hm$x0,n)
xre[, 1] <- x[, 1]
for(s in 2:t) {
x[, s] <- hm$xproc(xre[, s-1])
u[, s] <- hm$yproc(hm$y[s], x[, s])
resamp <- sample(n, n, replace = TRUE, prob = u[, s])
xre[ , s] <- x[resamp, s]
}
hm$pHat <- colSums(x * u)/colSums(u)
hm$x <- x
hm$u <- u
hm
}
set.seed(12345)
hmm<-bootFilt(hmm,1000)
set.seed(12345)
b<-bootfilt(sim$y,1,1000, alpha=5, beta=20, sigma=3)
#Test that we get the same result
all.equal(hmm$pHat, b$pHat)
[1] TRUE
microbenchmark(bootFilt(hmm,1000),bootfilt(sim$y,1,1000, alpha=5, beta=20, sigma=3))
Unit: milliseconds
expr min
bootFilt(hmm, 1000) 417.8634
bootfilt(sim$y, 1, 1000, alpha = 5, beta = 20, sigma = 3) 419.7231
lq mean median uq max neval
440.2433 460.5373 451.8778 482.9670 559.712 100
439.3482 463.0547 452.5277 490.7033 530.687 100
print.HMM <- function(x) {
cat("y process: \n")
print(hmm$y)
cat("\n Estimated hidden x process: \n")
print(hmm$pHat)
}
plot.HMM <- function(hmm) {
par(mfrow=c(1,2))
plot(hmm$pHat, type = "l", col="red", main="X process", xlab="t", ylab="Conditional mean")
plot(hmm$y,type="l", col="blue", main = "Y process",xlab="t", ylab = "y")
}