Before we can simulate the die being rolled, we need to intialize the Hidden Markov model. For the decoding and evaluation problem, this means we need to input the emission and transmission matrices. The function initHMM() intializes the model, giving us the intial state, or \(\pi\).

#install.packages("HMM")
library(HMM)
nSim = 20
States = c("Fair", "Unfair")
Symbols = 1:6
transProbs = matrix(c(0.99, 0.01, 0.02, 0.98), c(length(States),
     length(States)), byrow = TRUE)
emissionProbs = matrix(c(rep(1/6, 6), c(rep(0.1, 5), 0.5)),
     c(length(States), length(Symbols)), byrow = TRUE)
hmm = initHMM(States, Symbols, transProbs = transProbs, emissionProbs = emissionProbs)
sim = simHMM(hmm, nSim)
vit = viterbi(hmm, sim$observation)
f = forward(hmm, sim$observation)
x = list(hmm = hmm, sim = sim, vit = vit)

The first thing we would like to calculate is the probability of observing the sequence of throws we observe. This is done with the “forward algorithm”. The function forward() will give us the log probability of observing each state at each throw. To get the probability we simply exponentiate these values. This is a sequence of conditional probabilities. To get the probability of observing the sequence we observed, we simply need to sum the final probabilities over both states.

forwardprob=exp(f)
print(forwardprob)
##         index
## states            1          2           3            4            5
##   Fair   0.08333333 0.01458333 0.002488194 0.0004186313 6.987421e-05
##   Unfair 0.25000000 0.02458333 0.002423750 0.0002400157 1.197008e-04
##         index
## states              6            7            8            9           10
##   Fair   1.192825e-05 2.007496e-06 3.351314e-07 5.723851e-08 9.635769e-09
##   Unfair 1.180056e-05 1.168383e-06 5.825451e-07 5.742455e-08 5.684844e-09
##         index
## states             11           12           13           14           15
##   Fair   1.608851e-09 2.749063e-10 5.001482e-11 1.053811e-11 1.962951e-12
##   Unfair 2.833753e-09 1.396583e-09 6.857002e-10 6.724863e-11 6.600904e-12
##         index
## states             16           17           18           19           20
##   Fair   3.458899e-13 6.788602e-14 1.226214e-14 2.544247e-15 4.708990e-16
##   Unfair 3.244258e-12 3.182832e-13 1.562982e-13 1.532948e-14 1.504834e-15
sum(forwardprob[,nSim])
## [1] 1.975733e-15

Now we reinitialize the HMM to simulate the die being rolled, but this time we roll the die many more times. This is to ensure we get a long and interesting sequence, with a variety of fair and unfair dice.

nSim = 2000
States = c("Fair", "Unfair")
Symbols = 1:6
transProbs = matrix(c(0.99, 0.01, 0.02, 0.98), c(length(States),
     length(States)), byrow = TRUE)
emissionProbs = matrix(c(rep(1/6, 6), c(rep(0.1, 5), 0.5)),
     c(length(States), length(Symbols)), byrow = TRUE)
hmm = initHMM(States, Symbols, transProbs = transProbs, emissionProbs = emissionProbs)
sim = simHMM(hmm, nSim)
 vit = viterbi(hmm, sim$observation)
 f = forward(hmm, sim$observation)
 x = list(hmm = hmm, sim = sim, vit = vit)
 ##Plotting simulated throws at top
 mn = "Fair and unfair die"
 xlb = "Throw nr."
 ylb = ""

Then we plot the frequency of the observed throws. Next we show the sequence of throws and the sequence that the model predicts. The final plot shows the difference between what was observed and what the model predicts.

plot(x$sim$observation, ylim = c(-7.5, 6), pch = 3, main = mn,
    xlab = xlb, ylab = ylb, bty = "n", yaxt = "n")
axis(2, at = 1:6)

#######Simulated, which die was used (truth)####################
text(0, -1.2, adj = 0, cex = 0.8, col = "black", "True: green = fair die")
for (i in 1:nSim) {
    if (x$sim$states[i] == "Fair")
        rect(i, -1, i + 1, 0, col = "green", border = NA)
    else rect(i, -1, i + 1, 0, col = "red", border = NA)
   }
########Most probable path (viterbi)#######################
text(0, -3.2, adj = 0, cex = 0.8, col = "black", "Most probable path")
for (i in 1:nSim) {
    if (x$vit[i] == "Fair")
        rect(i, -3, i + 1, -2, col = "green", border = NA)
    else rect(i, -3, i + 1, -2, col = "red", border = NA)
}
##################Differences:
text(0, -5.2, adj = 0, cex = 0.8, col = "black", "Difference")
differing = !(x$sim$states == x$vit)
for (i in 1:nSim) {
    if (differing[i])
        rect(i, -5, i + 1, -4, col = rgb(0.3, 0.3, 0.3),
            border = NA)
    else rect(i, -5, i + 1, -4, col = rgb(0.9, 0.9, 0.9),
        border = NA)
       }

baumWelch(hmm, sim$observation, maxIterations=100, delta=1E-9, pseudoCount=0)
## $hmm
## $hmm$States
## [1] "Fair"   "Unfair"
## 
## $hmm$Symbols
## [1] 1 2 3 4 5 6
## 
## $hmm$startProbs
##   Fair Unfair 
##    0.5    0.5 
## 
## $hmm$transProbs
##         to
## from           Fair     Unfair
##   Fair   0.98962143 0.01037857
##   Unfair 0.03133091 0.96866909
## 
## $hmm$emissionProbs
##         symbols
## states            1          2          3          4         5         6
##   Fair   0.15134852 0.15687749 0.18613768 0.16562686 0.1688982 0.1711112
##   Unfair 0.09242536 0.08248709 0.08771433 0.08644012 0.0867502 0.5641829
## 
## 
## $difference
##  [1] 5.709456e-02 2.187790e-02 9.111243e-03 3.925724e-03 1.751676e-03
##  [6] 8.009743e-04 3.727256e-04 1.761204e-04 8.465071e-05 4.161305e-05
## [11] 2.113321e-05 1.126084e-05 6.403585e-06 3.897353e-06 2.487568e-06
## [16] 1.623745e-06 1.067824e-06 7.030666e-07 4.624790e-07 3.037885e-07
## [21] 1.992884e-07 1.305895e-07 8.550479e-08 5.594777e-08 3.659195e-08
## [26] 2.392433e-08 1.563970e-08 1.021988e-08 6.677336e-09 4.365253e-09
## [31] 2.851649e-09 1.863746e-09 1.216477e-09 7.966543e-10
transProbs
##      [,1] [,2]
## [1,] 0.99 0.01
## [2,] 0.02 0.98
emissionProbs
##           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]
## [1,] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
## [2,] 0.1000000 0.1000000 0.1000000 0.1000000 0.1000000 0.5000000

Source: theory: https://s3-ap-southeast-1.amazonaws.com/erbuc/files/c893615b-5f1e-422e-833c-10ac70bd39c9.pdf

code: http://web.stanford.edu/class/stats366/hmmR2.html