df <- read.csv("data.txt", sep = " ")
x <- df$x
x1 <- x[1:(length(x)-1)]
x2 <- x[2:length(x)]
data.frame(from = x1, to = x2) %>%
group_by(from, to) %>%
summarise(N = n(),
.groups = "drop") %>%
arrange(from, to) %>%
group_by(from) %>%
mutate(Probability = N / sum(N)) -> df.transition_prob
df.transition_prob %>%
knitr::kable()
| from | to | N | Probability |
|---|---|---|---|
| 1 | 1 | 2381 | 0.5098501 |
| 1 | 2 | 1129 | 0.2417559 |
| 1 | 3 | 1160 | 0.2483940 |
| 2 | 1 | 734 | 0.3307796 |
| 2 | 2 | 722 | 0.3253718 |
| 2 | 3 | 763 | 0.3438486 |
| 3 | 1 | 1555 | 0.4998393 |
| 3 | 2 | 368 | 0.1182899 |
| 3 | 3 | 1188 | 0.3818708 |
df.transition_prob$Probability %>%
matrix(ncol = 3, byrow = F) -> transition.mat
rownames(transition.mat) <- c(1, 2, 3)
colnames(transition.mat) <- c(1, 2, 3)
diagram::plotmat(transition.mat %>% round(3), pos = c(1,2),
lwd = 1, box.lwd = 2, cex.txt = 0.8,
box.size = 0.1, box.type = "circle",
box.prop = 0.5, box.col = "light blue",
arr.length=.1, arr.width=.1,
self.cex = .4, self.shifty = -.01,
self.shiftx = .13, main = "Transition Probability of Markov Chain")
# Record the last state in the given dataset
last_state <- df$x[nrow(df)]
sprintf("The last state is: %d\n", last_state) %>% cat()
## The last state is: 1
# Set a seed for reproducibility
set.seed(1)
# initialize one empty vector for storing the generated value in next 50 steps
x_next_50_steps <- rep(NA, 50)
# Use a for loop to continue the Markov chain for another 50 steps
for (i in 1:50){
if (i==1){
df.transition_prob %>%
filter(from == last_state) %>%
pull(Probability) -> prob_vec
# use the `sample` function in R to generate sample based on the probability vector
x_next_50_steps[i] <- sample(1:3, size = 1, prob = prob_vec)
} else{
df.transition_prob %>%
filter(from == x_next_50_steps[i-1]) %>%
pull(Probability) -> prob_vec
# use the `sample` function in R to generate sample based on the probability vector
x_next_50_steps[i] <- sample(1:3, size = 1, prob = prob_vec)
}
}
# Display the generated 50 steps
print(x_next_50_steps)
## [1] 1 1 3 2 3 2 2 1 3 1 1 1 3 1 2 1 3 2 1 2 2 3 3 1 1 1 1 1 2 3 1 3 1 1 2 1 2 3
## [39] 3 1 2 1 2 1 3 3 1 1 3 3
Since Markov chain assumes the transition probability matrix won’t vary with time, we can first estimate the transition probability matrix based on the given generated sample. Specifically, count the frequency of each transition such as from state 1 to state 2, and then calculate the probability of transiting from a state to another state. Then, based on the last state of the given sample, we can continue the Markov chain by sample from 1 to 3 with the calculated probability.