1 Load Dataset and Calculate the State Transition Probability

df <- read.csv("data.txt", sep = " ")
x <- df$x
x1 <- x[1:(length(x)-1)]
x2 <- x[2:length(x)]
data.frame(from = x1, to = x2) %>% 
  group_by(from, to) %>% 
  summarise(N = n(), 
            .groups = "drop") %>% 
  arrange(from, to) %>% 
  group_by(from) %>% 
  mutate(Probability = N / sum(N)) -> df.transition_prob
df.transition_prob %>% 
  knitr::kable()

from	to	N	Probability
1	1	2381	0.5098501
1	2	1129	0.2417559
1	3	1160	0.2483940
2	1	734	0.3307796
2	2	722	0.3253718
2	3	763	0.3438486
3	1	1555	0.4998393
3	2	368	0.1182899
3	3	1188	0.3818708

df.transition_prob$Probability %>% 
  matrix(ncol = 3, byrow = F) -> transition.mat
rownames(transition.mat) <- c(1, 2, 3)
colnames(transition.mat) <- c(1, 2, 3)

diagram::plotmat(transition.mat %>% round(3), pos = c(1,2), 
                 lwd = 1, box.lwd = 2, cex.txt = 0.8, 
                 box.size = 0.1, box.type = "circle", 
                 box.prop = 0.5, box.col = "light blue",
                 arr.length=.1, arr.width=.1, 
                 self.cex = .4, self.shifty = -.01,
                 self.shiftx = .13, main = "Transition Probability of Markov Chain")

2 Generate and Continue the Markov Chain for Another 50 Steps

# Record the last state in the given dataset
last_state <- df$x[nrow(df)]
sprintf("The last state is: %d\n", last_state) %>% cat()

## The last state is: 1

# Set a seed for reproducibility
set.seed(1)
# initialize one empty vector for storing the generated value in next 50 steps
x_next_50_steps <- rep(NA, 50)
# Use a for loop to continue the Markov chain for another 50 steps
for (i in 1:50){
  if (i==1){
    df.transition_prob %>% 
      filter(from == last_state) %>% 
      pull(Probability) -> prob_vec
    # use the `sample` function in R to generate sample based on the probability vector
    x_next_50_steps[i] <- sample(1:3, size = 1, prob = prob_vec)
  } else{
    df.transition_prob %>% 
      filter(from == x_next_50_steps[i-1]) %>% 
      pull(Probability) -> prob_vec
    # use the `sample` function in R to generate sample based on the probability vector
    x_next_50_steps[i] <- sample(1:3, size = 1, prob = prob_vec)
  }
}
# Display the generated 50 steps
print(x_next_50_steps)

##  [1] 1 1 3 2 3 2 2 1 3 1 1 1 3 1 2 1 3 2 1 2 2 3 3 1 1 1 1 1 2 3 1 3 1 1 2 1 2 3
## [39] 3 1 2 1 2 1 3 3 1 1 3 3

Since Markov chain assumes the transition probability matrix won’t vary with time, we can first estimate the transition probability matrix based on the given generated sample. Specifically, count the frequency of each transition such as from state 1 to state 2, and then calculate the probability of transiting from a state to another state. Then, based on the last state of the given sample, we can continue the Markov chain by sample from 1 to 3 with the calculated probability.

Analysis

1 Load Dataset and Calculate the State Transition Probability

2 Generate and Continue the Markov Chain for Another 50 Steps