Include libraries

set.seed(111)
library(readr)
library(ggplot2)
library(rstan)
## Loading required package: StanHeaders
## 
## rstan version 2.32.7 (Stan version 2.32.2)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
## Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file
library(rstudioapi)
## Warning: package 'rstudioapi' was built under R version 4.5.3

Read data

m <- read_csv("CI202603.csv")
## Rows: 495 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Date
## dbl (2): Leading, Coincident
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Composite Leading Index,  Coincident Index Data
# Date,LeadingIndex
# 1985-1-1,84.6,90.8
# 1985-2-1,84.9,90.5
# ...............
# Peaks and Troughs for business cycles
recess.df = read.table(textConnection(
  "Peak, Trough
 1985-06-01, 1986-11-01
 1991-02-01, 1993-10-01
 1997-05-01, 1999-01-01
 2000-11-01, 2002-01-01
 2008-02-01, 2009-03-01
 2012-03-01, 2012-11-01
 2018-10-01, 2020-05-01 "), sep=',',
colClasses=c('Date', 'Date'), header=TRUE)

Time series plot of Composite Index (shaded area: recession)

month <- as.vector(t(m[,1]))
ci    <- as.vector(t(m[,3])) 
#
ci.df <-data.frame(dates = as.Date(month), value = ci)  
T     <- length(ci)
#
g1 <- ggplot(ci.df) + geom_line(aes(x=dates, y=value)) + 
  xlab("Time") +  ylab("Composite Index") 
g1 = g1 + geom_rect(data=recess.df,aes(xmin=Peak, xmax=Trough, ymin=-Inf, ymax=+Inf), fill = "red", alpha=0.2)
g1

Time series plot of Change in Composite Index (%)

y     <- ( log(ci[2:T]) - log(ci[1:T-1]) ) * 100
month <- month[2:T]
T     <- T-1
cirate.df <-data.frame(dates = as.Date(month), value = y)  
#
g2 <- ggplot(cirate.df) + geom_line(aes(x=dates, y=value)) + 
  xlab("Time") + ylab("Composite Index Change")
g2 = g2 + geom_rect(data=recess.df,aes(xmin=Peak, xmax=Trough, ymin=-Inf, ymax=+Inf), fill = "red", alpha=0.2)
g2

Markov Switching Model using rstan

mydata <- list( T = T, y = y);
myinit <- function() { 
  list( mu1 = -1, mu2 = 1, sigma2 = 1, p11 = 0.90, p22 = 0.95)
}
fit  <- stan(file = 'markov.stan', data = mydata, init = myinit,
             chains = 4, warmup = 1000, iter = 11000, thin=10, cores = parallel::detectCores())

Estimation results

print(fit, pars = c("mu1","mu2","sigma2","p11","p22","loglik"))
## Inference for Stan model: anon_model.
## 4 chains, each with iter=11000; warmup=1000; thin=10; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##           mean se_mean   sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
## mu1      -0.58    0.00 0.21   -1.03   -0.70   -0.56   -0.43   -0.23  4021    1
## mu2       0.42    0.00 0.10    0.26    0.36    0.42    0.48    0.62  3694    1
## sigma2    0.60    0.00 0.06    0.50    0.56    0.60    0.64    0.72  3731    1
## p11       0.91    0.00 0.04    0.80    0.89    0.91    0.94    0.96  4078    1
## p22       0.96    0.00 0.02    0.91    0.95    0.96    0.97    0.99  3651    1
## loglik -782.48    0.03 1.75 -786.88 -783.34 -782.10 -781.19 -780.28  3993    1
## 
## Samples were drawn using NUTS(diag_e) at Fri Jun  5 15:02:35 2026.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
param <-c("mu1","mu2","sigma2","p11","p22")
stan_dens(fit,  pars = param, nrow = 3, ncol = 2, separate_chains = TRUE)

stan_trace(fit, pars = param, nrow = 3, ncol = 2)

stan_ac(fit, pars = param, nrow = 3, ncol = 2, separate_chains = TRUE)

AIC, BIC. # param = 5 (mu1, mu2, sigma2, p11, p22)

mcmcout = rstan::extract(fit)
cat("AIC=",-2*max(mcmcout$loglik)+2*5,"\n")
## AIC= 1569.902
cat("BIC=",-2*max(mcmcout$loglik)+log(T)*5)
## BIC= 1590.914

Compute and plot the median of smoothed probabilities

p_med = rep(0, T);
for(t in 1:T){
  p_med[t] = quantile(mcmcout$spr1[,t], 0.5);
}
prob.df <-data.frame(dates = as.Date(month), med = p_med)  
#
g3 <- ggplot(prob.df) + geom_line(aes(x=dates, y=med)) + 
  xlab("Time") + 
  ylab("Smoothed Pr. of Recession") + theme_bw()
g3 = g3 + geom_rect(data=recess.df,aes(xmin=Peak, xmax=Trough, ymin=-Inf, ymax=+Inf), fill = "red", alpha=0.2)
g3