Read Topix Data 2021/1/4-2026/5/29

library(readr)
topix <- read_csv("topix.csv")
## Rows: 1320 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): date
## dbl (1): close
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
topix$price <- as.numeric(topix$close)
topix$Date  <- as.Date(topix$date)
n <- length(topix$price)

return <- 100 * diff(log(topix$price))
mydate <- topix$Date[2:n]
n <- n-1

Draw time series plot of TOPIX returns

topix.df <-data.frame(dates = mydate, value = return) 
library(ggplot2)
ggplot(topix.df, aes(x = mydate, y = return)) +
  geom_line(linewidth = 0.4) +
  geom_hline(yintercept = 0, linetype = "dashed", linewidth = 0.5) +
  geom_hline(yintercept = c(2, -2), linetype = "dotted", linewidth = 0.7) +
  labs(title = "TOPIX Daily Log Returns", 
       subtitle = "Dotted lines indicate ±2%",
       x = "Date", y = "Log return (%)") + 
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")

Fit EGARCH(1,1) model using rstan

library(rstan)
## Loading required package: StanHeaders
## 
## rstan version 2.32.7 (Stan version 2.32.2)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
## Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file
library(rstudioapi)
mydata <- list( n = n, y = return);

myinit <- function() { 
  list( omega = 0, alpha = 0, beta = 0.5, gamma = 0, mu = 0)
}
fit  <- stan(file = 'egarch.stan', data = mydata, init = myinit,
             chains = 4, warmup = 1000, iter = 11000, thin=10, cores = parallel::detectCores())

param <-c("omega","alpha","beta", "gamma", "mu")
print(fit, pars = param, digits=3)
## Inference for Stan model: anon_model.
## 4 chains, each with iter=11000; warmup=1000; thin=10; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##         mean se_mean    sd   2.5%    25%    50%    75%  97.5% n_eff Rhat
## omega  0.025   0.000 0.011  0.006  0.018  0.024  0.032  0.050  3925    1
## alpha -0.160   0.000 0.023 -0.204 -0.175 -0.160 -0.145 -0.115  3713    1
## beta   0.866   0.001 0.035  0.787  0.846  0.871  0.891  0.921  3782    1
## gamma  0.226   0.001 0.043  0.152  0.197  0.223  0.254  0.319  3849    1
## mu     0.027   0.000 0.028 -0.026  0.008  0.027  0.045  0.081  4067    1
## 
## Samples were drawn using NUTS(diag_e) at Sat May 30 11:14:48 2026.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
stan_dens(fit,  pars = param, nrow = 3, ncol = 2, separate_chains = TRUE)

stan_trace(fit, pars = param, nrow = 3, ncol = 2)

stan_ac(fit, pars = param, nrow = 3, ncol = 2, separate_chains = TRUE)
## Warning in (function (mapping = NULL, data = NULL, stat = "count", position =
## "stack", : Ignoring unknown parameters: `size`

mcmcout <- rstan::extract(fit)
nparam  <- length(param)
cat("AIC=",-2*max(mcmcout$loglik)+2*nparam,"BIC=",-2*max(mcmcout$loglik)+nparam*log(n))
## AIC= 3891.084 BIC= 3917.007

Plot the median of conditional variances

v_med = rep(0, n);
for(t in 1:n){
  v_med[t] = quantile(mcmcout$sigma2[,t], 0.5);
}
variance.df <-data.frame(dates = mydate, med = v_med)  
#
ggplot(variance.df) + geom_line(aes(x=dates, y=v_med)) + 
  xlab("Date") + 
  ylab("Estimated Conditional Variance") +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")