On going IBM

Leitura e Pre-processamento dos Dados (IBM_data.csv)

dt.intra <- read.csv(
  "IBM_data.csv",
  header           = TRUE,
  stringsAsFactors = FALSE,
  sep              = ",",
  dec              = "."
)

dt.intra$Period <- mdy_hm(paste(dt.intra$Date, dt.intra$Time))

dt1 <- as_tibble(dt.intra) %>%
  arrange(Period) %>%
  filter(!(hour(Period) == 9 & minute(Period) < 40)) %>%
  filter(hour(Period) < 16) %>%
  filter(!(hour(Period) == 15 & minute(Period) >= 56)) %>%
  group_by(date(Period)) %>%
  filter(n() > 100) %>%
  ungroup() %>%
  arrange(Period) %>%
  mutate(
    Ret.1min   = (Close - lag(Close)) / lag(Close),
    Close.1min = Close
  ) %>%
  filter(!is.na(Ret.1min))

ret_sd       <- sd(dt1$Ret.1min, na.rm = TRUE)
ret_mean     <- mean(dt1$Ret.1min, na.rm = TRUE)
outlier_mask <- abs(dt1$Ret.1min - ret_mean) > 10 * ret_sd
dt1$Ret.1min[outlier_mask] <- NA
dt1$Ret.1min <- zoo::na.locf(dt1$Ret.1min, na.rm = FALSE)

ret.1min <- as.xts(dt1$Ret.1min, order.by = dt1$Period)

cat("Total de observacoes apos limpeza:", nrow(dt1), "\n")
Total de observacoes apos limpeza: 2644941 
cat("Periodo:", format(min(dt1$Period)), "a", format(max(dt1$Period)), "\n")
Periodo: 1998-01-02 09:41:00 a 2026-02-20 15:55:00 

Observacao Inicial

plot(dt1$Close.1min,
     main = "IBM - Precos de Fechamento (1 min) - Serie Completa",
     col  = "black", type = "l",
     xlab = "Indice", ylab = "Preco")

boxplot(as.double(ret.1min),
        main = "Boxplot dos Retornos 1min - IBM (Serie Completa)",
        ylab = "Retorno")

Preparacao da Serie Diaria de Retornos

PRE_DAYS  <- 60   # dias de pregao antes do breakpoint
POST_DAYS <- 60   # dias de pregao depois do breakpoint

n_total   <- nrow(dt1)
rets_full <- dt1$Ret.1min


daily_close <- dt1 %>%
  mutate(trade_date = as.Date(Period)) %>%
  group_by(trade_date) %>%
  summarise(close_last = last(Close.1min), .groups = "drop") %>%
  arrange(trade_date) %>%
  mutate(
    close_prev = lag(close_last),
    close_diff = close_last - close_prev,
    close_pct  = abs(close_diff / close_prev)
  )

dt1_indexed <- dt1 %>%
  mutate(global_idx = row_number(), trade_date = as.Date(Period))

last_idx_per_day <- dt1_indexed %>%
  group_by(trade_date) %>%
  summarise(last_global_idx = max(global_idx), .groups = "drop")

daily_close <- daily_close %>%
  left_join(last_idx_per_day, by = "trade_date")

dc_clean <- daily_close %>%
  filter(!is.na(close_diff), !is.na(last_global_idx)) %>%
  mutate(
    close_prev2 = close_last - close_diff,
    daily_ret   = ifelse(close_prev2 > 0, close_diff / close_prev2, NA_real_)
  ) %>%
  filter(!is.na(daily_ret))

ret_daily <- dc_clean$daily_ret
n_daily   <- length(ret_daily)
log_price <- log(dc_clean$close_last)

cat(sprintf("Serie diaria: %d obs  |  %s  a  %s\n",
            n_daily,
            format(min(dc_clean$trade_date)),
            format(max(dc_clean$trade_date))))
Serie diaria: 7076 obs  |  1998-01-05  a  2026-02-20
# ---- janela segura (opera em indices diarios) ----
safe_window_d <- function(start, end, n_max) {
  s <- max(1L, as.integer(start))
  e <- min(n_max, as.integer(end))
  if (s > e) return(NULL)
  s:e
}

Bai-Perron: Quebras na Media dos Retornos Diarios

Bai-Perron (1998, 2003) — Multiplas Quebras na Media

Generaliza o Teste de Chow para m quebras simultaneas desconhecidas. O modelo linear com \(m\) quebras e: \[y_t = \mathbf{x}_t' \boldsymbol{\beta}_j + \varepsilon_t, \quad t = T_{j-1}+1, \ldots, T_j, \quad j = 1,\ldots,m+1\] onde os pontos \(T_1 < T_2 < \cdots < T_m\) sao estimados conjuntamente por: \[\{\hat{T}_1, \ldots, \hat{T}_m\} = \arg\min_{T_1,\ldots,T_m} \sum_{j=1}^{m+1} \sum_{t=T_{j-1}+1}^{T_j} (y_t - \bar{y}_j)^2\]

O numero otimo de quebras \(m^*\) e selecionado pelo criterio BIC ou pelo teste sequencial de Bai-Perron, que controla a probabilidade de detectar quebras espurias. A busca e feita por programacao dinamica em \(O(n^2)\).

Entrada: \(\log(r_t^2)\) (log do quadrado dos retornos diarios), que aproxima a serie a uma distribuicao normal e e estacionario, tornando o procedimento valido. A transformacao \(\log(r_t^2) = \log(\sigma_t^2) + \log(z_t^2)\) lineariza a volatilidade, com o segundo termo atuando como ruido quasi-gaussiano de media conhecida.

O que detecta: mudancas persistentes e significativas no nivel medio de log-volatilidade ao longo de multiplos periodos sem datas pre-fixadas. Tres execucoes com \(h \in \{0.05,\, 0.10,\, 0.01\}\) permitem comparar a sensibilidade do detector ao tamanho minimo do segmento.

bp_override <- data.frame(
  h          = c("h = 0.05", "h = 0.05", "h = 0.05"),
  rank       = c(1L, 2L, 3L),
  trade_date = c("2003-07-30", "2007-10-15", "2009-06-03"),
  log_r2     = c(-8.8983, -17.2661 ,-9.3474),
  daily_ret  = c(-1.1689, -1.1689, -0.9338)
)
# ^^^^^^^^ Para usar o modelo ao invés desses dados, mude para: bp_override <- NULL

# Transformacao: log(r_t^2) aproxima a distribuicao a normal e lineariza a estrutura de volatilidade.
log_ret2 <- log(ret_daily^2 + 1e-10)   # pequeno offset para evitar log(0)

run_bp <- function(series, h_val, max_breaks = 3) {
  fit    <- strucchange::breakpoints(series ~ 1, breaks = max_breaks, h = h_val)
  bp_idx <- fit$breakpoints
  bp_idx <- bp_idx[!is.na(bp_idx)]
  list(fit = fit, bp_idx = bp_idx, h = h_val)
}

if (is.null(bp_override)) {
  # ---- Roda o modelo normalmente ----
  bp_h05 <- run_bp(log_ret2, h_val = 0.05)
  bp_daily_idx <- bp_h05$bp_idx

  make_bp_summary <- function(bp_obj, label) {
    idx <- bp_obj$bp_idx
    if (length(idx) == 0) return(data.frame())
    data.frame(
      h          = label,
      rank       = seq_along(idx),
      trade_date = as.character(dc_clean$trade_date[idx]),
      log_r2     = round(log_ret2[idx], 4),
      daily_ret  = round(ret_daily[idx] * 100, 4)
    )
  }

  bp_all_summary <- make_bp_summary(bp_h05, "h = 0.05")

} else {
  # ---- Usa os dados do override ----
  bp_all_summary <- bp_override

  # Reconstroi bp_daily_idx a partir das datas do override para os chunks seguintes
  override_dates <- as.Date(bp_override$trade_date[bp_override$h == "h = 0.05"])
  bp_daily_idx   <- which(dc_clean$trade_date %in% override_dates)
}

cat("Bai-Perron — quebras na media de log(r_t^2)\n")
Bai-Perron — quebras na media de log(r_t^2)
knitr::kable(bp_all_summary,
             col.names = c("h", "Rank", "Data", "log(r_t^2)", "Ret diario (%)"),
             caption   = "Bai-Perron: quebras estruturais em log(r_t^2) — serie diaria")
Bai-Perron: quebras estruturais em log(r_t^2) — serie diaria
h Rank Data log(r_t^2) Ret diario (%)
h = 0.05 1 2003-07-30 -8.8983 -1.1689
h = 0.05 2 2007-10-15 -17.2661 -1.1689
h = 0.05 3 2009-06-03 -9.3474 -0.9338
# --- Grafico: log(r_t^2) com os breakpoints ---
bp_plot_df <- if (length(bp_daily_idx) > 0)
  data.frame(
    x     = bp_daily_idx,
    label = paste0("h05_", seq_along(bp_daily_idx)),
    h     = "h = 0.05"
  ) else data.frame()

color_h <- c("h = 0.05" = "#2980B9", "h = 0.10" = "#E67E22", "h = 0.01" = "#27AE60")
lty_h   <- c("h = 0.05" = "solid",   "h = 0.10" = "dashed",  "h = 0.01" = "dotted")

ggplot(data.frame(t = seq_len(n_daily), y = log_ret2),
       aes(x = t, y = y)) +
  geom_line(color = "grey40", linewidth = 0.3) +
  geom_vline(data = bp_plot_df,
             aes(xintercept = x, color = h, linetype = h),
             linewidth = 0.8) +
  geom_label(data = bp_plot_df,
             aes(x = x, y = max(log_ret2, na.rm = TRUE) * 0.92,
                 label = label, color = h),
             size = 2.5, show.legend = FALSE) +
  scale_color_manual(values = color_h) +
  scale_linetype_manual(values = lty_h) +
  labs(title    = "Bai-Perron: Quebras Estruturais na Media de log(r_t^2)",
       subtitle = "Serie diaria de retornos",
       x = "Dia (indice diario)", y = "log(r_t^2)",
       color = "h", linetype = "h") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"), legend.position = "bottom")

15/10/2007 Início da turbulência da crise financeira global Crescimento da crise do subprime nos EUA Aumento da volatilidade das ações de tecnologia, incluindo IBM Período associado ao começo da transição para a crise de 2008 03/06/2009 Instabilidade durante a recuperação pós-crise de 2008 Economia mundial ainda em recessão após a quebra do Lehman Brothers Dados fracos de emprego nos EUA 28/09/2018 Volatilidade causada por juros e guerra comercialW Tensões da guerra comercial entre EUA e China Aumento das taxas de juros pelo Federal Reserve System

Teste ARCH-LM: Clustering de Volatilidade nos Breakpoints

Teste ARCH-LM (Engle, 1982) — Clustering de Volatilidade

O Multiplicador de Lagrange de Engle testa se os quadrados dos residuos \(\hat{\varepsilon}_t^2\) possuem autocorrelacao, indicando heterocedasticidade condicional (variancia nao constante ao longo do tempo).

O modelo ARCH(\(q\)) postula: \[\sigma_t^2 = \omega + \sum_{i=1}^{q} \alpha_i\,\varepsilon_{t-i}^2\]

O teste LM regride \(\hat{\varepsilon}_t^2\) em suas \(q\) defasagens: \[LM = n \cdot R^2 \;\sim\; \chi^2(q)\]

Hipoteses:

  • \(H_0\): \(\alpha_1 = \cdots = \alpha_q = 0\) — sem efeito ARCH (variancia constante)
  • \(H_1\): pelo menos um \(\alpha_i \neq 0\) — volatilidade depende do passado

Calibracao: defasagens fixadas em \(q = 5\) (5 dias de retornos diarios), capturando clustering de volatilidade de curto prazo. Janelas de PRE_DAYS e POST_DAYS dias antes e depois de cada breakpoint. Nivel de significancia \(\alpha = 0.05\).

O que detecta: rejeitar \(H_0\) confirma que a volatilidade e heterocedástica ao redor do breakpoint — movimentos grandes tendem a ser seguidos de movimentos grandes — validando a relevancia economica do ponto de quebra. O teste e aplicado separadamente para os breakpoints de cada valor de \(h\).

run_arch_lm <- function(r, lag = 5) {
  r <- as.numeric(na.omit(r))
  if (length(r) < lag + 10) return(list(stat = NA_real_, pval = NA_real_))
  tryCatch({
    fit <- FinTS::ArchTest(r, lags = lag)
    list(stat = unname(fit$statistic), pval = fit$p.value)
  }, error = function(e) list(stat = NA_real_, pval = NA_real_))
}

# ---- Constroi linhas ARCH para um conjunto de breakpoints (escala diaria) ----
make_arch_rows <- function(didx_vec, prefix) {
  do.call(rbind, lapply(seq_along(didx_vec), function(k) {
    D      <- didx_vec[k]
    r_pre  <- ret_daily[safe_window_d(D - PRE_DAYS,  D - 1,           n_daily)]
    r_post <- ret_daily[safe_window_d(D + 1,          D + POST_DAYS,   n_daily)]
    a_pre  <- run_arch_lm(r_pre)
    a_post <- run_arch_lm(r_post)
    data.frame(
      h          = prefix,
      evento     = paste0(prefix, "_", k),
      daily_idx  = D,
      trade_date = as.character(dc_clean$trade_date[min(D, n_daily)]),
      LM_pre     = round(a_pre$stat,  4),
      p_pre      = round(a_pre$pval,  6),
      LM_post    = round(a_post$stat, 4),
      p_post     = round(a_post$pval, 6),
      ARCH_pre   = ifelse(!is.na(a_pre$pval)  & a_pre$pval  < 0.05, "Sim", "Nao"),
      ARCH_post  = ifelse(!is.na(a_post$pval) & a_post$pval < 0.05, "Sim", "Nao")
    )
  }))
}

arch_tbl <- bind_rows(
  make_arch_rows(bp_daily_idx,      "BP h=0.05"),
  #make_arch_rows(bp_daily_idx_h10,  "BP h=0.10"),
  #make_arch_rows(bp_daily_idx_h01,  "BP h=0.01")
) %>% arrange(h, daily_idx)

cat("=== Teste ARCH-LM (lags=5) — pre vs pos breakpoint (serie diaria) ===\n")
=== Teste ARCH-LM (lags=5) — pre vs pos breakpoint (serie diaria) ===
cat("H0: sem efeito ARCH (variancia constante)  |  Rejeitar H0 se p < 0.05\n\n")
H0: sem efeito ARCH (variancia constante)  |  Rejeitar H0 se p < 0.05
knitr::kable(arch_tbl,
             col.names = c("h","Evento","Idx diario","Data",
                           "LM (pre)","p (pre)","LM (pos)","p (pos)",
                           "ARCH pre?","ARCH pos?"),
             caption = "Teste ARCH-LM(5): clustering de volatilidade pre vs pos breakpoint — serie diaria")
Teste ARCH-LM(5): clustering de volatilidade pre vs pos breakpoint — serie diaria
h Evento Idx diario Data LM (pre) p (pre) LM (pos) p (pos) ARCH pre? ARCH pos?
Chi-squared BP h=0.05 BP h=0.05_1 1400 2003-07-30 2.6574 0.752624 1.8234 0.873006 Nao Nao
Chi-squared1 BP h=0.05 BP h=0.05_2 2460 2007-10-15 5.2679 0.384065 5.2828 0.382354 Nao Nao
Chi-squared2 BP h=0.05 BP h=0.05_3 2871 2009-06-03 7.3441 0.196283 12.2253 0.031828 Nao Sim
# ---- Grafico: LM-stat pre vs pos por h ----
chi_crit <- qchisq(0.95, df = 5)

arch_long <- arch_tbl %>%
  select(h, evento, daily_idx, LM_pre, LM_post) %>%
  pivot_longer(cols = c(LM_pre, LM_post),
               names_to  = "fase",
               values_to = "LM_stat") %>%
  mutate(fase = ifelse(fase == "LM_pre", "pre", "post"),
         fase = factor(fase, levels = c("pre", "post")))

print(
  ggplot(arch_long %>% filter(!is.na(LM_stat)),
         aes(x = reorder(evento, daily_idx), y = LM_stat, fill = fase)) +
    geom_col(position = "dodge", width = 0.65, alpha = 0.85) +
    geom_hline(yintercept = chi_crit, linetype = "dashed",
               color = "red", linewidth = 0.7) +
    facet_wrap(~ h, scales = "free_x", ncol = 1) +
    scale_fill_manual(values = c(pre = "#2ECC71", post = "#E74C3C")) +
    labs(title    = "ARCH-LM (q=5) — Estatistica LM: pre vs pos breakpoint (serie diaria)",
         subtitle = paste0("Linha vermelha = chi^2(5) critico (p=0.05, val=",
                           round(chi_crit, 2), ") | Janela: ", PRE_DAYS, " dias pre / ", POST_DAYS, " dias pos"),
         x = "Evento", y = "LM-estatistica", fill = "Fase") +
    theme_minimal(base_size = 12) +
    theme(axis.text.x = element_text(angle = 35, hjust = 1),
          plot.title  = element_text(face = "bold"),
          strip.text  = element_text(face = "bold"))
)

Visualizacao Comparativa: Bai-Perron — Comparacao entre h

price_daily_max <- max(dc_clean$close_last, na.rm = TRUE)

# ---- Monta data.frame com todos os breakpoints (escala diaria) ----
make_marks <- function(didx_vec, h_label, y_frac) {
  if (length(didx_vec) == 0) return(NULL)
  data.frame(
    x      = didx_vec,
    h      = h_label,
    label  = paste0(gsub("h = ", "h", h_label), "_", seq_along(didx_vec)),
    y_pos  = price_daily_max * y_frac,
    stringsAsFactors = FALSE
  )
}

all_marks <- bind_rows(
  make_marks(bp_daily_idx,     "h = 0.05", 0.89),
  #make_marks(bp_daily_idx_h10, "h = 0.10", 0.76),
  #make_marks(bp_daily_idx_h01, "h = 0.01", 0.63)
) %>%
  mutate(h = factor(h, levels = c("h = 0.05", "h = 0.10", "h = 0.01")))

color_h  <- c("h = 0.05" = "#2980B9", "h = 0.10" = "#E67E22", "h = 0.01" = "#27AE60")
lty_h    <- c("h = 0.05" = "solid",   "h = 0.10" = "dashed",  "h = 0.01" = "dotted")

print(
  ggplot(data.frame(index = seq_len(n_daily), price = dc_clean$close_last),
         aes(x = index, y = price)) +
    geom_line(color = "black", linewidth = 0.4) +
    geom_vline(data = all_marks,
               aes(xintercept = x, color = h, linetype = h),
               linewidth = 0.75, alpha = 0.85) +
    geom_label(data = all_marks,
               aes(x = x, y = y_pos, label = label, color = h),
               size = 2.6, show.legend = FALSE) +
    scale_color_manual(values = color_h) +
    scale_linetype_manual(values = lty_h) +
    labs(
      title    = "IBM - Bai-Perron: Comparacao entre valores de h (serie diaria)",
      subtitle = "Aplicado sobre log(r_t^2) | h controla o tamanho minimo do segmento | Eixo X = dias de pregao",
      x = "Dia de pregao (indice diario)", y = "Preco de Fechamento Diario",
      color = "h", linetype = "h"
    ) +
    theme_minimal(base_size = 12) +
    theme(plot.title = element_text(face = "bold"), legend.position = "bottom")
)

Visualizacao: Janelamentos dos Breakpoints

fill_colors_seg <- c(pre = "#E8FA22", during = "#FC886D", post = "#6DD6FC")

# ---- Funcao auxiliar: plota janelas diarias para um conjunto de breakpoints ----
plot_windows_for_h <- function(didx_vec, h_label, line_color) {
  top5_vec <- head(didx_vec, 5)
  if (length(top5_vec) == 0) {
    cat(sprintf("[%s] Nenhum breakpoint disponivel.\n", h_label))
    return(invisible(NULL))
  }

  for (k in seq_along(top5_vec)) {
    D <- top5_vec[k]

    segs_df <- do.call(rbind, lapply(c("pre", "during", "post"), function(nm) {
      win <- switch(nm,
        pre    = safe_window_d(D - PRE_DAYS,            D - 1,              n_daily),
        during = safe_window_d(D,                        D + POST_DAYS,      n_daily),
        post   = safe_window_d(D + POST_DAYS + 1,        D + 2 * POST_DAYS,  n_daily)
      )
      if (is.null(win)) return(NULL)
      data.frame(janela = nm, xmin = win[1] - 0.5, xmax = tail(win, 1) + 0.5)
    }))

    zoom_s  <- max(1L,      D - PRE_DAYS  - 10L)
    zoom_e  <- min(n_daily, D + 2 * POST_DAYS + 10L)
    plot_df <- dc_clean[zoom_s:zoom_e, ] %>%
      mutate(index = zoom_s:zoom_e)

    crash_date <- dc_clean$trade_date[min(D, n_daily)]
    crash_ret  <- ret_daily[min(D, n_daily)]

    print(
      ggplot(plot_df, aes(x = index, y = close_last)) +
        geom_rect(data = segs_df,
                  aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf, fill = janela),
                  inherit.aes = FALSE, alpha = 0.30) +
        geom_line(color = "black", linewidth = 0.5) +
        geom_vline(xintercept = D, color = line_color,
                   linewidth = 0.9, linetype = "solid") +
        scale_fill_manual(name = "Janela", values = fill_colors_seg) +
        labs(
          title    = paste0("[", h_label, "] Evento_", k,
                            " — Janelamentos sobre Preco Diario IBM"),
          subtitle = paste0(
            "Linha = breakpoint (", format(crash_date, "%Y-%m-%d"),
            ") | Ret diario: ", round(crash_ret * 100, 3), "%",
            "\npre=[D-", PRE_DAYS, "d, D-1]  |  during=[D, D+", POST_DAYS,
            "d]  |  post=[D+", POST_DAYS + 1, "d, D+", 2 * POST_DAYS, "d]"
          ),
          x = "Dia de pregao (indice diario)", y = "Preco de Fechamento Diario"
        ) +
        theme_minimal(base_size = 11) +
        theme(plot.title = element_text(face = "bold"), legend.position = "top") +
        coord_cartesian(expand = FALSE)
    )
  }
}

# ---- Plota para cada valor de h ----
plot_windows_for_h(bp_daily_idx,     "h = 0.05", "#2980B9")

#plot_windows_for_h(bp_daily_idx_h10, "h = 0.10", "#E67E22")
#plot_windows_for_h(bp_daily_idx_h01, "h = 0.01", "#27AE60")

Construcao dos Eventos a partir dos Breakpoints Bai-Perron

# Constroi a lista 'events' com os 3 breakpoints diarios identificados pelo
# Bai-Perron, usando as mesmas janelas PRE_DAYS / POST_DAYS da serie diaria.
# Cada evento expoe:
#   crash_idx   : indice diario do breakpoint em dc_clean
#   crash_date  : data do breakpoint
#   crash_ret   : retorno diario no ponto de quebra
#   event_label : "Evento_k"
#   pre         : indices diarios [D - PRE_DAYS, D - 1]
#   during      : indices diarios [D, D + POST_DAYS]
#   post        : indices diarios [D + POST_DAYS + 1, D + 2*POST_DAYS]

bp_events <- vector("list", length(bp_daily_idx))

for (k in seq_along(bp_daily_idx)) {
  D <- bp_daily_idx[k]

  bp_events[[k]] <- list(
    crash_idx   = D,
    crash_date  = dc_clean$trade_date[min(D, n_daily)],
    crash_ret   = ret_daily[min(D, n_daily)],
    event_label = paste0("Evento_", k),
    pre         = safe_window_d(D - PRE_DAYS,            D - 1,             n_daily),
    during      = safe_window_d(D,                        D + POST_DAYS,     n_daily),
    post        = safe_window_d(D + POST_DAYS + 1,        D + 2 * POST_DAYS, n_daily)
  )
}

bp_summary_events <- do.call(rbind, lapply(bp_events, function(ev) {
  data.frame(
    evento      = ev$event_label,
    idx_diario  = ev$crash_idx,
    data_bp     = format(ev$crash_date, "%Y-%m-%d"),
    retorno_pct = round(ev$crash_ret * 100, 4),
    n_pre       = if (!is.null(ev$pre))    length(ev$pre)    else NA_integer_,
    n_during    = if (!is.null(ev$during)) length(ev$during) else NA_integer_,
    n_post      = if (!is.null(ev$post))   length(ev$post)   else NA_integer_
  )
}))

knitr::kable(bp_summary_events,
             col.names = c("Evento", "Idx diario", "Data BP",
                           "Ret (%)", "n pre", "n during", "n post"),
             caption = "Eventos Bai-Perron: janelas diarias pre / during / post")
Eventos Bai-Perron: janelas diarias pre / during / post
Evento Idx diario Data BP Ret (%) n pre n during n post
Evento_1 1400 2003-07-30 -1.1689 60 61 60
Evento_2 2460 2007-10-15 -0.0178 60 61 60
Evento_3 2871 2009-06-03 -0.9338 60 61 60

Ajuste do Modelo de Volatilidade Estocastica — por Evento

for (k in 1:3) {
  ev <- bp_events[[k]]
  for (nm in c("pre", "during", "post")) {
    mdl <- sv_models[[k]][[nm]]
    if (is.null(mdl)) next
    plot(mdl, showobs = FALSE)
    title(main = paste0(ev$event_label, " - SV [", nm, "] | ",
                        format(ev$crash_date, "%Y-%m-%d")))
  }
}

Volatilidade Realizada Acumulada — por Evento

# Volatilidade realizada acumulada calculada sobre os retornos DIARIOS
# de cada segmento (pre / during / post) dos 3 breakpoints Bai-Perron.

for (ev in bp_events) {
  vol_list <- lapply(c("pre", "during", "post"), function(nm) {
    idx <- ev[[nm]]
    if (is.null(idx)) return(NULL)
    r_seg    <- ret_daily[idx]
    vol_acum <- sqrt(cumsum(r_seg^2))
    data.frame(index = seq_along(idx), vol = vol_acum, janela = nm)
  })
  df_vol <- bind_rows(Filter(Negate(is.null), vol_list)) %>%
    mutate(janela = factor(janela, levels = c("pre", "during", "post")))

  print(
    ggplot(df_vol, aes(x = index, y = vol, color = janela)) +
      geom_line() +
      scale_color_manual(values = c(pre = "#2ECC71", during = "#E74C3C", post = "#3498DB")) +
      labs(title    = paste0(ev$event_label, " - Vol. Realizada Acumulada (diaria)"),
           subtitle = paste0("Breakpoint: ", format(ev$crash_date, "%Y-%m-%d"),
                             " | Ret diario: ", round(ev$crash_ret * 100, 3), "%"),
           x = "Indice relativo (dias)", y = "sqrt(cumsum(r^2))", color = "Janela") +
      theme_minimal()
  )
}

Realized Variance, Bipower, Saltos e Testes Estatisticos — por Evento

dias_ano <- 252   # dias uteis por ano (retornos diarios)

calc_RV <- function(r) sum(r^2, na.rm = TRUE)

calc_BV <- function(r) {
  if (length(r) < 2) return(NA_real_)
  (pi / 2)^(-1) * sum(abs(r[-1]) * abs(r[-length(r)]), na.rm = TRUE)
}

annualize_vol_from_rv <- function(rv, n_obs) {
  if (n_obs <= 0) return(NA_real_)
  sqrt(rv / n_obs) * sqrt(dias_ano)
}

bootstrap_RV <- function(r, R = 1500) {
  r <- na.omit(as.numeric(r))
  if (length(r) < 5) return(NULL)
  b  <- boot::boot(data = r, statistic = function(d, i) sum(d[i]^2), R = R)
  ci <- boot::boot.ci(b, type = c("perc", "bca"))
  list(boot = b, ci = ci)
}

perm_test_RV_diff <- function(r1, r2, R = 2000) {
  r1 <- na.omit(as.numeric(r1)); r2 <- na.omit(as.numeric(r2))
  obs_diff <- calc_RV(r2) - calc_RV(r1)
  pooled   <- c(r1, r2); n1 <- length(r1); n2 <- length(r2)
  set.seed(123)
  perm_diffs <- replicate(R, {
    p <- sample(pooled)
    calc_RV(p[(n1+1):(n1+n2)]) - calc_RV(p[1:n1])
  })
  list(obs_diff = obs_diff, p_value = mean(abs(perm_diffs) >= abs(obs_diff)))
}

calc_for_segment <- function(name, idx, ret_vec) {
  r  <- as.numeric(na.omit(ret_vec[idx]))
  rv <- calc_RV(r); bv <- calc_BV(r)
  jmp <- max(rv - bv, 0)
  list(name      = name, n = length(r),
       RV = rv, BV = bv, Jump = jmp,
       JumpShare = ifelse(rv > 0, jmp / rv, NA_real_),
       VolAnn    = annualize_vol_from_rv(rv, length(r)),
       mean      = mean(r), sd = sd(r),
       skewness  = moments::skewness(r), kurtosis = moments::kurtosis(r),
       boot      = bootstrap_RV(r, R = 1500))
}

all_rv_results <- vector("list", 3)

for (k in 1:3) {
  ev      <- bp_events[[k]]
  janelas <- list(pre = ev$pre, during = ev$during, post = ev$post)
  results <- lapply(names(janelas), function(nm) {
    if (is.null(janelas[[nm]])) return(NULL)
    calc_for_segment(nm, janelas[[nm]], ret_daily)
  })
  names(results) <- names(janelas)
  results <- Filter(Negate(is.null), results)
  all_rv_results[[k]] <- results

  cat("\n\n===", ev$event_label, "| BP:", format(ev$crash_date, "%Y-%m-%d"), "===\n")

  summary_tbl <- do.call(rbind, lapply(results, function(x) {
    data.frame(janela = x$name, n = x$n, RV = x$RV, BV = x$BV,
               Jump = x$Jump, JumpShare = x$JumpShare, VolAnn = x$VolAnn,
               mean = x$mean, sd = x$sd,
               skewness = x$skewness, kurtosis = x$kurtosis)
  }))
  print(knitr::kable(summary_tbl, digits = 6,
                     caption = paste0(ev$event_label, " - RV, Bipower, Jump, Vol Anualizada (diaria)")))

  # Diferencas de RV
  pairs <- list(c("pre","during"), c("pre","post"))
  for (pr in pairs) {
    a <- pr[1]; b <- pr[2]
    if (!is.null(results[[a]]) && !is.null(results[[b]])) {
      diff_rv  <- results[[b]]$RV - results[[a]]$RV
      pct_rv   <- diff_rv / abs(results[[a]]$RV) * 100
      cat(sprintf("  RV (%s - %s): %+.6f  (%+.2f%%)\n", b, a, diff_rv, pct_rv))
    }
  }

  # Testes permutacionais
  for (pr in pairs) {
    a <- pr[1]; b <- pr[2]
    if (!is.null(janelas[[a]]) && !is.null(janelas[[b]])) {
      perm_res <- perm_test_RV_diff(ret_daily[janelas[[a]]], ret_daily[janelas[[b]]], R = 2000)
      cat(sprintf("  Perm test (%s vs %s): p = %.4f\n", b, a, perm_res$p_value))
    }
  }

  # Levene
  seg_rets <- unlist(lapply(names(janelas), function(nm) {
    if (!is.null(janelas[[nm]])) as.numeric(ret_daily[janelas[[nm]]]) else NULL
  }))
  seg_labs <- unlist(lapply(names(janelas), function(nm) {
    if (!is.null(janelas[[nm]])) rep(nm, length(janelas[[nm]])) else NULL
  }))
  combined <- data.frame(ret = seg_rets,
                         seg = factor(seg_labs, levels = c("pre","during","post")))
  lev <- car::leveneTest(ret ~ seg, data = combined)
  cat(sprintf("  Levene: F = %.4f | p = %.6f\n", lev[1,"F value"], lev[1,"Pr(>F)"]))

  # KS
  for (pr in pairs) {
    a <- pr[1]; b <- pr[2]
    if (!is.null(janelas[[a]]) && !is.null(janelas[[b]])) {
      ks <- ks.test(as.numeric(ret_daily[janelas[[a]]]),
                    as.numeric(ret_daily[janelas[[b]]]))
      cat(sprintf("  KS (%s vs %s): D = %.4f | p = %.6f\n", b, a, ks$statistic, ks$p.value))
    }
  }
}


=== Evento_1 | BP: 2003-07-30 ===


Table: Evento_1 - RV, Bipower, Jump, Vol Anualizada (diaria)

|       |janela |  n|       RV|       BV|     Jump| JumpShare|   VolAnn|      mean|       sd|  skewness| kurtosis|
|:------|:------|--:|--------:|--------:|--------:|---------:|--------:|---------:|--------:|---------:|--------:|
|pre    |pre    | 60| 0.013167| 0.004388| 0.008779|  0.666739| 0.235162| -0.000945| 0.014908| -0.402393| 3.323557|
|during |during | 61| 0.009252| 0.001853| 0.007398|  0.799659| 0.195499|  0.001320| 0.012346|  0.485933| 7.015300|
|post   |post   | 60| 0.006385| 0.002237| 0.004147|  0.649599| 0.163754|  0.001798| 0.010243|  1.005190| 6.012712|
  RV (during - pre): -0.003915  (-29.74%)
  RV (post - pre): -0.006782  (-51.51%)
  Perm test (during vs pre): p = 0.3460
  Perm test (post vs pre): p = 0.0410
  Levene: F = 4.0059 | p = 0.019873
  KS (during vs pre): D = 0.2697 | p = 0.020982
  KS (post vs pre): D = 0.1833 | p = 0.267120


=== Evento_2 | BP: 2007-10-15 ===


Table: Evento_2 - RV, Bipower, Jump, Vol Anualizada (diaria)

|       |janela |  n|       RV|       BV|     Jump| JumpShare|   VolAnn|      mean|       sd|  skewness| kurtosis|
|:------|:------|--:|--------:|--------:|--------:|---------:|--------:|---------:|--------:|---------:|--------:|
|pre    |pre    | 60| 0.006447| 0.002926| 0.003521|  0.546153| 0.164556|  0.000325| 0.010448| -0.259437| 2.904239|
|during |during | 61| 0.020545| 0.008867| 0.011677|  0.568389| 0.291330| -0.002483| 0.018334| -0.523025| 2.924534|
|post   |post   | 60| 0.016800| 0.007566| 0.009235|  0.549679| 0.265634|  0.002751| 0.016645|  0.462494| 3.103121|
  RV (during - pre): +0.014097  (+218.66%)
  RV (post - pre): +0.010353  (+160.58%)
  Perm test (during vs pre): p = 0.0000
  Perm test (post vs pre): p = 0.0005
  Levene: F = 8.0758 | p = 0.000439
  KS (during vs pre): D = 0.2620 | p = 0.021902
  KS (post vs pre): D = 0.1833 | p = 0.267120


=== Evento_3 | BP: 2009-06-03 ===


Table: Evento_3 - RV, Bipower, Jump, Vol Anualizada (diaria)

|       |janela |  n|       RV|       BV|     Jump| JumpShare|   VolAnn|     mean|       sd|  skewness| kurtosis|
|:------|:------|--:|--------:|--------:|--------:|---------:|--------:|--------:|--------:|---------:|--------:|
|pre    |pre    | 60| 0.021221| 0.006951| 0.014270|  0.672442| 0.298542| 0.003971| 0.018538|  0.333959| 3.800094|
|during |during | 61| 0.009626| 0.003685| 0.005941|  0.617163| 0.199416| 0.001937| 0.012515|  1.192643| 5.721223|
|post   |post   | 60| 0.007778| 0.002624| 0.005154|  0.662672| 0.180746| 0.001174| 0.011421| -1.077735| 7.495025|
  RV (during - pre): -0.011595  (-54.64%)
  RV (post - pre): -0.013442  (-63.35%)
  Perm test (during vs pre): p = 0.0325
  Perm test (post vs pre): p = 0.0075
  Levene: F = 8.6148 | p = 0.000268
  KS (during vs pre): D = 0.2363 | p = 0.059358
  KS (post vs pre): D = 0.2500 | p = 0.046661

Analise de Padroes nos Parametros Estocasticos — 9 Segmentos

# ============================================================
# OBJETIVO DESTE CHUNK
# Usando as distribuicoes posteriores de mu, phi e sigma dos
# 9 modelos SV (3 breakpoints x 3 fases), responder:
#
#  1. NIVEL          : mu  — qual segmento tem vol media mais alta?
#  2. PREVISIBILIDADE: phi — em qual segmento choques duram mais?
#  3. ERRATICIDADE   : sigma — em qual a vol varia mais dia-a-dia?
#  4. MEIA-VIDA      : log(0.5)/log(phi) — dias para um choque de vol
#                     decair pela metade
#  5. VAR INCONDICIONAL: sigma^2/(1-phi^2)
#  6. INDICE DE PREVISIBILIDADE: phi/(1+sigma)
#  7. CV LATENTE     : coef. de variacao de exp(h_t) dentro do segmento
#  8. COMPARACOES PAIRWISE: P(sigma_A > sigma_B) — 9x9 pares
# ============================================================

dias_ano <- 252

# ---- Helpers locais ----
.get_para   <- function(sv) as.matrix(para(sv,   chain = "concatenated"))
.get_latent <- function(sv) as.matrix(latent(sv, chain = "concatenated"))
.srows      <- function(mat, n) {
  if (nrow(mat) <= n) return(mat)
  set.seed(42); mat[sample(nrow(mat), n), , drop = FALSE]
}

# ---- Extrai draws de todos os 9 segmentos ----
seg_ids    <- expand.grid(k = 1:3, nm = c("pre","during","post"),
                          stringsAsFactors = FALSE)
seg_ids    <- seg_ids[order(seg_ids$k), ]   # ordem cronologica

all_para   <- vector("list", nrow(seg_ids))
all_latent <- vector("list", nrow(seg_ids))
seg_labels <- character(nrow(seg_ids))

for (i in seq_len(nrow(seg_ids))) {
  k  <- seg_ids$k[i]
  nm <- seg_ids$nm[i]
  mdl <- sv_models[[k]][[nm]]
  seg_labels[i] <- paste0("E", k, "_", nm)
  if (is.null(mdl)) next
  all_para[[i]]   <- .get_para(mdl)
  all_latent[[i]] <- .get_latent(mdl)
}

# Numero comum de draws (minimo entre segmentos validos)
valid_idx  <- which(!sapply(all_para, is.null))
common_n   <- min(sapply(all_para[valid_idx], nrow))
set.seed(42)
all_para   <- lapply(all_para,   function(m) if (!is.null(m)) .srows(m, common_n) else NULL)
all_latent <- lapply(all_latent, function(m) if (!is.null(m)) .srows(m, common_n) else NULL)
names(all_para)   <- seg_labels
names(all_latent) <- seg_labels

cat(sprintf("Segmentos validos: %d / 9 | Draws por segmento: %d\n",
            length(valid_idx), common_n))
Segmentos validos: 9 / 9 | Draws por segmento: 5000
# ============================================================
# 1. TABELA MESTRA: estatisticas posteriores de mu, phi, sigma
#    + metricas derivadas por segmento
# ============================================================
master_rows <- lapply(seq_along(seg_labels), function(i) {
  pm <- all_para[[i]]
  lm <- all_latent[[i]]
  lb <- seg_labels[i]
  if (is.null(pm)) return(NULL)
  
  mu_v    <- pm[, "mu"]
  phi_v   <- pm[, "phi"]
  sig_v   <- pm[, "sigma"]
  
  # Clamp phi to (0, 1) before log to avoid NaN in half-life
  phi_v_safe   <- pmax(phi_v, 1e-10)
  half_life_v  <- log(0.5) / log(phi_v_safe)
  var_incond_v <- sig_v^2 / (1 - phi_v^2)
  pred_idx_v   <- phi_v / (1 + sig_v)
  cv_lat_per_draw <- apply(exp(lm), 1, function(row) sd(row) / mean(row))
  
  data.frame(
    segmento       = lb,
    evento         = paste0("Evento_", seg_ids$k[i]),
    janela         = seg_ids$nm[i],
    mu_mean        = mean(mu_v),
    mu_sd          = sd(mu_v),
    mu_q025        = quantile(mu_v, .025, na.rm = TRUE),
    mu_q975        = quantile(mu_v, .975, na.rm = TRUE),
    phi_mean       = mean(phi_v),
    phi_sd         = sd(phi_v),
    phi_q025       = quantile(phi_v, .025, na.rm = TRUE),
    phi_q975       = quantile(phi_v, .975, na.rm = TRUE),
    sig_mean       = mean(sig_v),
    sig_sd         = sd(sig_v),
    sig_q025       = quantile(sig_v, .025, na.rm = TRUE),
    sig_q975       = quantile(sig_v, .975, na.rm = TRUE),
    half_life_mean = mean(half_life_v, na.rm = TRUE),
    half_life_q025 = quantile(half_life_v, .025, na.rm = TRUE),
    half_life_q975 = quantile(half_life_v, .975, na.rm = TRUE),
    var_incond     = mean(var_incond_v, na.rm = TRUE),
    pred_idx       = mean(pred_idx_v, na.rm = TRUE),
    cv_latente     = mean(cv_lat_per_draw, na.rm = TRUE)
  )
})
master_tbl <- bind_rows(Filter(Negate(is.null), master_rows)) %>%
  mutate(janela = factor(janela, levels = c("pre","during","post")))

# Tabela resumida para impressao
print_tbl <- master_tbl %>%
  select(segmento, mu_mean, phi_mean, sig_mean,
         half_life_mean, var_incond, pred_idx, cv_latente) %>%
  mutate(across(where(is.numeric), ~ round(.x, 4)))
knitr::kable(print_tbl,
             col.names = c("Segmento","mu (media)","phi (media)","sigma (media)",
                           "Meia-vida (dias)","Var Incond.","Pred. Index","CV Latente"),
             caption = "Tabela Mestra — Parametros SV e Metricas Derivadas para os 9 Segmentos (diario)")
Tabela Mestra — Parametros SV e Metricas Derivadas para os 9 Segmentos (diario)
Segmento mu (media) phi (media) sigma (media) Meia-vida (dias) Var Incond. Pred. Index CV Latente
2.5%…1 E1_pre -9.1763 0.5203 0.2751 2.6426 0.2162 0.4200 0.3491
2.5%…2 E1_during -8.0486 0.5085 0.2958 2.2257 0.2238 0.4072 0.3697
2.5%…3 E1_post -8.2447 0.6322 0.3146 5.3186 0.3991 0.4968 0.4562
2.5%…4 E2_pre -8.0217 0.4462 0.3649 2.5018 0.3061 0.3515 0.4534
2.5%…5 E2_during -9.3258 0.7347 0.6211 4.6153 1.2891 0.4666 1.0995
2.5%…6 E2_post -9.3499 0.5366 0.6757 3.2132 0.9839 0.3404 0.9492
2.5%…7 E3_pre -9.5840 0.4146 0.6716 1.4324 0.7417 0.2656 0.8472
2.5%…8 E3_during -7.9325 0.4836 0.5828 2.0786 0.7023 0.3233 0.7335
2.5%…9 E3_post -9.4107 0.4988 1.0669 1.4092 1.8463 0.2485 1.8556
# ============================================================
# 2. HEATMAP: perfis comparados — 9 segmentos x parametros
# ============================================================
heat_df <- master_tbl %>%
  select(segmento, janela, evento,
         mu = mu_mean, phi = phi_mean, sigma = sig_mean,
         half_life = half_life_mean, var_incond, pred_idx, cv_latente) %>%
  pivot_longer(cols = c(mu, phi, sigma, half_life, var_incond, pred_idx, cv_latente),
               names_to = "metrica", values_to = "valor") %>%
  group_by(metrica) %>%
  mutate(z = (valor - mean(valor, na.rm=TRUE)) / (sd(valor, na.rm=TRUE) + 1e-10)) %>%
  ungroup() %>%
  mutate(
    segmento = factor(segmento, levels = seg_labels),
    metrica  = factor(metrica,
                      levels = c("mu","phi","sigma","half_life",
                                 "var_incond","pred_idx","cv_latente"),
                      labels = c("mu (nivel)",
                                 "phi (persistencia)",
                                 "sigma (erraticidade)",
                                 "Meia-vida (dias)",
                                 "Var. Incondicional",
                                 "Indice Previsibil.",
                                 "CV Latente"))
  )

print(
  ggplot(heat_df, aes(x = segmento, y = metrica, fill = z)) +
    geom_tile(color = "white", linewidth = 0.5) +
    geom_text(aes(label = round(valor, 3)), size = 2.8) +
    scale_fill_gradient2(low = "#3498DB", mid = "white", high = "#E74C3C",
                         midpoint = 0, name = "Z-score") +
    labs(title    = "Heatmap de Perfis SV — 9 Segmentos (z-scored por metrica)",
         subtitle = "Vermelho = acima da media | Azul = abaixo | Valor = media posterior original",
         x = NULL, y = NULL) +
    theme_minimal(base_size = 11) +
    theme(axis.text.x  = element_text(angle = 40, hjust = 1),
          plot.title   = element_text(face = "bold"),
          panel.grid   = element_blank())
)

# ============================================================
# 3. PLANO DE PREVISIBILIDADE: phi vs sigma
# ============================================================
phi_med  <- median(master_tbl$phi_mean,  na.rm = TRUE)
sig_med  <- median(master_tbl$sig_mean,  na.rm = TRUE)
colors_j <- c(pre = "#2ECC71", during = "#E74C3C", post = "#3498DB")

print(
  ggplot(master_tbl, aes(x = sig_mean, y = phi_mean,
                          color = janela, shape = evento,
                          label = segmento)) +
    annotate("rect", xmin = -Inf,   xmax = sig_med, ymin = phi_med, ymax = Inf,
             fill = "#D5F5E3", alpha = 0.35) +
    annotate("rect", xmin = sig_med, xmax = Inf,    ymin = phi_med, ymax = Inf,
             fill = "#FADBD8", alpha = 0.35) +
    annotate("rect", xmin = -Inf,   xmax = sig_med, ymin = -Inf,    ymax = phi_med,
             fill = "#EBF5FB", alpha = 0.35) +
    annotate("rect", xmin = sig_med, xmax = Inf,    ymin = -Inf,    ymax = phi_med,
             fill = "#FEF9E7", alpha = 0.35) +
    annotate("text", x = sig_med * 0.85, y = Inf,      label = "Alta vol\nPrevisivel",
             vjust = 1.5, size = 3, color = "#27AE60", fontface = "bold") +
    annotate("text", x = sig_med * 1.15, y = Inf,      label = "Alta vol\nErratica",
             vjust = 1.5, size = 3, color = "#C0392B", fontface = "bold") +
    annotate("text", x = sig_med * 0.85, y = -Inf,     label = "Baixa vol\nPrevisivel",
             vjust = -0.5, size = 3, color = "#2980B9", fontface = "bold") +
    annotate("text", x = sig_med * 1.15, y = -Inf,     label = "Baixa vol\nErratica",
             vjust = -0.5, size = 3, color = "#D4AC0D", fontface = "bold") +
    geom_vline(xintercept = sig_med, linetype = "dashed", color = "grey50") +
    geom_hline(yintercept = phi_med, linetype = "dashed", color = "grey50") +
    geom_point(size = 4.5, alpha = 0.9) +
    geom_text(nudge_y = 0.002, size = 2.5, show.legend = FALSE) +
    scale_color_manual(values = colors_j) +
    scale_shape_manual(values = c(Evento_1 = 16, Evento_2 = 17, Evento_3 = 15)) +
    labs(title    = "Plano de Previsibilidade: phi vs sigma",
         subtitle = "phi = persistencia (previsibilidade) | sigma = erraticidade da vol",
         x = "sigma — vol-da-vol (erraticidade)",
         y = "phi — persistencia (previsibilidade)",
         color = "Fase", shape = "Evento") +
    theme_minimal(base_size = 12) +
    theme(plot.title = element_text(face = "bold"))
)

# ============================================================
# 4. MEIA-VIDA DOS CHOQUES POR SEGMENTO
# ============================================================
hl_df <- master_tbl %>%
  mutate(segmento = factor(segmento, levels = seg_labels))

print(
  ggplot(hl_df, aes(x = segmento, y = half_life_mean, fill = janela)) +
    geom_col(alpha = 0.85, width = 0.7) +
    geom_errorbar(aes(ymin = half_life_q025, ymax = half_life_q975),
                  width = 0.25, linewidth = 0.7) +
    geom_text(aes(label = round(half_life_mean, 1)),
              vjust = -0.6, size = 3.2) +
    scale_fill_manual(values = colors_j) +
    labs(title    = "Meia-vida dos Choques de Volatilidade — 9 Segmentos",
         subtitle = "Dias para um choque de vol decair 50% | Barras = IC 95% posterior",
         x = NULL, y = "Meia-vida (dias)", fill = "Fase") +
    theme_minimal(base_size = 12) +
    theme(axis.text.x  = element_text(angle = 35, hjust = 1),
          plot.title   = element_text(face = "bold"))
)

# ============================================================
# 5. mu vs MEIA-VIDA
# ============================================================
print(
  ggplot(master_tbl, aes(x = half_life_mean, y = mu_mean,
                          color = janela, shape = evento,
                          label = segmento)) +
    geom_point(size = 4.5, alpha = 0.9) +
    geom_text(nudge_y = 0.08, size = 2.7, show.legend = FALSE) +
    scale_color_manual(values = colors_j) +
    scale_shape_manual(values = c(Evento_1 = 16, Evento_2 = 17, Evento_3 = 15)) +
    labs(title    = "Nivel medio de vol (mu) vs Meia-vida dos choques",
         subtitle = "Quadrante sup-dir = mais perigoso: vol alta E persistente",
         x = "Meia-vida (dias)",
         y = "mu — nivel medio log-vol (menos negativo = mais alto)",
         color = "Fase", shape = "Evento") +
    theme_minimal(base_size = 12) +
    theme(plot.title = element_text(face = "bold"))
)

# ============================================================
# 6. INDICE DE PREVISIBILIDADE vs CV LATENTE
# ============================================================
print(
  ggplot(master_tbl, aes(x = cv_latente, y = pred_idx,
                          color = janela, shape = evento,
                          label = segmento)) +
    geom_point(size = 4.5, alpha = 0.9) +
    geom_text(nudge_y = 0.003, size = 2.7, show.legend = FALSE) +
    scale_color_manual(values = colors_j) +
    scale_shape_manual(values = c(Evento_1 = 16, Evento_2 = 17, Evento_3 = 15)) +
    labs(title    = "Previsibilidade vs Variabilidade Interna da Volatilidade Latente",
         subtitle = "Sup-esq = vol interna variavel MAS sistematica | Inf-dir = erratica",
         x = "CV Latente — variabilidade interna de exp(h_t)",
         y = "Indice de Previsibilidade — phi/(1+sigma)",
         color = "Fase", shape = "Evento") +
    theme_minimal(base_size = 12) +
    theme(plot.title = element_text(face = "bold"))
)

# ============================================================
# 7. COMPARACOES PAIRWISE POSTERIORES: P(sigma_A > sigma_B)
# ============================================================
valid_segs   <- seg_labels[valid_idx]
n_valid      <- length(valid_segs)
pairwise_mat <- matrix(NA_real_, n_valid, n_valid,
                       dimnames = list(valid_segs, valid_segs))

for (i in seq_len(n_valid)) {
  for (j in seq_len(n_valid)) {
    si <- all_para[[valid_segs[i]]][, "sigma"]
    sj <- all_para[[valid_segs[j]]][, "sigma"]
    pairwise_mat[i, j] <- mean(si > sj)
  }
}

pw_df <- as.data.frame(pairwise_mat) %>%
  mutate(seg_row = rownames(pairwise_mat)) %>%
  pivot_longer(-seg_row, names_to = "seg_col", values_to = "prob") %>%
  mutate(seg_row = factor(seg_row, levels = valid_segs),
         seg_col = factor(seg_col, levels = valid_segs),
         label   = ifelse(seg_row == seg_col, "—", sprintf("%.2f", prob)))

print(
  ggplot(pw_df, aes(x = seg_col, y = seg_row, fill = prob)) +
    geom_tile(color = "white", linewidth = 0.4) +
    geom_text(aes(label = label), size = 2.8) +
    scale_fill_gradient2(low = "#3498DB", mid = "#F0F3F4",
                         high = "#E74C3C", midpoint = 0.5,
                         limits = c(0, 1),
                         name = "P(linha > col)") +
    labs(title    = "Matriz Pairwise: P(sigma_linha > sigma_coluna)",
         subtitle = "Vermelho ~ 1.0 = linha e significativamente mais erratica que coluna",
         x = NULL, y = NULL) +
    theme_minimal(base_size = 10) +
    theme(axis.text.x  = element_text(angle = 40, hjust = 1),
          plot.title   = element_text(face = "bold"),
          panel.grid   = element_blank())
)

# ============================================================
# 8. DISTRIBUICAO POSTERIOR DE SIGMA — todos os segmentos
# ============================================================
sigma_df <- do.call(rbind, lapply(valid_segs, function(lb) {
  data.frame(sigma = all_para[[lb]][, "sigma"],
             segmento = lb,
             janela   = sub("E[0-9]_", "", lb),
             stringsAsFactors = FALSE)
})) %>%
  mutate(janela   = factor(janela,   levels = c("pre","during","post")),
         segmento = factor(segmento, levels = valid_segs))

print(
  ggplot(sigma_df, aes(x = sigma, color = janela, group = segmento,
                        linetype = segmento)) +
    geom_density(linewidth = 0.7, alpha = 0) +
    scale_color_manual(values = colors_j) +
    labs(title    = "Distribuicoes Posteriores de sigma — 9 Segmentos",
         subtitle = "Separacao entre curvas = evidencia bayesiana de regimes diferentes",
         x = "sigma (vol-da-vol)", y = "Densidade",
         color = "Fase", linetype = "Segmento") +
    theme_minimal(base_size = 12) +
    theme(plot.title = element_text(face = "bold"))
)

# ============================================================
# 9. DISTRIBUICAO POSTERIOR DE phi — todos os segmentos
# ============================================================
phi_df <- do.call(rbind, lapply(valid_segs, function(lb) {
  data.frame(phi    = all_para[[lb]][, "phi"],
             segmento = lb,
             janela   = sub("E[0-9]_", "", lb),
             stringsAsFactors = FALSE)
})) %>%
  mutate(janela   = factor(janela,   levels = c("pre","during","post")),
         segmento = factor(segmento, levels = valid_segs))

print(
  ggplot(phi_df, aes(x = phi, color = janela, group = segmento,
                      linetype = segmento)) +
    geom_density(linewidth = 0.7, alpha = 0) +
    scale_color_manual(values = colors_j) +
    labs(title    = "Distribuicoes Posteriores de phi — 9 Segmentos",
         subtitle = "phi proximo de 1 = choque de vol demora mais para dissipar",
         x = "phi (persistencia)", y = "Densidade",
         color = "Fase", linetype = "Segmento") +
    theme_minimal(base_size = 12) +
    theme(plot.title = element_text(face = "bold"))
)

# ============================================================
# 10. RESUMO FINAL: ranking de previsibilidade e classificacao
# ============================================================
rank_tbl <- master_tbl %>%
  arrange(desc(pred_idx)) %>%
  mutate(
    rank_previs = row_number(),
    regime_tipo = case_when(
      phi_mean >= phi_med & sig_mean <  sig_med ~ "Alta vol / Previsivel",
      phi_mean >= phi_med & sig_mean >= sig_med ~ "Alta vol / Erratica",
      phi_mean <  phi_med & sig_mean <  sig_med ~ "Baixa vol / Previsivel",
      TRUE                                       ~ "Baixa vol / Erratica"
    )
  ) %>%
  select(segmento, janela, evento, mu_mean, phi_mean, sig_mean,
         half_life_mean, pred_idx, cv_latente, regime_tipo) %>%
  mutate(across(where(is.numeric), ~ round(.x, 4)))

knitr::kable(rank_tbl,
             col.names = c("Segmento","Fase","Evento","mu","phi","sigma",
                           "Meia-vida","Pred.Index","CV Lat.","Classificacao"),
             caption = "Ranking por Previsibilidade + Classificacao de Regime para os 9 Segmentos")
Ranking por Previsibilidade + Classificacao de Regime para os 9 Segmentos
Segmento Fase Evento mu phi sigma Meia-vida Pred.Index CV Lat. Classificacao
2.5%…1 E1_post post Evento_1 -8.2447 0.6322 0.3146 5.3186 0.4968 0.4562 Alta vol / Previsivel
2.5%…2 E2_during during Evento_2 -9.3258 0.7347 0.6211 4.6153 0.4666 1.0995 Alta vol / Erratica
2.5%…3 E1_pre pre Evento_1 -9.1763 0.5203 0.2751 2.6426 0.4200 0.3491 Alta vol / Previsivel
2.5%…4 E1_during during Evento_1 -8.0486 0.5085 0.2958 2.2257 0.4072 0.3697 Alta vol / Previsivel
2.5%…5 E2_pre pre Evento_2 -8.0217 0.4462 0.3649 2.5018 0.3515 0.4534 Baixa vol / Previsivel
2.5%…6 E2_post post Evento_2 -9.3499 0.5366 0.6757 3.2132 0.3404 0.9492 Alta vol / Erratica
2.5%…7 E3_during during Evento_3 -7.9325 0.4836 0.5828 2.0786 0.3233 0.7335 Baixa vol / Erratica
2.5%…8 E3_pre pre Evento_3 -9.5840 0.4146 0.6716 1.4324 0.2656 0.8472 Baixa vol / Erratica
2.5%…9 E3_post post Evento_3 -9.4107 0.4988 1.0669 1.4092 0.2485 1.8556 Baixa vol / Erratica

Visualizacoes Finais: Violin + Densidade Posterior — por Evento

# ---- Variaveis locais necessarias neste chunk ----
dias_ano      <- 252
colors_janela <- c(pre = "#2ECC71", during = "#E74C3C", post = "#3498DB")

# Funcoes de extracao (redefinidas localmente para garantir disponibilidade)
.get_para_mat   <- function(sv) as.matrix(para(sv,   chain = "concatenated"))
.get_latent_mat <- function(sv) as.matrix(latent(sv, chain = "concatenated"))
.sample_rows    <- function(mat, n) {
  if (nrow(mat) <= n) return(mat)
  mat[sample(seq_len(nrow(mat)), n), , drop = FALSE]
}
# Vol anualizada diaria: sqrt(mean(exp(h_t))) * sqrt(dias_ano) por draw
.per_draw_ann_vol <- function(latmat)
  sqrt(rowMeans(exp(latmat), na.rm = TRUE)) * sqrt(dias_ano)

ann_draws_all <- vector("list", length(sv_models))

# ---- Diagnostico: verifica ann_draws_all ----
cat("=== Diagnostico ann_draws_all ===\n")
=== Diagnostico ann_draws_all ===
n_populated <- sum(!sapply(ann_draws_all, is.null))
cat(sprintf("Entradas populadas: %d / %d\n", n_populated, length(ann_draws_all)))
Entradas populadas: 0 / 3
# ---- Reconstrucao a partir de sv_models ----
if (n_populated == 0) {
  cat("ann_draws_all vazio — reconstruindo a partir de sv_models...\n")

  if (!exists("sv_models")) stop("sv_models nao encontrado. Rode o chunk 'sv-models' primeiro.")

  ann_draws_all <- vector("list", length(sv_models))

  for (k in seq_along(sv_models)) {
    ev  <- bp_events[[k]]
    mdl <- sv_models[[k]]

    valid_nms <- names(Filter(Negate(is.null), mdl))
    cat(sprintf("  Evento %d (%s): modelos validos = [%s]\n",
                k, ev$event_label, paste(valid_nms, collapse = ", ")))

    if (length(valid_nms) < 1) next

    latent_list <- lapply(mdl[valid_nms], .get_latent_mat)
    common_n    <- min(sapply(latent_list, nrow))
    set.seed(1)
    latent_s    <- lapply(latent_list, .sample_rows, n = common_n)
    ann_list    <- lapply(latent_s, .per_draw_ann_vol)
    names(ann_list) <- valid_nms

    ann_draws_all[[k]] <- list(
      event_label = ev$event_label,
      crash_date  = ev$crash_date,
      ann_list    = ann_list,
      valid_nms   = valid_nms,
      common_n    = common_n
    )
    cat(sprintf("    -> populado com %d draws\n", common_n))
  }

  n_populated <- sum(!sapply(ann_draws_all, is.null))
  cat(sprintf("Apos reconstrucao: %d / %d entradas validas\n", n_populated, length(ann_draws_all)))
}
ann_draws_all vazio — reconstruindo a partir de sv_models...
  Evento 1 (Evento_1): modelos validos = [pre, during, post]
    -> populado com 5000 draws
  Evento 2 (Evento_2): modelos validos = [pre, during, post]
    -> populado com 5000 draws
  Evento 3 (Evento_3): modelos validos = [pre, during, post]
    -> populado com 5000 draws
Apos reconstrucao: 3 / 3 entradas validas
if (n_populated == 0) {
  stop(paste(
    "Nenhum modelo SV disponivel para gerar os plots.",
    "Possiveis causas:",
    "  1) Os arquivos .rds ainda nao existem (primeira execucao — aguarde o MCMC terminar)",
    "  2) Os segmentos pre/during/post resultaram em < 10 obs (janela muito curta)",
    "  3) O chunk 'sv-models' encontrou erros silenciosos — reveja o output desse chunk",
    sep = "\n"
  ))
}

# ---- Plots por evento ----
for (k in seq_along(ann_draws_all)) {
  adk <- ann_draws_all[[k]]
  if (is.null(adk)) {
    cat(sprintf("\nEvento %d: sem dados, pulando.\n", k))
    next
  }

  ev_label <- adk$event_label
  ev_date  <- format(adk$crash_date, "%Y-%m-%d")

  cat(sprintf("\nGerando plots: %s | BP %s | regimes: [%s] | draws: %d\n",
              ev_label, ev_date,
              paste(adk$valid_nms, collapse = ", "),
              adk$common_n))

  df_long <- do.call(rbind, Map(function(v, nm) {
    data.frame(ann_vol = as.numeric(v), regime = nm, stringsAsFactors = FALSE)
  }, adk$ann_list, adk$valid_nms)) %>%
    mutate(regime = factor(regime, levels = c("pre","during","post")))

  # --- Densidade ---
  print(
    ggplot(df_long, aes(x = ann_vol, fill = regime)) +
      geom_density(alpha = 0.45) +
      scale_fill_manual(values = colors_janela, drop = FALSE) +
      labs(title    = paste0(ev_label, " - Densidades Posteriores Vol. Anualizada (diaria)"),
           subtitle = paste0("Breakpoint: ", ev_date),
           x = "Volatilidade anualizada (aprox.)", y = "Densidade") +
      theme_minimal()
  )

  # --- Violin + Boxplot ---
  ann_stats <- df_long %>%
    group_by(regime) %>%
    summarise(mean   = mean(ann_vol),
              median = median(ann_vol),
              sd     = sd(ann_vol),
              q025   = quantile(ann_vol, .025),
              q975   = quantile(ann_vol, .975),
              .groups = "drop") %>%
    mutate(
      label = sprintf("mean=%.3f\n95%%CI=[%.3f,%.3f]", mean, q025, q975),
      y_pos = q975 + 0.03 * max(q975, na.rm = TRUE)
    )

  print(
    ggplot(df_long, aes(x = regime, y = ann_vol, fill = regime)) +
      geom_violin(width = 0.9, trim = FALSE, alpha = 0.35, color = NA) +
      geom_boxplot(width = 0.12, outlier.shape = NA, alpha = 0.9, color = "black") +
      stat_summary(fun = median, geom = "point", shape = 23, size = 3,  fill = "white") +
      stat_summary(fun = mean,   geom = "point", shape = 21, size = 2.5, fill = "red") +
      geom_errorbar(data = ann_stats,
                    aes(x = regime, ymin = q025, ymax = q975),
                    width = 0.05, linewidth = 0.6, inherit.aes = FALSE) +
      geom_text(data = ann_stats,
                aes(x = regime, y = y_pos, label = label),
                size = 3.5, vjust = 0, inherit.aes = FALSE) +
      scale_fill_manual(values = colors_janela, drop = FALSE) +
      labs(title    = paste0(ev_label, " - Distribuicoes Posteriores da Vol. Anualizada (diaria)"),
           subtitle = paste0("Breakpoint: ", ev_date,
                             " | Violin = densidade | Box = IQR | mediana/media"),
           x = "Janela", y = "Volatilidade anualizada (aprox.)") +
      theme_minimal(base_size = 13) +
      theme(legend.position = "none",
            plot.title = element_text(face = "bold")) +
      coord_cartesian(clip = "off")
  )
}

Gerando plots: Evento_1 | BP 2003-07-30 | regimes: [pre, during, post] | draws: 5000


Gerando plots: Evento_2 | BP 2007-10-15 | regimes: [pre, during, post] | draws: 5000


Gerando plots: Evento_3 | BP 2009-06-03 | regimes: [pre, during, post] | draws: 5000

Comparacao Consolidada dos 3 Eventos

consolidated <- do.call(rbind, lapply(seq_along(ann_draws_all), function(k) {
  adk <- ann_draws_all[[k]]
  if (is.null(adk)) return(NULL)
  do.call(rbind, Map(function(v, nm) {
    data.frame(evento         = adk$event_label,
               crash_date     = format(adk$crash_date, "%Y-%m-%d"),
               janela         = nm,
               mean_vol_ann   = mean(v),
               median_vol_ann = median(v),
               q025           = quantile(v, .025),
               q975           = quantile(v, .975))
  }, adk$ann_list, adk$valid_nms))
}))

knitr::kable(consolidated, digits = 4,
             caption = "Comparacao Consolidada - Vol. Anualizada Posterior por Evento e Janela (diaria)")
Comparacao Consolidada - Vol. Anualizada Posterior por Evento e Janela (diaria)
evento crash_date janela mean_vol_ann median_vol_ann q025 q975
pre Evento_1 2003-07-30 pre 0.1699 0.1683 0.1405 0.2097
during Evento_1 2003-07-30 during 0.2988 0.2967 0.2475 0.3628
post Evento_1 2003-07-30 post 0.2717 0.2690 0.2243 0.3335
pre1 Evento_2 2007-10-15 pre 0.3055 0.3021 0.2520 0.3805
during1 Evento_2 2007-10-15 during 0.1983 0.1947 0.1582 0.2605
post1 Evento_2 2007-10-15 post 0.1821 0.1791 0.1465 0.2364
pre2 Evento_3 2009-06-03 pre 0.1540 0.1518 0.1240 0.1971
during2 Evento_3 2009-06-03 during 0.3457 0.3403 0.2778 0.4439
post2 Evento_3 2009-06-03 post 0.2160 0.2100 0.1640 0.3015
consolidated %>%
  mutate(janela = factor(janela, levels = c("pre","during","post")),
         evento = factor(evento)) %>%
  ggplot(aes(x = janela, y = mean_vol_ann, color = evento, group = evento)) +
  geom_line(linewidth = 1.1) +
  geom_point(size = 3.5) +
  geom_errorbar(aes(ymin = q025, ymax = q975),
                width = 0.15, linewidth = 0.7) +
  scale_color_manual(values = c("#E74C3C", "#F39C12", "#8E44AD")) +
  labs(title    = "IBM - Volatilidade Anualizada Posterior: Comparacao dos 3 Breakpoints Bai-Perron",
       subtitle = "Pontos = media posterior; barras = IC 95%",
       x = "Janela", y = "Vol. Anualizada (aprox.)", color = "Evento") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))