Ao longo deste notebook iremos explorar dados financeiros de ações blue chips e small caps. Nosso objetivo é analisar as propriedades estatísticas das entropias de Shannon produzidas por padrões ordinais de bandt-pompe. Apresentaremos uma possível aplicação de teste de hipótese em que séries temporais produzem padrões ordinais com mesma entropia.

require(ggplot2)
require(ggthemes)
require(dplyr)
require(magrittr)
require(ggpubr)

source("InformationTheory.R")
source("Bandt-Pompe.R")

Média e variância da distribuição assintótica

AsymptoticMean <- function(p, k){
  return(-sum(p*log(p))/log(k))
}

AsymptoticVariance <- function(p, n) {
  k <- length(p)
  p.nonzero <- p[p >0]
  
  acc <- 0
  for(j in 1:(k-1)) {
    for(i in (j+1):k) {
      acc <- acc + p.nonzero[j]*p.nonzero[i]*(log(p.nonzero[j])+1)*(log(p.nonzero[i])+1)
    }
  }
  
  return(sum(p.nonzero*(1-p.nonzero)*(log(p.nonzero)+1)^2)/n -
    acc*2/n)
}

Hiper-parâmetros

D = 3
Tau = 1
k = factorial(D)

Analisando diferentes ações da mesma empresa (Usiminas - blue chip)

usim3 = read.csv("../../Data/R/stock markets/USIM3.SA.csv")
usim5 = read.csv("../../Data/R/stock markets/USIM5.SA.csv") 
usim_3 = usim3[2729:2977,]
usim_5 = usim5[2729:2977,]
n_usim = dim(usim_5)[1]

p_usim3 <- bandt.pompe(unlist(usim_3["Close"]), D, Tau)
H_usim_3 <- shannon.entropy.normalized(p_usim3)
  
p_usim5 <- bandt.pompe(unlist(usim_5["Close"]), D, Tau)
H_usim_5 <- shannon.entropy.normalized(p_usim5)

AsymptoticMean_usim3 <- AsymptoticMean(p_usim3, k)
AsymptoticSTD_usim3 <- sqrt(AsymptoticVariance(p_usim3, n_usim))/log(k)
x_usim3 = rnorm(n = 10000, mean = AsymptoticMean_usim3, sd = AsymptoticSTD_usim3)

AsymptoticMean_usim5 <- AsymptoticMean(p_usim5, k)
AsymptoticSTD_usim5 <- sqrt(AsymptoticVariance(p_usim5, n_usim))/log(k)
x_usim5 = rnorm(n = 10000, mean = AsymptoticMean_usim5, sd = AsymptoticSTD_usim5)

ggplot() +
  geom_density(aes(x = x_usim3), kernel="epanechnikov", alpha=.3, color = "blue", fill = "blue") +
  geom_density(aes(x = x_usim5), kernel="epanechnikov", alpha=.3, color = "red", fill = "red") +
  geom_vline(aes(xintercept=H_usim_3, color = "blue")) +
  geom_vline(aes(xintercept=H_usim_5, color = "red")) +
  xlab(expression(italic(H))) + ylab("") +
  ggtitle(paste0("N = ", n_usim)) +
  scale_color_manual(name = "Ações", values = c("blue", "red"), labels=c("USIM3", "USIM5")) +
  theme_few(base_size = 12, base_family = "serif")  + 
  theme(plot.title = element_text(hjust = 0.5))

Analisando ações em diferentes anos (Usiminas - blue chip)


usim_3_1 = usim3[2729:2977,]
usim_3_2 = usim5[1987:2232,]
n_usim = dim(usim_3_2)[1]

p_usim_3_1 <- bandt.pompe(unlist(usim_3_1["Close"]), D, Tau)
H_usim_3_1 <- shannon.entropy.normalized(p_usim_3_1)
  
p_usim_3_2 <- bandt.pompe(unlist(usim_5["Close"]), D, Tau)
H_usim_3_2 <- shannon.entropy.normalized(p_usim_3_2)

AsymptoticMean_usim_3_1 <- AsymptoticMean(p_usim_3_1, k)
AsymptoticSTD_usim_3_1 <- sqrt(AsymptoticVariance(p_usim_3_1, n_usim))/log(k)
x_usim_3_1 = rnorm(n = 10000, mean = AsymptoticMean_usim_3_1, sd = AsymptoticSTD_usim_3_1)

AsymptoticMean_usim_3_2 <- AsymptoticMean(p_usim_3_2, k)
AsymptoticSTD_usim_3_2 <- sqrt(AsymptoticVariance(p_usim_3_2, n_usim))/log(k)
x_usim_3_2 = rnorm(n = 10000, mean = AsymptoticMean_usim_3_2, sd = AsymptoticSTD_usim_3_2)

ggplot() +
  geom_density(aes(x = x_usim_3_1), kernel="epanechnikov", alpha=.3, color = "blue", fill = "blue") +
  geom_density(aes(x = x_usim_3_2), kernel="epanechnikov", alpha=.3, color = "red", fill = "red") +
  geom_vline(aes(xintercept=H_usim_3_1, color = "blue")) +
  geom_vline(aes(xintercept=H_usim_3_2, color = "red")) +
  xlab(expression(italic(H))) + ylab("") +
  scale_color_manual(name = "Ações", values = c("blue", "red"), labels=c("USIM3 (2021-2022)", "USIM3 (2018-2019)")) +
  theme_few(base_size = 12, base_family = "serif")  + 
  theme(plot.title = element_text(hjust = 0.5))

usim_3_1 = usim3[2729:2977,]
usim_3_2 = usim5[2481:2728,]
n_usim = dim(usim_3_2)[1]

p_usim_3_1 <- bandt.pompe(unlist(usim_3_1["Close"]), D, Tau)
H_usim_3_1 <- shannon.entropy.normalized(p_usim_3_1)
  
p_usim_3_2 <- bandt.pompe(unlist(usim_5["Close"]), D, Tau)
H_usim_3_2 <- shannon.entropy.normalized(p_usim_3_2)

AsymptoticMean_usim_3_1 <- AsymptoticMean(p_usim_3_1, k)
AsymptoticSTD_usim_3_1 <- sqrt(AsymptoticVariance(p_usim_3_1, n_usim))/log(k)
x_usim_3_1 = rnorm(n = 10000, mean = AsymptoticMean_usim_3_1, sd = AsymptoticSTD_usim_3_1)

AsymptoticMean_usim_3_2 <- AsymptoticMean(p_usim_3_2, k)
AsymptoticSTD_usim_3_2 <- sqrt(AsymptoticVariance(p_usim_3_2, n_usim))/log(k)
x_usim_3_2 = rnorm(n = 10000, mean = AsymptoticMean_usim_3_2, sd = AsymptoticSTD_usim_3_2)

ggplot() +
  geom_density(aes(x = x_usim_3_1), kernel="epanechnikov", alpha=.3, color = "blue", fill = "blue") +
  geom_density(aes(x = x_usim_3_2), kernel="epanechnikov", alpha=.3, color = "red", fill = "red") +
  geom_vline(aes(xintercept=H_usim_3_1, color = "blue")) +
  geom_vline(aes(xintercept=H_usim_3_2, color = "red")) +
  xlab(expression(italic(H))) + ylab("") +
  scale_color_manual(name = "Ações", values = c("blue", "red"), labels=c("USIM3 (2021-2022)", "USIM3 (2020-2021)")) +
  theme_few(base_size = 12, base_family = "serif")  + 
  theme(plot.title = element_text(hjust = 0.5))

Análise de sequências do setor financeiro (blue chips)

bbas3 = read.csv("../../Data/R/stock markets/BBAS3.SA.csv")
bbdc4 = read.csv("../../Data/R/stock markets/BBDC4.SA.csv")
itub4 = read.csv("../../Data/R/stock markets/ITUB4.SA.csv")
bbas3 = bbas3[2729:2977,]
bbdc4 = bbdc4[2729:2977,]
itub4 = itub4[2729:2977,]
n = dim(itub4)[1]

p_bbas3 <- bandt.pompe(unlist(bbas3["Close"]), D, Tau)
H_bbas3 <- shannon.entropy.normalized(p_bbas3)

p_bbdc4 <- bandt.pompe(unlist(bbdc4["Close"]), D, Tau)
H_bbdc4 <- shannon.entropy.normalized(p_bbdc4)

p_itub4 <- bandt.pompe(unlist(itub4["Close"]), D, Tau)
H_itub4 <- shannon.entropy.normalized(p_itub4)

AsymptoticMean_bbas3 <- AsymptoticMean(p_bbas3, k)
AsymptoticSTD_bbas3 <- sqrt(AsymptoticVariance(p_bbas3, n))/log(k)
x_bbas3 = rnorm(n = 10000, mean = AsymptoticMean_bbas3, sd = AsymptoticSTD_bbas3)

AsymptoticMean_bbdc4 <- AsymptoticMean(p_bbdc4, k)
AsymptoticSTD_bbdc4 <- sqrt(AsymptoticVariance(p_bbdc4, n))/log(k)
x_bbdc4 = rnorm(n = 10000, mean = AsymptoticMean_bbdc4, sd = AsymptoticSTD_bbdc4)

AsymptoticMean_itub4 <- AsymptoticMean(p_itub4, k)
AsymptoticSTD_itub4 <- sqrt(AsymptoticVariance(p_itub4, n))/log(k)
x_itub4 = rnorm(n = 10000, mean = AsymptoticMean_itub4, sd = AsymptoticSTD_itub4)

ggplot() +
  geom_density(aes(x = x_bbas3), kernel="epanechnikov", alpha=.3, color = "blue", fill = "blue") +
  geom_density(aes(x = x_bbdc4), kernel="epanechnikov", alpha=.3, color = "red", fill = "red") +
  geom_density(aes(x = x_itub4), kernel="epanechnikov", alpha=.3, color = "orange", fill = "orange") +
  geom_vline(aes(xintercept=H_bbas3, color = "blue")) +
  geom_vline(aes(xintercept=H_bbdc4, color = "red")) +
  geom_vline(aes(xintercept=H_itub4, color = "orange")) +
  xlab(expression(italic(H))) + ylab("") +
  ggtitle("2021-2022") +
  scale_color_manual(name = "Ações", values = c("blue", "red", "orange"), labels=c("bbas3", "bbdc4", "itub4")) +
  theme_few(base_size = 12, base_family = "serif")  + 
  theme(plot.title = element_text(hjust = 0.5))

Análise de sequências do setor financeiro (blue chips & small caps)

bmgb4 = read.csv("../../Data/R/stock markets/BMGB4.SA.csv")[247:494,]
brsr6 = read.csv("../../Data/R/stock markets/BRSR6.SA.csv")[2729:2977,]
n = dim(brsr6)[1]

Análise de ações de diferente setores (blue chips)

Análise de sequências do setor de companhia áerea (Small Caps)

