arima_sp500

Especificando as bibliotecas

library(yfR)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggthemes)
library(e1071)
library(FinTS)

## Carregando pacotes exigidos: zoo
## 
## Anexando pacote: 'zoo'
## 
## Os seguintes objetos são mascarados por 'package:base':
## 
##     as.Date, as.Date.numeric

library(WriteXLS)
library(xtable)
library(tbl2xts)
library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo 
## 
## Anexando pacote: 'forecast'
## 
## O seguinte objeto é mascarado por 'package:FinTS':
## 
##     Acf

library(tseries)
library(timeSeries)

## Carregando pacotes exigidos: timeDate
## 
## Anexando pacote: 'timeDate'
## 
## O seguinte objeto é mascarado por 'package:xtable':
## 
##     align
## 
## Os seguintes objetos são mascarados por 'package:e1071':
## 
##     kurtosis, skewness
## 
## 
## Anexando pacote: 'timeSeries'
## 
## O seguinte objeto é mascarado por 'package:zoo':
## 
##     time<-
## 
## O seguinte objeto é mascarado por 'package:dplyr':
## 
##     lag
## 
## Os seguintes objetos são mascarados por 'package:graphics':
## 
##     lines, points

library(xts)

## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Anexando pacote: 'xts'
## 
## Os seguintes objetos são mascarados por 'package:dplyr':
## 
##     first, last

library(corrplot)

## corrplot 0.95 loaded

library(knitr)
library(kableExtra)

## 
## Anexando pacote: 'kableExtra'
## 
## O seguinte objeto é mascarado por 'package:dplyr':
## 
##     group_rows

library(GGally)

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

Limpando o diretório

rm(list=ls())

Obtendo todas as acoes sp500

sp500_data[is.na(sp500_data)] = 0 #lidando com valores NA

head(sp500_data)

Filtrando os dados

#melhor usar dplyr::select
sp500_data <- sp500_data %>%
  select(ref_date, ticker, ret_closing_prices) %>%
  rename(Data = ref_date, Ativo = ticker, Retorno = ret_closing_prices)

sp500_data <- sp500_data %>%
  replace_na(list(Retorno = 0))

Removendo os dados duplicados

sp500_data <- sp500_data %>%
  distinct(Data, Ativo, .keep_all = TRUE)

Criando um data frame cujas colunas sao separadas por ativo

sp500_wide <- sp500_data %>%
  pivot_wider(
    names_from = Ativo,
    values_from = Retorno
  )

# Separando as datas
datas <- sp500_wide$Data

# Dados sem a coluna de data
sp500_dados_matriz <- sp500_wide[ , -1]

# Substituir NA por 0
sp500_dados_matriz[is.na(sp500_dados_matriz)] <- 0

sp500_dados_xts <- xts(sp500_dados_matriz, order.by = as.Date(datas))

# Lista de tickers (nomes das colunas, exceto Data)
tickers <- colnames(sp500_dados_xts)

# lista para resultados
resultados <- list()

for (i in seq_along(tickers)) {
  ativo <- tickers[i]
  serie <- sp500_dados_xts[, i]
  
  # Ajustar o modelo e capturar falhas
  resultado <- tryCatch({
    fit <- auto.arima(serie)
    
    # Parâmetros AR, MA e i
    ar <- fit$arma[1]
    ma <- fit$arma[2]
    d  <- fit$arma[6]
    
    # Previsão de t+1
    prev <- predict(fit, n.ahead = 1)$pred[1]
    
    # Tibble com o resultado
    tibble(Ativo = ativo, AR = ar, I = d, MA = ma, Previsao = prev)
    
  }, error = function(e) {
    tibble(Ativo = ativo, AR = NA, I = NA, MA = NA, Previsao = NA)
  })
  
  resultados[[i]] <- resultado
}

Unindo todos os resultados em um único data frame

df_modelos <- bind_rows(resultados)

print(head(df_modelos))

## # A tibble: 6 × 5
##   Ativo    AR     I    MA Previsao
##   <chr> <int> <int> <int>    <dbl>
## 1 A         0     0     0  0      
## 2 AAPL      0     0     0  0      
## 3 ABBV      0     0     0  0      
## 4 ABNB      0     0     0  0      
## 5 ABT       0     0     0  0      
## 6 ACGL      2     0     2 -0.00375

df_modelos$Previsao[is.na(df_modelos$Previsao)] <- 0

# Filtrando apenas os ativos com previsão positiva
# Se for necessário, recria df_modelos como um tibble
df_modelos <- tibble(df_modelos)
colnames(df_modelos) <- trimws(colnames(df_modelos))

# Filtrando as previsões positivas
final_positivos <- dplyr::filter(df_modelos, Previsao > 0)

head(final_positivos)

# Calculando a média da previsão desses ativos
media_retorno <- mean(final_positivos$Previsao, na.rm = TRUE)

# Ativos e média
print(final_positivos)

## # A tibble: 153 × 5
##    Ativo    AR     I    MA  Previsao
##    <chr> <int> <int> <int>     <dbl>
##  1 AES       3     0     2 0.00652  
##  2 AJG       0     0     0 0.000971 
##  3 AMT       2     0     2 0.00217  
##  4 AMZN      0     0     0 0.00152  
##  5 APH       0     0     0 0.00137  
##  6 APO       0     0     0 0.00141  
##  7 ATO       1     0     2 0.00209  
##  8 AVGO      0     0     4 0.00542  
##  9 AVY       0     0     1 0.0000423
## 10 AXON      0     0     0 0.00247  
## # ℹ 143 more rows

print(media_retorno)

## [1] 0.001911001

# Ordenar os ativos pela maior previsão de retorno e pegar os 10 primeiros
top_10_ativos <- final_positivos %>%
  arrange(desc(Previsao)) %>%
  head(10)

top_10_ativos

# Pegar os nomes dos 10 ativos com maior previsão
ativos_top10 <- top_10_ativos$Ativo

# Filtrar os dados originais para conter só esses ativos
retornos_top10 <- sp500_data %>%
  dplyr::filter(Ativo %in% ativos_top10) %>%
  pivot_wider(names_from = Ativo, values_from = Retorno) %>%
  select(-Data)  # remover coluna de data para as correlações

Fatos estilizados

ativos_top10 <- c("PODD", "CCL", "ZBRA", "CME", "PLTR", "SMCI", "BLDR", "PKG", "NWS", "SPG")

assets <- yf_get(
  tickers = ativos_top10,
  first_date = "2020-01-01",
  last_date = Sys.Date(),
  type_return = "log",
  freq_data = "daily"
)

##

## ── Running yfR for 10 stocks | 2020-01-01 --> 2025-05-14 (1960 days) ──

##

## ℹ Downloading data for benchmark ticker ^GSPC

## ℹ (1/10) Fetching data for BLDR

## ✔    - found cache file (2023-01-03 --> 2025-05-13)

## !    - need new data (cache doesnt match query)

## ✔    - got 1348 valid rows (2020-01-02 --> 2025-05-13)

## ✔    - got 100% of valid prices -- Looking good!

## ℹ (2/10) Fetching data for CCL

## ✔    - found cache file (2023-01-03 --> 2025-05-13)

## !    - need new data (cache doesnt match query)

## ✔    - got 1348 valid rows (2020-01-02 --> 2025-05-13)

## ✔    - got 100% of valid prices -- Youre doing good!

## ℹ (3/10) Fetching data for CME

## ✔    - found cache file (2023-01-03 --> 2025-05-13)

## !    - need new data (cache doesnt match query)

## ✔    - got 1348 valid rows (2020-01-02 --> 2025-05-13)

## ✔    - got 100% of valid prices -- Mais contente que cusco de cozinheira!

## ℹ (4/10) Fetching data for NWS

## ✔    - found cache file (2023-01-03 --> 2025-05-13)

## !    - need new data (cache doesnt match query)

## ✔    - got 1348 valid rows (2020-01-02 --> 2025-05-13)

## ✔    - got 100% of valid prices -- You got it agariel!

## ℹ (5/10) Fetching data for PKG

## ✔    - found cache file (2023-01-03 --> 2025-05-13)

## !    - need new data (cache doesnt match query)

## ✔    - got 1348 valid rows (2020-01-02 --> 2025-05-13)

## ✔    - got 100% of valid prices -- Youre doing good!

## ℹ (6/10) Fetching data for PLTR

## ✔    - found cache file (2023-01-03 --> 2025-05-13)

## !    - need new data (cache doesnt match query)

## ✔    - got 1160 valid rows (2020-09-30 --> 2025-05-13)

## ✔    - got 86% of valid prices -- Time for some tea?

## ℹ (7/10) Fetching data for PODD

## ✔    - found cache file (2023-01-03 --> 2025-05-13)

## !    - need new data (cache doesnt match query)

## ✔    - got 1348 valid rows (2020-01-02 --> 2025-05-13)

## ✔    - got 100% of valid prices -- Looking good!

## ℹ (8/10) Fetching data for SMCI

## ✔    - found cache file (2023-01-03 --> 2025-05-13)

## !    - need new data (cache doesnt match query)

## ✔    - got 1348 valid rows (2020-01-02 --> 2025-05-13)

## ✔    - got 100% of valid prices -- Well done agariel!

## ℹ (9/10) Fetching data for SPG

## ✔    - found cache file (2023-01-03 --> 2025-05-13)

## !    - need new data (cache doesnt match query)

## ✔    - got 1348 valid rows (2020-01-02 --> 2025-05-13)

## ✔    - got 100% of valid prices -- Good stuff!

## ℹ (10/10) Fetching data for ZBRA

## ✔    - found cache file (2023-01-03 --> 2025-05-13)

## !    - need new data (cache doesnt match query)

## ✔    - got 1348 valid rows (2020-01-02 --> 2025-05-13)

## ✔    - got 100% of valid prices -- Youre doing good!

## ℹ Binding price data

##

## ── Diagnostics ─────────────────────────────────────────────────────────────────

## ✔ Returned dataframe with 13292 rows -- Time for some tea?

## ℹ Using 30.5 MB at /tmp/Rtmpy16JFj/yf_cache for 504 cache files

## ℹ Out of 10 requested tickers, you got 10 (100%)

assets <- assets %>%
  select(ref_date, ticker, ret_closing_prices, price_close) %>%
  rename(Data = ref_date, Ativo = ticker, Retorno = ret_closing_prices, Preco = price_close)

Precos

ggplot(assets, aes(x = Data, y = Preco)) +
  geom_line(color = 'steelblue') +
  facet_wrap(~Ativo, scales = "free_y") +
  labs(title = "Preços Diários", y = "Preço Fechamento")

Retornos

ggplot(assets, aes(x = Data, y = Retorno)) +
  geom_line(color = 'tomato') +
  facet_wrap(~Ativo) +
  labs(title = "Retornos Diários")

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

Retornos absolutos

ggplot(assets, aes(x = Data, y = abs(Retorno))) +
  geom_line(color = 'orchid3') +
  facet_wrap(~Ativo) +
  labs(title = "Retornos Absolutos Diários")

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

qqplot

ggplot(assets, aes(sample = Retorno)) +
  stat_qq() + stat_qq_line() +
  facet_wrap(~Ativo) +
  labs(title = "QQplot dos Retornos")

## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_qq()`).

## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_qq_line()`).

histograma

ggplot(assets) +
  geom_histogram(aes(x = Retorno, y = after_stat(density)), bins = 30,
                 fill = "mintcream", color = "grey60") +
  geom_density(aes(x = Retorno), color = "black") +
  facet_wrap(~Ativo) +
  labs(title = "Histogramas dos Retornos")

## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_density()`).

# Partindo do objeto `assets`, que tem Data, Ativo, Retorno

atv.dados <- assets %>%
  dplyr::filter(Ativo %in% ativos_top10) %>%
  select(Data, Ativo, Retorno) %>%
  pivot_wider(
    names_from = Ativo,
    values_from = Retorno
  )
atv.dados[is.na(atv.dados)] <- 0

# Calcular os fatos estilizados corretamente
estatisticas <- lapply(atv.dados[, -1], function(x) {
  c(
    Desvio_Padrao = sd(x, na.rm = TRUE),
    Variancia     = var(x, na.rm = TRUE),
    Curtose       = kurtosis(x, na.rm = TRUE),
    Assimetria    = skewness(x, na.rm = TRUE)
  )
})

# Converter para data frame
fatos_estilizados <- as.data.frame(do.call(rbind, estatisticas))
fatos_estilizados <- tibble::rownames_to_column(fatos_estilizados, var = "Ativo")

# Arredondar valores
fatos_estilizados <- fatos_estilizados %>%
  mutate(across(where(is.numeric), ~ round(.x, 4)))

# Mostrar com kable
kable(fatos_estilizados, format = "markdown", align = "l", caption = "Fatos estilizados das 10 ações com melhor previsão") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover"))

Fatos estilizados das 10 ações com melhor previsão
Ativo	Desvio_Padrao	Variancia	Curtose	Assimetria
BLDR	0.0337	0.0011	8.9782	-0.5327
CCL	0.0468	0.0022	12.1492	-0.9305
CME	0.0177	0.0003	23.6752	-1.5992
NWS	0.0214	0.0005	7.1941	-0.3753
PKG	0.0187	0.0004	8.1916	0.0843
PLTR	0.0420	0.0018	5.7912	0.7224
PODD	0.0282	0.0008	6.8392	0.2964
SMCI	0.0496	0.0025	10.3215	-0.0456
SPG	0.0302	0.0009	22.4039	-0.9399
ZBRA	0.0263	0.0007	8.5240	-0.7243

# Gráfico de pares
ggpairs(retornos_top10,
        upper = list(continuous = wrap("cor", size = 4)),
        diag = list(continuous = wrap("densityDiag")),
        lower = list(continuous = wrap("points", alpha = 0.5, size = 1)))

# Calcular matriz de correlação
matriz_cor <- cor(retornos_top10)

# Plotar mapa de calor
corrplot(matriz_cor,
         method = "color",
         type = "lower",
         addCoef.col = "black",
         tl.col = "black",
         tl.srt = 45,
         number.cex = 0.8)

dados <- data.frame(
  Código = c("BLDR", "CCL", "CME", "PLTR", "PKG", "SMCI", "PODD", "NWS", "SPG", "ZBRA"),

  Empresa = c(
    "Builders FirstSource", "Carnival Corp.", 
    "CME Group", "Palantir Technologies", "Packaging Corporation of America",
    "Super Micro Computer","Insulet Corporation",
    "News Corp", "Simon Property Group", "Zebra Technologies"
  ),
  Setor = c(
    "Construção / Materiais", "Turismo / Cruzeiros",
    "Financeiro / Bolsa e Derivativos", "Tecnologia / Big Data e Defesa",
    "Indústria / Embalagens", "Tecnologia / Servidores",
    "Saúde / Tecnologia médica", "Mídia / Comunicação", "REIT / Shoppings", "Tecnologia / Automação"
  ),
  Comentário = c(
    "Cíclica e sensível ao setor. ARIMA(5,1,0).",
    "Alta volatilidade. ARIMA(2,0,2). GARCH recomendado.",
    "Estável e previsível. ARIMA(0,0,1). Ideal para curto prazo.",
    "Tendência com ruído. ARIMA(0,0,1). Modelo simples.",
    "Setor defensivo. ARIMA(2,0,2). Volatilidade baixa.",
    "Alta expansão em 2023. ARIMA(0,0,3). Volatilidade acentuada.",
    "Tendência forte e estável. ARIMA(2,0,2). Reação rápida a choques.",
    "Sensível a mídia e ciclos. ARIMA(2,0,3).",
    "REIT tradicional. ARIMA(2,0,2). Estabilidade esperada.",
    "Volátil, mas previsível. ARIMA(1,0,2)."
  )
)

kable(dados, format = "markdown", align = "l", caption = "Top 10 ações com melhor previsão ARIMA — descrição por setor e modelo") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover"))

Top 10 ações com melhor previsão ARIMA — descrição por setor e modelo
Código	Empresa	Setor	Comentário
BLDR	Builders FirstSource	Construção / Materiais	Cíclica e sensível ao setor. ARIMA(5,1,0).
CCL	Carnival Corp.	Turismo / Cruzeiros	Alta volatilidade. ARIMA(2,0,2). GARCH recomendado.
CME	CME Group	Financeiro / Bolsa e Derivativos	Estável e previsível. ARIMA(0,0,1). Ideal para curto prazo.
PLTR	Palantir Technologies	Tecnologia / Big Data e Defesa	Tendência com ruído. ARIMA(0,0,1). Modelo simples.
PKG	Packaging Corporation of America	Indústria / Embalagens	Setor defensivo. ARIMA(2,0,2). Volatilidade baixa.
SMCI	Super Micro Computer	Tecnologia / Servidores	Alta expansão em 2023. ARIMA(0,0,3). Volatilidade acentuada.
PODD	Insulet Corporation	Saúde / Tecnologia médica	Tendência forte e estável. ARIMA(2,0,2). Reação rápida a choques.
NWS	News Corp	Mídia / Comunicação	Sensível a mídia e ciclos. ARIMA(2,0,3).
SPG	Simon Property Group	REIT / Shoppings	REIT tradicional. ARIMA(2,0,2). Estabilidade esperada.
ZBRA	Zebra Technologies	Tecnologia / Automação	Volátil, mas previsível. ARIMA(1,0,2).

arima_sp500

Agariel

2025-05-12