Fatos Estilizados

A. Gráficos de preços B. Gráficos de retornos C. Histograma dos retornos

  1. Voce deve escolher 4 (quatro) variaveis disponiveis no Yahoo Finance e verificar os fatos estilizados de series temporais financeiras, seguindo aproximadamente o que foi mostrado na aula.

  2. Extra Calcule as principais estatısticas descritivas das variaaveis: media, desvio-padrao, variancia, curtose, assimetria, etc.

  3. Extra Calcule o coeficiente de correlacao entre dois retornos e faca o grafico de dispersao.

  4. Extra Voce pode fazer tambem o mapa de calor para as correlacoes entre os quatro ativos escolhidos.

Instalações necessárias

# install.packages("quantmod")
# install.packages("dplyr")
# install.packages("BatchGetSymbols")
# install.packages("GGally")
# install.packages("corrplot")
# install.packages("e1071")

Carregamento das bibliotecas

suppressMessages(library(quantmod))
## Warning: package 'quantmod' was built under R version 4.3.3
suppressMessages(library(dplyr))
## Warning: package 'dplyr' was built under R version 4.3.3
suppressMessages(library(ggplot2))
## Warning: package 'ggplot2' was built under R version 4.3.3
suppressMessages(library(BatchGetSymbols))
## Warning: package 'BatchGetSymbols' was built under R version 4.3.3
suppressMessages(library(GGally))
## Warning: package 'GGally' was built under R version 4.3.3
suppressMessages(library(corrplot))
## Warning: package 'corrplot' was built under R version 4.3.3
suppressMessages(library(e1071))
## Warning: package 'e1071' was built under R version 4.3.3

Baixando do yahoo finance

# Definir os símbolos das ações que você deseja baixar
symbols <- c("AAPL", "MSFT", "GOOG", "AMZN")
# Criar uma lista para armazenar os dados
data_list <- list()
# Loop para baixar os dados de cada símbolo
for (symbol in symbols) {
  # Baixar os dados históricos
  data <- getSymbols(symbol, src = "yahoo", auto.assign = FALSE)
  # Adicionar os dados baixados à lista
  data_list[[symbol]] <- data
}

Combinando cada df dentro da lista de dataframes em um único!

df <- do.call(cbind, data_list)
print(head(df, 4))
##            AAPL.Open AAPL.High AAPL.Low AAPL.Close AAPL.Volume AAPL.Adjusted
## 2007-01-03  3.081786  3.092143 2.925000   2.992857  1238319600      2.533751
## 2007-01-04  3.001786  3.069643 2.993571   3.059286   847260400      2.589989
## 2007-01-05  3.063214  3.078571 3.014286   3.037500   834741600      2.571545
## 2007-01-08  3.070000  3.090357 3.045714   3.052500   797106800      2.584246
##            MSFT.Open MSFT.High MSFT.Low MSFT.Close MSFT.Volume MSFT.Adjusted
## 2007-01-03     29.91     30.25    29.40      29.86    76935100      21.39744
## 2007-01-04     29.70     29.97    29.44      29.81    45774500      21.36159
## 2007-01-05     29.63     29.75    29.45      29.64    44607200      21.23977
## 2007-01-08     29.65     30.10    29.53      29.93    50220200      21.44760
##            GOOG.Open GOOG.High GOOG.Low GOOG.Close GOOG.Volume GOOG.Adjusted
## 2007-01-03  11.60650  11.87200 11.48470   11.64610   309415434      11.63286
## 2007-01-04  11.68122  12.05357 11.66503   12.03638   316686586      12.02270
## 2007-01-05  12.01746  12.14199 11.90812   12.13427   275914333      12.12047
## 2007-01-08  12.14672  12.20102 12.00998   12.04436   190888826      12.03067
##            AMZN.Open AMZN.High AMZN.Low AMZN.Close AMZN.Volume AMZN.Adjusted
## 2007-01-03    1.9340    1.9530   1.9025     1.9350   248102000        1.9350
## 2007-01-04    1.9295    1.9570   1.9130     1.9450   126368000        1.9450
## 2007-01-05    1.9360    1.9395   1.8800     1.9185   132394000        1.9185
## 2007-01-08    1.9110    1.9155   1.8585     1.8750   135660000        1.8750
# checando dados nulos que precisariam ser corrigidos!
colSums(is.na(df))
##     AAPL.Open     AAPL.High      AAPL.Low    AAPL.Close   AAPL.Volume 
##             0             0             0             0             0 
## AAPL.Adjusted     MSFT.Open     MSFT.High      MSFT.Low    MSFT.Close 
##             0             0             0             0             0 
##   MSFT.Volume MSFT.Adjusted     GOOG.Open     GOOG.High      GOOG.Low 
##             0             0             0             0             0 
##    GOOG.Close   GOOG.Volume GOOG.Adjusted     AMZN.Open     AMZN.High 
##             0             0             0             0             0 
##      AMZN.Low    AMZN.Close   AMZN.Volume AMZN.Adjusted 
##             0             0             0             0

Filtrando os dados para o preço de ajuste e realizando um auto-plot

# filtrando somente para o preço de ajuste
df_adj <- df[, grepl("Adjusted", colnames(df))]
print(head(df_adj))
##            AAPL.Adjusted MSFT.Adjusted GOOG.Adjusted AMZN.Adjusted
## 2007-01-03      2.533751      21.39744      11.63286        1.9350
## 2007-01-04      2.589989      21.36159      12.02270        1.9450
## 2007-01-05      2.571545      21.23977      12.12047        1.9185
## 2007-01-08      2.584246      21.44760      12.03067        1.8750
## 2007-01-09      2.798918      21.46908      12.07843        1.8890
## 2007-01-10      2.932862      21.25411      12.17695        1.8575
# autoplot
autoplot(df_adj) + labs(title = "Auto-plot das series temporais")

plotando ajustes com ggplot

# definindo o indice para uma coluna de data
df_adj$date <- index(df_adj)
# Plotando com ggplot2
df_long <- data.frame(date = index(df_adj),
                      variable = rep(colnames(df_adj), each = nrow(df_adj)),
                      value = as.vector(coredata(df_adj)))
# Plotando com ggplot2
ggplot(data = df_long, aes(x = date, y = value, color = variable)) +
  geom_line() +
  labs(title = "Series Temporais",
       x = "Data",
       y = "Valor",
       color = "Empresa") +
  theme_minimal()

# Utilizando retornos logaritmos 
df_ret = log(df_adj)
print(head(df_ret, 4))
##            AAPL.Adjusted MSFT.Adjusted GOOG.Adjusted AMZN.Adjusted
## 2007-01-03     0.9297010      3.063271      2.453834     0.6601073
## 2007-01-04     0.9516537      3.061595      2.486797     0.6652620
## 2007-01-05     0.9445070      3.055875      2.494896     0.6515436
## 2007-01-08     0.9494338      3.065613      2.487459     0.6286087
# A primeira diferença é subtrair a linha i + 1 pela linha i! 

diff(df_ret)
##            AAPL.Adjusted MSFT.Adjusted GOOG.Adjusted AMZN.Adjusted
## 2007-01-03            NA            NA            NA            NA
## 2007-01-04   0.021952694  -0.001676507   0.032962925  0.0051547071
## 2007-01-05  -0.007146674  -0.005719192   0.008099363 -0.0137184012
## 2007-01-08   0.004926725   0.009737225  -0.007437316 -0.0229349434
## 2007-01-09   0.079799251   0.001001213   0.003962550  0.0074389599
## 2007-01-10   0.046745832  -0.010063723   0.008123390 -0.0168161448
## 2007-01-11  -0.012448192   0.034463198   0.020745213  0.0067069589
## 2007-01-12  -0.012393824   0.016476143   0.010510455  0.0211647912
## 2007-01-16   0.025872188  -0.001603504  -0.001426797  0.0119699594
## 2007-01-17  -0.022390940  -0.001927314  -0.013978363 -0.0203821610
##        ...                                                        
## 2024-06-05   0.007790460   0.018903530   0.011016637  0.0107593654
## 2024-06-06  -0.007121841   0.001202030   0.007202714  0.0203130345
## 2024-06-07   0.012315886  -0.001579460  -0.013548081 -0.0037909438
## 2024-06-10  -0.019333462   0.009439766   0.004994646  0.0148645267
## 2024-06-11   0.070131407   0.011179008   0.008793234  0.0009083768
## 2024-06-12   0.028177646   0.019182507   0.007658990 -0.0018175796
## 2024-06-13   0.005476123   0.001178259  -0.015829643 -0.0165087778
## 2024-06-14  -0.008201953   0.002239487   0.009180260 -0.0009251853
## 2024-06-17   0.019480496   0.013020113   0.002295976  0.0021755360
## 2024-06-18  -0.011045244  -0.004537788  -0.013118460 -0.0068144294
head(df_ret, 4)
##            AAPL.Adjusted MSFT.Adjusted GOOG.Adjusted AMZN.Adjusted
## 2007-01-03     0.9297010      3.063271      2.453834     0.6601073
## 2007-01-04     0.9516537      3.061595      2.486797     0.6652620
## 2007-01-05     0.9445070      3.055875      2.494896     0.6515436
## 2007-01-08     0.9494338      3.065613      2.487459     0.6286087
# lidando com o NaN da primeira linha

df_ret <- na.omit(df_ret)
# replotando o mesmo gráfico acima 
# Plotando com ggplot2
df_long <- data.frame(date = index(df_ret),
                      variable = rep(colnames(df_ret), each = nrow(df_ret)),
                      value = as.vector(coredata(df_ret)))
# Plotando com ggplot2
ggplot(data = df_long, aes(x = date, y = value, color = variable)) +
  geom_line() +
  labs(title = "Series Temporais",
       x = "Data",
       y = "Valor",
       color = "Empresa") +
  theme_minimal()

Testando retornos logaratimos direto do jeito que a profa passou! (ATALHO)

symbols <- c("AAPL", "MSFT", "GOOG", "AMZN")

# Defina o intervalo de datas
first.date <- '2021-01-01'
last.date <- Sys.Date()  # Use a data atual como a última data

# Use a função BatchGetSymbols para obter os dados
stocks_list <- BatchGetSymbols(tickers = symbols,
                               first.date = first.date,
                               last.date = last.date,
                               type.return = "log",
                               freq.data = "daily")
## Warning: `BatchGetSymbols()` was deprecated in BatchGetSymbols 2.6.4.
## ℹ Please use `yfR::yf_get()` instead.
## ℹ 2022-05-01: Package BatchGetSymbols will soon be replaced by yfR.  More
##   details about the change is available at github
##   <<www.github.com/msperlin/yfR> You can install yfR by executing:
## 
## remotes::install_github('msperlin/yfR')
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## 
## Running BatchGetSymbols for:
##    tickers =AAPL, MSFT, GOOG, AMZN
##    Downloading data for benchmark ticker
## ^GSPC | yahoo (1|1) | Not Cached | Saving cache
## AAPL | yahoo (1|4) | Not Cached | Saving cache - Got 100% of valid prices | Boa!
## MSFT | yahoo (2|4) | Not Cached | Saving cache - Got 100% of valid prices | OK!
## GOOG | yahoo (3|4) | Not Cached | Saving cache - Got 100% of valid prices | Good job!
## AMZN | yahoo (4|4) | Not Cached | Saving cache - Got 100% of valid prices | You got it!

Entendendo as variaveis

# Retirando o df que nos interessa da lista
stocks_df = stocks_list$df.tickers
# Selecionando apenas as colunas de data, os tickers e o retorno 
stocks_df <- stocks_df[, c("ref.date", "ticker", "ret.adjusted.prices")]
# Aplicando uma transformação do tipo melt, vamos transformar cada Ticker em uma coluna de retorno! 
stocks_df <- reshape(stocks_df, idvar = "ref.date", timevar = "ticker", direction = "wide")
# Como o retorno é logaratimo a primeira linha é um NaN, precisamos remover
stocks_df <- na.omit(stocks_df)
# vamos mudar os nomes das colunas para tirar ret.adjusted.prices ficando somente com os tickers
colnames(stocks_df) <- sub("^ret.adjusted.prices\\.", "", colnames(stocks_df))
# vamos indexar nosso dataframe resultante para a coluna ref.date
stocks_df.index = stocks_df$ref.date

Inspecionando a série de dados de retornos

mask <- sapply(stocks_df, is.numeric)
summary(stocks_df[, mask])
##       AAPL                 MSFT                 GOOG           
##  Min.   :-0.0604716   Min.   :-0.0802954   Min.   :-0.1013131  
##  1st Qu.:-0.0084917   1st Qu.:-0.0079333   1st Qu.:-0.0095666  
##  Median : 0.0006939   Median : 0.0006785   Median : 0.0016291  
##  Mean   : 0.0006020   Mean   : 0.0008589   Mean   : 0.0008228  
##  3rd Qu.: 0.0101846   3rd Qu.: 0.0109729   3rd Qu.: 0.0108726  
##  Max.   : 0.0852364   Max.   : 0.0790589   Max.   : 0.0949936  
##       AMZN           
##  Min.   :-0.1513979  
##  1st Qu.:-0.0117953  
##  Median : 0.0003131  
##  Mean   : 0.0001582  
##  3rd Qu.: 0.0121914  
##  Max.   : 0.1269489

Calculando Media, Mediana, Desvio Padrão, Variancia, Curtose!

numeric_cols <- sapply(stocks_df, is.numeric)
means <- colMeans(stocks_df[, numeric_cols])
cat("Medias:\n")
## Medias:
print(means)
##         AAPL         MSFT         GOOG         AMZN 
## 0.0006019867 0.0008588702 0.0008228403 0.0001581823
medians <- sapply(stocks_df[, numeric_cols], median)
cat("Medianas:\n")
## Medianas:
print(medians)
##         AAPL         MSFT         GOOG         AMZN 
## 0.0006938855 0.0006785121 0.0016290875 0.0003131233
desv <- sapply(stocks_df[, numeric_cols], sd)
cat("Desvios Pad:\n")
## Desvios Pad:
print(desv)
##       AAPL       MSFT       GOOG       AMZN 
## 0.01723295 0.01689546 0.01968002 0.02262417
variances <- sapply(stocks_df[, numeric_cols], var)
cat("Variancias:\n")
## Variancias:
print(variances)
##         AAPL         MSFT         GOOG         AMZN 
## 0.0002969746 0.0002854565 0.0003873033 0.0005118530
cat("Variancias:\n")
## Variancias:
print(desv^2)
##         AAPL         MSFT         GOOG         AMZN 
## 0.0002969746 0.0002854565 0.0003873033 0.0005118530
# Calcular assimetria (skewness)
skewnesses <- sapply(stocks_df[, numeric_cols], skewness)
cat("Assimetria:\n")
## Assimetria:
print(skewnesses)
##        AAPL        MSFT        GOOG        AMZN 
##  0.10289515 -0.04407316 -0.12745262 -0.11849811
# Calcular curtose (kurtosis)
kurtoses <- sapply(stocks_df[, numeric_cols], kurtosis)
cat("Curtose:\n")
## Curtose:
print(kurtoses)
##     AAPL     MSFT     GOOG     AMZN 
## 1.830035 1.717124 3.262288 5.110315

Criando histogramas dos retornos

# Selecionar colunas numéricas
numeric_cols <- sapply(stocks_df, is.numeric)
numeric_df <- stocks_df[, numeric_cols]

# Número de colunas e linhas para layout de plots
num_plots <- length(numeric_df)
num_cols <- 2
num_rows <- ceiling(num_plots / num_cols)

# Configurar layout de plots
par(mfrow = c(num_rows, num_cols))

# Loop através de cada coluna numérica e criar um histograma
for (colname in names(numeric_df)) {
  hist(numeric_df[[colname]], main = paste("Histograma de", colname),
       xlab = colname, ylab = "Freq", col = "blue", border = "black")
}

# Reset layout
par(mfrow = c(1, 1))

Criando um gráfico de dispersão entre as colunas

# Não precisa mais do que isso!
plot(numeric_df)

# Verificar o número de colunas no dataframe stocks_df
n <- ncol(numeric_df)
options(repr.plot.width=16, repr.plot.height=8) 
# Criar matriz de scatter plots usando ggpairs
ggpairs(numeric_df, columns = 1:n)

Mapa simples de correlação

# Calcular a matriz de correlações
correlations <- cor(numeric_df)
options(repr.plot.width=10, repr.plot.height=8) 
# Plotar um mapa de calor das correlações
heatmap(correlations, 
        col = colorRampPalette(c("blue", "white", "red"))(100),  # Esquema de cores
        symm = TRUE,  # Mostrar apenas a metade inferior da matriz
        scale = "none",  # Não escalar as cores
        margins = c(5, 5))  # Margens ao redor do gráfico

Repetindo o correlograma com mais beleza

corrplot(correlations, method = "color", type = "upper", 
         col = colorRampPalette(c("blue", "white", "red"))(100), 
         order = "hclust", addCoef.col = "black", 
         tl.col = "black", tl.srt = 45)