A. Gráficos de preços B. Gráficos de retornos C. Histograma dos retornos
Voce deve escolher 4 (quatro) variaveis disponiveis no Yahoo Finance e verificar os fatos estilizados de series temporais financeiras, seguindo aproximadamente o que foi mostrado na aula.
Extra Calcule as principais estatısticas descritivas das variaaveis: media, desvio-padrao, variancia, curtose, assimetria, etc.
Extra Calcule o coeficiente de correlacao entre dois retornos e faca o grafico de dispersao.
Extra Voce pode fazer tambem o mapa de calor para as correlacoes entre os quatro ativos escolhidos.
# install.packages("quantmod")
# install.packages("dplyr")
# install.packages("BatchGetSymbols")
# install.packages("GGally")
# install.packages("corrplot")
# install.packages("e1071")
suppressMessages(library(quantmod))
## Warning: package 'quantmod' was built under R version 4.3.3
suppressMessages(library(dplyr))
## Warning: package 'dplyr' was built under R version 4.3.3
suppressMessages(library(ggplot2))
## Warning: package 'ggplot2' was built under R version 4.3.3
suppressMessages(library(BatchGetSymbols))
## Warning: package 'BatchGetSymbols' was built under R version 4.3.3
suppressMessages(library(GGally))
## Warning: package 'GGally' was built under R version 4.3.3
suppressMessages(library(corrplot))
## Warning: package 'corrplot' was built under R version 4.3.3
suppressMessages(library(e1071))
## Warning: package 'e1071' was built under R version 4.3.3
# Definir os sÃmbolos das ações que você deseja baixar
symbols <- c("AAPL", "MSFT", "GOOG", "AMZN")
# Criar uma lista para armazenar os dados
data_list <- list()
# Loop para baixar os dados de cada sÃmbolo
for (symbol in symbols) {
# Baixar os dados históricos
data <- getSymbols(symbol, src = "yahoo", auto.assign = FALSE)
# Adicionar os dados baixados à lista
data_list[[symbol]] <- data
}
df <- do.call(cbind, data_list)
print(head(df, 4))
## AAPL.Open AAPL.High AAPL.Low AAPL.Close AAPL.Volume AAPL.Adjusted
## 2007-01-03 3.081786 3.092143 2.925000 2.992857 1238319600 2.533751
## 2007-01-04 3.001786 3.069643 2.993571 3.059286 847260400 2.589989
## 2007-01-05 3.063214 3.078571 3.014286 3.037500 834741600 2.571545
## 2007-01-08 3.070000 3.090357 3.045714 3.052500 797106800 2.584246
## MSFT.Open MSFT.High MSFT.Low MSFT.Close MSFT.Volume MSFT.Adjusted
## 2007-01-03 29.91 30.25 29.40 29.86 76935100 21.39744
## 2007-01-04 29.70 29.97 29.44 29.81 45774500 21.36159
## 2007-01-05 29.63 29.75 29.45 29.64 44607200 21.23977
## 2007-01-08 29.65 30.10 29.53 29.93 50220200 21.44760
## GOOG.Open GOOG.High GOOG.Low GOOG.Close GOOG.Volume GOOG.Adjusted
## 2007-01-03 11.60650 11.87200 11.48470 11.64610 309415434 11.63286
## 2007-01-04 11.68122 12.05357 11.66503 12.03638 316686586 12.02270
## 2007-01-05 12.01746 12.14199 11.90812 12.13427 275914333 12.12047
## 2007-01-08 12.14672 12.20102 12.00998 12.04436 190888826 12.03067
## AMZN.Open AMZN.High AMZN.Low AMZN.Close AMZN.Volume AMZN.Adjusted
## 2007-01-03 1.9340 1.9530 1.9025 1.9350 248102000 1.9350
## 2007-01-04 1.9295 1.9570 1.9130 1.9450 126368000 1.9450
## 2007-01-05 1.9360 1.9395 1.8800 1.9185 132394000 1.9185
## 2007-01-08 1.9110 1.9155 1.8585 1.8750 135660000 1.8750
# checando dados nulos que precisariam ser corrigidos!
colSums(is.na(df))
## AAPL.Open AAPL.High AAPL.Low AAPL.Close AAPL.Volume
## 0 0 0 0 0
## AAPL.Adjusted MSFT.Open MSFT.High MSFT.Low MSFT.Close
## 0 0 0 0 0
## MSFT.Volume MSFT.Adjusted GOOG.Open GOOG.High GOOG.Low
## 0 0 0 0 0
## GOOG.Close GOOG.Volume GOOG.Adjusted AMZN.Open AMZN.High
## 0 0 0 0 0
## AMZN.Low AMZN.Close AMZN.Volume AMZN.Adjusted
## 0 0 0 0
# filtrando somente para o preço de ajuste
df_adj <- df[, grepl("Adjusted", colnames(df))]
print(head(df_adj))
## AAPL.Adjusted MSFT.Adjusted GOOG.Adjusted AMZN.Adjusted
## 2007-01-03 2.533751 21.39744 11.63286 1.9350
## 2007-01-04 2.589989 21.36159 12.02270 1.9450
## 2007-01-05 2.571545 21.23977 12.12047 1.9185
## 2007-01-08 2.584246 21.44760 12.03067 1.8750
## 2007-01-09 2.798918 21.46908 12.07843 1.8890
## 2007-01-10 2.932862 21.25411 12.17695 1.8575
# autoplot
autoplot(df_adj) + labs(title = "Auto-plot das series temporais")
# definindo o indice para uma coluna de data
df_adj$date <- index(df_adj)
# Plotando com ggplot2
df_long <- data.frame(date = index(df_adj),
variable = rep(colnames(df_adj), each = nrow(df_adj)),
value = as.vector(coredata(df_adj)))
# Plotando com ggplot2
ggplot(data = df_long, aes(x = date, y = value, color = variable)) +
geom_line() +
labs(title = "Series Temporais",
x = "Data",
y = "Valor",
color = "Empresa") +
theme_minimal()
# Utilizando retornos logaritmos
df_ret = log(df_adj)
print(head(df_ret, 4))
## AAPL.Adjusted MSFT.Adjusted GOOG.Adjusted AMZN.Adjusted
## 2007-01-03 0.9297010 3.063271 2.453834 0.6601073
## 2007-01-04 0.9516537 3.061595 2.486797 0.6652620
## 2007-01-05 0.9445070 3.055875 2.494896 0.6515436
## 2007-01-08 0.9494338 3.065613 2.487459 0.6286087
# A primeira diferença é subtrair a linha i + 1 pela linha i!
diff(df_ret)
## AAPL.Adjusted MSFT.Adjusted GOOG.Adjusted AMZN.Adjusted
## 2007-01-03 NA NA NA NA
## 2007-01-04 0.021952694 -0.001676507 0.032962925 0.0051547071
## 2007-01-05 -0.007146674 -0.005719192 0.008099363 -0.0137184012
## 2007-01-08 0.004926725 0.009737225 -0.007437316 -0.0229349434
## 2007-01-09 0.079799251 0.001001213 0.003962550 0.0074389599
## 2007-01-10 0.046745832 -0.010063723 0.008123390 -0.0168161448
## 2007-01-11 -0.012448192 0.034463198 0.020745213 0.0067069589
## 2007-01-12 -0.012393824 0.016476143 0.010510455 0.0211647912
## 2007-01-16 0.025872188 -0.001603504 -0.001426797 0.0119699594
## 2007-01-17 -0.022390940 -0.001927314 -0.013978363 -0.0203821610
## ...
## 2024-06-05 0.007790460 0.018903530 0.011016637 0.0107593654
## 2024-06-06 -0.007121841 0.001202030 0.007202714 0.0203130345
## 2024-06-07 0.012315886 -0.001579460 -0.013548081 -0.0037909438
## 2024-06-10 -0.019333462 0.009439766 0.004994646 0.0148645267
## 2024-06-11 0.070131407 0.011179008 0.008793234 0.0009083768
## 2024-06-12 0.028177646 0.019182507 0.007658990 -0.0018175796
## 2024-06-13 0.005476123 0.001178259 -0.015829643 -0.0165087778
## 2024-06-14 -0.008201953 0.002239487 0.009180260 -0.0009251853
## 2024-06-17 0.019480496 0.013020113 0.002295976 0.0021755360
## 2024-06-18 -0.011045244 -0.004537788 -0.013118460 -0.0068144294
head(df_ret, 4)
## AAPL.Adjusted MSFT.Adjusted GOOG.Adjusted AMZN.Adjusted
## 2007-01-03 0.9297010 3.063271 2.453834 0.6601073
## 2007-01-04 0.9516537 3.061595 2.486797 0.6652620
## 2007-01-05 0.9445070 3.055875 2.494896 0.6515436
## 2007-01-08 0.9494338 3.065613 2.487459 0.6286087
# lidando com o NaN da primeira linha
df_ret <- na.omit(df_ret)
# replotando o mesmo gráfico acima
# Plotando com ggplot2
df_long <- data.frame(date = index(df_ret),
variable = rep(colnames(df_ret), each = nrow(df_ret)),
value = as.vector(coredata(df_ret)))
# Plotando com ggplot2
ggplot(data = df_long, aes(x = date, y = value, color = variable)) +
geom_line() +
labs(title = "Series Temporais",
x = "Data",
y = "Valor",
color = "Empresa") +
theme_minimal()
symbols <- c("AAPL", "MSFT", "GOOG", "AMZN")
# Defina o intervalo de datas
first.date <- '2021-01-01'
last.date <- Sys.Date() # Use a data atual como a última data
# Use a função BatchGetSymbols para obter os dados
stocks_list <- BatchGetSymbols(tickers = symbols,
first.date = first.date,
last.date = last.date,
type.return = "log",
freq.data = "daily")
## Warning: `BatchGetSymbols()` was deprecated in BatchGetSymbols 2.6.4.
## ℹ Please use `yfR::yf_get()` instead.
## ℹ 2022-05-01: Package BatchGetSymbols will soon be replaced by yfR. More
## details about the change is available at github
## <<www.github.com/msperlin/yfR> You can install yfR by executing:
##
## remotes::install_github('msperlin/yfR')
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
##
## Running BatchGetSymbols for:
## tickers =AAPL, MSFT, GOOG, AMZN
## Downloading data for benchmark ticker
## ^GSPC | yahoo (1|1) | Not Cached | Saving cache
## AAPL | yahoo (1|4) | Not Cached | Saving cache - Got 100% of valid prices | Boa!
## MSFT | yahoo (2|4) | Not Cached | Saving cache - Got 100% of valid prices | OK!
## GOOG | yahoo (3|4) | Not Cached | Saving cache - Got 100% of valid prices | Good job!
## AMZN | yahoo (4|4) | Not Cached | Saving cache - Got 100% of valid prices | You got it!
# Retirando o df que nos interessa da lista
stocks_df = stocks_list$df.tickers
# Selecionando apenas as colunas de data, os tickers e o retorno
stocks_df <- stocks_df[, c("ref.date", "ticker", "ret.adjusted.prices")]
# Aplicando uma transformação do tipo melt, vamos transformar cada Ticker em uma coluna de retorno!
stocks_df <- reshape(stocks_df, idvar = "ref.date", timevar = "ticker", direction = "wide")
# Como o retorno é logaratimo a primeira linha é um NaN, precisamos remover
stocks_df <- na.omit(stocks_df)
# vamos mudar os nomes das colunas para tirar ret.adjusted.prices ficando somente com os tickers
colnames(stocks_df) <- sub("^ret.adjusted.prices\\.", "", colnames(stocks_df))
# vamos indexar nosso dataframe resultante para a coluna ref.date
stocks_df.index = stocks_df$ref.date
mask <- sapply(stocks_df, is.numeric)
summary(stocks_df[, mask])
## AAPL MSFT GOOG
## Min. :-0.0604716 Min. :-0.0802954 Min. :-0.1013131
## 1st Qu.:-0.0084917 1st Qu.:-0.0079333 1st Qu.:-0.0095666
## Median : 0.0006939 Median : 0.0006785 Median : 0.0016291
## Mean : 0.0006020 Mean : 0.0008589 Mean : 0.0008228
## 3rd Qu.: 0.0101846 3rd Qu.: 0.0109729 3rd Qu.: 0.0108726
## Max. : 0.0852364 Max. : 0.0790589 Max. : 0.0949936
## AMZN
## Min. :-0.1513979
## 1st Qu.:-0.0117953
## Median : 0.0003131
## Mean : 0.0001582
## 3rd Qu.: 0.0121914
## Max. : 0.1269489
numeric_cols <- sapply(stocks_df, is.numeric)
means <- colMeans(stocks_df[, numeric_cols])
cat("Medias:\n")
## Medias:
print(means)
## AAPL MSFT GOOG AMZN
## 0.0006019867 0.0008588702 0.0008228403 0.0001581823
medians <- sapply(stocks_df[, numeric_cols], median)
cat("Medianas:\n")
## Medianas:
print(medians)
## AAPL MSFT GOOG AMZN
## 0.0006938855 0.0006785121 0.0016290875 0.0003131233
desv <- sapply(stocks_df[, numeric_cols], sd)
cat("Desvios Pad:\n")
## Desvios Pad:
print(desv)
## AAPL MSFT GOOG AMZN
## 0.01723295 0.01689546 0.01968002 0.02262417
variances <- sapply(stocks_df[, numeric_cols], var)
cat("Variancias:\n")
## Variancias:
print(variances)
## AAPL MSFT GOOG AMZN
## 0.0002969746 0.0002854565 0.0003873033 0.0005118530
cat("Variancias:\n")
## Variancias:
print(desv^2)
## AAPL MSFT GOOG AMZN
## 0.0002969746 0.0002854565 0.0003873033 0.0005118530
# Calcular assimetria (skewness)
skewnesses <- sapply(stocks_df[, numeric_cols], skewness)
cat("Assimetria:\n")
## Assimetria:
print(skewnesses)
## AAPL MSFT GOOG AMZN
## 0.10289515 -0.04407316 -0.12745262 -0.11849811
# Calcular curtose (kurtosis)
kurtoses <- sapply(stocks_df[, numeric_cols], kurtosis)
cat("Curtose:\n")
## Curtose:
print(kurtoses)
## AAPL MSFT GOOG AMZN
## 1.830035 1.717124 3.262288 5.110315
# Selecionar colunas numéricas
numeric_cols <- sapply(stocks_df, is.numeric)
numeric_df <- stocks_df[, numeric_cols]
# Número de colunas e linhas para layout de plots
num_plots <- length(numeric_df)
num_cols <- 2
num_rows <- ceiling(num_plots / num_cols)
# Configurar layout de plots
par(mfrow = c(num_rows, num_cols))
# Loop através de cada coluna numérica e criar um histograma
for (colname in names(numeric_df)) {
hist(numeric_df[[colname]], main = paste("Histograma de", colname),
xlab = colname, ylab = "Freq", col = "blue", border = "black")
}
# Reset layout
par(mfrow = c(1, 1))
# Não precisa mais do que isso!
plot(numeric_df)
# Verificar o número de colunas no dataframe stocks_df
n <- ncol(numeric_df)
options(repr.plot.width=16, repr.plot.height=8)
# Criar matriz de scatter plots usando ggpairs
ggpairs(numeric_df, columns = 1:n)
# Calcular a matriz de correlações
correlations <- cor(numeric_df)
options(repr.plot.width=10, repr.plot.height=8)
# Plotar um mapa de calor das correlações
heatmap(correlations,
col = colorRampPalette(c("blue", "white", "red"))(100), # Esquema de cores
symm = TRUE, # Mostrar apenas a metade inferior da matriz
scale = "none", # Não escalar as cores
margins = c(5, 5)) # Margens ao redor do gráfico
corrplot(correlations, method = "color", type = "upper",
col = colorRampPalette(c("blue", "white", "red"))(100),
order = "hclust", addCoef.col = "black",
tl.col = "black", tl.srt = 45)