Instalar Bibliotecas

#  Importar bibliotecas e obter os dados do Yahoo Finance
#install.packages('cowplot')
#install.packages('dplyr')
#install.packages('moments')
#install.packages("e1071")
#install.packages("plyr")
library(BatchGetSymbols)
## Warning: package 'BatchGetSymbols' was built under R version 4.3.1
## Carregando pacotes exigidos: rvest
## Carregando pacotes exigidos: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
library(rugarch)
## Warning: package 'rugarch' was built under R version 4.3.1
## Carregando pacotes exigidos: parallel
## 
## Attaching package: 'rugarch'
## The following object is masked from 'package:stats':
## 
##     sigma
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.1
## Warning: package 'ggplot2' was built under R version 4.3.1
## Warning: package 'readr' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.2     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()         masks stats::filter()
## ✖ readr::guess_encoding() masks rvest::guess_encoding()
## ✖ dplyr::lag()            masks stats::lag()
## ✖ purrr::reduce()         masks rugarch::reduce()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.3.1
library(ggplot2)
library(cowplot)
## Warning: package 'cowplot' was built under R version 4.3.1
## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:ggthemes':
## 
##     theme_map
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
library(dplyr)
library(moments)
# Limpar workspace 
rm(list = ls())

1. Escolha 4 variavéis disponíveis no Yahoo Finance e preparar e entender como esses dados foram armazenados nesse dataset.

# Quatro ações escolhidas  4 ações:


my.tickers <- c('EGIE3.SA', 'PETR4.SA','PSSA3.SA','WEGE3.SA')

first.date <- '2014-01-01'
last.datetoday <- Sys.Date()
my.bench <- '^BVSP'

 my.l <- BatchGetSymbols(tickers = my.tickers, 
                        freq.data = 'daily',
                        bench.ticker = my.bench,
                        type.return = "log",
                        first.date = first.date,
                        last.date = last.datetoday)
## Warning: `BatchGetSymbols()` was deprecated in BatchGetSymbols 2.6.4.
## ℹ Please use `yfR::yf_get()` instead.
## ℹ 2022-05-01: Package BatchGetSymbols will soon be replaced by yfR.  More
##   details about the change is available at github
##   <<www.github.com/msperlin/yfR> You can install yfR by executing:
## 
## remotes::install_github('msperlin/yfR')
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## 
## Running BatchGetSymbols for:
##    tickers =EGIE3.SA, PETR4.SA, PSSA3.SA, WEGE3.SA
##    Downloading data for benchmark ticker
## ^BVSP | yahoo (1|1) | Not Cached | Saving cache
## EGIE3.SA | yahoo (1|4) | Not Cached | Saving cache - Got 100% of valid prices | Looking good!
## PETR4.SA | yahoo (2|4) | Not Cached | Saving cache - Got 100% of valid prices | Got it!
## PSSA3.SA | yahoo (3|4) | Not Cached | Saving cache - Got 100% of valid prices | You got it!
## WEGE3.SA | yahoo (4|4) | Not Cached | Saving cache - Got 100% of valid prices | Looking good!

1.1 - # Olhando os dados se estão corretos analisar variavéis, ticker, src, download.status, total.obs, perc.benchmark.dates e threshold.decision

# Olhando os dados se estão ok ticker src   download.status total.obs perc.benchmark.dates threshold.decision
glimpse(my.l)
## List of 2
##  $ df.control: tibble [4 × 6] (S3: tbl_df/tbl/data.frame)
##   ..$ ticker              : chr [1:4] "EGIE3.SA" "PETR4.SA" "PSSA3.SA" "WEGE3.SA"
##   ..$ src                 : chr [1:4] "yahoo" "yahoo" "yahoo" "yahoo"
##   ..$ download.status     : chr [1:4] "OK" "OK" "OK" "OK"
##   ..$ total.obs           : int [1:4] 2360 2360 2360 2360
##   ..$ perc.benchmark.dates: num [1:4] 1 1 1 1
##   ..$ threshold.decision  : chr [1:4] "KEEP" "KEEP" "KEEP" "KEEP"
##  $ df.tickers:'data.frame':  9440 obs. of  10 variables:
##   ..$ price.open         : num [1:9440] 28.8 29.4 29.2 28.9 28.6 ...
##   ..$ price.high         : num [1:9440] 29.3 30.8 29.6 29.1 28.8 ...
##   ..$ price.low          : num [1:9440] 28.6 29.2 28.5 28.5 28.2 ...
##   ..$ price.close        : num [1:9440] 29.2 29.4 28.8 28.6 28.5 ...
##   ..$ volume             : num [1:9440] 702875 4576875 988500 765000 739375 ...
##   ..$ price.adjusted     : num [1:9440] 15.7 15.8 15.5 15.3 15.3 ...
##   ..$ ref.date           : Date[1:9440], format: "2014-01-02" "2014-01-03" ...
##   ..$ ticker             : chr [1:9440] "EGIE3.SA" "EGIE3.SA" "EGIE3.SA" "EGIE3.SA" ...
##   ..$ ret.adjusted.prices: num [1:9440] NA 0.00601 -0.01924 -0.00808 -0.00224 ...
##   ..$ ret.closing.prices : num [1:9440] NA 0.00601 -0.01924 -0.00808 -0.00224 ...

1.2 - O dados das ações estão ok, porém todos os dados estão empilhados de cada ação será necessário filtrar eses dados, mais a frente.

# Detalhes que os dados estão empilhados de cada ação. 

myassets <- my.l$df.tickers

#print(myassets)
#head(myassets$ret.closing.prices)

# Eliminando os valores NA
myassets = na.omit(myassets)
#head(myassets$ret.closing.prices)

1.3 - Verificar se todos os dados estão corretos.

# Verificar os ativos que ficaram de fora com threshold.decision

table(my.l$df.control$threshold.decision)
## 
## KEEP 
##    4

2. Escolha 4 variavéis disponíveis no Yahoo Finance e verifique os fatos estilizados de séries temporais financeiras aproximadamente o que foi mostrado na aula

2.1 - Criando uma função para plotar os gráficos para as 4 ações escolhidas

# Função para plotar os gráficos para qualquer ação que deseja analisar. 
plot_graficos <- function(acao) {
  
  # Gráfico 1: preços
  p <- ggplot(acao, aes(ref.date, price.close)) +
    geom_line(color='#006600') +
    labs(x = "", y = 'Preço Fechamento',
         title = paste("Cotação Diária da", acao$ticker[1]),
         subtitle = "Período: de 02/01/2014 a 30/06/2023",
         caption = "Fonte: B3") +
    theme_economist()

  # Gráfico 2: retornos diários
  daily_returns <- acao %>% select(ref.date, ticker, ret.closing.prices)
  plot_returns <- ggplot(daily_returns) +
    geom_line(aes(x = ref.date, y = ret.closing.prices), color='#006600') +
    labs(x = "", y = 'Retornos',
         title = paste("Retornos da", acao$ticker[1]),
         subtitle = "Período: de 02/01/2014 a 30/06/2023",
         caption = "Fonte: B3") +
    theme_economist()

  # Gráfico 3: Retornos absolutos (abs)
  plot_volatility <- ggplot(daily_returns) +
    geom_line(aes(x = ref.date, y = abs(ret.closing.prices)), color='#006600') +
    labs(x = "", y = 'Retornos absolutos',
         title = paste("Retornos abs da", acao$ticker[1]),
         subtitle = "Período: de 02/01/2014 a 30/06/2023",
         caption = "Fonte: B3") +
    theme_economist()

  # Gráfico 4: qqplot
  qqplot <- ggplot(daily_returns, aes(sample = ret.closing.prices)) +
    stat_qq() +
    stat_qq_line() +
    labs(x = "Teórico", y = 'Amostra',
         title = "QQplot",
         subtitle = paste("Retornos diários da", acao$ticker[1]),
         caption = "Fonte: Elaborado a partir de dados da B3") +
    theme_economist()

  # Gráfico 5: Histograma
  histogram <- ggplot(daily_returns) +
    geom_histogram(aes(x = ret.closing.prices, y = ..density..),
                   color = "white", fill = "Dark grey", linetype = "solid", alpha = 0.8, binwidth = 0.02) +
    geom_density(aes(x = ret.closing.prices, y = ..density..), color = "black") +
    labs(x = "", y = 'Densidade',
         title = "Histograma",
         subtitle = paste("Retornos diários da", acao$ticker[1]),
         caption = "Fonte: Elaborado a partir de dados da B3") +
    theme_economist()

  # Plotar os gráficos juntos
  plot_grid(plot_returns, qqplot, plot_volatility, histogram, nrow = 2)
}

2.2 - Chamando a função plot_graficos() para plotar gráfico EGIE3.SA

plot_graficos(myassets %>% filter(ticker == "EGIE3.SA"))
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

2.3 - Chamando a função plot_graficos() para plotar gráfico PETR4.SA

plot_graficos(myassets %>% filter(ticker == "PETR4.SA"))

### 2.4 - Chamando a função plot_graficos() para plotar gráfico PSSA3.SA

plot_graficos(myassets %>% filter(ticker == "PSSA3.SA"))

2.5 - Chamando a função plot_graficos() para plotar gráfico WEGE3.SA

plot_graficos(myassets %>% filter(ticker == "WEGE3.SA"))

2. Calcule as principais estatísticas descritivas das variavéis: média\((\mu)\), desvio padrão \(\sigma\), variância \(\sigma^2\), curtose, assimétria, etc…

2.1 - Criar uma função para calcular as principais estatísticas descritivas com base nos retornos das ações

summary(myassets)
##    price.open      price.high      price.low      price.close    
##  Min.   : 0.00   Min.   : 0.00   Min.   : 0.00   Min.   : 4.024  
##  1st Qu.:14.34   1st Qu.:14.57   1st Qu.:14.12   1st Qu.:14.350  
##  Median :24.26   Median :24.70   Median :23.92   Median :24.280  
##  Mean   :23.25   Mean   :23.58   Mean   :22.91   Mean   :23.249  
##  3rd Qu.:29.68   3rd Qu.:30.09   3rd Qu.:29.29   3rd Qu.:29.652  
##  Max.   :54.10   Max.   :54.33   Max.   :53.32   Max.   :54.100  
##      volume          price.adjusted      ref.date             ticker         
##  Min.   :        0   Min.   : 1.546   Min.   :2014-01-03   Length:9436       
##  1st Qu.:  1247000   1st Qu.: 8.124   1st Qu.:2016-05-24   Class :character  
##  Median :  2888580   Median :14.799   Median :2018-09-25   Mode  :character  
##  Mean   : 17935110   Mean   :16.857   Mean   :2018-10-02                     
##  3rd Qu.: 18547330   3rd Qu.:22.724   3rd Qu.:2021-02-19                     
##  Max.   :698950612   Max.   :45.950   Max.   :2023-06-30                     
##  ret.adjusted.prices  ret.closing.prices  
##  Min.   :-0.3523668   Min.   :-0.3523667  
##  1st Qu.:-0.0108594   1st Qu.:-0.0110052  
##  Median : 0.0003856   Median : 0.0002636  
##  Mean   : 0.0006632   Mean   : 0.0004018  
##  3rd Qu.: 0.0122364   3rd Qu.: 0.0121354  
##  Max.   : 0.2006706   Max.   : 0.2006706

2.2 - Calcular estatísticas descritivas para cada ação

# Calcular estatísticas descritivas para cada ação
estatisticas <- myassets %>%
    group_by(ticker) %>%
    summarise(
      media = mean(ret.closing.prices),
      desvio_padrao = sd(ret.closing.prices),
      variancia = var(ret.closing.prices),
      curtose = kurtosis(ret.closing.prices),
      assimetria = skewness(ret.closing.prices)
    )
  
  print(estatisticas)
## # A tibble: 4 × 6
##   ticker      media desvio_padrao variancia curtose assimetria
##   <chr>       <dbl>         <dbl>     <dbl>   <dbl>      <dbl>
## 1 EGIE3.SA 0.000190        0.0156  0.000244    6.27    -0.0551
## 2 PETR4.SA 0.000240        0.0327  0.00107    13.7     -0.991 
## 3 PSSA3.SA 0.000276        0.0199  0.000396    4.97    -0.0619
## 4 WEGE3.SA 0.000902        0.0211  0.000443   13.4     -0.503

2.3 - Função para calcular os graus de liberdade da distribuição t-Student

# Função para calcular os graus de liberdade da distribuição t-Student
df_t_student <- function(data) {
  n <- length(data)
  df <- ifelse(n < 30, n - 1, 30)
  return(df)
}

# Função para calcular estatísticas descritivas e plotar histograma
plot_estatisticas_histograma <- function(data, tickers) {
  # Plotar histograma
  grafico <- ggplot(data = data) +
    geom_histogram(aes(x = ret.closing.prices, y = ..density..),
                   color = "black", fill = "white") +
    facet_wrap(~ticker, ncol = 2) +
    geom_density(aes(x = ret.closing.prices),
                 color = "blue", fill = "lightblue", alpha = 0.5) +
    stat_function(fun = dnorm,
                  args = list(mean = mean(data$ret.closing.prices), sd = sd(data$ret.closing.prices)),
                  color = "red", linetype = "dashed") +
    stat_function(fun = dt,
                  args = list(df = df_t_student(data$ret.closing.prices)),
                  color = "green", linetype = "dotted") +
    labs(x = "Retornos", y = "Densidade",
         title = "Distribuição Normal e t-Student",
         subtitle = "Comparação das distribuições para cada ação",
         caption = "Fonte: Dados da Ação") +
    theme_minimal()
  
  print(grafico)
}

# Chamar a função para as 4 ações desejadas
tickers <- c("EGIE3.SA", "PETR4.SA", "PSSA3.SA", "WEGE3.SA")
plot_estatisticas_histograma(myassets, tickers)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

3. Calcule o coeficiente de correlação de person \(r^2\), entre dois retornos e faça o gráfico de dispersão.

# Selecionar os retornos das ações PSSA3 e EGIE3
pssa3 <- myassets$ret.closing.prices[myassets$ticker == "PSSA3.SA"]
egie3 <- myassets$ret.closing.prices[myassets$ticker == "EGIE3.SA"]

geral <- pssa3 + egie3
# Calcular o coeficiente de correlação de Pearson
correlation <- cor(pssa3, egie3)

# Criar um data frame com os dados das duas ações
data <- data.frame(PSSA3 = pssa3, EGIE3 = egie3)

# Plotar o gráfico de dispersão
scatter_plot <- ggplot(data, aes(x = PSSA3, y = EGIE3)) +
  geom_point() +
  labs(x = "Retorno PSSA3", y = "Retorno EGIE3",
       title = "Gráfico de Dispersão - PSSA3 e EGIE3",
       subtitle = paste("Coeficiente de correlação: ", correlation)) +
  theme_economist()

print(scatter_plot)

4. Faça também o mapa de calor para correlações entre os quatro ativos escolhidos.

# Load and install heatmaply package
#install.packages("heatmaply")
library(heatmaply)
## Warning: package 'heatmaply' was built under R version 4.3.1
## Carregando pacotes exigidos: plotly
## Warning: package 'plotly' was built under R version 4.3.1
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## Carregando pacotes exigidos: viridis
## Warning: package 'viridis' was built under R version 4.3.1
## Carregando pacotes exigidos: viridisLite
## 
## ======================
## Welcome to heatmaply version 1.4.2
## 
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
## 
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## You may ask questions at stackoverflow, use the r and heatmaply tags: 
##   https://stackoverflow.com/questions/tagged/heatmaply
## ======================
# Selecionar os retornos das ações PSSA3 e EGIE3
pssa3 <- myassets$ret.closing.prices[myassets$ticker == "PSSA3.SA"]
egie3 <- myassets$ret.closing.prices[myassets$ticker == "EGIE3.SA"]
petr4 <- myassets$ret.closing.prices[myassets$ticker == "PETR4.SA"]
wege3 <- myassets$ret.closing.prices[myassets$ticker == "WEGE3.SA"]

# Criar um data frame com os dados das 4 ações
mystock4 <- data.frame(PSSA3 = pssa3, EGIE3 = egie3, PETR4 = petr4, WEGE3 = wege3)

# plotting corr heatmap
heatmaply_cor(x = cor(mystock4), xlab = "Ações", ylab = "Ações", k_col = 4, k_row = 4)
#install.packages("plotly")
#install.packages("ggcorrplot")
library(plotly)
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.3.1
# create corr matrix and
# corresponding p-value matrix
corr_mat <- round(cor(mystock4),2)
p_mat <- cor_pmat(mystock4)
 
# plotting the interactive corr heatmap
corr_mat <- ggcorrplot(
  corr_mat, hc.order = TRUE, type = "lower",
  outline.col = "white",
  p.mat = p_mat
)
 
ggplotly(corr_mat)