library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(rvest)
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(tidyquant)
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ══ Need to Learn tidyquant? ════════════════════════════════════════════════════
## Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(lubridate)

today <- Sys.Date()
date = today %m+% months(-3)
print(date)
## [1] "2021-10-08"
#get stock information 
tickers_df <- tq_index("SP500")
## Getting holdings for SP500
tickers_df2 = tq_get(tickers_df, from = date)   #"date" - variable about the last 3 month
## Warning: x = 'BRK.B', get = 'stock.prices': Error in getSymbols.yahoo(Symbols = "BRK.B", env = <environment>, verbose = FALSE, : Unable to import "BRK.B".
## argument "conn" is missing, with no default
##  Removing BRK.B.
## Warning: x = 'BF.B', get = 'stock.prices': Error in getSymbols.yahoo(Symbols = "BF.B", env = <environment>, verbose = FALSE, : Unable to import "BF.B".
## argument "conn" is missing, with no default
##  Removing BF.B.
tickers_df2 %>% 
  head()
## # A tibble: 6 × 15
##   symbol company    identifier sedol   weight sector  shares_held local_currency
##   <chr>  <chr>      <chr>      <chr>    <dbl> <chr>         <dbl> <chr>         
## 1 AAPL   Apple Inc. 03783310   2046251 0.0674 Inform…   175321170 USD           
## 2 AAPL   Apple Inc. 03783310   2046251 0.0674 Inform…   175321170 USD           
## 3 AAPL   Apple Inc. 03783310   2046251 0.0674 Inform…   175321170 USD           
## 4 AAPL   Apple Inc. 03783310   2046251 0.0674 Inform…   175321170 USD           
## 5 AAPL   Apple Inc. 03783310   2046251 0.0674 Inform…   175321170 USD           
## 6 AAPL   Apple Inc. 03783310   2046251 0.0674 Inform…   175321170 USD           
## # … with 7 more variables: date <date>, open <dbl>, high <dbl>, low <dbl>,
## #   close <dbl>, volume <dbl>, adjusted <dbl>
#get daily return become own table 
daily_sector = tickers_df2 %>% group_by(company, sector, symbol) %>% 
  tq_transmute(select     = adjusted, 
               mutate_fun = periodReturn, 
               period     = "daily") %>% 
  ungroup()

#create average return table
#volatility ~~ standard deviation of returns 
avg_return =daily_sector %>% 
  group_by(company, sector) %>%
  summarise(avg_return = round(mean(daily.returns), 4),Volatility =   sd(daily.returns)) %>%         
  arrange(desc(avg_return), desc(Volatility))
## `summarise()` has grouped output by 'company'. You can override using the `.groups` argument.
avg_return %>% head()
## # A tibble: 6 × 4
## # Groups:   company [6]
##   company              sector                 avg_return Volatility
##   <chr>                <chr>                       <dbl>      <dbl>
## 1 Ford Motor Company   Consumer Discretionary     0.0081     0.0309
## 2 Teradyne Inc.        Information Technology     0.0061     0.0246
## 3 Qualcomm Inc         Information Technology     0.0059     0.0263
## 4 Dollar Tree Inc.     Consumer Discretionary     0.0059     0.0255
## 5 Arista Networks Inc. Information Technology     0.0057     0.0328
## 6 HP Inc.              Information Technology     0.0055     0.0221
#create average return table with symbol
avg_return3 =daily_sector %>% 
  group_by(symbol, company, sector) %>%
  summarise(avg_return3 = round(mean(daily.returns), 4),Volatility =   sd(daily.returns)) %>%         
  arrange(desc(avg_return3), desc(Volatility))
## `summarise()` has grouped output by 'symbol', 'company'. You can override using the `.groups` argument.
avg_return3 %>% head()
## # A tibble: 6 × 5
## # Groups:   symbol, company [6]
##   symbol company              sector                 avg_return3 Volatility
##   <chr>  <chr>                <chr>                        <dbl>      <dbl>
## 1 F      Ford Motor Company   Consumer Discretionary      0.0081     0.0309
## 2 TER    Teradyne Inc.        Information Technology      0.0061     0.0246
## 3 QCOM   Qualcomm Inc         Information Technology      0.0059     0.0263
## 4 DLTR   Dollar Tree Inc.     Consumer Discretionary      0.0059     0.0255
## 5 ANET   Arista Networks Inc. Information Technology      0.0057     0.0328
## 6 HPQ    HP Inc.              Information Technology      0.0055     0.0221
#question 1
avg_return %>% head(20) %>% ggplot(aes(reorder(company, -avg_return), avg_return, fill = avg_return))+
  geom_col()+
  coord_flip()+
  labs(title = "Securities With Highest Average Returns In SP500 Over Past 3 Month", x = "Security", y = "Average Return")+
  theme_classic()+
  theme(legend.position="none")

#question 2 
avg_return3 = avg_return3 %>% 
  mutate(Indicator = case_when(symbol %in% c('MRNA', 'ENPH','TSLA') ~ "3 Highest volatility companies",
                               TRUE ~ "The Rest of the SP500"))
plot = avg_return3 %>% ggplot(aes(avg_return3, Volatility, color = Indicator))+
  geom_text(aes(label = symbol), size = 3)+
  labs(title = "Average Return vs Volatility Over Last 3 Months In SP500", x = "Average Return", subtitle = "Data Source: Yahoo Finance")+
  theme_minimal()

plot