library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(rvest)
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
library(tidyquant)
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ══ Need to Learn tidyquant? ════════════════════════════════════════════════════
## Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
rm(list=ls())
SP500 <- read_tsv("SP500-Stocks_long.txt")
## Rows: 829710 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): price.adjusted,ref.date,ticker
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
today <- Sys.Date()
date <-  today %m+% months(-3)
print(date)
## [1] "2021-10-08"
SP500_1 <- tq_get("^GSPC", from = date)
SP500_1 %>% 
  head()
## # A tibble: 6 × 8
##   symbol date        open  high   low close     volume adjusted
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>      <dbl>    <dbl>
## 1 ^GSPC  2021-10-08 4407. 4412. 4386. 4391. 2401890000    4391.
## 2 ^GSPC  2021-10-11 4385. 4416. 4361. 4361. 2580000000    4361.
## 3 ^GSPC  2021-10-12 4368. 4375. 4342. 4351. 2608150000    4351.
## 4 ^GSPC  2021-10-13 4358. 4373. 4330. 4364. 2926460000    4364.
## 5 ^GSPC  2021-10-14 4387. 4440. 4387. 4438. 2642920000    4438.
## 6 ^GSPC  2021-10-15 4448. 4476. 4448. 4471. 3000560000    4471.
url <- "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
tickers <- url %>%
  read_html() %>%
  html_nodes(xpath = '//*[@id="constituents"]') %>% 
  html_table()
sp500tickers <- tickers[[1]]
sp500tickers = sp500tickers %>% mutate(Symbol = case_when(Symbol == "BRK.B" ~ "BRK-B",
                                           Symbol == "BF.B" ~ "BF-B",
                                            TRUE ~ as.character(Symbol)))
symbol = sp500tickers$Symbol
get_symbols <-  function(ticker = "AAPL"){
  df = tq_get(ticker, from = date) %>% mutate(symbol = rep(ticker, length(date)))
  return(df)
}
tickers_df = map(symbol, get_symbols) %>% bind_rows()
tickers_df = tickers_df %>% 
  left_join(sp500tickers, by = c('symbol' = 'Symbol')) %>% 
  clean_names() 
tickers_df %>% 
  head()
## # A tibble: 6 × 16
##   symbol date        open  high   low close volume adjusted security sec_filings
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>  <dbl>    <dbl> <chr>    <chr>      
## 1 MMM    2021-10-08  178.  178.  177.  177. 2.47e6     176. 3M       reports    
## 2 MMM    2021-10-11  178.  179.  176.  176. 2.69e6     175. 3M       reports    
## 3 MMM    2021-10-12  176.  177.  175.  176. 2.16e6     174. 3M       reports    
## 4 MMM    2021-10-13  176.  178.  175.  177. 2.03e6     176. 3M       reports    
## 5 MMM    2021-10-14  178   180.  178.  180. 2.28e6     179. 3M       reports    
## 6 MMM    2021-10-15  181.  183   181.  182. 2.16e6     180. 3M       reports    
## # … with 6 more variables: gics_sector <chr>, gics_sub_industry <chr>,
## #   headquarters_location <chr>, date_first_added <chr>, cik <int>,
## #   founded <chr>
#1. Based on the data of SP500, create plot of 20 companies with the highest average returns in the last 3 months of the data range. 
daily_sector = tickers_df %>% group_by(security, gics_sector, symbol) %>% 
tq_transmute(select     = adjusted, 
              mutate_fun = periodReturn, 
              period     = "daily") %>% 
              ungroup()
avg_return =daily_sector %>% 
  group_by(security, gics_sector, symbol) %>%
  summarise(avg_return = round(mean(daily.returns), 4),Volatility =   sd(daily.returns)) %>%
  arrange(desc(avg_return), desc(Volatility))
## `summarise()` has grouped output by 'security', 'gics_sector'. You can override using the `.groups` argument.
avg_return %>% head()
## # A tibble: 6 × 5
## # Groups:   security, gics_sector [6]
##   security        gics_sector            symbol avg_return Volatility
##   <chr>           <chr>                  <chr>       <dbl>      <dbl>
## 1 Ford            Consumer Discretionary F          0.0081     0.0309
## 2 Teradyne        Information Technology TER        0.0061     0.0246
## 3 Qualcomm        Information Technology QCOM       0.0059     0.0263
## 4 Dollar Tree     Consumer Discretionary DLTR       0.0059     0.0255
## 5 Arista Networks Information Technology ANET       0.0057     0.0328
## 6 HP              Information Technology HPQ        0.0055     0.0221
avg_return %>% head(20) %>% ggplot(aes(reorder(security, -avg_return), avg_return, fill = avg_return))+
  geom_col()+
  coord_flip()+
  labs(title = "20 companies in SP500 with the highest average returns in the last 3 months", x = "Company", y = "Average Return")+
  theme_classic()+
  theme(legend.position="none")

#2.  Based on the data of SP500, create plot of average returns vs. volatility in the last 3 months of the data range.  Also highlight 3 highest volatility companies in the plot as well. 
plot <- avg_return %>% ggplot(aes(avg_return, Volatility))+
  geom_text(aes(label = symbol), size = 3)+
  labs(title = "Average Return vs Volatility Over Last 3 Months In SP500", x = "Average Return", subtitle = "Data Source: Yahoo Finance")+ theme_minimal()
plot

avg_return = avg_return %>% 
  mutate(Indicator = case_when(symbol %in% c('MRNA', 'ENPH', 'TSLA') ~ "3 highest volatility companies", TRUE ~ "The Rest of the SP500"))
plot = avg_return %>% ggplot(aes(avg_return, Volatility, color = Indicator))+
  geom_text(aes(label = symbol), size = 3)+
  labs(title = "Average Return vs Volatility Over Last 3 Months In SP500", x = "Average Return", subtitle = "Data Source: Yahoo Finance")+ theme_minimal()
plot