library(rvest)
library(tidyverse)
library(tidyquant)
library(janitor)

today <- Sys.Date()
date = today %m+% months(-3)
print(date)
## [1] "2021-10-08"
one_ticker = tq_get("^GSPC", from = date)
one_ticker %>% 
 head()
## # A tibble: 6 × 8
##   symbol date        open  high   low close     volume adjusted
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>      <dbl>    <dbl>
## 1 ^GSPC  2021-10-08 4407. 4412. 4386. 4391. 2401890000    4391.
## 2 ^GSPC  2021-10-11 4385. 4416. 4361. 4361. 2580000000    4361.
## 3 ^GSPC  2021-10-12 4368. 4375. 4342. 4351. 2608150000    4351.
## 4 ^GSPC  2021-10-13 4358. 4373. 4330. 4364. 2926460000    4364.
## 5 ^GSPC  2021-10-14 4387. 4440. 4387. 4438. 2642920000    4438.
## 6 ^GSPC  2021-10-15 4448. 4476. 4448. 4471. 3000560000    4471.
#get the URL for the wikipedia page with all SP500 symbols
url <- "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
# use that URL to scrape the SP500 table us#read the HTML from the webpage

tickers <- url %>%
 read_html() %>%
 html_nodes(xpath = '//*[@id="constituents"]') %>% 
 html_table()
sp500tickers <- tickers[[1]]
sp500tickers = sp500tickers %>% mutate(Symbol = case_when(Symbol == "BRK.B" ~ "BRK-B",
                                                         Symbol == "BF.B" ~ "BF-B",
                                                         TRUE ~ as.character(Symbol)))

symbol = sp500tickers$Symbol
get_symbols <-  function(ticker = "AAPL"){
 df = tq_get(ticker, from = date) %>% mutate(symbol = rep(ticker, length(date)))
 return(df)
}


tickers_df = map(symbol, get_symbols) %>% bind_rows()
tickers_df = tickers_df %>% 
 left_join(sp500tickers, by = c('symbol' = 'Symbol')) %>% 
 clean_names() 
tickers_df %>% 
 head()
## # A tibble: 6 × 16
##   symbol date        open  high   low close volume adjusted security sec_filings
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>  <dbl>    <dbl> <chr>    <chr>      
## 1 MMM    2021-10-08  178.  178.  177.  177. 2.47e6     176. 3M       reports    
## 2 MMM    2021-10-11  178.  179.  176.  176. 2.69e6     175. 3M       reports    
## 3 MMM    2021-10-12  176.  177.  175.  176. 2.16e6     174. 3M       reports    
## 4 MMM    2021-10-13  176.  178.  175.  177. 2.03e6     176. 3M       reports    
## 5 MMM    2021-10-14  178   180.  178.  180. 2.28e6     179. 3M       reports    
## 6 MMM    2021-10-15  181.  183   181.  182. 2.16e6     180. 3M       reports    
## # … with 6 more variables: gics_sector <chr>, gics_sub_industry <chr>,
## #   headquarters_location <chr>, date_first_added <chr>, cik <int>,
## #   founded <chr>
daily_sector = tickers_df %>% group_by(security, gics_sector, symbol) %>% 
 tq_transmute(select     = adjusted, 
              mutate_fun = periodReturn, 
              period     = "daily") %>% 
 ungroup()

avg_return =daily_sector %>% 
 group_by(security, gics_sector) %>%
 summarise(avg_return = round(mean(daily.returns), 4),Volatility =   sd(daily.returns)) %>%
 arrange(desc(avg_return), desc(Volatility))

avg_return %>% head()
## # A tibble: 6 × 4
## # Groups:   security [6]
##   security        gics_sector            avg_return Volatility
##   <chr>           <chr>                       <dbl>      <dbl>
## 1 Ford            Consumer Discretionary     0.0081     0.0309
## 2 Teradyne        Information Technology     0.0061     0.0246
## 3 Qualcomm        Information Technology     0.0059     0.0263
## 4 Dollar Tree     Consumer Discretionary     0.0059     0.0255
## 5 Arista Networks Information Technology     0.0057     0.0328
## 6 HP              Information Technology     0.0055     0.0221
avg_return %>% head(20) %>% ggplot(aes(reorder(security, -avg_return), avg_return, fill = avg_return))+
 geom_col()+
 coord_flip()+
 labs(title = "20 companies in SP500 with the highest average returns in the last 3 months", x = "Company", y = "Average Return")+
 theme_classic()+
 theme(legend.position="none")

plot <- avg_return %>% ggplot(aes(avg_return, Volatility))+
 geom_text(aes(label = symbol), size = 3)+
 labs(title = "Average Return vs Volatility Over Last 3 Months In SP500", x = "Average Return", subtitle = "Data Source: Yahoo Finance")+ theme_minimal()
plot