library(rvest)
library(tidyverse)
library(tidyquant)
library(janitor)
today <- Sys.Date()
date = today %m+% months(-3)
print(date)
## [1] "2021-10-08"
one_ticker = tq_get("^GSPC", from = date)
one_ticker %>%
head()
## # A tibble: 6 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ^GSPC 2021-10-08 4407. 4412. 4386. 4391. 2401890000 4391.
## 2 ^GSPC 2021-10-11 4385. 4416. 4361. 4361. 2580000000 4361.
## 3 ^GSPC 2021-10-12 4368. 4375. 4342. 4351. 2608150000 4351.
## 4 ^GSPC 2021-10-13 4358. 4373. 4330. 4364. 2926460000 4364.
## 5 ^GSPC 2021-10-14 4387. 4440. 4387. 4438. 2642920000 4438.
## 6 ^GSPC 2021-10-15 4448. 4476. 4448. 4471. 3000560000 4471.
#get the URL for the wikipedia page with all SP500 symbols
url <- "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
# use that URL to scrape the SP500 table us#read the HTML from the webpage
tickers <- url %>%
read_html() %>%
html_nodes(xpath = '//*[@id="constituents"]') %>%
html_table()
sp500tickers <- tickers[[1]]
sp500tickers = sp500tickers %>% mutate(Symbol = case_when(Symbol == "BRK.B" ~ "BRK-B",
Symbol == "BF.B" ~ "BF-B",
TRUE ~ as.character(Symbol)))
symbol = sp500tickers$Symbol
get_symbols <- function(ticker = "AAPL"){
df = tq_get(ticker, from = date) %>% mutate(symbol = rep(ticker, length(date)))
return(df)
}
tickers_df = map(symbol, get_symbols) %>% bind_rows()
tickers_df = tickers_df %>%
left_join(sp500tickers, by = c('symbol' = 'Symbol')) %>%
clean_names()
tickers_df %>%
head()
## # A tibble: 6 × 16
## symbol date open high low close volume adjusted security sec_filings
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 MMM 2021-10-08 178. 178. 177. 177. 2.47e6 176. 3M reports
## 2 MMM 2021-10-11 178. 179. 176. 176. 2.69e6 175. 3M reports
## 3 MMM 2021-10-12 176. 177. 175. 176. 2.16e6 174. 3M reports
## 4 MMM 2021-10-13 176. 178. 175. 177. 2.03e6 176. 3M reports
## 5 MMM 2021-10-14 178 180. 178. 180. 2.28e6 179. 3M reports
## 6 MMM 2021-10-15 181. 183 181. 182. 2.16e6 180. 3M reports
## # … with 6 more variables: gics_sector <chr>, gics_sub_industry <chr>,
## # headquarters_location <chr>, date_first_added <chr>, cik <int>,
## # founded <chr>
daily_sector = tickers_df %>% group_by(security, gics_sector, symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "daily") %>%
ungroup()
avg_return =daily_sector %>%
group_by(security, gics_sector) %>%
summarise(avg_return = round(mean(daily.returns), 4),Volatility = sd(daily.returns)) %>%
arrange(desc(avg_return), desc(Volatility))
avg_return %>% head()
## # A tibble: 6 × 4
## # Groups: security [6]
## security gics_sector avg_return Volatility
## <chr> <chr> <dbl> <dbl>
## 1 Ford Consumer Discretionary 0.0081 0.0309
## 2 Teradyne Information Technology 0.0061 0.0246
## 3 Qualcomm Information Technology 0.0059 0.0263
## 4 Dollar Tree Consumer Discretionary 0.0059 0.0255
## 5 Arista Networks Information Technology 0.0057 0.0328
## 6 HP Information Technology 0.0055 0.0221
avg_return %>% head(20) %>% ggplot(aes(reorder(security, -avg_return), avg_return, fill = avg_return))+
geom_col()+
coord_flip()+
labs(title = "20 companies in SP500 with the highest average returns in the last 3 months", x = "Company", y = "Average Return")+
theme_classic()+
theme(legend.position="none")

plot <- avg_return %>% ggplot(aes(avg_return, Volatility))+
geom_text(aes(label = symbol), size = 3)+
labs(title = "Average Return vs Volatility Over Last 3 Months In SP500", x = "Average Return", subtitle = "Data Source: Yahoo Finance")+ theme_minimal()
plot
