#for scraping
library(rvest)
#blanket import for core tidyverse packages
library(tidyverse)
#tidy financial analysis
library(tidyquant)
#tidy data cleaning functions
library(janitor)
rm(list=ls())
SP500.Stocks_long <- read.csv("C:/Users/HP/Downloads/SP500-Stocks_long.txt/SP500-Stocks_long.txt")
today <- Sys.Date()
date <- today %m+% months(-3)
print(date)
SP500_1 <- tq_get("^GSPC", from = date)
SP500_1 %>%
head()
url <- "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
tickers <- url %>%
read_html() %>%
html_nodes(xpath = '//*[@id="constituents"]') %>%
html_table()
sp500tickers <- tickers[[1]]
sp500tickers = sp500tickers %>% mutate(Symbol = case_when(Symbol == "BRK.B" ~ "BRK-B",
Symbol == "BF.B" ~ "BF-B",
TRUE ~ as.character(Symbol)))
symbol = sp500tickers$Symbol
get_symbols <- function(ticker = "AAPL"){
df = tq_get(ticker, from = date) %>% mutate(symbol = rep(ticker, length(date)))
return(df)
}
tickers_df = map(symbol, get_symbols) %>% bind_rows()
tickers_df = tickers_df %>%
left_join(sp500tickers, by = c('symbol' = 'Symbol')) %>%
clean_names()
tickers_df %>%
head()
#Solution 1.
daily_sector = tickers_df %>% group_by(security, gics_sector, symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "daily") %>%
ungroup()
avg_return =daily_sector %>%
group_by(security, gics_sector) %>%
summarise(avg_return = round(mean(daily.returns), 4),Volatility = sd(daily.returns)) %>%
arrange(desc(avg_return), desc(Volatility))
avg_return %>% head()
avg_return %>% head(20) %>% ggplot(aes(reorder(security, -avg_return), avg_return, fill = avg_return))+
geom_col()+
coord_flip()+
labs(title = "20 Companies with the Highest Average Returns in the S&P500 During the Last 3 Months", x = "Company", y = "Average Return")+
theme_classic()+
theme(legend.position="none")
#Solution 2.
plot <- avg_return %>% ggplot(aes(avg_return, Volatility))+
geom_text(aes(label = symbol), size = 3)+
labs(title = "Average Return vs. Volatility Over Last 3 Months In S&P500", x = "Average Return", subtitle = "Data Source: Yahoo Finance")+ theme_minimal()
plot
#At the first glance, the biggest thing that stands out above the scatter plot is the high volatility of XYL, PYG, AMD corporations. So I'm going to highlight them
#First attempt:
avg_return = avg_return %>%
mutate(Indicator = case_when(symbol %in% c('XYL', 'PYG', 'AMD') ~ "Top 3 Highest Firms",
TRUE ~ "The Rest of the S&P500"))
plot = avg_return %>% ggplot(aes(avg_return, Volatility, color = Indicator))+
geom_text(aes(label = symbol), size = 3)+
labs(title = "Average Return vs Volatility Over Last 3 Months In SP500", x = "Average Return", subtitle = "Data Source: Yahoo Finance")+
theme_minimal()
plot
#Second Attempt: filter dataframe to get data to be highligheted
highlight_df <- avg_return %>%
filter(Volatility > 0.04, avg_return >= -0.003)
plot <- avg_return %>%
ggplot(aes(avg_return, Volatility))+
geom_text(data = highlight_df, aes(label = symbol), color= 'red', size = 3)+
labs(title = "Average Return vs. Volatility Over Last 3 Months In S&P500", x = "Average Return", subtitle = "Data Source: Yahoo Finance")+ theme_minimal()
plot