library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.5 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'purrr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## Warning: package 'stringr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(rvest)
## Warning: package 'rvest' was built under R version 4.0.5
##
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
library(tidyquant)
## Warning: package 'tidyquant' was built under R version 4.0.5
## Loading required package: lubridate
## Warning: package 'lubridate' was built under R version 4.0.5
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## Warning: package 'PerformanceAnalytics' was built under R version 4.0.5
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.0.5
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.0.5
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## Loading required package: quantmod
## Warning: package 'quantmod' was built under R version 4.0.5
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 4.0.5
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## == Need to Learn tidyquant? ====================================================
## Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(janitor)
## Warning: package 'janitor' was built under R version 4.0.5
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(readr)
SP500_Stocks_long <- read_csv("C:/Windows/System32/SP500-Stocks_long.txt")
## Rows: 829710 Columns: 3
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): ticker
## dbl (1): price.adjusted
## date (1): ref.date
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
today <- Sys.Date()
date <- today %m+% months(-3)
print(date)
## [1] "2021-10-08"
SP500_1 <- tq_get("^GSPC", from = date)
SP500_1 %>%
head()
## # A tibble: 6 x 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ^GSPC 2021-10-08 4407. 4412. 4386. 4391. 2401890000 4391.
## 2 ^GSPC 2021-10-11 4385. 4416. 4361. 4361. 2580000000 4361.
## 3 ^GSPC 2021-10-12 4368. 4375. 4342. 4351. 2608150000 4351.
## 4 ^GSPC 2021-10-13 4358. 4373. 4330. 4364. 2926460000 4364.
## 5 ^GSPC 2021-10-14 4387. 4440. 4387. 4438. 2642920000 4438.
## 6 ^GSPC 2021-10-15 4448. 4476. 4448. 4471. 3000560000 4471.
url <- "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
tickers <- url %>%
read_html() %>%
html_nodes(xpath = '//*[@id="constituents"]') %>%
html_table()
sp500tickers <- tickers[[1]]
sp500tickers = sp500tickers %>% mutate(Symbol = case_when(Symbol == "BRK.B" ~ "BRK-B",
Symbol == "BF.B" ~ "BF-B",
TRUE ~ as.character(Symbol)))
symbol = sp500tickers$Symbol
get_symbols <- function(ticker = "AAPL"){
df = tq_get(ticker, from = date) %>% mutate(symbol = rep(ticker, length(date)))
return(df)
}
tickers_df = map(symbol, get_symbols) %>% bind_rows()
tickers_df = tickers_df %>%
left_join(sp500tickers, by = c('symbol' = 'Symbol')) %>%
clean_names()
tickers_df %>%
head()
## # A tibble: 6 x 16
## symbol date open high low close volume adjusted security sec_filings
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 MMM 2021-10-08 178. 178. 177. 177. 2.47e6 176. 3M reports
## 2 MMM 2021-10-11 178. 179. 176. 176. 2.69e6 175. 3M reports
## 3 MMM 2021-10-12 176. 177. 175. 176. 2.16e6 174. 3M reports
## 4 MMM 2021-10-13 176. 178. 175. 177. 2.03e6 176. 3M reports
## 5 MMM 2021-10-14 178 180. 178. 180. 2.28e6 179. 3M reports
## 6 MMM 2021-10-15 181. 183 181. 182. 2.16e6 180. 3M reports
## # ... with 6 more variables: gics_sector <chr>, gics_sub_industry <chr>,
## # headquarters_location <chr>, date_first_added <chr>, cik <int>,
## # founded <chr>
#1 Based on the data of SP500, create plot of 20 companies with the highest average returns in the last 3 months of the data range.
daily_sector = tickers_df %>% group_by(security, gics_sector, symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "daily") %>%
ungroup()
avg_return =daily_sector %>%
group_by(security, gics_sector) %>%
summarise(avg_return = round(mean(daily.returns), 4),Volatility = sd(daily.returns)) %>%
arrange(desc(avg_return), desc(Volatility))
## `summarise()` has grouped output by 'security'. You can override using the `.groups` argument.
avg_return %>% head()
## # A tibble: 6 x 4
## # Groups: security [6]
## security gics_sector avg_return Volatility
## <chr> <chr> <dbl> <dbl>
## 1 Ford Consumer Discretionary 0.0081 0.0309
## 2 Teradyne Information Technology 0.0061 0.0246
## 3 Qualcomm Information Technology 0.0059 0.0263
## 4 Dollar Tree Consumer Discretionary 0.0059 0.0255
## 5 Arista Networks Information Technology 0.0057 0.0328
## 6 HP Information Technology 0.0055 0.0221
avg_return %>% head(20) %>% ggplot(aes(reorder(security, -avg_return), avg_return, fill = avg_return))+
geom_col()+
coord_flip()+
labs(title = "20 companies in SP500 with the highest average returns in the last 3 months", x = "Company", y = "Average Return")+
theme_classic()+
theme(legend.position="none")

#2 Based on the data of SP500, create plot of average returns vs. volatility in the last 3 months of the data range. Also highlight 3 highest volatility companies in the plot as well.
plot <- avg_return %>% ggplot(aes(avg_return, Volatility))+
geom_text(aes(label = symbol), size = 3)+
labs(title = "Average Return vs Volatility Over Last 3 Months In SP500", x = "Average Return", subtitle = "Data Source: Yahoo Finance")+ theme_minimal()
plot
