0. Load Libraries and Define Functions
# 0.0 Load libraries ----
# Loads tidyquant, lubridate, xts, quantmod, TTR, and PerformanceAnalytics
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8 ✔ TTR 0.24.4
## ✔ quantmod 0.4.28 ✔ xts 0.14.1── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ dplyr::filter() masks stats::filter()
## ✖ xts::first() masks dplyr::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ xts::last() masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
#0.1 Define functions ----
#The following functions explicitly compute annual, monthly and weekly returns
get_annual_returns <- function(stock.returns) {
stock.returns %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
type = "log",
period = "yearly")
}
get_monthly_returns <- function(stock.returns) {
stock.returns %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
type = "log",
period = "monthly")
}
get_weekly_returns <- function(stock.returns) {
stock.returns %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
type = "log",
period = "weekly")
}
get_daily_returns <- function(stock.returns) {
stock.returns %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
type = "log",
period = "daily")
}
1. Collect stock price data from yahoo —-
1.1 Get data on all stocks in the SP00 —-
stocks_tbl <- tq_index("SP500")
## Getting holdings for SP500
stocks_tbl
## # A tibble: 504 × 8
## symbol company identifier sedol weight sector shares_held local_currency
## <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <chr>
## 1 NVDA NVIDIA CORP 67066G104 2379… 0.0725 - 292827172 USD
## 2 AAPL APPLE INC 037833100 2046… 0.0702 - 178100766 USD
## 3 MSFT MICROSOFT C… 594918104 2588… 0.0616 - 89205769 USD
## 4 AMZN AMAZON.COM … 023135106 2000… 0.0373 - 116468378 USD
## 5 GOOGL ALPHABET IN… 02079K305 BYVY… 0.0300 - 69807871 USD
## 6 AVGO BROADCOM INC 11135F101 BDZ7… 0.0267 - 56445022 USD
## 7 META META PLATFO… 30303M102 B7TL… 0.0245 - 26028317 USD
## 8 GOOG ALPHABET IN… 02079K107 BYY8… 0.0242 - 56039675 USD
## 9 TSLA TESLA INC 88160R101 B616… 0.0228 - 33674627 USD
## 10 BRK-B BERKSHIRE H… 084670702 2073… 0.0161 - 22004519 USD
## # ℹ 494 more rows
# # Symbol "BRK.B" gives error from tq_get
# # Change to "BRK-B"
# rowindex_0<-which(stocks_tbl$symbol=="BRK.B")
# stocks_tbl$symbol[rowindex_0]<-"BRK-B"
#
# # Change to "BF.B" to "BF-B"
# rowindex_0<-which(stocks_tbl$symbol=="BF.B")
# stocks_tbl$symbol[rowindex_0]<-"BF-B"
#
# # Drop symbol "-"
index_dash<-which(stocks_tbl$symbol=="-")
stocks_tbl0<-stocks_tbl[-c( index_dash),]
# Collect all symbols in stocks_tbl0
stocks_symbols<-stocks_tbl0$symbol
1.2 Set start and end dates for price data —-
date_start= "2019-01-01"
date_end= "2024-08-31"
# Collecting data from large group can take
# significant amount of time
# (on 9/12/2023, less than 3 minutes)
# To monitor progress we split large group into subgroups
1.3 Split large group of stocks into subgroups —-
stocks_tbl0$group<-floor((c(1:nrow(stocks_tbl0))-1)/50)
list_group<-unique(stocks_tbl0$group)
#for (group0 in list_group[-c(1:4)]){
for (group0 in list_group) {
stocks_data <- stocks_tbl0 %>%
filter(group==group0) %>%
select(symbol, company) %>%
tq_get(from = date_start, to=date_end)
assign(paste("stocks_data",group0,sep="."),
stocks_data)
print(c("End for group ", as.character(group0)))
}
## [1] "End for group " "0"
## [1] "End for group " "1"
## [1] "End for group " "2"
## [1] "End for group " "3"
## [1] "End for group " "4"
## Warning: There was 1 warning in `dplyr::mutate()`.
## ℹ In argument: `data.. = purrr::map(...)`.
## Caused by warning:
## ! x = 'SNDK', get = 'stock.prices': Error in getSymbols.yahoo(Symbols = "SNDK", env = <environment>, verbose = FALSE, : Unable to import "SNDK".
## cannot open the connection
## Removing SNDK.
## [1] "End for group " "5"
## [1] "End for group " "6"
## [1] "End for group " "7"
## Warning: There was 1 warning in `dplyr::mutate()`.
## ℹ In argument: `data.. = purrr::map(...)`.
## Caused by warning:
## ! x = 'Q', get = 'stock.prices': Error in getSymbols.yahoo(Symbols = "Q", env = <environment>, verbose = FALSE, : Unable to import "Q".
## cannot open the connection
## Removing Q.
## [1] "End for group " "8"
## Warning: There was 1 warning in `dplyr::mutate()`.
## ℹ In argument: `data.. = purrr::map(...)`.
## Caused by warning:
## ! x = 'SOLS', get = 'stock.prices': Error in getSymbols.yahoo(Symbols = "SOLS", env = <environment>, verbose = FALSE, : Unable to import "SOLS".
## cannot open the connection
## Removing SOLS.
## [1] "End for group " "9"
## [1] "End for group " "10"
# If any warnings occur, determine which symbol generated message
# Revise symbol name (as above, replacing "." by "-", or delete
# as was case for symbol = "-", which corresponded to US dollar).
1.4 Bind together data from all groups
stocks_data<-rbind(
stocks_data.0,
stocks_data.1,
stocks_data.2,
stocks_data.3,
stocks_data.4,
stocks_data.5,
stocks_data.6,
stocks_data.7,
stocks_data.8,
stocks_data.9,
stocks_data.10)
dim(stocks_data)
## [1] 701329 9
1.5 Convert stocks_data to time series matrix
stocks_data1 <- stocks_data %>%
dplyr::select(symbol, date, adjusted) %>%
dplyr::filter(!is.na(adjusted)) %>%
dplyr::rename(price = adjusted)
stocks_tsmatrix<- stocks_data1 %>%
pivot_wider(names_from = symbol, values_from = price)
dim(stocks_tsmatrix)
## [1] 1426 501
# On 9/10/2024
# dim(stocks_tsmatrix)
# [1] 1426 504
names(stocks_tsmatrix)
## [1] "date" "NVDA" "AAPL" "MSFT" "AMZN" "GOOGL" "AVGO" "META" "GOOG"
## [10] "TSLA" "BRK-B" "JPM" "LLY" "V" "WMT" "JNJ" "XOM" "MA"
## [19] "PLTR" "NFLX" "ABBV" "COST" "BAC" "HD" "PG" "AMD" "GE"
## [28] "CSCO" "UNH" "WFC" "ORCL" "CVX" "IBM" "KO" "GS" "CAT"
## [37] "MU" "MRK" "PM" "CRM" "RTX" "MCD" "ABT" "TMO" "MS"
## [46] "PEP" "C" "AXP" "ISRG" "AMAT" "DIS" "LIN" "LRCX" "QCOM"
## [55] "INTU" "APP" "AMGN" "T" "TJX" "BKNG" "VZ" "ACN" "GEV"
## [64] "NEE" "UBER" "SCHW" "NOW" "BA" "SPGI" "TXN" "INTC" "KLAC"
## [73] "APH" "BLK" "COF" "GILD" "ADBE" "PFE" "DHR" "UNP" "BSX"
## [82] "LOW" "ADI" "PGR" "HON" "WELL" "ANET" "MDT" "ETN" "PANW"
## [91] "SYK" "DE" "PLD" "COP" "CRWD" "CB" "VRTX" "BX" "CMCSA"
## [100] "PH" "NEM" "BMY" "ADP" "CEG" "MCK" "MO" "CVS" "CME"
## [109] "LMT" "SBUX" "TMUS" "SO" "ICE" "MMC" "DUK" "KKR" "HOOD"
## [118] "MMM" "CDNS" "TT" "GD" "SNPS" "USB" "AMT" "PNC" "DASH"
## [127] "WM" "BK" "ORLY" "NKE" "ELV" "HCA" "HWM" "MCO" "GM"
## [136] "REGN" "JCI" "SHW" "NOC" "AON" "UPS" "EMR" "EQIX" "CI"
## [145] "RCL" "WMB" "TDG" "MAR" "WBD" "MDLZ" "HLT" "FCX" "CMI"
## [154] "CSX" "APO" "ITW" "ECL" "GLW" "COR" "NSC" "TEL" "TRV"
## [163] "TFC" "CL" "AJG" "CTAS" "ADSK" "MSI" "PWR" "AEP" "FDX"
## [172] "SPG" "ROST" "STX" "PCAR" "WDC" "PYPL" "SLB" "ABNB" "EOG"
## [181] "AZO" "SRE" "NXPI" "BDX" "IDXX" "COIN" "ALL" "APD" "ZTS"
## [190] "PSX" "AFL" "MPC" "MNST" "O" "KMI" "LHX" "F" "FTNT"
## [199] "D" "URI" "VST" "VLO" "EW" "CMG" "ROP" "FAST" "AIG"
## [208] "DLR" "CAH" "CBRE" "WDAY" "AME" "MET" "OKE" "AMP" "EA"
## [217] "DAL" "CTVA" "GWW" "TGT" "EXC" "BKR" "DDOG" "ROK" "MPWR"
## [226] "RSG" "MSCI" "XEL" "AXON" "YUM" "NDAQ" "PSA" "DHI" "FICO"
## [235] "CARR" "TTWO" "ETR" "CTSH" "PRU" "PEG" "TRGP" "DELL" "HIG"
## [244] "A" "KR" "CCI" "KDP" "PAYX" "EBAY" "VMC" "IQV" "GEHC"
## [253] "MLM" "FISV" "NUE" "WAB" "SYY" "ED" "STT" "RMD" "VTR"
## [262] "UAL" "ACGL" "XYZ" "OTIS" "CPRT" "MCHP" "FIS" "ARES" "PCG"
## [271] "KMB" "KEYS" "EQT" "WEC" "EXPE" "XYL" "KVUE" "GRMN" "MTB"
## [280] "WTW" "FITB" "HPE" "HUM" "SYF" "VRSK" "CCL" "DG" "VICI"
## [289] "TER" "OXY" "FANG" "NRG" "ODFL" "RJF" "MTD" "TSCO" "IR"
## [298] "EXR" "HSY" "ADM" "IBKR" "HBAN" "CSGP" "FSLR" "ATO" "EFX"
## [307] "DTE" "DOV" "AEE" "BR" "EME" "CBOE" "NTRS" "AWK" "OMC"
## [316] "ULTA" "CINF" "EXE" "GIS" "AVB" "EL" "DXCM" "PPL" "TPR"
## [325] "BIIB" "CFG" "CNP" "ES" "VLTO" "STE" "DLTR" "RF" "LEN"
## [334] "PHM" "STLD" "BRO" "HAL" "TDY" "DVN" "PPG" "IRM" "FE"
## [343] "LDOS" "JBL" "HUBB" "EIX" "HPQ" "TROW" "WSM" "LULU" "WAT"
## [352] "DRI" "NTAP" "CPAY" "ON" "LYV" "EQR" "LUV" "NVR" "LH"
## [361] "CMS" "KHC" "CHD" "PTC" "WRB" "SBAC" "VRSN" "LVS" "IP"
## [370] "PODD" "EXPD" "DGX" "GPN" "STZ" "SW" "CTRA" "TYL" "NI"
## [379] "CHTR" "WST" "KEY" "AMCR" "IT" "CHRW" "CDW" "TRMB" "FTV"
## [388] "PKG" "ZBH" "CNC" "PFG" "L" "SNA" "GPC" "ROL" "MKC"
## [397] "GDDY" "WY" "DD" "ESS" "TPL" "EVRG" "LNT" "PNR" "IFF"
## [406] "APTV" "TSN" "TTD" "HOLX" "COO" "DOW" "INCY" "MAA" "ALB"
## [415] "J" "GEN" "TXT" "LII" "INVH" "DECK" "SMCI" "FOXA" "FFIV"
## [424] "JBHT" "TKO" "AVY" "BBY" "BALL" "RL" "JKHY" "KIM" "ALLE"
## [433] "EG" "DPZ" "MAS" "VTRS" "IEX" "ZBRA" "BG" "CF" "HII"
## [442] "NDSN" "UHS" "CLX" "AKAM" "AIZ" "SOLV" "HST" "IVZ" "CPT"
## [451] "DOC" "BLDR" "FDS" "EPAM" "LYB" "REG" "GL" "SWK" "RVTY"
## [460] "UDR" "DAY" "SJM" "HAS" "PNW" "ALGN" "WYNN" "MRNA" "BXP"
## [469] "NWSA" "BAX" "CRL" "AES" "NCLH" "SWKS" "FOX" "TECH" "APA"
## [478] "MOH" "CAG" "PAYC" "GNRC" "LW" "TAP" "FRT" "LKQ" "HSIC"
## [487] "MTCH" "POOL" "AOS" "MOS" "MGM" "ARE" "BEN" "ERIE" "HRL"
## [496] "MHK" "CPB" "BF-B" "DVA" "PSKY" "NWS"
1.6 Subset out stocks with no missing data
missingcount_bydate<-apply(is.na(stocks_tsmatrix),1,sum)
plot(x=stocks_tsmatrix$date, y=missingcount_bydate,type="l")

# On 9/12/2022, 11 stocks had some missing values on whole period from date_start to date_end
# Define stocks_tsmatrix2 to be stocks with no missing values
missingcount_bystock<-apply(is.na(stocks_tsmatrix),2,sum)
plot(x=c(1:ncol(stocks_tsmatrix)),
y=missingcount_bystock,type="l")

sum(missingcount_bystock==0)
## [1] 479
# 488 stocks had no missing prices on period
which_cols_nomissing<-which(missingcount_bystock==0)
# Extract data for stocks with no missing prices ----
dim(stocks_tsmatrix)
## [1] 1426 501
stocks_tsmatrix0<-stocks_tsmatrix[,which_cols_nomissing]
1.7 Create stocks_tbl0 corresponding to symbols in
stocks_tsmatrix
# names(stocks_tsmatrix0)
dim(stocks_tsmatrix0) # columns equal to 1(date) + (number of stocks)
## [1] 1426 479
which_symbols_tsmatrix0<-match(names(stocks_tsmatrix0)[-1],
stocks_tbl0$symbol,nomatch=0)
stocks_tbl0_tsmatrix0<- stocks_tbl0[which_symbols_tsmatrix0,]
dim(stocks_tsmatrix0)
## [1] 1426 479