This is a note from Matt’s PP slides https://github.com/business-science/presentations/blob/master/2019_05_17_RFinance_Tidyquant_Portfolio_Optimization/R_Finance_tidyquant_matt_dancho.pdf?utm_source=Business+Science+-+Combined+List&utm_campaign=7407a74aed-RFINANCE_TALK_EMAIL&utm_medium=email&utm_term=0_a4e5b7c52f-7407a74aed-62996495&mc_cid=7407a74aed&mc_eid=d9cda1f377
Tidyquant is a workflow that manages the process of systematically evaluating stocks & portfolios.
# Load packages
library(tidyquant)
library(tidyverse)
from <- today() - years(5)
stock_prices <- c("AAPL", "MSFT", "FB", "NFLX") %>%
tq_get(get = "stock.prices",
from = from)
stock_prices
## # A tibble: 5,032 x 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2015-03-02 129. 130. 128. 129. 48096700 119.
## 2 AAPL 2015-03-03 129. 130. 128. 129. 37816300 119.
## 3 AAPL 2015-03-04 129. 130. 128. 129. 31666300 118.
## 4 AAPL 2015-03-05 129. 129. 126. 126. 56517100 116.
## 5 AAPL 2015-03-06 128. 129. 126. 127. 72842100 117.
## 6 AAPL 2015-03-09 128. 130. 125. 127. 88528500 117.
## 7 AAPL 2015-03-10 126. 127. 124. 125. 68856600 115.
## 8 AAPL 2015-03-11 125. 125. 122. 122. 68939000 112.
## 9 AAPL 2015-03-12 122. 125. 122. 124. 48362700 115.
## 10 AAPL 2015-03-13 124. 125. 123. 124. 51827300 114.
## # … with 5,022 more rows
stock_returns_monthly <- stock_prices %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "monthly",
col_rename = "Ra")
stock_returns_monthly
## # A tibble: 240 x 3
## # Groups: symbol [4]
## symbol date Ra
## <chr> <date> <dbl>
## 1 AAPL 2015-03-31 -0.0361
## 2 AAPL 2015-04-30 0.00579
## 3 AAPL 2015-05-29 0.0453
## 4 AAPL 2015-06-30 -0.0372
## 5 AAPL 2015-07-31 -0.0329
## 6 AAPL 2015-08-31 -0.0662
## 7 AAPL 2015-09-30 -0.0218
## 8 AAPL 2015-10-30 0.0834
## 9 AAPL 2015-11-30 -0.00580
## 10 AAPL 2015-12-31 -0.110
## # … with 230 more rows
# Baseline Period Returns
baseline_returns_monthly <- "^GSPC" %>%
tq_get(get = "stock.prices",
from = from) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "monthly",
col_rename = "Rb")
baseline_returns_monthly
## # A tibble: 60 x 2
## date Rb
## <date> <dbl>
## 1 2015-03-31 -0.0234
## 2 2015-04-30 0.00852
## 3 2015-05-29 0.0105
## 4 2015-06-30 -0.0210
## 5 2015-07-31 0.0197
## 6 2015-08-31 -0.0626
## 7 2015-09-30 -0.0264
## 8 2015-10-30 0.0830
## 9 2015-11-30 0.000505
## 10 2015-12-31 -0.0175
## # … with 50 more rows
# scaling a single portfolio to num_port number
num_port = 1
stock_returns_monthly_multi <- stock_returns_monthly %>%
tq_repeat_df(n = num_port)
stock_returns_monthly_multi
## # A tibble: 240 x 4
## # Groups: portfolio [1]
## portfolio symbol date Ra
## <int> <chr> <date> <dbl>
## 1 1 AAPL 2015-03-31 -0.0361
## 2 1 AAPL 2015-04-30 0.00579
## 3 1 AAPL 2015-05-29 0.0453
## 4 1 AAPL 2015-06-30 -0.0372
## 5 1 AAPL 2015-07-31 -0.0329
## 6 1 AAPL 2015-08-31 -0.0662
## 7 1 AAPL 2015-09-30 -0.0218
## 8 1 AAPL 2015-10-30 0.0834
## 9 1 AAPL 2015-11-30 -0.00580
## 10 1 AAPL 2015-12-31 -0.110
## # … with 230 more rows
# Create Vector of Weights
# not all symbols need to be specified. Any symbol not specified by default gets a weight of zero.
# The following is what I tried based on the info on the Web.
# https://stackoverflow.com/questions/5622608/choosing-n-numbers-with-fixed-sum
# n = number of stocks in the portfolio
# Choose n - 1 numbers between 0 and 1
# Sort them and calculate the distance between each number
# Create a data frame # of columns = # of stocks in the portfolio and # of rows = # of portfolio
# n = # of stocks in the portfolio
# Case of two portfolios with three stocks
num_stocks <- stock_returns_monthly %>%
distinct(symbol) %>%
n_distinct()
num_stocks
## [1] 4
# Initialize data frame
weights <- list()
# Set the seed of random numbers so that they can be reproduced
set.seed(06182019)
# Loop through the values 1 to number of portfolios, to generate weights for stocks in the portfolio
for(i in 1:num_port){
# Generate n-1 number of random numbers b/t 0 and 1
weights[[i]] = runif(num_stocks-1, 0, 1)
# Add 0 and 1
weights[[i]] = append(weights[[i]], c(0, 1))
# Sort the random numbers
weights[[i]] = sort(weights[[i]])
# Calculate the differences b/t the random numbers
weights[[i]] = diff(weights[[i]])
}
weights[[1]]
## [1] 0.06385721 0.32707606 0.36002330 0.24904342
# Convert the list to a vector
weights <- unlist(weights)
weights
## [1] 0.06385721 0.32707606 0.36002330 0.24904342
#
weights_table <- stock_returns_monthly %>%
distinct(symbol) %>%
tq_repeat_df(n = num_port) %>%
bind_cols(tibble(weights)) %>%
group_by(portfolio)
weights_table
## # A tibble: 4 x 3
## # Groups: portfolio [1]
## portfolio symbol weights
## <int> <chr> <dbl>
## 1 1 AAPL 0.0639
## 2 1 MSFT 0.327
## 3 1 FB 0.360
## 4 1 NFLX 0.249
# Aggregate a Portfolio using Vector of Weights
portfolio_returns_monthly_multi <-
stock_returns_monthly_multi %>%
tq_portfolio(assets_col = symbol,
returns_col = Ra,
weights = weights_table,
col_rename = "Ra")
portfolio_returns_monthly_multi
## # A tibble: 60 x 3
## # Groups: portfolio [1]
## portfolio date Ra
## <int> <date> <dbl>
## 1 1 2015-03-31 -0.0481
## 2 1 2015-04-30 0.123
## 3 1 2015-05-29 0.0269
## 4 1 2015-06-30 0.0219
## 5 1 2015-07-31 0.115
## 6 1 2015-08-31 -0.0350
## 7 1 2015-09-30 -0.0300
## 8 1 2015-10-30 0.120
## 9 1 2015-11-30 0.0609
## 10 1 2015-12-31 -0.0205
## # … with 50 more rows
# Merging Ra and Rb
RaRb_multi_portfolio <- left_join(portfolio_returns_monthly_multi ,
baseline_returns_monthly,
by = "date")
RaRb_multi_portfolio
## # A tibble: 60 x 4
## # Groups: portfolio [1]
## portfolio date Ra Rb
## <int> <date> <dbl> <dbl>
## 1 1 2015-03-31 -0.0481 -0.0234
## 2 1 2015-04-30 0.123 0.00852
## 3 1 2015-05-29 0.0269 0.0105
## 4 1 2015-06-30 0.0219 -0.0210
## 5 1 2015-07-31 0.115 0.0197
## 6 1 2015-08-31 -0.0350 -0.0626
## 7 1 2015-09-30 -0.0300 -0.0264
## 8 1 2015-10-30 0.120 0.0830
## 9 1 2015-11-30 0.0609 0.000505
## 10 1 2015-12-31 -0.0205 -0.0175
## # … with 50 more rows
# Beta and Alpha
RaRb_multi_portfolio %>%
tq_performance(Ra = Ra, Rb = Rb, performance_fun = table.CAPM) %>%
select(Alpha, Beta)
## # A tibble: 1 x 3
## # Groups: portfolio [1]
## portfolio Alpha Beta
## <int> <dbl> <dbl>
## 1 1 0.0165 1.16
library(broom)
# In the linear regression model below, Ra (portfolio returns of your interest, ESG) is the dependent variable, while Rb (returns of the benchmark portfolio, S&P500) is the independent variable
lm(Ra ~ Rb, RaRb_multi_portfolio) %>%
tidy()
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.0165 0.00661 2.50 0.0152
## 2 Rb 1.16 0.185 6.26 0.0000000499
Alpha is the intercept, which is statistically significant at 5% (i.e, its p-value is smaller than 0.05). Beta is the coefficient, which is statistically significant at 1% (i.e, its p-value is smaller than 0.01).