This is a note from Matt’s PP slides https://github.com/business-science/presentations/blob/master/2019_05_17_RFinance_Tidyquant_Portfolio_Optimization/R_Finance_tidyquant_matt_dancho.pdf?utm_source=Business+Science+-+Combined+List&utm_campaign=7407a74aed-RFINANCE_TALK_EMAIL&utm_medium=email&utm_term=0_a4e5b7c52f-7407a74aed-62996495&mc_cid=7407a74aed&mc_eid=d9cda1f377

Overview

Tidyquant is a workflow that manages the process of systematically evaluating stocks & portfolios.

Import

# Load packages  
library(tidyquant)
library(tidyverse)

from <- today() - years(5)
stock_prices <- c("AAPL", "MSFT", "FB", "NFLX") %>%
    tq_get(get  = "stock.prices",
           from = from)
stock_prices
## # A tibble: 5,032 x 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 AAPL   2015-03-02  129.  130.  128.  129. 48096700     119.
##  2 AAPL   2015-03-03  129.  130.  128.  129. 37816300     119.
##  3 AAPL   2015-03-04  129.  130.  128.  129. 31666300     118.
##  4 AAPL   2015-03-05  129.  129.  126.  126. 56517100     116.
##  5 AAPL   2015-03-06  128.  129.  126.  127. 72842100     117.
##  6 AAPL   2015-03-09  128.  130.  125.  127. 88528500     117.
##  7 AAPL   2015-03-10  126.  127.  124.  125. 68856600     115.
##  8 AAPL   2015-03-11  125.  125.  122.  122. 68939000     112.
##  9 AAPL   2015-03-12  122.  125.  122.  124. 48362700     115.
## 10 AAPL   2015-03-13  124.  125.  123.  124. 51827300     114.
## # … with 5,022 more rows

Manipulate

stock_returns_monthly <- stock_prices %>%
    group_by(symbol) %>%
    tq_transmute(select     = adjusted, 
                 mutate_fun = periodReturn, 
                 period     = "monthly", 
                 col_rename = "Ra")
stock_returns_monthly
## # A tibble: 240 x 3
## # Groups:   symbol [4]
##    symbol date             Ra
##    <chr>  <date>        <dbl>
##  1 AAPL   2015-03-31 -0.0361 
##  2 AAPL   2015-04-30  0.00579
##  3 AAPL   2015-05-29  0.0453 
##  4 AAPL   2015-06-30 -0.0372 
##  5 AAPL   2015-07-31 -0.0329 
##  6 AAPL   2015-08-31 -0.0662 
##  7 AAPL   2015-09-30 -0.0218 
##  8 AAPL   2015-10-30  0.0834 
##  9 AAPL   2015-11-30 -0.00580
## 10 AAPL   2015-12-31 -0.110  
## # … with 230 more rows

# Baseline Period Returns
baseline_returns_monthly <- "^GSPC" %>%
    tq_get(get  = "stock.prices",
           from = from) %>%
    tq_transmute(select     = adjusted, 
                 mutate_fun = periodReturn, 
                 period     = "monthly", 
                 col_rename = "Rb")
baseline_returns_monthly
## # A tibble: 60 x 2
##    date              Rb
##    <date>         <dbl>
##  1 2015-03-31 -0.0234  
##  2 2015-04-30  0.00852 
##  3 2015-05-29  0.0105  
##  4 2015-06-30 -0.0210  
##  5 2015-07-31  0.0197  
##  6 2015-08-31 -0.0626  
##  7 2015-09-30 -0.0264  
##  8 2015-10-30  0.0830  
##  9 2015-11-30  0.000505
## 10 2015-12-31 -0.0175  
## # … with 50 more rows

Aggregate

# scaling a single portfolio to num_port number 
num_port = 1

stock_returns_monthly_multi <- stock_returns_monthly %>%
    tq_repeat_df(n = num_port)
stock_returns_monthly_multi
## # A tibble: 240 x 4
## # Groups:   portfolio [1]
##    portfolio symbol date             Ra
##        <int> <chr>  <date>        <dbl>
##  1         1 AAPL   2015-03-31 -0.0361 
##  2         1 AAPL   2015-04-30  0.00579
##  3         1 AAPL   2015-05-29  0.0453 
##  4         1 AAPL   2015-06-30 -0.0372 
##  5         1 AAPL   2015-07-31 -0.0329 
##  6         1 AAPL   2015-08-31 -0.0662 
##  7         1 AAPL   2015-09-30 -0.0218 
##  8         1 AAPL   2015-10-30  0.0834 
##  9         1 AAPL   2015-11-30 -0.00580
## 10         1 AAPL   2015-12-31 -0.110  
## # … with 230 more rows

# Create Vector of Weights
# not all symbols need to be specified. Any symbol not specified by default gets a weight of zero.
# The following is what I tried based on the info on the Web.
# https://stackoverflow.com/questions/5622608/choosing-n-numbers-with-fixed-sum
# n = number of stocks in the portfolio
# Choose n - 1 numbers between 0 and 1
# Sort them and calculate the distance between each number

# Create a data frame # of columns = # of stocks in the portfolio and # of rows = # of portfolio
# n = # of stocks in the portfolio
# Case of two portfolios with three stocks
num_stocks <- stock_returns_monthly %>%
  distinct(symbol) %>%
  n_distinct()
num_stocks
## [1] 4

# Initialize data frame
weights <- list()

# Set the seed of random numbers so that they can be reproduced
set.seed(06182019)

# Loop through the values 1 to number of portfolios, to generate weights for stocks in the portfolio
for(i in 1:num_port){
  # Generate n-1 number of random numbers b/t 0 and 1
  weights[[i]] = runif(num_stocks-1, 0, 1)
  # Add 0 and 1
  weights[[i]] = append(weights[[i]], c(0, 1))
  # Sort the random numbers
  weights[[i]] = sort(weights[[i]])
  # Calculate the differences b/t the random numbers
  weights[[i]] = diff(weights[[i]])
}
weights[[1]]
## [1] 0.06385721 0.32707606 0.36002330 0.24904342

# Convert the list to a vector
weights <- unlist(weights)
weights
## [1] 0.06385721 0.32707606 0.36002330 0.24904342

# 
weights_table <-  stock_returns_monthly %>%
  distinct(symbol) %>%
    tq_repeat_df(n = num_port) %>%
    bind_cols(tibble(weights)) %>%
    group_by(portfolio)
weights_table
## # A tibble: 4 x 3
## # Groups:   portfolio [1]
##   portfolio symbol weights
##       <int> <chr>    <dbl>
## 1         1 AAPL    0.0639
## 2         1 MSFT    0.327 
## 3         1 FB      0.360 
## 4         1 NFLX    0.249

# Aggregate a Portfolio using Vector of Weights
portfolio_returns_monthly_multi  <-
  stock_returns_monthly_multi %>%
    tq_portfolio(assets_col  = symbol, 
                 returns_col = Ra, 
                 weights     = weights_table, 
                 col_rename  = "Ra")
portfolio_returns_monthly_multi 
## # A tibble: 60 x 3
## # Groups:   portfolio [1]
##    portfolio date            Ra
##        <int> <date>       <dbl>
##  1         1 2015-03-31 -0.0481
##  2         1 2015-04-30  0.123 
##  3         1 2015-05-29  0.0269
##  4         1 2015-06-30  0.0219
##  5         1 2015-07-31  0.115 
##  6         1 2015-08-31 -0.0350
##  7         1 2015-09-30 -0.0300
##  8         1 2015-10-30  0.120 
##  9         1 2015-11-30  0.0609
## 10         1 2015-12-31 -0.0205
## # … with 50 more rows

Calculate performance

# Merging Ra and Rb
RaRb_multi_portfolio <- left_join(portfolio_returns_monthly_multi , 
                                   baseline_returns_monthly,
                                   by = "date")
RaRb_multi_portfolio
## # A tibble: 60 x 4
## # Groups:   portfolio [1]
##    portfolio date            Ra        Rb
##        <int> <date>       <dbl>     <dbl>
##  1         1 2015-03-31 -0.0481 -0.0234  
##  2         1 2015-04-30  0.123   0.00852 
##  3         1 2015-05-29  0.0269  0.0105  
##  4         1 2015-06-30  0.0219 -0.0210  
##  5         1 2015-07-31  0.115   0.0197  
##  6         1 2015-08-31 -0.0350 -0.0626  
##  7         1 2015-09-30 -0.0300 -0.0264  
##  8         1 2015-10-30  0.120   0.0830  
##  9         1 2015-11-30  0.0609  0.000505
## 10         1 2015-12-31 -0.0205 -0.0175  
## # … with 50 more rows

# Beta and Alpha
RaRb_multi_portfolio %>%
  tq_performance(Ra = Ra, Rb = Rb, performance_fun = table.CAPM) %>%
  select(Alpha, Beta)
## # A tibble: 1 x 3
## # Groups:   portfolio [1]
##   portfolio  Alpha  Beta
##       <int>  <dbl> <dbl>
## 1         1 0.0165  1.16

Statistical significance of Alpha and Beta

library(broom)
# In the linear regression model below, Ra (portfolio returns of your interest, ESG) is the dependent variable, while Rb (returns of the benchmark portfolio, S&P500) is the independent variable
lm(Ra ~ Rb, RaRb_multi_portfolio) %>%
  tidy()
## # A tibble: 2 x 5
##   term        estimate std.error statistic      p.value
##   <chr>          <dbl>     <dbl>     <dbl>        <dbl>
## 1 (Intercept)   0.0165   0.00661      2.50 0.0152      
## 2 Rb            1.16     0.185        6.26 0.0000000499

Alpha is the intercept, which is statistically significant at 5% (i.e, its p-value is smaller than 0.05). Beta is the coefficient, which is statistically significant at 1% (i.e, its p-value is smaller than 0.01).