1. Load Libraries and data

1.1 Load libraries —-

1.2 Load RData file with stocks_prices and stocks_tbl —-

load(file="SP500_data_subset.RData")

2. Process data

dim(stocks_prices)
## [1] 1426  488
# On 9/10/2024 1426 days and 488 stocks
head(stocks_prices)
## # A tibble: 6 × 488
##   date        AAPL  MSFT  NVDA  AMZN  META GOOGL `BRK-B`  GOOG   LLY   JPM  AVGO
##   <date>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2019-01-02  37.8  95.5  3.38  77.0  135.  52.6    203.  52.2  105.  83.9  21.3
## 2 2019-01-03  34.0  92.0  3.17  75.0  131.  51.1    192.  50.7  102.  82.7  19.4
## 3 2019-01-04  35.4  96.3  3.38  78.8  138.  53.8    195.  53.4  105.  85.7  19.6
## 4 2019-01-07  35.4  96.4  3.56  81.5  138.  53.7    197.  53.3  106.  85.8  20.0
## 5 2019-01-08  36.0  97.1  3.47  82.8  142.  54.1    196.  53.7  107.  85.6  19.9
## 6 2019-01-09  36.6  98.5  3.54  83.0  144.  53.9    196.  53.6  108.  85.5  20.7
## # ℹ 476 more variables: TSLA <dbl>, UNH <dbl>, XOM <dbl>, V <dbl>, PG <dbl>,
## #   JNJ <dbl>, MA <dbl>, COST <dbl>, HD <dbl>, ABBV <dbl>, WMT <dbl>,
## #   MRK <dbl>, NFLX <dbl>, KO <dbl>, BAC <dbl>, ADBE <dbl>, PEP <dbl>,
## #   CVX <dbl>, CRM <dbl>, TMO <dbl>, AMD <dbl>, ORCL <dbl>, LIN <dbl>,
## #   ACN <dbl>, MCD <dbl>, ABT <dbl>, PM <dbl>, CSCO <dbl>, WFC <dbl>,
## #   IBM <dbl>, TXN <dbl>, GE <dbl>, QCOM <dbl>, VZ <dbl>, DHR <dbl>,
## #   INTU <dbl>, NOW <dbl>, AMGN <dbl>, ISRG <dbl>, NEE <dbl>, PFE <dbl>, …

2.1 Extract starting prices only into stocks_prices0 —-

stocks_prices0<-as.matrix(stocks_prices[,-1])
head(stocks_prices0[,1:10])
##          AAPL     MSFT     NVDA    AMZN     META    GOOGL  BRK-B     GOOG
## [1,] 37.75009 95.50132 3.378612 76.9565 135.4018 52.60375 202.80 52.16439
## [2,] 33.98989 91.98801 3.174486 75.0140 131.4698 51.14685 191.66 50.67854
## [3,] 35.44090 96.26631 3.377868 78.7695 137.6671 53.77036 195.20 53.40435
## [4,] 35.36202 96.38908 3.556695 81.4755 137.7669 53.66312 196.91 53.28863
## [5,] 36.03613 97.08797 3.468149 82.8290 142.2377 54.13446 196.31 53.68216
## [6,] 36.64807 98.47630 3.536356 82.9710 143.9342 53.94891 196.37 53.60136
##           LLY      JPM
## [1,] 105.4700 83.85518
## [2,] 102.1924 82.66345
## [3,] 105.2680 85.71089
## [4,] 105.8372 85.77045
## [5,] 106.8104 85.60875
## [6,] 107.6091 85.46404

2.2 Create portfolio investing $1000, equal weighted across SP500 stocks

2.2.1 Define q vector of shares with equal dollar values at first date

q = (1000/nrow(stocks_tbl))/stocks_prices[1,-1]
q0 = as.matrix((as.numeric(q)))
v1000_0 = stocks_prices0 %*% q0
v1000_0.xts<- xts(x=v1000_0, order.by=stocks_prices$date)
date_start<- as.character(stocks_prices$date[1])

2.2.2 Define data frame df_port_SP500

df_port_SP500<-data.frame(
  date=stocks_prices$date,
  value=v1000_0,
  portfolio="Equal-Weighted SP500")

2.2.3 Plot Value of $1000 invested at start

ggplot(df_port_SP500, aes(x=date,y=value)) + geom_line()+
  ggtitle(paste(c("Value $1000 Invested ", date_start, " \n",
                  "Equal-Weighted Portfolio"), collapse="")) +
  scale_y_continuous(labels= scales::dollar_format())

2.3 Create portfolio investing $1000, equal weighted across AANG stocks

list_AANG<-c("AMZN","AAPL","NFLX","GOOG")
index_AANG<-match(list_AANG, 
                  dimnames(stocks_prices0)[[2]],nomatch=0)

2.3.1 Define q vector of shares with equal dollar values

# Use share vector of same length
q_AANG<-0*q
stocks_prices0[1,"AAPL"]
##     AAPL 
## 37.75009
for (symbol in list_AANG){
  print(symbol)
  q_symbol<- (1000/length(index_AANG))/stocks_prices0[1,symbol]
  print(q_symbol)
q_AANG[symbol]<-q_symbol
}
## [1] "AMZN"
##     AMZN 
## 3.248589 
## [1] "AAPL"
##   AAPL 
## 6.6225 
## [1] "NFLX"
##      NFLX 
## 0.9340208 
## [1] "GOOG"
##     GOOG 
## 4.792541
q_AANG_0 = as.matrix((as.numeric(q_AANG)))
v1000_AANG_0 = stocks_prices0 %*% q_AANG_0

2.3.2 Define data frame df_port_AANG

df_port_AANG<-data.frame(
  date=stocks_prices$date,
  value=v1000_AANG_0,
  portfolio="Equal-Weighted AANG")

df_port_all<-rbind(
  df_port_SP500,
  df_port_AANG
)

2.3.3 Plot Value of $1000 equal-weighted invested at start

ggplot(df_port_all, aes(x=date,y=value,col=portfolio)) + geom_line()+
  ggtitle(paste(c("Value $1000 Invested 2007-01-01\n",
                  "Equal-Weighted SP500 \nvs Equal-Weighted AANG"), collapse="")) +
  scale_y_continuous(labels= scales::dollar_format())+
  theme(legend.position="bottom")

tail(df_port_all)
##            date    value           portfolio
## 2847 2024-08-23 3520.158 Equal-Weighted AANG
## 2848 2024-08-26 3521.398 Equal-Weighted AANG
## 2849 2024-08-27 3518.676 Equal-Weighted AANG
## 2850 2024-08-28 3480.847 Equal-Weighted AANG
## 2851 2024-08-29 3509.794 Equal-Weighted AANG
## 2852 2024-08-30 3541.757 Equal-Weighted AANG

2.3.4 Find final and initial values of components

n0<-nrow(stocks_prices0)
Values_initial<-q_AANG_0*stocks_prices0[1,] 
Values_initial[Values_initial!=0]
## [1] 250 250 250 250
Values_final<-q_AANG_0*stocks_prices0[n0,] 
Values_final[Values_final!=0]
## [1] 1516.5526  579.8731  790.2563  655.0754

2.3.5 Plot time series of values of individual components

initialize=TRUE
symbol="AMZN"
for (symbol in list_AANG){
  df_symbol<- data.frame(
    date=stocks_prices$date,
    Value = as.numeric(
      as.numeric(q_AANG[symbol])*stocks_prices0[,symbol]),
    symbol = symbol)
  if (initialize){
    df_all<-df_symbol
    initialize=FALSE} else{
    df_all<-rbind(df_all, df_symbol)
    }
}

tail(df_all)
##            date    Value symbol
## 5699 2024-08-23 801.3603   GOOG
## 5700 2024-08-26 803.7534   GOOG
## 5701 2024-08-27 796.3348   GOOG
## 5702 2024-08-28 787.3366   GOOG
## 5703 2024-08-29 782.0717   GOOG
## 5704 2024-08-30 790.2563   GOOG
ggplot(df_all, aes(x=date,y=Value, col=symbol))+
  geom_line() +
  theme(legend.position = "bottom")+
  ggtitle("Value of $250 Invested on 2019-01-01")+
  scale_y_continuous(labels= scales::dollar_format())