# Load packages

# Core
library(tidyverse)
library(tidyquant)

# Source function

1 Import stock prices

Revise the code below.

symbols <- c("AAPL", "CL=F", "ROKU")

prices <- tq_get(x    = symbols,
                 get  = "stock.prices",    
                 from = "2012-12-31",
                 to   = "2023-10-31")

2 Convert prices to returns

asset_returns_tbl <- prices %>%
    
    group_by(symbol) %>%
    
    tq_transmute(select     = adjusted, 
                 mutate_fun = periodReturn, 
                 period     = "monthly",
                 type       = "log") %>%
    
    slice(-1) %>%
    
    ungroup() %>%
    
    set_names(c("asset", "date", "returns"))

3 Assign a weight to each asset

Revise the code for weights.

# symbols
symbols <- asset_returns_tbl %>% distinct(asset) %>% pull()
symbols
## [1] "AAPL" "CL=F" "ROKU"
# weights
weights <- c(0.5, 0.3, 0.2)
weights
## [1] 0.5 0.3 0.2
w_tbl <- tibble(symbols, weights)
w_tbl
## # A tibble: 3 × 2
##   symbols weights
##   <chr>     <dbl>
## 1 AAPL        0.5
## 2 CL=F        0.3
## 3 ROKU        0.2

4 Build a portfolio

portfolio_returns_tbl <- asset_returns_tbl %>%
    
    tq_portfolio(assets_col = asset, 
                 returns_col = returns, 
                 weights = w_tbl, 
                 rebalance_on = "months", 
                 col_rename = "returns")

portfolio_returns_tbl
## # A tibble: 130 × 2
##    date        returns
##    <date>        <dbl>
##  1 2013-01-31 -0.0598 
##  2 2013-02-28 -0.0300 
##  3 2013-03-28  0.0178 
##  4 2013-04-30 -0.0117 
##  5 2013-05-31  0.00626
##  6 2013-06-28 -0.0483 
##  7 2013-07-31  0.0913 
##  8 2013-08-30  0.0476 
##  9 2013-09-30 -0.0261 
## 10 2013-10-31  0.0280 
## # ℹ 120 more rows

5 Simulating growth of a dollar

# Get mean portfolio return
mean_port_return <- mean(portfolio_returns_tbl$returns)
mean_port_return
## [1] 0.009936184
# Get standard deviation of portfolio returns
stddev_port_return <- sd(portfolio_returns_tbl$returns)
stddev_port_return
## [1] 0.07833957

6 Simulation function

simulate_accumulation <- function(initial_value, N, mean_return, sd_return) {
  
  # Add a dollar
  simulated_returns_add_1 <- tibble(returns = c(initial_value, 1 + rnorm(N, mean_return, sd_return))) 
  
  # Calculate the cumulative growth of a dollar
  simulated_growth <- simulated_returns_add_1 %>%
    mutate(growth = accumulate(returns, function(x, y) x*y)) %>%
    select(growth)
  
  return(simulated_growth)
  
}

simulate_accumulation(initial_value = 100, N = 120, mean_return = 0.005, sd_return = 0.01) %>%
  tail()
## # A tibble: 6 × 1
##   growth
##    <dbl>
## 1   186.
## 2   186.
## 3   188.
## 4   190.
## 5   193.
## 6   191.
dump(list = c("simulate_accumulation"),
     file = "../00_scripts/simulate_accumulation.R")

7 Running multiple simulations

# Create a vector of 1s as a starting point
sims <- 51
starts <- rep(100, sims) %>%
  set_names(paste0("sim", 1:sims))

starts
##  sim1  sim2  sim3  sim4  sim5  sim6  sim7  sim8  sim9 sim10 sim11 sim12 sim13 
##   100   100   100   100   100   100   100   100   100   100   100   100   100 
## sim14 sim15 sim16 sim17 sim18 sim19 sim20 sim21 sim22 sim23 sim24 sim25 sim26 
##   100   100   100   100   100   100   100   100   100   100   100   100   100 
## sim27 sim28 sim29 sim30 sim31 sim32 sim33 sim34 sim35 sim36 sim37 sim38 sim39 
##   100   100   100   100   100   100   100   100   100   100   100   100   100 
## sim40 sim41 sim42 sim43 sim44 sim45 sim46 sim47 sim48 sim49 sim50 sim51 
##   100   100   100   100   100   100   100   100   100   100   100   100
# Simulate
# for reproducible research
set.seed(1234)

monte_carlo_sim_51 <- starts %>%
  
  # Simulate
  map_dfc(simulate_accumulation,
          N = 240,
          mean = mean_port_return,
          sd_return = stddev_port_return) %>%
  
  # Add the column, month
  mutate(month = seq(1:nrow(.))) %>%
  
  # Arrange column names
  select(month, everything()) %>%
  set_names(c("month", names(starts))) %>%
  
  pivot_longer(cols = -month, names_to = "sim", values_to = "growth")

monte_carlo_sim_51
## # A tibble: 12,291 × 3
##    month sim   growth
##    <int> <chr>  <dbl>
##  1     1 sim1     100
##  2     1 sim2     100
##  3     1 sim3     100
##  4     1 sim4     100
##  5     1 sim5     100
##  6     1 sim6     100
##  7     1 sim7     100
##  8     1 sim8     100
##  9     1 sim9     100
## 10     1 sim10    100
## # ℹ 12,281 more rows
# Calculate the quantiles for simulated values

probs <- c(0, 0.25, 0.5, 0.75, 1)

monte_carlo_sim_51 %>%
  
  group_by(sim) %>%
  summarise(growth = last(growth)) %>%
  ungroup() %>%
  pull(growth) %>%
  
  # Find the quantiles
  quantile(probs = probs) %>%
  round(2)
##      0%     25%     50%     75%    100% 
##   41.39  320.47  754.03 1517.09 4490.51

8 Visualizing simulations with ggplot

Line plot with max, median, and min

# Step 1 Summarize data into max, median, and min of last value
sim_summary <- monte_carlo_sim_51 %>%
  
  group_by(sim) %>%
  summarise(growth = last(growth)) %>%
  ungroup() %>%
  
  summarise(max    = max(growth),
            median = median(growth),
            min    = min(growth))

sim_summary
## # A tibble: 1 × 3
##     max median   min
##   <dbl>  <dbl> <dbl>
## 1 4491.   754.  41.4
monte_carlo_sim_51 %>%
  
  group_by(sim) %>%
  filter(last(growth) == sim_summary$max |
         last(growth) == sim_summary$median |
         last(growth) == sim_summary$min) %>%
  ungroup() %>%
  
  # Plot
  ggplot(aes(x = month, y = growth, col = sim)) +
  geom_line() +
  
  theme(legend.position = "none") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(plot.subtitle = element_text(hjust = 0.5)) +
  
  labs(title = "Simulating Growth of $100 over 240 months",
       subtitle = "Max, Median, and Minimum Simulation")

Based on the Monte Carlo simulation results, how much should you expect from your $100 investment after 20 years? The median results were $754, which is what I should expect What is the best-case scenario? The best-case scenario is $4,491! What is the worst-case scenario? The worst case scenario is $41, which is a loss of almost $60. What are limitations of this simulation analysis? The limitations to this analysis is that it is probably more optimistic than real. The second limitation is about the data set we used. During the first five years, we were in a raising bull market, which means that the mean is probably on the high side for the first half of the graph, but since we have results until present time, the analysis is a bit more accurate.