# Load packages

# Core
library(tidyverse)
library(tidyquant)

1 Import stock prices

Revise the code below.

symbols <- c("SPY", "EFA", "IJS", "EEM", "AGG")

prices <- tq_get(x    = symbols,
                 get  = "stock.prices",    
                 from = "2012-12-31",
                 to   = "2017-12-31")

2 Convert prices to returns

asset_returns_tbl <- prices %>%
    
    group_by(symbol) %>%
    
    tq_transmute(select     = adjusted, 
                 mutate_fun = periodReturn, 
                 period     = "monthly",
                 type       = "log") %>%
    
    slice(-1) %>%
    
    ungroup() %>%
    
    set_names(c("asset", "date", "returns"))

3 Assign a weight to each asset

Revise the code for weights.

# symbols
symbols <- asset_returns_tbl %>% distinct(asset) %>% pull()
symbols
## [1] "AGG" "EEM" "EFA" "IJS" "SPY"
# weights
weights <- c(0.25, 0.25, 0.2, 0.2, 0.1)
weights
## [1] 0.25 0.25 0.20 0.20 0.10
w_tbl <- tibble(symbols, weights)
w_tbl
## # A tibble: 5 Ă— 2
##   symbols weights
##   <chr>     <dbl>
## 1 AGG        0.25
## 2 EEM        0.25
## 3 EFA        0.2 
## 4 IJS        0.2 
## 5 SPY        0.1

4 Build a portfolio

# ?tq_portfolio

portfolio_returns_tbl <- asset_returns_tbl %>%
    
    tq_portfolio(assets_col = asset, 
                 returns_col = returns, 
                 weights = w_tbl, 
                 rebalance_on = "months", 
                 col_rename = "returns")

portfolio_returns_tbl
## # A tibble: 60 Ă— 2
##    date        returns
##    <date>        <dbl>
##  1 2013-01-31  0.0204 
##  2 2013-02-28 -0.00239
##  3 2013-03-28  0.0121 
##  4 2013-04-30  0.0174 
##  5 2013-05-31 -0.0128 
##  6 2013-06-28 -0.0247 
##  7 2013-07-31  0.0321 
##  8 2013-08-30 -0.0224 
##  9 2013-09-30  0.0511 
## 10 2013-10-31  0.0301 
## # ℹ 50 more rows

5 Simulating growth of a dollar

# Get mean portfolio return
mean_port_return <- mean(portfolio_returns_tbl$returns)
mean_port_return
## [1] 0.005899132
# Get standard deviation of portfolio returns
stddev_port_return <- sd(portfolio_returns_tbl$returns)
stddev_port_return
## [1] 0.02347493
# Construct a normal distribution
simulated_monthly_returns <- rnorm(120, mean_port_return, stddev_port_return)
simulated_monthly_returns
##   [1]  0.0222412170  0.0383759888  0.0183399239  0.0143930967  0.0370389848
##   [6] -0.0007300877  0.0042532121 -0.0163597987 -0.0320716765 -0.0122004990
##  [11]  0.0025834904 -0.0076160435  0.0271044034 -0.0136054598  0.0201268340
##  [16]  0.0273221349 -0.0260620144  0.0205040922  0.0061316767 -0.0062621334
##  [21] -0.0122030170  0.0462428756  0.0329807877  0.0462951038  0.0189364387
##  [26] -0.0151261311  0.0143853267  0.0095828317 -0.0129709480  0.0345309274
##  [31] -0.0006621876  0.0075842253  0.0350110238  0.0559959445  0.0012460039
##  [36]  0.0143933641  0.0280510888  0.0041407274 -0.0019236887  0.0088747492
##  [41] -0.0232482588  0.0400093852  0.0222571658  0.0403749086  0.0339810527
##  [46]  0.0072623449 -0.0093291705  0.0148681797  0.0438333202 -0.0182160613
##  [51] -0.0217574485  0.0138607721 -0.0277365918  0.0288505754 -0.0169009364
##  [56] -0.0186767407 -0.0077390828  0.0178973876 -0.0138267275  0.0222417510
##  [61]  0.0280083112  0.0266230753  0.0072731589 -0.0146804335  0.0155728726
##  [66]  0.0407817528 -0.0087583557  0.0561380508  0.0034015288 -0.0072961169
##  [71]  0.0065478314  0.0192109107  0.0617179474 -0.0193414509  0.0214597699
##  [76]  0.0080583052  0.0440215712 -0.0304617213 -0.0722646712  0.0280837098
##  [81] -0.0018306322  0.0444219133  0.0196235036  0.0108498335 -0.0325571895
##  [86]  0.0288950714  0.0413541742  0.0270157542  0.0157397037 -0.0314248614
##  [91] -0.0006876908 -0.0023692899  0.0025358132 -0.0383695934  0.0341578188
##  [96] -0.0132406932  0.0306871846 -0.0246796006  0.0026039050  0.0273435707
## [101] -0.0088142153  0.0091615564 -0.0074881598  0.0073428032  0.0267008340
## [106] -0.0196576413  0.0004329239  0.0318423513  0.0201150139 -0.0044951843
## [111]  0.0010223751 -0.0049488856  0.0316536878  0.0153109944 -0.0031987540
## [116]  0.0125930564  0.0721231238 -0.0141746711 -0.0291194612 -0.0073125565
# Add a dollar
simulated_returns_add_1 <- tibble(returns = c(1, 1 + simulated_monthly_returns))
simulated_returns_add_1
## # A tibble: 121 Ă— 1
##    returns
##      <dbl>
##  1   1    
##  2   1.02 
##  3   1.04 
##  4   1.02 
##  5   1.01 
##  6   1.04 
##  7   0.999
##  8   1.00 
##  9   0.984
## 10   0.968
## # ℹ 111 more rows
# Calculate the cumulative growth of a dollar
simulated_growth <- simulated_returns_add_1 %>%
    mutate(growth = accumulate(returns, function(x, y) x*y)) %>%
    select(growth)

simulated_growth
## # A tibble: 121 Ă— 1
##    growth
##     <dbl>
##  1   1   
##  2   1.02
##  3   1.06
##  4   1.08
##  5   1.10
##  6   1.14
##  7   1.14
##  8   1.14
##  9   1.12
## 10   1.09
## # ℹ 111 more rows
# Check the compound annual growth rate
cagr <- ((simulated_growth$growth[nrow(simulated_growth)]^(1/10)) - 1) * 100
cagr
## [1] 10.64269

6 Simulation function

simulate_accumulation <- function(initial_value, N, mean_return, sd_return) {
    
    # Add a dollar
    simulated_returns_add_1 <- tibble(returns = c(initial_value, 1 + rnorm(N, mean_return, sd_return)))
    
    # Calculate the cumulative growth of a dollar
    simulated_growth <- simulated_returns_add_1 %>%
        mutate(growth = accumulate(returns, function(x, y) x*y)) %>%
        select(growth)
    
    return(simulated_growth)    
    
}

simulate_accumulation(initial_value = 100, N = 240, mean_return = 0.005, sd_return = 0.01) %>%
    tail()
## # A tibble: 6 Ă— 1
##   growth
##    <dbl>
## 1   421.
## 2   426.
## 3   427.
## 4   428.
## 5   422.
## 6   423.

7 Running multiple simulations

# Create a vector of 1s as a starting point
sims <- 51
starts <- rep(1, sims) %>%
    set_names(paste0("sim", 1:sims))

starts
##  sim1  sim2  sim3  sim4  sim5  sim6  sim7  sim8  sim9 sim10 sim11 sim12 sim13 
##     1     1     1     1     1     1     1     1     1     1     1     1     1 
## sim14 sim15 sim16 sim17 sim18 sim19 sim20 sim21 sim22 sim23 sim24 sim25 sim26 
##     1     1     1     1     1     1     1     1     1     1     1     1     1 
## sim27 sim28 sim29 sim30 sim31 sim32 sim33 sim34 sim35 sim36 sim37 sim38 sim39 
##     1     1     1     1     1     1     1     1     1     1     1     1     1 
## sim40 sim41 sim42 sim43 sim44 sim45 sim46 sim47 sim48 sim49 sim50 sim51 
##     1     1     1     1     1     1     1     1     1     1     1     1
# Simulate
# for reproducible research
set.seed(1234)

monte_carlo_sim_51 <- starts %>%
    
    # Simulate
    map_dfc(.x = .,
            .f = ~simulate_accumulation(initial_value = .x, 
                                        N             = 120, 
                                        mean_return   = mean_port_return, 
                                        sd_return     = stddev_port_return)) %>%
    
    # Add column month
    mutate(month = 1:nrow(.)) %>%
    select(month, everything()) %>%
    
    # Rearrange column names
    set_names(c("month", names(starts))) %>%
    
    # Transform to long form
    pivot_longer(cols = -month, names_to = "sim", values_to = "growth")

monte_carlo_sim_51
## # A tibble: 6,171 Ă— 3
##    month sim   growth
##    <int> <chr>  <dbl>
##  1     1 sim1       1
##  2     1 sim2       1
##  3     1 sim3       1
##  4     1 sim4       1
##  5     1 sim5       1
##  6     1 sim6       1
##  7     1 sim7       1
##  8     1 sim8       1
##  9     1 sim9       1
## 10     1 sim10      1
## # ℹ 6,161 more rows

8 Visualizing simulations with ggplot

monte_carlo_sim_51 %>%
    
    ggplot(aes(x = month, y = growth, color = sim)) +
    geom_line() +
    theme(legend.position = "none") + 
    theme(plot.title = element_text(hjust = 0.5)) +
    
    labs(title = "Simulating growth of $1 over 120 months")

Based on the Monte Carlo simulation results, how much should you expect from your $100 investment after 20 years? What is the best-case scenario? What is the worst-case scenario? What are limitations of this simulation analysis?

The Monte Carlo simulation gives an estimate of how a $100 investment could grow over 20 years, but it has several limitations. It assumes returns follow a normal distribution, even though real markets often experience extreme events and irregular volatility. The simulation also relies on historical averages for returns and risk, which may not reflect future conditions. It treats each month as independent and assumes constant volatility, ignoring market cycles and trends. Finally, it doesn’t account for real-world factors like taxes, fees, or inflation, and the limited number of simulations reduces the accuracy of the results.