# Load packages
# Core
library(tidyverse)
library(tidyquant)
# Source function
source("../00_scripts/simulate_accumulation.R")
Revise the code below.
symbols <- c("LLY", "HD", "UA", "TSLA", "NVDA")
prices <- tq_get(x = symbols,
get = "stock.prices",
from = "2012-12-31")
asset_returns_tbl <- prices %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "monthly",
type = "log") %>%
slice(-1) %>%
ungroup() %>%
set_names(c("asset", "date", "returns"))
Revise the code for weights.
# symbols
symbols <- asset_returns_tbl %>% distinct(asset) %>% pull()
symbols
## [1] "HD" "LLY" "NVDA" "TSLA" "UA"
# weights
weights <- c(0.2, 0.2, 0.2, 0.2, 0.2)
weights
## [1] 0.2 0.2 0.2 0.2 0.2
w_tbl <- tibble(symbols, weights)
w_tbl
## # A tibble: 5 × 2
## symbols weights
## <chr> <dbl>
## 1 HD 0.2
## 2 LLY 0.2
## 3 NVDA 0.2
## 4 TSLA 0.2
## 5 UA 0.2
portfolio_returns_tbl <- asset_returns_tbl %>%
tq_portfolio(assets_col = asset,
returns_col = returns,
weights = w_tbl,
rebalance_on = "months",
col_rename = "returns")
portfolio_returns_tbl
## # A tibble: 155 × 2
## date returns
## <date> <dbl>
## 1 2013-01-31 0.0532
## 2 2013-02-28 0.00290
## 3 2013-03-28 0.0320
## 4 2013-04-30 0.0899
## 5 2013-05-31 0.137
## 6 2013-06-28 -0.00517
## 7 2013-07-31 0.0700
## 8 2013-08-30 0.0347
## 9 2013-09-30 0.0381
## 10 2013-10-31 -0.0395
## # ℹ 145 more rows
# Get mean portfolio return
mean_port_return <- mean(portfolio_returns_tbl$returns)
mean_port_return
## [1] 0.01916024
# Get standard deviation of portfolio returns
stddev_port_return <- sd(portfolio_returns_tbl$returns)
stddev_port_return
## [1] 0.06584246
No need
# Create a vector of 1s as a starting point
sims <- 51
starts <- rep(100, sims) %>%
set_names(paste0("sim", 1:sims))
starts
## sim1 sim2 sim3 sim4 sim5 sim6 sim7 sim8 sim9 sim10 sim11 sim12 sim13
## 100 100 100 100 100 100 100 100 100 100 100 100 100
## sim14 sim15 sim16 sim17 sim18 sim19 sim20 sim21 sim22 sim23 sim24 sim25 sim26
## 100 100 100 100 100 100 100 100 100 100 100 100 100
## sim27 sim28 sim29 sim30 sim31 sim32 sim33 sim34 sim35 sim36 sim37 sim38 sim39
## 100 100 100 100 100 100 100 100 100 100 100 100 100
## sim40 sim41 sim42 sim43 sim44 sim45 sim46 sim47 sim48 sim49 sim50 sim51
## 100 100 100 100 100 100 100 100 100 100 100 100
# Create a vector of 1s as a starting point
sims <- 51
starts <- rep(100, sims) %>%
set_names(paste0("sim", 1:sims))
starts
## sim1 sim2 sim3 sim4 sim5 sim6 sim7 sim8 sim9 sim10 sim11 sim12 sim13
## 100 100 100 100 100 100 100 100 100 100 100 100 100
## sim14 sim15 sim16 sim17 sim18 sim19 sim20 sim21 sim22 sim23 sim24 sim25 sim26
## 100 100 100 100 100 100 100 100 100 100 100 100 100
## sim27 sim28 sim29 sim30 sim31 sim32 sim33 sim34 sim35 sim36 sim37 sim38 sim39
## 100 100 100 100 100 100 100 100 100 100 100 100 100
## sim40 sim41 sim42 sim43 sim44 sim45 sim46 sim47 sim48 sim49 sim50 sim51
## 100 100 100 100 100 100 100 100 100 100 100 100
# Simulate
# for reproducible research
set.seed(1234)
monte_carle_sim_51 <- starts %>%
# Simulate
map_dfc(.x = .,
.f = ~simulate_accumulation(initial_value = .x,
N = 240,
mean_return = mean_port_return,
sd_return = stddev_port_return)) %>%
# Add column month
mutate(month = 1:nrow(.)) %>%
select(month, everything()) %>%
# Rearrange column names
set_names(c("month", names(starts))) %>%
# Transform to long form
pivot_longer(cols = -month, names_to = "sim", values_to = "growth")
# Find quantiles
monte_carle_sim_51 %>%
group_by(sim) %>%
summarise(growth = last(growth)) %>%
ungroup() %>%
pull(growth) %>%
quantile(probs = c(0, 0.25, 0.5, 0.75, 1)) %>%
round(2)
## 0% 25% 50% 75% 100%
## 703.65 3852.30 7817.91 14099.21 34793.23
Line Plot of Simulations with Max, Median, and Min
# Step 1 Summarize data into max, median, and min of last value
sim_summary <- monte_carle_sim_51 %>%
group_by(sim) %>%
summarise(growth = last(growth)) %>%
ungroup() %>%
summarise(max = max(growth),
median = median(growth),
min = min(growth))
sim_summary
## # A tibble: 1 × 3
## max median min
## <dbl> <dbl> <dbl>
## 1 34793. 7818. 704.
# Step 2 Plot
monte_carle_sim_51 %>%
# Filter for max, median, min sim
group_by(sim) %>%
filter(last(growth) == sim_summary$max |
last(growth) == sim_summary$median |
last(growth) == sim_summary$min) %>%
ungroup() %>%
# Plot
ggplot(aes(x = month, y = growth, color = sim)) +
geom_line() +
theme(legend.position = "none") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(plot.subtitle = element_text(hjust = 0.5)) +
labs(title = "Simulating Growth of $100 over 240 months",
subtitle = "Maximum, Median, and Mimimum Simulation")
Based on the Monte Carlo Simulation, the average return after 20 years that I can expect from a $100 initial investment would be $7818
The best case scenario is that my initial investment can grow to $34,793 after 20 years
The worst case scenario is that my initial investment can only grow to $704 after 20 years
The results heavily depend on the assumptions: Monte Carlo simulations are only as good as the assumptions, so unrealistic distributions and incorrect means/variances will produce incorrect outputs.
They typically require a large amount of data to be accurate: You need stornger historical data to estimate realistic probable distributions. If the dataset is small or biased, then the simulated outcomes will not reflect reality.
The randomness of the analysis changes every time you run it: This means that results will vary and confidence in exact numerical outputs should be limited. You can reduce noise by running more simulations, but this increases computation.