# ============================================================
# AI-Assisted Portfolio Construction and Backtesting
# Strategy: Quality-Momentum Global/Mixed Portfolio
# Benchmark: ACWI (MSCI All Country World ETF)
# Backtest Period: 3 Years
# ============================================================

# --- SECTION 1: Load Required Libraries ---
# Each library serves a specific purpose:
# tidyquant    - download financial data from Yahoo Finance
# PortfolioAnalytics - portfolio optimization framework
# PerformanceAnalytics - calculate risk/return metrics
# ROI + plugin - solver backend for optimization
# tidyverse    - data manipulation
# xts          - time series objects required by PortfolioAnalytics

library(tidyquant)
## Warning: package 'tidyquant' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Warning: package 'xts' was built under R version 4.5.3
## Warning: package 'zoo' was built under R version 4.5.2
## Warning: package 'quantmod' was built under R version 4.5.2
## Warning: package 'TTR' was built under R version 4.5.2
## Warning: package 'PerformanceAnalytics' was built under R version 4.5.3
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.12 ──
## ✔ PerformanceAnalytics 2.1.0      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.28     ✔ xts                  0.14.2
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(PortfolioAnalytics)
## Warning: package 'PortfolioAnalytics' was built under R version 4.5.3
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 4.5.3
## Registered S3 method overwritten by 'PortfolioAnalytics':
##   method           from
##   print.constraint ROI
library(PerformanceAnalytics)
library(ROI)
## Warning: package 'ROI' was built under R version 4.5.3
## ROI: R Optimization Infrastructure
## Registered solver plugins: nlminb, symphony, glpk, quadprog.
## Default solver: auto.
## 
## Attaching package: 'ROI'
## 
## The following objects are masked from 'package:PortfolioAnalytics':
## 
##     is.constraint, objective
library(ROI.plugin.quadprog)
## Warning: package 'ROI.plugin.quadprog' was built under R version 4.5.3
library(ROI.plugin.glpk)
## Warning: package 'ROI.plugin.glpk' was built under R version 4.5.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'ggplot2' was built under R version 4.5.3
## Warning: package 'readr' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::accumulate() masks foreach::accumulate()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::first()      masks xts::first()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ dplyr::last()       masks xts::last()
## ✖ purrr::when()       masks foreach::when()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(xts)

# --- SECTION 2: Define Tickers and Date Range ---
# Portfolio tickers (10 assets)
tickers <- c("QUAL", "IMTM", "MTUM", "MSCI", "ASML",
             "EWJ", "VWO", "IHI", "IBIT", "GLD")

# Benchmark: ACWI = iShares MSCI All Country World ETF
benchmark_ticker <- "ACWI"

# Set backtest window: 3 years back from today
end_date   <- Sys.Date()
start_date <- end_date - years(3)

cat("Backtest period:", as.character(start_date), "to", as.character(end_date), "\n")
## Backtest period: 2023-05-25 to 2026-05-25
# --- SECTION 3: Download Adjusted Price Data ---
# tq_get() pulls adjusted closing prices from Yahoo Finance
# "adjusted" accounts for dividends and splits — critical for accurate return calculation

# NOTE: IBIT (Bitcoin ETF) only launched January 2024.
# If you get an error or <3 years of data for IBIT, this is expected.
# See handling note below.

raw_prices <- tq_get(
  c(tickers, benchmark_ticker),
  from = start_date,
  to   = end_date,
  get  = "stock.prices"
) %>%
  select(symbol, date, adjusted)

# Check for any tickers with missing or short data
data_check <- raw_prices %>%
  group_by(symbol) %>%
  summarise(
    start     = min(date),
    end       = max(date),
    n_obs     = n(),
    pct_complete = round(n() / as.numeric(end_date - start_date) * 100, 1)
  )
print(data_check)
## # A tibble: 11 × 5
##    symbol start      end        n_obs pct_complete
##    <chr>  <date>     <date>     <int>        <dbl>
##  1 ACWI   2023-05-25 2026-05-22   751         68.5
##  2 ASML   2023-05-25 2026-05-22   751         68.5
##  3 EWJ    2023-05-25 2026-05-22   751         68.5
##  4 GLD    2023-05-25 2026-05-22   751         68.5
##  5 IBIT   2024-01-11 2026-05-22   593         54.1
##  6 IHI    2023-05-25 2026-05-22   751         68.5
##  7 IMTM   2023-05-25 2026-05-22   751         68.5
##  8 MSCI   2023-05-25 2026-05-22   751         68.5
##  9 MTUM   2023-05-25 2026-05-22   751         68.5
## 10 QUAL   2023-05-25 2026-05-22   751         68.5
## 11 VWO    2023-05-25 2026-05-22   751         68.5
# IMPORTANT: Review this table. Any ticker with <80% completeness may distort results.

# --- SECTION 4: Handle IBIT Data Limitation ---
# IBIT launched January 2024 (~16 months of data as of mid-2025).
# For a clean 3-year backtest, replace IBIT with GLD or remove it.
# Decision: We fill IBIT's pre-launch period with NA and then trim the portfolio
# to only include the common date range across ALL tickers.

# Pivot to wide format (dates as rows, tickers as columns)
prices_wide <- raw_prices %>%
  pivot_wider(names_from = symbol, values_from = adjusted) %>%
  arrange(date)

# Find common complete date range (drop rows with ANY NA)
prices_clean <- prices_wide %>%
  drop_na()

cat("\nClean data rows after dropping NAs:", nrow(prices_clean), "\n")
## 
## Clean data rows after dropping NAs: 593
cat("Effective backtest start:", as.character(min(prices_clean$date)), "\n")
## Effective backtest start: 2024-01-11
# NOTE: If IBIT causes your effective start to be Jan 2024 instead of 3 years ago,
# consider removing IBIT from tickers and replacing with another asset.

# --- SECTION 5: Calculate Daily Returns ---
# We use simple (arithmetic) returns for PortfolioAnalytics
# log returns are theoretically preferable but PortfolioAnalytics expects simple returns

returns_wide <- prices_clean %>%
  mutate(across(-date, ~ . / lag(.) - 1)) %>%
  slice(-1)  # remove first row (NA from lag)

# Separate portfolio returns from benchmark returns
portfolio_returns_df <- returns_wide %>%
  select(date, all_of(tickers))

benchmark_returns_df <- returns_wide %>%
  select(date, all_of(benchmark_ticker))

# Convert to xts objects — required format for PortfolioAnalytics
portfolio_xts  <- xts(portfolio_returns_df %>% select(-date),
                      order.by = portfolio_returns_df$date)

benchmark_xts  <- xts(benchmark_returns_df %>% select(-date),
                      order.by = benchmark_returns_df$date)

# --- SECTION 6: Portfolio Specification ---
# Define the portfolio object with constraints

port_spec <- portfolio.spec(assets = tickers)

# Constraint 1: Full investment (weights sum to 100%)
port_spec <- add.constraint(port_spec, type = "full_investment")

# Constraint 2: Long-only (no short selling)
port_spec <- add.constraint(port_spec, type = "long_only")

# Constraint 3: Maximum 20% per asset (per project requirement)
port_spec <- add.constraint(
  port_spec,
  type    = "box",
  min     = 0.02,   # minimum 2% per asset to avoid trivial allocations
  max     = 0.20    # maximum 20% cap per asset
)

# Objective: Maximize Sharpe Ratio
# PortfolioAnalytics maximizes by minimizing negative Sharpe
port_spec <- add.objective(port_spec, type = "return", name = "mean")
port_spec <- add.objective(port_spec, type = "risk",   name = "StdDev")

# --- SECTION 7: Portfolio Optimization ---
# Method: ROI (quadratic programming) — deterministic, faster than random portfolios
# risk_aversion parameter controls the return/risk tradeoff in Sharpe optimization

cat("\nRunning portfolio optimization...\n")
## 
## Running portfolio optimization...
opt_result <- optimize.portfolio(
  R              = portfolio_xts,
  portfolio      = port_spec,
  optimize_method = "ROI",
  maxSR          = TRUE,    # maximize Sharpe Ratio directly
  trace          = TRUE
)

# Extract optimized weights
opt_weights <- extractWeights(opt_result)
cat("\nOptimized Portfolio Weights:\n")
## 
## Optimized Portfolio Weights:
print(round(opt_weights, 4))
##   QUAL   IMTM   MTUM   MSCI   ASML    EWJ    VWO    IHI   IBIT    GLD 
## 0.1526 0.1474 0.2000 0.0200 0.0200 0.0200 0.2000 0.0200 0.0200 0.2000
cat("Sum of weights:", round(sum(opt_weights), 6), "\n")  # should be ~1.0
## Sum of weights: 1
# --- SECTION 8: Calculate Portfolio Returns Using Optimized Weights ---
# Apply the static optimized weights to daily returns
# NOTE: This is a static-weight backtest, not a rolling re-optimization.
# Static weights introduce a mild look-ahead bias because we used the full
# 3-year return series to determine weights, then apply them to that same period.
# Acknowledge this limitation in your report.

portfolio_ret <- Return.portfolio(
  R       = portfolio_xts,
  weights = opt_weights,
  rebalance_on = "months"  # monthly rebalancing assumption
)

colnames(portfolio_ret) <- "Portfolio"

# Combine portfolio and benchmark into one xts object for comparison
combined_ret <- merge(portfolio_ret, benchmark_xts)
colnames(combined_ret) <- c("Portfolio", "Benchmark_ACWI")

# --- SECTION 9: Calculate Required Performance Metrics ---
cat("\n========== PERFORMANCE METRICS ==========\n")
## 
## ========== PERFORMANCE METRICS ==========
# Risk-free rate assumption: 4.5% annualized (approximate US T-bill rate, mid-2025)
# This needs verification — check current 3-month T-bill rate
Rf_annual <- 0.045
Rf_daily  <- Rf_annual / 252

# --- Metric 1: Cumulative Return ---
cum_returns <- Return.cumulative(combined_ret)
cat("\n1. Cumulative Return:\n")
## 
## 1. Cumulative Return:
print(round(cum_returns, 4))
##                   Portfolio Benchmark_ACWI
## Cumulative Return    0.7638         0.5934
# --- Metric 2: Sharpe Ratio (annualized) ---
sharpe <- SharpeRatio.annualized(combined_ret, Rf = Rf_daily)
cat("\n2. Annualized Sharpe Ratio:\n")
## 
## 2. Annualized Sharpe Ratio:
print(round(sharpe, 4))
##                                           Portfolio Benchmark_ACWI
## Annualized Sharpe Ratio (Rf=4.5%, p=95%):    1.3929         1.0984
# --- Metric 3: Maximum Drawdown ---
max_dd <- maxDrawdown(combined_ret)
cat("\n3. Maximum Drawdown:\n")
## 
## 3. Maximum Drawdown:
print(round(max_dd, 4))
##                Portfolio Benchmark_ACWI
## Worst Drawdown    0.1267         0.1655
# --- Metric 4 & 5: Alpha and Beta vs Benchmark ---
# CAPM regression: Portfolio_excess = Alpha + Beta * Benchmark_excess + error
# We use CAPM() from PerformanceAnalytics

capm_table <- CAPM.alpha(combined_ret[, "Portfolio"],
                         combined_ret[, "Benchmark_ACWI"],
                         Rf = Rf_daily)
beta_val   <- CAPM.beta(combined_ret[, "Portfolio"],
                        combined_ret[, "Benchmark_ACWI"],
                        Rf = Rf_daily)

cat("\n4. CAPM Alpha (annualized):\n")
## 
## 4. CAPM Alpha (annualized):
print(round(capm_table, 6))
## [1] 0.000227
cat("\n5. Beta vs ACWI Benchmark:\n")
## 
## 5. Beta vs ACWI Benchmark:
print(round(beta_val, 4))
## [1] 0.9146
# Summary table
summary_metrics <- data.frame(
  Metric = c("Cumulative Return (Portfolio)", "Cumulative Return (Benchmark)",
             "Sharpe Ratio (Portfolio)", "Sharpe Ratio (Benchmark)",
             "Max Drawdown (Portfolio)", "Max Drawdown (Benchmark)",
             "Alpha (annualized)", "Beta vs ACWI"),
  Value = c(
    round(as.numeric(cum_returns["Portfolio"]), 4),
    round(as.numeric(cum_returns["Benchmark_ACWI"]), 4),
    round(as.numeric(sharpe[, "Portfolio"]), 4),
    round(as.numeric(sharpe[, "Benchmark_ACWI"]), 4),
    round(as.numeric(max_dd["Portfolio"]), 4),
    round(as.numeric(max_dd["Benchmark_ACWI"]), 4),
    round(as.numeric(capm_table), 6),
    round(as.numeric(beta_val), 4)
  )
)
cat("\n========== SUMMARY TABLE ==========\n")
## 
## ========== SUMMARY TABLE ==========
print(summary_metrics)
##                          Metric    Value
## 1 Cumulative Return (Portfolio)       NA
## 2 Cumulative Return (Benchmark)       NA
## 3      Sharpe Ratio (Portfolio) 1.392900
## 4      Sharpe Ratio (Benchmark) 1.098400
## 5      Max Drawdown (Portfolio)       NA
## 6      Max Drawdown (Benchmark)       NA
## 7            Alpha (annualized) 0.000227
## 8                  Beta vs ACWI 0.914600
# --- SECTION 10: Chart 1 — Cumulative Return Comparison ---
# Shows growth of $1 invested in portfolio vs benchmark

chart.CumReturns(
  combined_ret,
  main        = "Cumulative Returns: Quality-Momentum Portfolio vs ACWI Benchmark",
  legend.loc  = "topleft",
  colorset    = c("steelblue", "firebrick"),
  lwd         = 2,
  ylab        = "Growth of $1",
  xlab        = "Date"
)
grid()

# --- SECTION 11: Chart 2 — Drawdown Chart ---
# Shows underwater periods — critical for understanding downside risk
# Maximum Drawdown is the deepest trough from any peak

chart.Drawdown(
  combined_ret,
  main        = "Drawdown Comparison: Quality-Momentum Portfolio vs ACWI",
  legend.loc  = "bottomleft",
  colorset    = c("steelblue", "firebrick"),
  lwd         = 2,
  ylab        = "Drawdown",
  xlab        = "Date"
)
grid()

# --- SECTION 12: Bonus Chart — Optimized Portfolio Weights Bar Chart ---
# Useful for the presentation slide showing final allocation

weights_df <- data.frame(
  Ticker = names(opt_weights),
  Weight = as.numeric(opt_weights) * 100
) %>% arrange(desc(Weight))

ggplot(weights_df, aes(x = reorder(Ticker, Weight), y = Weight, fill = Weight)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  scale_fill_gradient(low = "lightblue", high = "steelblue") +
  labs(
    title    = "Optimized Portfolio Weights (Max Sharpe Ratio)",
    subtitle = "Quality-Momentum Global/Mixed Portfolio",
    x        = "Ticker",
    y        = "Weight (%)",
    fill     = "Weight (%)"
  ) +
  theme_minimal() +
  geom_text(aes(label = paste0(round(Weight, 1), "%")), hjust = -0.1) +
  ylim(0, 25)