Cleaned_model

library(readr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(openxlsx)
library(tibble)

# Fix random seed so simulation results are reproducible
set.seed(123)

directory <- path.expand("~/Downloads")


files <- list(
  CSCO = file.path(directory, "csco_us_d.csv"),
  AMZN = file.path(directory, "amzn_us_d.csv"),
  SPY  = file.path(directory, "spy_us_d.csv")
)

# Date range used for the forecasting and trading backtest
start_date <- as.Date("2026-02-03")
end_date <- as.Date("2026-03-05")

# Starting portfolio cash and max budget allowed per stock
start_cash <- 100000.0
budget_all <- c(CSCO = 25000.0, AMZN = 25000.0, SPY = 50000.0)

# Forecast simulation settings
num_sims <- 2000     # number of bootstrap simulations
roll_window <- 20    # window used for rolling RMSE/MAPE
slope_h <- 5          # forecast horizon used for trend slope

# Fraction of remaining budget used when buy levels trigger
buy_fracs  <- c(buy1 = 0.05, buy2 = 0.07, buy3 = 0.10)

# Fraction of current holdings sold when sell levels trigger
sell_fracs <- c(sell1 = 0.10, sell2 = 0.20)

# ATR-style volatility settings used to adjust buy/sell thresholds
av_range_n <- 14        # lookback window for volatility
av_range_frac <- 0.20   # near-miss threshold relative to ATR

# Load stock CSV, clean types, sort by date, and keep valid Open,High,Low,Close rows
read_stock_csv <- function(path, ticker) {
  df <- read_csv(path, show_col_types = FALSE)
  names(df) <- trimws(names(df))

  keep <- c("Symbol", "Date", "Open", "High", "Low", "Close")
  df <- df[, keep]

  df$Date <- (as.Date(df$Date, format = "%m/%d/%y"))
  df <- df[!is.na(df$Date), ]

  for (col in c("Open", "High", "Low", "Close")) {
    df[[col]] <- (as.numeric(df[[col]]))
  }

  df <- df[complete.cases(df[, c("Open", "High", "Low", "Close")]), ]
  df <- df %>%
    arrange(Date) %>%
    distinct(Date, .keep_all = TRUE)

  df$Symbol <- ticker
  df
}

# Generate simulated future prices using drift + bootstrapped residuals
bootstrap_drift_forecast <- function(y_hist, h = 1, num_sims = 2000) {
  y_hist <- as.numeric(y_hist)
  y_hist <- y_hist[!is.na(y_hist)]

  if (length(y_hist) < 5) {
    stop("Not enough history for drift bootstrap forecast (need >= 5).")
  }

  window <- 60
  if (length(y_hist) > (window + 1)) {
    y_use <- tail(y_hist, window + 1)
  } else {
    y_use <- y_hist
  }

  diffs <- diff(y_use)
  diffs <- diffs[!is.na(diffs)]

  if (length(diffs) < 2) {
    diffs <- c(0, 0)
  }

  mu <- mean(diffs)
  resid <- diffs - mu

  if (all(abs(resid) < 1e-12)) {
    resid <- 0
  }

  last_val <- tail(y_hist, 1)

  draws <- matrix(
    sample(resid, size = num_sims * h, replace = TRUE),
    nrow = num_sims,
    ncol = h
  )

  sims <- t(apply(draws, 1, function(x) cumsum(mu + x)))

  if (h == 1) {
    sims <- matrix(last_val + as.numeric(sims), ncol = 1)
  } else {
    sims <- last_val + sims
  }

  sims
}

# Compute forecast interval bounds from simulation results
interval_from_sims <- function(sims_1d, levels = c(0.80, 0.95)) {
  out <- list()
  for (lv in levels) {
    lo <- as.numeric(quantile(sims_1d, probs = (1 - lv) / 2, na.rm = TRUE, type = 7))
    hi <- as.numeric(quantile(sims_1d, probs = 1 - (1 - lv) / 2, na.rm = TRUE, type = 7))
    out[[paste0("lo", as.integer(lv * 100))]] <- lo
    out[[paste0("hi", as.integer(lv * 100))]] <- hi
  }
  out
}

# Convert slope into Bullish / Bearish / Neutral label
trend_label <- function(slope, eps = 1e-9) {
  if (slope > eps) return("Bullish")
  if (slope < -eps) return("Bearish")
  "Neutral"
}

# Momentum adjustment based on price vs 5-day lag
mom_mult <- function(close_today, close_5lag) {
  m <- (close_today - close_5lag) / max(close_5lag, 1e-9)
  if (m < -0.01) return(1.25)
  if (m > 0.01) return(0.85)
  1.0
}

# Adjust buy aggressiveness based on forecast trend strength
buy_multiplier_from_trend <- function(slope, close) {
  s <- as.numeric(slope) / max(as.numeric(close), 1e-9)
  if (s > 0.002) return(min(2.0, 1.0 + s / 0.005))
  if (s < -0.002) return(0.60)
  1.0
}

# Adjust sell aggressiveness based on forecast trend strength
sell_multiplier_from_trend <- function(slope, close) {
  s <- as.numeric(slope) / max(as.numeric(close), 1e-9)
  if (s < -0.002) return(1.50)
  if (s > 0.002) return(0.85)
  1.0
}

# Simple volatility estimate = average High-Low over last n days
atr_proxy <- function(df_hist, n = 14) {
  h <- tail(df_hist, n)
  if (nrow(h) == 0) return(0.0)

  tr <- as.numeric(h$High) - as.numeric(h$Low)
  tr <- tr[is.finite(tr)]

  if (length(tr) == 0) return(0.0)
  mean(tr)
}

# Retrieve 20-day momentum value for the current row
get_mom20 <- function(df, idx) {
  if ("Mom 20d" %in% names(df) && !is.na(df[idx, "Mom 20d"])) {
    return(as.numeric(df[idx, "Mom 20d"]))
  }
  0.0
}

# Load each stock dataset and store them in a named list
stocks <- lapply(names(files), function(tkr) read_stock_csv(files[[tkr]], tkr))

## New names:
## • `` -> `...7`

names(stocks) <- names(files)

# Build a shared trading calendar so all stocks use the same dates
cal <- stocks$SPY %>%
  select(Date) %>%
  filter(Date >= start_date, Date <= end_date)

for (tkr in names(stocks)) {
  cal <- inner_join(cal, stocks[[tkr]] %>% select(Date), by = "Date")
}

# Sort dates and remove any duplicates
cal <- cal %>%
  arrange(Date) %>%
  distinct(Date)

# Set up objects to store prior forecasts, forecast errors, and output rows
prev_fc_close <- setNames(as.list(rep(NA_real_, length(stocks))), names(stocks))
close_errors <- setNames(vector("list", length(stocks)), names(stocks))
rows <- list()
row_counter <- 1

for (i in seq_len(nrow(cal))) {
  date_t <- cal$Date[i]

  # Score yesterday's close forecast against today's actual close
  for (tkr in names(stocks)) {
    df <- stocks[[tkr]]
    close_today <- df %>% filter(Date == date_t) %>% pull(Close) %>% as.numeric()

    if (!is.na(prev_fc_close[[tkr]])) {
      close_errors[[tkr]] <- c(close_errors[[tkr]], close_today - prev_fc_close[[tkr]])
    }
  }

  # Build next-day forecasts and 5-day trend forecasts for each stock
  fc_pack <- list()

  for (tkr in names(stocks)) {
    df <- stocks[[tkr]]
    hist <- df %>% filter(Date <= date_t)

    if (nrow(hist) < 50) {
      stop(paste0(tkr, ": need more history before ", as.character(date_t), " (have ", nrow(hist), ")"))
    }

    today_row <- df %>% filter(Date == date_t)
    close_today <- as.numeric(today_row$Close[1])

    fc_vals <- list()

    for (col in c("Open", "High", "Low", "Close")) {
      sims_1 <- bootstrap_drift_forecast(hist[[col]], h = 1, num_sims = num_sims)
      sims_1d <- as.numeric(sims_1[, 1])
      fc_vals[[paste("FC", col)]] <- mean(sims_1d)

      if (col == "Close") {
        ints <- interval_from_sims(sims_1d, levels = c(0.80, 0.95))
        fc_vals <- c(fc_vals, ints)
      }
    }

    # Classify today's close relative to the forecast band
    fc_low <- fc_vals[["FC Low"]]
    fc_high <- fc_vals[["FC High"]]

    zone_band <- if (close_today < fc_low) {
      "Below FC Low (cheap)"
    } else if (close_today > fc_high) {
      "Above FC High (expensive)"
    } else {
      "Inside FC Band (normal)"
    }

    fc_vals[["Zone"]] <- zone_band

    sims_5 <- bootstrap_drift_forecast(hist$Close, h = slope_h, num_sims = num_sims)
    slope_5d <- mean(sims_5[, ncol(sims_5)]) - mean(sims_5[, 1])

    fc_vals[["FC 5 day trend"]] <- slope_5d
    fc_vals[["Trend"]] <- trend_label(slope_5d)

    fc_pack[[tkr]] <- fc_vals
  }

  # Calculate rolling forecast accuracy using recent close forecast errors
  rmse_map <- list()
  mape_map <- list()

  for (tkr in names(stocks)) {
    df <- stocks[[tkr]]
    errs <- tail(close_errors[[tkr]], roll_window)
    errs <- as.numeric(errs)

    if (length(errs) == 0) {
      rmse_map[[tkr]] <- NA_real_
      mape_map[[tkr]] <- NA_real_
    } else {
      rmse_map[[tkr]] <- sqrt(mean(errs^2))

      actuals <- df %>%
        filter(Date <= date_t) %>%
        pull(Close) %>%
        tail(length(errs)) %>%
        as.numeric()

      mape_map[[tkr]] <- mean(abs(errs) / pmax(abs(actuals), 1e-9)) * 100
    }
  }

  # Save today's forecasted close so it can be scored tomorrow
  for (tkr in names(stocks)) {
    prev_fc_close[[tkr]] <- fc_pack[[tkr]][["FC Close"]]
  }

  # Add one output row per stock with actual values, forecasts, and metrics
  for (tkr in names(stocks)) {
    df <- stocks[[tkr]]
    today_row <- df %>% filter(Date == date_t)
    hist <- df %>% filter(Date <= date_t)

    open_t  <- as.numeric(today_row$Open[1])
    high_t  <- as.numeric(today_row$High[1])
    low_t   <- as.numeric(today_row$Low[1])
    close_t <- as.numeric(today_row$Close[1])

    fc_5day_lag <- if (nrow(hist) >= 5) as.numeric(hist$Close[nrow(hist) - 4]) else NA_real_
    close_20lag <- if (nrow(hist) >= 20) as.numeric(hist$Close[nrow(hist) - 19]) else NA_real_

    mom_20d <- if (!is.na(close_20lag) && close_20lag != 0) {
      (close_t - close_20lag) / close_20lag
    } else {
      NA_real_
    }

    fc_low  <- fc_pack[[tkr]][["FC Low"]]
    fc_high <- fc_pack[[tkr]][["FC High"]]
    band_w <- max(fc_high - fc_low, 1e-9)
    sig <- (fc_pack[[tkr]][["FC Close"]] - close_t) / band_w

    rows[[row_counter]] <- tibble(
      Symbol = tkr,
      Date = date_t,
      Open = open_t,
      High = high_t,
      Low = low_t,
      Close = close_t,
      `FC Open` = fc_pack[[tkr]][["FC Open"]],
      `FC High` = fc_pack[[tkr]][["FC High"]],
      `FC Low` = fc_pack[[tkr]][["FC Low"]],
      `FC Close` = fc_pack[[tkr]][["FC Close"]],
      `FC 5 day trend` = fc_pack[[tkr]][["FC 5 day trend"]],
      Zone = fc_pack[[tkr]][["Zone"]],
      Signal = sig,
      `FC 5 day lag` = fc_5day_lag,
      `Close 20d lag` = close_20lag,
      `Mom 20d` = mom_20d,
      RSME = rmse_map[[tkr]],
      MAPE = mape_map[[tkr]],
      `Stocks Bought` = 0L,
      `Stocks Sold` = 0L,
      `Purchased Pr` = NA_real_,
      `Sold Price` = NA_real_,
      `Current Hold` = 0L,
      `Profit Loss` = 0.0,
      Cash = NA_real_
    )
    row_counter <- row_counter + 1
  }
}

# Combine all rows into one output table and sort by date and stock
out <- bind_rows(rows)

out$Symbol <- factor(out$Symbol, levels = c("CSCO", "AMZN", "SPY"), ordered = TRUE)
out <- out %>% arrange(Date, Symbol)

# Trading Block

cash <- start_cash
holdings <- setNames(as.list(rep(0L, length(stocks))), names(stocks))
avg_cost <- setNames(as.list(rep(0.0, length(stocks))), names(stocks))
realized_pnl <- 0.0

# Extract trading dates from forecast table
dates <- unique(out$Date)

# Sanity check: ensure each day has exactly 3 rows (CSCO, AMZN, SPY)
if (nrow(out) != 3 * length(dates)) {
  stop("Expected exactly 3 rows per date (CSCO/AMZN/SPY). Check sorting or missing dates.")
}

# Each trading day occupies 3 rows in the dataset
step <- 3


# Trading loop

# For each trading day, execute trades using yesterday's forecasts
for (day_idx in 2:length(dates)) {

  # Locate the rows corresponding to today and yesterday
  today_start <- (day_idx - 1) * step + 1
  yest_start  <- (day_idx - 2) * step + 1

  # Loop through each stock (CSCO, AMZN, SPY)
  for (j in 1:3) {

    tkr <- c("CSCO", "AMZN", "SPY")[j]

    r_today <- today_start + (j - 1)
    r_yest  <- yest_start + (j - 1)

    # Get today's data

    open_t  <- as.numeric(out$Open[r_today])
    high_t  <- as.numeric(out$High[r_today])
    low_t   <- as.numeric(out$Low[r_today])
    close_t <- as.numeric(out$Close[r_today])

    # Get yesterdays forecasts
  
    fc_low_y   <- as.numeric(out[r_yest, "FC Low"])
    fc_high_y  <- as.numeric(out[r_yest, "FC High"])
    fc_close_y <- as.numeric(out[r_yest, "FC Close"])
    slope_y    <- as.numeric(out[r_yest, "FC 5 day trend"])

    # Estimate volatility using average range

    # Compute recent volatility from historical price range
    date_today <- as.Date(out$Date[r_today])
    hist_today <- stocks[[tkr]] %>% filter(Date <= date_today)

    atr <- max(atr_proxy(hist_today, n = av_range_n), 1e-9)

    # Allow small tolerance for "near misses" of price thresholds
    near <- av_range_frac * atr

    # Define buying and sell levels

    # Buy thresholds are placed below forecast low
    buy1 <- fc_low_y
    buy2 <- fc_low_y - 0.75 * atr
    buy3 <- fc_low_y - 1.50 * atr

    # Sell thresholds are placed above forecast high
    sell1 <- fc_high_y
    sell2 <- fc_high_y + 0.75 * atr

    # Budgeting

    invested <- holdings[[tkr]] * close_t

    # Remaining capital allowed for this stock
    remaining_budget <- max(as.numeric(budget_all[tkr]) - invested, 0.0)

    # Trend and momentum adjustments
    bmult <- buy_multiplier_from_trend(slope_y, close_t)
    smult <- sell_multiplier_from_trend(slope_y, close_t)

    close_5lag <- if (!is.na(out[r_today, "FC 5 day lag"]))
      as.numeric(out[r_today, "FC 5 day lag"])
    else close_t

    mmult <- mom_mult(close_t, close_5lag)

    # 20-day momentum filter
    mom20 <- get_mom20(out, r_today)
    allow_buy <- (mom20 > 0) || (mom20 > -0.005 && slope_y > 0)

    # Stop significant loss

    # If price falls 5% below average cost, sell half the position
    if (holdings[[tkr]] > 0 && avg_cost[[tkr]] > 0 && close_t < 0.95 * avg_cost[[tkr]]) {

      cut_qty <- max(1L, as.integer(0.50 * holdings[[tkr]]))
      cut_px <- close_t

      cash <- cash + cut_qty * cut_px
      holdings[[tkr]] <- holdings[[tkr]] - cut_qty

      realized_pnl <- realized_pnl + cut_qty * (cut_px - avg_cost[[tkr]])

      out[r_today, "Stocks Sold"] <- as.integer(out[r_today, "Stocks Sold"]) + cut_qty
      out[r_today, "Sold Price"] <- cut_px
    }

    # Buy strategy 
    buy_qty <- 0L
    buy_px <- NA_real_
    dollars <- 0.0

    # Allow slightly weaker momentum for AMZN/CSCO
    allow_buy_this <- allow_buy
    if (tkr %in% c("AMZN", "CSCO") && mom20 > -0.05) {
      allow_buy_this <- TRUE
    }

    # Check whether any buy levels were touched during the day
    if (allow_buy_this && remaining_budget > 0 && cash > 0) {

      hit_buy3 <- (low_t <= buy3) || (close_t <= buy3 + near)
      hit_buy2 <- (low_t <= buy2) || (close_t <= buy2 + near)
      hit_buy1 <- (low_t <= buy1) || (close_t <= buy1 + near)

      # Allocate larger capital when price drops deeper
      if (hit_buy3) {
        dollars <- remaining_budget * buy_fracs["buy3"] * bmult * mmult
        buy_px <- min(close_t, buy3 + near)

      } else if (hit_buy2) {
        dollars <- remaining_budget * buy_fracs["buy2"] * bmult * mmult
        buy_px <- min(close_t, buy2 + near)

      } else if (hit_buy1) {
        dollars <- remaining_budget * buy_fracs["buy1"] * bmult * mmult
        buy_px <- min(close_t, buy1 + near)
      }

      # Convert dollar allocation into number of shares
      dollars <- min(dollars, cash)

      if (dollars > 0 && !is.na(buy_px) && buy_px > 0) {
        buy_qty <- floor(dollars / buy_px)
      }
    }

    # Execute buy trade
    if (buy_qty > 0 && !is.na(buy_px) && buy_px > 0) {

      cost <- buy_qty * buy_px

      if (!is.na(cost) && cost <= cash) {

        old_qty <- holdings[[tkr]]
        new_qty <- old_qty + buy_qty

        avg_cost[[tkr]] <- ((old_qty * avg_cost[[tkr]]) + cost) / new_qty
        holdings[[tkr]] <- new_qty

        cash <- cash - cost

        out[r_today, "Stocks Bought"] <- as.integer(out[r_today, "Stocks Bought"]) + buy_qty
        out[r_today, "Purchased Pr"] <- buy_px
      }
    }

    # Sell strategy
    sell_qty <- 0L
    sell_px <- NA_real_

    # Sell if price exceeds forecast high thresholds
    if (holdings[[tkr]] > 0) {

      if (high_t >= sell2) {
        sell_qty <- as.integer(holdings[[tkr]] * sell_fracs["sell2"] * smult)
        sell_px <- sell2

      } else if (high_t >= sell1) {
        sell_qty <- as.integer(holdings[[tkr]] * sell_fracs["sell1"] * smult)
        sell_px <- sell1
      }
    }

    sell_qty <- min(sell_qty, holdings[[tkr]])

    if (sell_qty > 0) {

      proceeds <- sell_qty * sell_px

      cash <- cash + proceeds
      holdings[[tkr]] <- holdings[[tkr]] - sell_qty

      realized_pnl <- realized_pnl + sell_qty * (sell_px - avg_cost[[tkr]])

      out[r_today, "Stocks Sold"] <- as.integer(out[r_today, "Stocks Sold"]) + sell_qty
      out[r_today, "Sold Price"] <- sell_px
    }

    # Portfolio tracking
    out[r_today, "Current Hold"] <- holdings[[tkr]]

    unreal <- holdings[[tkr]] * (close_t - avg_cost[[tkr]])

    out[r_today, "Profit Loss"] <- realized_pnl + unreal
    out[r_today, "Cash"] <- cash
  }
}

# Final column order and export
col_order <- c(
  "Symbol", "Date", "Open", "High", "Low", "Close",
  "FC Open", "FC High", "FC Low", "FC Close",
  "FC 5 day trend", "Zone", "Signal", "FC 5 day lag",
  "RSME", "MAPE",
  "Stocks Bought", "Stocks Sold", "Purchased Pr", "Sold Price",
  "Current Hold", "Profit Loss", "Cash"
)

out <- out[, col_order]

out_file <- file.path(path.expand("~/Downloads"), "forecast_trading_log.xlsx")
write.xlsx(out, out_file, rowNames = FALSE, sheetName = "Log")

cat("Saved:", out_file, "| rows =", nrow(out), "\n")

## Saved: /Users/johnnaschroeder/Downloads/forecast_trading_log.xlsx | rows = 66

Cleaned_model

2026-03-03