# Quantitative strategies on High Frequency Data
# Assessment of the sections (labs)
# prof. Piotr Wójcik
# academic year 2024/2025
# 

# Below is the final code implementing the best strategy for group 1.
# This R Markdown uses a single best approach for:
#   - NQ => (fastEMA=20, slowEMA=60, vol_sd=60, m_=2)
#   - SP => (fastEMA=30, slowEMA=60, vol_sd=60, m_=2)
# Then it COMBINES their PnLs before calculating performance metrics.
###############################################################################
#                       FINAL VOLATILITY BREAKOUT MOM STRATEGY
###############################################################################
# 
# In this strategy:
# - NQ uses fastEMA=20, slowEMA=60, vol_sd=60, m_=2
# - SP uses fastEMA=30, slowEMA=60, vol_sd=60, m_=2
# The gross PnLs are combined BEFORE performance metrics are calculated.

# 1) Load libraries & external functions
rm(list = ls())

library(xts)
library(zoo)
library(TTR)
library(lubridate)
library(tseries)
library(caTools)
library(RColorBrewer)
library(ggplot2)
library(dplyr)

source("https://raw.githubusercontent.com/ptwojcik/HFD/master/function_mySR.R")
source("https://raw.githubusercontent.com/ptwojcik/HFD/master/function_positionVB_new.R")
source("https://raw.githubusercontent.com/ptwojcik/HFD/master/functions_plotHeatmap.R")


###############################################################################
# 2) Flattening function & custom Calmar
###############################################################################
build_pos_flat <- function(x) {
  posf <- xts(rep(0, NROW(x)), order.by = index(x))
  hhmm <- format(index(x), format = "%H:%M")
  # Flatten after 15:40
  posf[hhmm >= "15:40"] <- 1
  # Flatten between 09:31 and 09:55
  posf[hhmm >= "09:31" & hhmm < "09:55"] <- 1
  return(posf)
}

myCalmarRatio <- function(x, scale = 252) {
  avg_ <- mean(coredata(x), na.rm = TRUE)
  dd_obj <- maxdrawdown(cumsum(x))
  if (is.null(dd_obj$maxdrawdown) || dd_obj$maxdrawdown == 0) return(NA)
  return(scale * avg_ / dd_obj$maxdrawdown)
}


###############################################################################
# 3) Hard‐coded best parameters for NQ & SP
###############################################################################
# NQ => (fastEMA=20, slowEMA=60, vol_sd=60, m_=2)
param_NQ <- list(fast_ema = 20, slow_ema = 60, vol_sd = 60, m_ = 2)

# SP => (fastEMA=30, slowEMA=60, vol_sd=60, m_=2)
param_SP <- list(fast_ema = 30, slow_ema = 60, vol_sd = 60, m_ = 2)


###############################################################################
# 4) Data file to process (example single quarter: 2022_Q1)
###############################################################################
selected_quarter <- "2022_Q1"
filename <- paste0("data1_", selected_quarter, ".RData")
if (!file.exists(filename)) {
  stop("File not found: ", filename)
}
load(filename)
object_name <- paste0("data1_", selected_quarter)
if (!exists(object_name)) {
  stop("Object not found after loading: ", object_name)
}
data_group1 <- get(object_name)

if (!all(c("NQ", "SP") %in% colnames(data_group1))) {
  stop("Columns 'NQ' or 'SP' missing in data")
}


###############################################################################
# 5) Pre‐processing
###############################################################################
data_group1$NQ <- na.locf(data_group1$NQ, na.rm = FALSE)
data_group1$SP <- na.locf(data_group1$SP, na.rm = FALSE)

hhmm <- format(index(data_group1), format = "%H:%M")
data_group1[hhmm >= "09:31" & hhmm <= "09:40", ] <- NA
data_group1[hhmm >= "15:51" & hhmm <= "16:00", ] <- NA


###############################################################################
# 6) Build flatten indicators
###############################################################################
pos_flat_NQ <- build_pos_flat(data_group1)
pos_flat_SP <- build_pos_flat(data_group1)


###############################################################################
# 7) NQ position with best parameters
###############################################################################
NQ_ff <- na.locf(data_group1$NQ, na.rm = FALSE)

ema_fast_NQ <- EMA(NQ_ff, n = param_NQ$fast_ema)
ema_slow_NQ <- EMA(NQ_ff, n = param_NQ$slow_ema)
roll_std_NQ <- runsd(NQ_ff, param_NQ$vol_sd, endrule = "NA", align = "right")

posNQ <- positionVB_new(
  signal   = ema_fast_NQ,
  lower    = ema_slow_NQ - param_NQ$m_ * roll_std_NQ,
  upper    = ema_slow_NQ + param_NQ$m_ * roll_std_NQ,
  pos_flat = coredata(pos_flat_NQ),
  strategy = "mom"  # momentum
)
posNQ <- xts(posNQ, order.by = index(data_group1))
posNQ <- na.locf(posNQ, na.rm = FALSE)


###############################################################################
# 8) SP position with best parameters
###############################################################################
SP_ff <- na.locf(data_group1$SP, na.rm = FALSE)

ema_fast_SP <- EMA(SP_ff, n = param_SP$fast_ema)
ema_slow_SP <- EMA(SP_ff, n = param_SP$slow_ema)
roll_std_SP <- runsd(SP_ff, param_SP$vol_sd, endrule = "NA", align = "right")

posSP <- positionVB_new(
  signal   = ema_fast_SP,
  lower    = ema_slow_SP - param_SP$m_ * roll_std_SP,
  upper    = ema_slow_SP + param_SP$m_ * roll_std_SP,
  pos_flat = coredata(pos_flat_SP),
  strategy = "mom"
)
posSP <- xts(posSP, order.by = index(data_group1))
posSP <- na.locf(posSP, na.rm = FALSE)


###############################################################################
# 9) Calculate Gross & Net PnL for each, then combine
###############################################################################
transaction_cost       <- 12.0
contract_multiplier_NQ <- 20
contract_multiplier_SP <- 50

dNQ <- diff.xts(data_group1$NQ)
dSP <- diff.xts(data_group1$SP)
dNQ[is.na(dNQ)] <- 0
dSP[is.na(dSP)] <- 0

# NQ
pnl_grossNQ <- posNQ * dNQ * contract_multiplier_NQ
ntransNQ    <- abs(diff.xts(posNQ))
ntransNQ[is.na(ntransNQ)] <- 0
pnl_netNQ   <- pnl_grossNQ - (ntransNQ * transaction_cost)

# SP
pnl_grossSP <- posSP * dSP * contract_multiplier_SP
ntransSP    <- abs(diff.xts(posSP))
ntransSP[is.na(ntransSP)] <- 0
pnl_netSP   <- pnl_grossSP - (ntransSP * transaction_cost)

# combine gross before metrics
pnl_gross_combined <- pnl_grossNQ + pnl_grossSP
pnl_net_combined   <- pnl_netNQ   + pnl_netSP


###############################################################################
# 10) Aggregate to daily
###############################################################################
scale_annual <- 252
day_ends <- endpoints(data_group1, on = "days")

daily_gross <- period.apply(pnl_gross_combined, INDEX = day_ends, FUN = sum, na.rm = TRUE)
daily_net   <- period.apply(pnl_net_combined,   INDEX = day_ends, FUN = sum, na.rm = TRUE)

daily_ntrans <- period.apply(ntransNQ + ntransSP, INDEX = day_ends, FUN = sum, na.rm = TRUE)


###############################################################################
# 11) Performance metrics on combined approach
###############################################################################
grossSR <- mySR(daily_gross, scale = scale_annual)
netSR   <- mySR(daily_net,   scale = scale_annual)

grossCR <- myCalmarRatio(daily_gross, scale = scale_annual)
netCR   <- myCalmarRatio(daily_net,   scale = scale_annual)

avg_ntrades <- mean(daily_ntrans, na.rm = TRUE)
grossPnL    <- sum(daily_gross, na.rm = TRUE)
netPnL      <- sum(daily_net,   na.rm = TRUE)

# final summary statistic:
stat_ <- if (abs(netPnL) < 1e-10) {
  0
} else {
  netCR * max(0, log(abs(netPnL / 1000)))
}

cat("\n=== Combined NQ+SP (with best param sets) Results for", selected_quarter, "===\n")

=== Combined NQ+SP (with best param sets) Results for 2022_Q1 ===
cat("Gross SR:", grossSR, "\n")
Gross SR: 3.220168 
cat("Net   SR:", netSR,   "\n")
Net   SR: 3.165656 
cat("Gross CR:", grossCR, "\n")
Gross CR: NA 
cat("Net   CR:", netCR,   "\n")
Net   CR: 602.86 
cat("Average daily trades:", avg_ntrades, "\n")
Average daily trades: 0.28125 
cat("Gross PnL:", grossPnL, "\n")
Gross PnL: 7565.15 
cat("Net   PnL:", netPnL,   "\n")
Net   PnL: 7349.15 
cat("Final stat:", stat_, "\n")
Final stat: 1202.455