# Quantitative strategies on High Frequency Data
# Assessment of the sections (labs)
# prof. Piotr Wójcik
# academic year 2024/2025
#
# Below is the final code implementing the best strategy for group 1.
# This R Markdown uses a single best approach for:
# - NQ => (fastEMA=20, slowEMA=60, vol_sd=60, m_=2)
# - SP => (fastEMA=30, slowEMA=60, vol_sd=60, m_=2)
# Then it COMBINES their PnLs before calculating performance metrics.
###############################################################################
# FINAL VOLATILITY BREAKOUT MOM STRATEGY
###############################################################################
#
# In this strategy:
# - NQ uses fastEMA=20, slowEMA=60, vol_sd=60, m_=2
# - SP uses fastEMA=30, slowEMA=60, vol_sd=60, m_=2
# The gross PnLs are combined BEFORE performance metrics are calculated.
# 1) Load libraries & external functions
rm(list = ls())
library(xts)
library(zoo)
library(TTR)
library(lubridate)
library(tseries)
library(caTools)
library(RColorBrewer)
library(ggplot2)
library(dplyr)
source("https://raw.githubusercontent.com/ptwojcik/HFD/master/function_mySR.R")
source("https://raw.githubusercontent.com/ptwojcik/HFD/master/function_positionVB_new.R")
source("https://raw.githubusercontent.com/ptwojcik/HFD/master/functions_plotHeatmap.R")
###############################################################################
# 2) Flattening function & custom Calmar
###############################################################################
build_pos_flat <- function(x) {
posf <- xts(rep(0, NROW(x)), order.by = index(x))
hhmm <- format(index(x), format = "%H:%M")
# Flatten after 15:40
posf[hhmm >= "15:40"] <- 1
# Flatten between 09:31 and 09:55
posf[hhmm >= "09:31" & hhmm < "09:55"] <- 1
return(posf)
}
myCalmarRatio <- function(x, scale = 252) {
avg_ <- mean(coredata(x), na.rm = TRUE)
dd_obj <- maxdrawdown(cumsum(x))
if (is.null(dd_obj$maxdrawdown) || dd_obj$maxdrawdown == 0) return(NA)
return(scale * avg_ / dd_obj$maxdrawdown)
}
###############################################################################
# 3) Hard‐coded best parameters for NQ & SP
###############################################################################
# NQ => (fastEMA=20, slowEMA=60, vol_sd=60, m_=2)
param_NQ <- list(fast_ema = 20, slow_ema = 60, vol_sd = 60, m_ = 2)
# SP => (fastEMA=30, slowEMA=60, vol_sd=60, m_=2)
param_SP <- list(fast_ema = 30, slow_ema = 60, vol_sd = 60, m_ = 2)
###############################################################################
# 4) Data file to process (example single quarter: 2022_Q1)
###############################################################################
selected_quarter <- "2022_Q1"
filename <- paste0("data1_", selected_quarter, ".RData")
if (!file.exists(filename)) {
stop("File not found: ", filename)
}
load(filename)
object_name <- paste0("data1_", selected_quarter)
if (!exists(object_name)) {
stop("Object not found after loading: ", object_name)
}
data_group1 <- get(object_name)
if (!all(c("NQ", "SP") %in% colnames(data_group1))) {
stop("Columns 'NQ' or 'SP' missing in data")
}
###############################################################################
# 5) Pre‐processing
###############################################################################
data_group1$NQ <- na.locf(data_group1$NQ, na.rm = FALSE)
data_group1$SP <- na.locf(data_group1$SP, na.rm = FALSE)
hhmm <- format(index(data_group1), format = "%H:%M")
data_group1[hhmm >= "09:31" & hhmm <= "09:40", ] <- NA
data_group1[hhmm >= "15:51" & hhmm <= "16:00", ] <- NA
###############################################################################
# 6) Build flatten indicators
###############################################################################
pos_flat_NQ <- build_pos_flat(data_group1)
pos_flat_SP <- build_pos_flat(data_group1)
###############################################################################
# 7) NQ position with best parameters
###############################################################################
NQ_ff <- na.locf(data_group1$NQ, na.rm = FALSE)
ema_fast_NQ <- EMA(NQ_ff, n = param_NQ$fast_ema)
ema_slow_NQ <- EMA(NQ_ff, n = param_NQ$slow_ema)
roll_std_NQ <- runsd(NQ_ff, param_NQ$vol_sd, endrule = "NA", align = "right")
posNQ <- positionVB_new(
signal = ema_fast_NQ,
lower = ema_slow_NQ - param_NQ$m_ * roll_std_NQ,
upper = ema_slow_NQ + param_NQ$m_ * roll_std_NQ,
pos_flat = coredata(pos_flat_NQ),
strategy = "mom" # momentum
)
posNQ <- xts(posNQ, order.by = index(data_group1))
posNQ <- na.locf(posNQ, na.rm = FALSE)
###############################################################################
# 8) SP position with best parameters
###############################################################################
SP_ff <- na.locf(data_group1$SP, na.rm = FALSE)
ema_fast_SP <- EMA(SP_ff, n = param_SP$fast_ema)
ema_slow_SP <- EMA(SP_ff, n = param_SP$slow_ema)
roll_std_SP <- runsd(SP_ff, param_SP$vol_sd, endrule = "NA", align = "right")
posSP <- positionVB_new(
signal = ema_fast_SP,
lower = ema_slow_SP - param_SP$m_ * roll_std_SP,
upper = ema_slow_SP + param_SP$m_ * roll_std_SP,
pos_flat = coredata(pos_flat_SP),
strategy = "mom"
)
posSP <- xts(posSP, order.by = index(data_group1))
posSP <- na.locf(posSP, na.rm = FALSE)
###############################################################################
# 9) Calculate Gross & Net PnL for each, then combine
###############################################################################
transaction_cost <- 12.0
contract_multiplier_NQ <- 20
contract_multiplier_SP <- 50
dNQ <- diff.xts(data_group1$NQ)
dSP <- diff.xts(data_group1$SP)
dNQ[is.na(dNQ)] <- 0
dSP[is.na(dSP)] <- 0
# NQ
pnl_grossNQ <- posNQ * dNQ * contract_multiplier_NQ
ntransNQ <- abs(diff.xts(posNQ))
ntransNQ[is.na(ntransNQ)] <- 0
pnl_netNQ <- pnl_grossNQ - (ntransNQ * transaction_cost)
# SP
pnl_grossSP <- posSP * dSP * contract_multiplier_SP
ntransSP <- abs(diff.xts(posSP))
ntransSP[is.na(ntransSP)] <- 0
pnl_netSP <- pnl_grossSP - (ntransSP * transaction_cost)
# combine gross before metrics
pnl_gross_combined <- pnl_grossNQ + pnl_grossSP
pnl_net_combined <- pnl_netNQ + pnl_netSP
###############################################################################
# 10) Aggregate to daily
###############################################################################
scale_annual <- 252
day_ends <- endpoints(data_group1, on = "days")
daily_gross <- period.apply(pnl_gross_combined, INDEX = day_ends, FUN = sum, na.rm = TRUE)
daily_net <- period.apply(pnl_net_combined, INDEX = day_ends, FUN = sum, na.rm = TRUE)
daily_ntrans <- period.apply(ntransNQ + ntransSP, INDEX = day_ends, FUN = sum, na.rm = TRUE)
###############################################################################
# 11) Performance metrics on combined approach
###############################################################################
grossSR <- mySR(daily_gross, scale = scale_annual)
netSR <- mySR(daily_net, scale = scale_annual)
grossCR <- myCalmarRatio(daily_gross, scale = scale_annual)
netCR <- myCalmarRatio(daily_net, scale = scale_annual)
avg_ntrades <- mean(daily_ntrans, na.rm = TRUE)
grossPnL <- sum(daily_gross, na.rm = TRUE)
netPnL <- sum(daily_net, na.rm = TRUE)
# final summary statistic:
stat_ <- if (abs(netPnL) < 1e-10) {
0
} else {
netCR * max(0, log(abs(netPnL / 1000)))
}
cat("\n=== Combined NQ+SP (with best param sets) Results for", selected_quarter, "===\n")
=== Combined NQ+SP (with best param sets) Results for 2022_Q1 ===
cat("Gross SR:", grossSR, "\n")
Gross SR: 3.220168
cat("Net SR:", netSR, "\n")
Net SR: 3.165656
cat("Gross CR:", grossCR, "\n")
Gross CR: NA
cat("Net CR:", netCR, "\n")
Net CR: 602.86
cat("Average daily trades:", avg_ntrades, "\n")
Average daily trades: 0.28125
cat("Gross PnL:", grossPnL, "\n")
Gross PnL: 7565.15
cat("Net PnL:", netPnL, "\n")
Net PnL: 7349.15
cat("Final stat:", stat_, "\n")
Final stat: 1202.455