Submission checklist
1) Knit this.Rmdto HTML or PDF.
2) Export plots (saved automatically intoplots/) and paste into the blue boxes of the Project 3 Worksheet (PDF).
3) Upload this.Rmd, the knitted output, the filled worksheet PDF, and the three CSV data files to Canvas.
# install.packages(c("tidyverse","lubridate","zoo"))
library(tidyverse)
library(lubridate)
library(zoo)
# === FILES ===
files <- c(AMD = "C:\\Users\\bella\\Downloads\\AAPL.csv",
AAPL = "C:\\Users\\bella\\Downloads\\AMD.csv",
NVDA = "C:\\Users\\bella\\Downloads\\NVDA.csv")
# === TRADING RULE PARAMETERS ===
win <- 20 # 20-day lookback
buy_thr <- 0.001 # buy when z <= 0.001
sell_thr <- 2.5 # sell when z >= 2.5
# === OUTPUT PLOT DIR ===
plot_dir <- "plots"
if(!dir.exists(plot_dir)) dir.create(plot_dir, recursive = TRUE)
read_prices <- function(path){
stopifnot(file.exists(path))
D <- read.csv(path, stringsAsFactors = FALSE)
# normalize names
names(D) <- gsub("\\\\.", "", names(D))
names(D) <- tolower(names(D))
# prefer adjusted close
price_col <- dplyr::case_when(
"adjclose" %in% names(D) ~ "adjclose",
"adj_close" %in% names(D) ~ "adj_close",
"close" %in% names(D) ~ "close",
TRUE ~ NA_character_
)
if(is.na(price_col)) stop("No 'Close' or 'Adj Close' column found.")
if(!"date" %in% names(D)) stop("No 'Date' column found.")
D$date <- as.Date(D$date)
D <- D %>% arrange(date) %>%
transmute(Date = date, Price = as.numeric(.data[[price_col]])) %>%
distinct(Date, .keep_all = TRUE)
return(D)
}
compute_signals <- function(D, win = 20, buy_thr = 0.001, sell_thr = 2.5){
D <- D %>% arrange(Date) %>%
mutate(
MA20 = rollapply(Price, win, mean, align = "right", fill = NA),
SD20 = rollapply(Price, win, sd, align = "right", fill = NA),
z = (Price - MA20) / SD20,
ret_d = Price/lag(Price) - 1
)
# one-long-position rule with next-day execution
n <- nrow(D)
sig <- integer(n)
pos <- integer(n)
in_pos <- FALSE
for(i in seq_len(n)){
zt <- D$z[i]
if(!is.na(zt)){
if(!in_pos && zt <= buy_thr){ sig[i] <- 1L; in_pos <- TRUE }
else if(in_pos && zt >= sell_thr){ sig[i] <- -1L; in_pos <- FALSE }
else sig[i] <- 0L
} else sig[i] <- 0L
# position takes effect next day
if(i == 1) pos[i] <- 0L
else {
if(sig[i-1] == 1L) pos[i] <- 1L
else if(sig[i-1] == -1L) pos[i] <- 0L
else pos[i] <- pos[i-1]
}
}
D$signal <- sig
D$position <- pos
D$strat_ret_d <- D$position * D$ret_d
return(D)
}
agg_returns <- function(D){
D2 <- D %>%
mutate(
cum_strat = cumprod(1 + coalesce(strat_ret_d, 0)),
bh_ret_d = ret_d,
cum_bh = cumprod(1 + coalesce(bh_ret_d, 0))
)
# weekly/monthly compounded simple returns
weekly <- D2 %>%
filter(!is.na(ret_d)) %>%
mutate(week = isoweek(Date), year = isoyear(Date)) %>%
group_by(year, week) %>%
summarize(
strat = prod(1 + coalesce(strat_ret_d,0)) - 1,
bh = prod(1 + coalesce(bh_ret_d,0)) - 1,
.groups = "drop"
)
monthly <- D2 %>%
filter(!is.na(ret_d)) %>%
mutate(month = month(Date), year = year(Date)) %>%
group_by(year, month) %>%
summarize(
strat = prod(1 + coalesce(strat_ret_d,0)) - 1,
bh = prod(1 + coalesce(bh_ret_d,0)) - 1,
.groups = "drop"
)
list(D = D2, weekly = weekly, monthly = monthly)
}
save_plot <- function(p, filename, w = 7, h = 4.5, dpi = 300){
ggsave(file.path(plot_dir, filename), p, width = w, height = h, dpi = dpi)
}
clt_block_means <- function(series, k){
series <- na.omit(series)
n <- length(series)
m <- floor(n / k)
if(m < 5) return(tibble(block_mean = numeric(0)))
x <- series[seq_len(m*k)]
M <- matrix(x, nrow = k, byrow = TRUE)
tibble(block_mean = colMeans(M))
}
results <- list()
for(sym in names(files)){
cat("Processing", sym, "...\n")
D0 <- read_prices(files[[sym]])
D <- compute_signals(D0, win = win, buy_thr = buy_thr, sell_thr = sell_thr)
res <- agg_returns(D)
results[[sym]] <- res
}
## Processing AMD ...
## Processing AAPL ...
## Processing NVDA ...
sym <- "AMD"
DD <- results[[sym]]$D
bind_rows(
head(DD, 5),
tail(DD, 5)
) %>%
select(Date, Price, MA20, SD20, z, ret_d, signal, position, strat_ret_d)
## Date Price MA20 SD20 z ret_d signal position
## 1 2024-12-01 226.84 NA NA NA NA 0 0
## 2 2024-12-02 227.18 NA NA NA 0.0014988538 0 0
## 3 2024-12-03 228.03 NA NA NA 0.0037415265 0 0
## 4 2024-12-04 226.49 NA NA NA -0.0067534973 0 0
## 5 2024-12-05 229.79 NA NA NA 0.0145701797 0 0
## 6 2025-09-22 269.00 256.4660 6.793766 1.8449267 0.0007068189 0 1
## 7 2025-09-23 269.70 257.1785 7.401647 1.6917180 0.0026022305 0 1
## 8 2025-09-24 271.40 257.8920 8.055630 1.6768397 0.0063033000 0 1
## 9 2025-09-25 270.37 258.5095 8.525591 1.3911646 -0.0037951363 0 1
## 10 2025-09-26 267.35 259.0425 8.736464 0.9508995 -0.0111698783 0 1
## strat_ret_d
## 1 NA
## 2 0.0000000000
## 3 0.0000000000
## 4 0.0000000000
## 5 0.0000000000
## 6 0.0007068189
## 7 0.0026022305
## 8 0.0063033000
## 9 -0.0037951363
## 10 -0.0111698783
p_price <- ggplot(DD, aes(Date, Price)) +
geom_line() +
geom_line(aes(y = MA20), linetype = "dashed") +
labs(title = paste(sym, "Price with 20-Day MA"), y = "Price", x = "")
p_price
save_plot(p_price, sprintf("price_ma20_%s.pdf", sym))
p_z <- ggplot(DD, aes(Date, z)) +
geom_line() +
geom_hline(yintercept = buy_thr, linetype = "dotted") +
geom_hline(yintercept = sell_thr, linetype = "dotted") +
labs(title = paste(sym, "z-Score vs Thresholds"), y = "z", x = "")
p_z
save_plot(p_z, sprintf("zseries_%s.pdf", sym))
p_hist <- ggplot(DD, aes(z)) +
geom_histogram(aes(y = after_stat(density)), bins = 40) +
geom_density(alpha = 0.2) +
stat_function(fun = dnorm,
args = list(mean = mean(DD$z, na.rm = TRUE),
sd = sd(DD$z, na.rm = TRUE))) +
labs(title = paste(sym, "z-Score Distribution"), x = "z", y = "Density")
p_hist
save_plot(p_hist, sprintf("zdist_%s.pdf", sym))
p_qq <- ggplot(DD, aes(sample = z)) +
stat_qq() + stat_qq_line() +
labs(title = paste(sym, "QQ-plot of z-Scores"), x = "Theoretical", y = "Sample")
p_qq
save_plot(p_qq, sprintf("zqq_%s.pdf", sym))
trades <- DD %>% filter(signal != 0) %>% select(Date, Price, z, signal, position)
trades
## Date Price z signal position
## 1 2024-12-31 221.69 -0.7261892 1 0
## 2 2025-07-02 212.44 2.8152788 -1 1
## 3 2025-07-20 211.27 -0.1539818 1 0
## 4 2025-07-28 229.35 2.9520373 -1 1
## 5 2025-08-19 226.79 -1.1270807 1 0
## 6 2025-08-27 256.08 2.9583465 -1 1
## 7 2025-09-10 245.27 -0.7715485 1 0
GG <- results[[sym]]$D
p_cum <- ggplot(GG, aes(Date, cum_strat)) +
geom_line() +
geom_line(aes(y = cum_bh), linetype = "dashed") +
labs(title = paste(sym, "Cumulative Growth of $1: Strategy vs Buy&Hold"), y = "Growth", x = "")
p_cum
save_plot(p_cum, sprintf("cum_%s.pdf", sym))
wk <- results[[sym]]$weekly
mo <- results[[sym]]$monthly
summ <- tibble(
freq = c("weekly","weekly","monthly","monthly"),
series = c("strategy","buy_hold","strategy","buy_hold"),
mean = c(mean(wk$strat, na.rm=TRUE), mean(wk$bh, na.rm=TRUE),
mean(mo$strat, na.rm=TRUE), mean(mo$bh, na.rm=TRUE)),
sd = c(sd(wk$strat, na.rm=TRUE), sd(wk$bh, na.rm=TRUE),
sd(mo$strat, na.rm=TRUE), sd(mo$bh, na.rm=TRUE))
)
summ
## # A tibble: 4 × 4
## freq series mean sd
## <chr> <chr> <dbl> <dbl>
## 1 weekly strategy 0.00715 0.0520
## 2 weekly buy_hold 0.00521 0.0520
## 3 monthly strategy 0.0277 0.0779
## 4 monthly buy_hold 0.0190 0.0738
r_d <- na.omit(GG$strat_ret_d)
b5 <- clt_block_means(r_d, k = 5) # ~ weekly
b20 <- clt_block_means(r_d, k = 20) # ~ monthly
p_b5 <- ggplot(b5, aes(block_mean)) + geom_histogram(bins = 30) + labs(title = "Block Means (k=5)")
p_q5 <- ggplot(b5, aes(sample = block_mean)) + stat_qq() + stat_qq_line() + labs(title = "QQ (k=5)")
p_b20 <- ggplot(b20, aes(block_mean)) + geom_histogram(bins = 30) + labs(title = "Block Means (k=20)")
p_q20 <- ggplot(b20, aes(sample = block_mean)) + stat_qq() + stat_qq_line() + labs(title = "QQ (k=20)")
p_b5; p_q5; p_b20; p_q20
save_plot(p_b5, "blockmeans_k5_hist.pdf")
save_plot(p_q5, "blockmeans_k5_qq.pdf", w = 6, h = 4)
save_plot(p_b20, "blockmeans_k20_hist.pdf")
save_plot(p_q20, "blockmeans_k20_qq.pdf", w = 6, h = 4)
cmp <- lapply(names(results), function(sym){
mo <- results[[sym]]$monthly
tibble(symbol = sym,
mean_monthly_strat = mean(mo$strat, na.rm=TRUE),
sd_monthly_strat = sd(mo$strat, na.rm=TRUE),
mean_monthly_bh = mean(mo$bh, na.rm=TRUE),
sd_monthly_bh = sd(mo$bh, na.rm=TRUE))
}) %>% bind_rows()
cmp
## # A tibble: 3 × 5
## symbol mean_monthly_strat sd_monthly_strat mean_monthly_bh sd_monthly_bh
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 AMD 0.0277 0.0779 0.0190 0.0738
## 2 AAPL 0.00390 0.102 0.0766 0.253
## 3 NVDA 0.0506 0.128 0.0592 0.140