Submission checklist
1) Knit this .Rmd to HTML or PDF.
2) Export plots (saved automatically into plots/) and paste into the blue boxes of the Project 3 Worksheet (PDF).
3) Upload this .Rmd, the knitted output, the filled worksheet PDF, and the three CSV data files to Canvas.

0) Setup

# install.packages(c("tidyverse","lubridate","zoo"))
library(tidyverse)
library(lubridate)
library(zoo)
# === FILES ===
files <- c(AMD = "C:\\Users\\bella\\Downloads\\AAPL.csv",
           AAPL = "C:\\Users\\bella\\Downloads\\AMD.csv",
           NVDA = "C:\\Users\\bella\\Downloads\\NVDA.csv")

# === TRADING RULE PARAMETERS ===
win      <- 20        # 20-day lookback
buy_thr  <- 0.001     # buy when z <= 0.001
sell_thr <- 2.5       # sell when z >= 2.5

# === OUTPUT PLOT DIR ===
plot_dir <- "plots"
if(!dir.exists(plot_dir)) dir.create(plot_dir, recursive = TRUE)

1) Helper functions

read_prices <- function(path){
  stopifnot(file.exists(path))
  D <- read.csv(path, stringsAsFactors = FALSE)
  # normalize names
  names(D) <- gsub("\\\\.", "", names(D))
  names(D) <- tolower(names(D))
  # prefer adjusted close
  price_col <- dplyr::case_when(
    "adjclose" %in% names(D) ~ "adjclose",
    "adj_close" %in% names(D) ~ "adj_close",
    "close" %in% names(D) ~ "close",
    TRUE ~ NA_character_
  )
  if(is.na(price_col)) stop("No 'Close' or 'Adj Close' column found.")
  if(!"date" %in% names(D)) stop("No 'Date' column found.")
  D$date <- as.Date(D$date)
  D <- D %>% arrange(date) %>%
    transmute(Date = date, Price = as.numeric(.data[[price_col]])) %>%
    distinct(Date, .keep_all = TRUE)
  return(D)
}

compute_signals <- function(D, win = 20, buy_thr = 0.001, sell_thr = 2.5){
  D <- D %>% arrange(Date) %>%
    mutate(
      MA20 = rollapply(Price, win, mean, align = "right", fill = NA),
      SD20 = rollapply(Price, win, sd,   align = "right", fill = NA),
      z    = (Price - MA20) / SD20,
      ret_d = Price/lag(Price) - 1
    )
  # one-long-position rule with next-day execution
  n <- nrow(D)
  sig <- integer(n)
  pos <- integer(n)
  in_pos <- FALSE
  for(i in seq_len(n)){
    zt <- D$z[i]
    if(!is.na(zt)){
      if(!in_pos && zt <= buy_thr){ sig[i] <- 1L;  in_pos <- TRUE }
      else if(in_pos && zt >= sell_thr){ sig[i] <- -1L; in_pos <- FALSE }
      else sig[i] <- 0L
    } else sig[i] <- 0L
    # position takes effect next day
    if(i == 1) pos[i] <- 0L
    else {
      if(sig[i-1] == 1L) pos[i] <- 1L
      else if(sig[i-1] == -1L) pos[i] <- 0L
      else pos[i] <- pos[i-1]
    }
  }
  D$signal <- sig
  D$position <- pos
  D$strat_ret_d <- D$position * D$ret_d
  return(D)
}

agg_returns <- function(D){
  D2 <- D %>%
    mutate(
      cum_strat = cumprod(1 + coalesce(strat_ret_d, 0)),
      bh_ret_d  = ret_d,
      cum_bh    = cumprod(1 + coalesce(bh_ret_d, 0))
    )
  # weekly/monthly compounded simple returns
  weekly <- D2 %>%
    filter(!is.na(ret_d)) %>%
    mutate(week = isoweek(Date), year = isoyear(Date)) %>%
    group_by(year, week) %>%
    summarize(
      strat = prod(1 + coalesce(strat_ret_d,0)) - 1,
      bh    = prod(1 + coalesce(bh_ret_d,0)) - 1,
      .groups = "drop"
    )
  monthly <- D2 %>%
    filter(!is.na(ret_d)) %>%
    mutate(month = month(Date), year = year(Date)) %>%
    group_by(year, month) %>%
    summarize(
      strat = prod(1 + coalesce(strat_ret_d,0)) - 1,
      bh    = prod(1 + coalesce(bh_ret_d,0)) - 1,
      .groups = "drop"
    )
  list(D = D2, weekly = weekly, monthly = monthly)
}

save_plot <- function(p, filename, w = 7, h = 4.5, dpi = 300){
  ggsave(file.path(plot_dir, filename), p, width = w, height = h, dpi = dpi)
}

clt_block_means <- function(series, k){
  series <- na.omit(series)
  n <- length(series)
  m <- floor(n / k)
  if(m < 5) return(tibble(block_mean = numeric(0)))
  x <- series[seq_len(m*k)]
  M <- matrix(x, nrow = k, byrow = TRUE)
  tibble(block_mean = colMeans(M))
}

2) Load data and compute signals

results <- list()
for(sym in names(files)){
  cat("Processing", sym, "...\n")
  D0 <- read_prices(files[[sym]])
  D  <- compute_signals(D0, win = win, buy_thr = buy_thr, sell_thr = sell_thr)
  res <- agg_returns(D)
  results[[sym]] <- res
}
## Processing AMD ...
## Processing AAPL ...
## Processing NVDA ...

Quick peek (head/tail for AMD)

sym <- "AMD"
DD <- results[[sym]]$D
bind_rows(
  head(DD, 5),
  tail(DD, 5)
) %>%
  select(Date, Price, MA20, SD20, z, ret_d, signal, position, strat_ret_d)
##          Date  Price     MA20     SD20         z         ret_d signal position
## 1  2024-12-01 226.84       NA       NA        NA            NA      0        0
## 2  2024-12-02 227.18       NA       NA        NA  0.0014988538      0        0
## 3  2024-12-03 228.03       NA       NA        NA  0.0037415265      0        0
## 4  2024-12-04 226.49       NA       NA        NA -0.0067534973      0        0
## 5  2024-12-05 229.79       NA       NA        NA  0.0145701797      0        0
## 6  2025-09-22 269.00 256.4660 6.793766 1.8449267  0.0007068189      0        1
## 7  2025-09-23 269.70 257.1785 7.401647 1.6917180  0.0026022305      0        1
## 8  2025-09-24 271.40 257.8920 8.055630 1.6768397  0.0063033000      0        1
## 9  2025-09-25 270.37 258.5095 8.525591 1.3911646 -0.0037951363      0        1
## 10 2025-09-26 267.35 259.0425 8.736464 0.9508995 -0.0111698783      0        1
##      strat_ret_d
## 1             NA
## 2   0.0000000000
## 3   0.0000000000
## 4   0.0000000000
## 5   0.0000000000
## 6   0.0007068189
## 7   0.0026022305
## 8   0.0063033000
## 9  -0.0037951363
## 10 -0.0111698783

3) Plots — Price + MA20 and z-Series (example: AMD)

p_price <- ggplot(DD, aes(Date, Price)) +
  geom_line() +
  geom_line(aes(y = MA20), linetype = "dashed") +
  labs(title = paste(sym, "Price with 20-Day MA"), y = "Price", x = "")
p_price

save_plot(p_price, sprintf("price_ma20_%s.pdf", sym))
p_z <- ggplot(DD, aes(Date, z)) +
  geom_line() +
  geom_hline(yintercept = buy_thr, linetype = "dotted") +
  geom_hline(yintercept = sell_thr, linetype = "dotted") +
  labs(title = paste(sym, "z-Score vs Thresholds"), y = "z", x = "")
p_z

save_plot(p_z, sprintf("zseries_%s.pdf", sym))

4) Plots — z-Score Distribution & QQ (example: AMD)

p_hist <- ggplot(DD, aes(z)) +
  geom_histogram(aes(y = after_stat(density)), bins = 40) +
  geom_density(alpha = 0.2) +
  stat_function(fun = dnorm,
                args = list(mean = mean(DD$z, na.rm = TRUE),
                            sd   = sd(DD$z,   na.rm = TRUE))) +
  labs(title = paste(sym, "z-Score Distribution"), x = "z", y = "Density")
p_hist

save_plot(p_hist, sprintf("zdist_%s.pdf", sym))
p_qq <- ggplot(DD, aes(sample = z)) +
  stat_qq() + stat_qq_line() +
  labs(title = paste(sym, "QQ-plot of z-Scores"), x = "Theoretical", y = "Sample")
p_qq

save_plot(p_qq, sprintf("zqq_%s.pdf", sym))

5) Signals & Trades (example: AMD)

trades <- DD %>% filter(signal != 0) %>% select(Date, Price, z, signal, position)
trades
##         Date  Price          z signal position
## 1 2024-12-31 221.69 -0.7261892      1        0
## 2 2025-07-02 212.44  2.8152788     -1        1
## 3 2025-07-20 211.27 -0.1539818      1        0
## 4 2025-07-28 229.35  2.9520373     -1        1
## 5 2025-08-19 226.79 -1.1270807      1        0
## 6 2025-08-27 256.08  2.9583465     -1        1
## 7 2025-09-10 245.27 -0.7715485      1        0

6) Performance — Cumulative and Summaries (example: AMD)

GG <- results[[sym]]$D
p_cum <- ggplot(GG, aes(Date, cum_strat)) +
  geom_line() +
  geom_line(aes(y = cum_bh), linetype = "dashed") +
  labs(title = paste(sym, "Cumulative Growth of $1: Strategy vs Buy&Hold"), y = "Growth", x = "")
p_cum

save_plot(p_cum, sprintf("cum_%s.pdf", sym))
wk  <- results[[sym]]$weekly
mo  <- results[[sym]]$monthly

summ <- tibble(
  freq = c("weekly","weekly","monthly","monthly"),
  series = c("strategy","buy_hold","strategy","buy_hold"),
  mean = c(mean(wk$strat, na.rm=TRUE), mean(wk$bh, na.rm=TRUE),
           mean(mo$strat, na.rm=TRUE), mean(mo$bh, na.rm=TRUE)),
  sd   = c(sd(wk$strat, na.rm=TRUE), sd(wk$bh, na.rm=TRUE),
           sd(mo$strat, na.rm=TRUE), sd(mo$bh, na.rm=TRUE))
)
summ
## # A tibble: 4 × 4
##   freq    series      mean     sd
##   <chr>   <chr>      <dbl>  <dbl>
## 1 weekly  strategy 0.00715 0.0520
## 2 weekly  buy_hold 0.00521 0.0520
## 3 monthly strategy 0.0277  0.0779
## 4 monthly buy_hold 0.0190  0.0738

7) CLT demonstration (example: AMD)

r_d <- na.omit(GG$strat_ret_d)
b5  <- clt_block_means(r_d, k = 5)    # ~ weekly
b20 <- clt_block_means(r_d, k = 20)   # ~ monthly

p_b5  <- ggplot(b5,  aes(block_mean)) + geom_histogram(bins = 30) + labs(title = "Block Means (k=5)")
p_q5  <- ggplot(b5,  aes(sample = block_mean)) + stat_qq() + stat_qq_line() + labs(title = "QQ (k=5)")
p_b20 <- ggplot(b20, aes(block_mean)) + geom_histogram(bins = 30) + labs(title = "Block Means (k=20)")
p_q20 <- ggplot(b20, aes(sample = block_mean)) + stat_qq() + stat_qq_line() + labs(title = "QQ (k=20)")

p_b5;  p_q5;  p_b20;  p_q20

save_plot(p_b5,  "blockmeans_k5_hist.pdf")
save_plot(p_q5,  "blockmeans_k5_qq.pdf",  w = 6, h = 4)
save_plot(p_b20, "blockmeans_k20_hist.pdf")
save_plot(p_q20, "blockmeans_k20_qq.pdf", w = 6, h = 4)

8) Cross-stock comparison (monthly)

cmp <- lapply(names(results), function(sym){
  mo <- results[[sym]]$monthly
  tibble(symbol = sym,
         mean_monthly_strat = mean(mo$strat, na.rm=TRUE),
         sd_monthly_strat   = sd(mo$strat,   na.rm=TRUE),
         mean_monthly_bh    = mean(mo$bh,    na.rm=TRUE),
         sd_monthly_bh      = sd(mo$bh,      na.rm=TRUE))
}) %>% bind_rows()

cmp
## # A tibble: 3 × 5
##   symbol mean_monthly_strat sd_monthly_strat mean_monthly_bh sd_monthly_bh
##   <chr>               <dbl>            <dbl>           <dbl>         <dbl>
## 1 AMD               0.0277            0.0779          0.0190        0.0738
## 2 AAPL              0.00390           0.102           0.0766        0.253 
## 3 NVDA              0.0506            0.128           0.0592        0.140