This is a markdown file for DATA607 Assignment_3B. 

Assignment requirements:

Find (or ask an LLM to generate!) a dataset that includes time series for two or more separate items. For example, you could use end of day stock or cryptocurrency prices since Jan 1, 2022 for several instruments. Use window functions (in SQL or dplyr) to calculate the year-to-date average and the six-day moving averages for each item.

Step 1: Load required packages

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(slider)      # For moving averages
library(tidyquant)   # To fetch stock data
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.28     ✔ xts                  0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ dplyr::filter()                masks stats::filter()
## ✖ xts::first()                   masks dplyr::first()
## ✖ dplyr::lag()                   masks stats::lag()
## ✖ xts::last()                    masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)    # For data visualization
library(tidyr)

Step 2: Get stock data (example with Apple and Microsoft):

stock_data <- tq_get(c("AAPL", "MSFT"), 
                     from = "2022-01-01", 
                     to = Sys.Date()) %>%
  dplyr::select(symbol, date, close)

Print stock data

print(stock_data %>% filter(symbol == "AAPL") %>% head(20))
## # A tibble: 20 × 3
##    symbol date       close
##    <chr>  <date>     <dbl>
##  1 AAPL   2022-01-03  182.
##  2 AAPL   2022-01-04  180.
##  3 AAPL   2022-01-05  175.
##  4 AAPL   2022-01-06  172 
##  5 AAPL   2022-01-07  172.
##  6 AAPL   2022-01-10  172.
##  7 AAPL   2022-01-11  175.
##  8 AAPL   2022-01-12  176.
##  9 AAPL   2022-01-13  172.
## 10 AAPL   2022-01-14  173.
## 11 AAPL   2022-01-18  170.
## 12 AAPL   2022-01-19  166.
## 13 AAPL   2022-01-20  165.
## 14 AAPL   2022-01-21  162.
## 15 AAPL   2022-01-24  162.
## 16 AAPL   2022-01-25  160.
## 17 AAPL   2022-01-26  160.
## 18 AAPL   2022-01-27  159.
## 19 AAPL   2022-01-28  170.
## 20 AAPL   2022-01-31  175.
print(stock_data %>% filter(symbol == "MSFT") %>% head(20))
## # A tibble: 20 × 3
##    symbol date       close
##    <chr>  <date>     <dbl>
##  1 MSFT   2022-01-03  335.
##  2 MSFT   2022-01-04  329.
##  3 MSFT   2022-01-05  316.
##  4 MSFT   2022-01-06  314.
##  5 MSFT   2022-01-07  314.
##  6 MSFT   2022-01-10  314.
##  7 MSFT   2022-01-11  315.
##  8 MSFT   2022-01-12  318.
##  9 MSFT   2022-01-13  305.
## 10 MSFT   2022-01-14  310.
## 11 MSFT   2022-01-18  303.
## 12 MSFT   2022-01-19  303.
## 13 MSFT   2022-01-20  302.
## 14 MSFT   2022-01-21  296.
## 15 MSFT   2022-01-24  296.
## 16 MSFT   2022-01-25  288.
## 17 MSFT   2022-01-26  297.
## 18 MSFT   2022-01-27  300.
## 19 MSFT   2022-01-28  308.
## 20 MSFT   2022-01-31  311.

Step 3: Calculate year-to-date (YTD) average using cummean():

ytd_data <- stock_data %>%
  group_by(symbol, year = format(date, "%Y")) %>%  # Group by symbol and year
  arrange(date) %>%                                # Ensure dates are in order
  mutate(ytd_avg = cummean(close)) %>%             # Cumulative mean within each year
  
  ungroup()
  1. Calculate 6-day moving average using slide_dbl():
final_data <- ytd_data %>%
  group_by(symbol) %>%
  mutate(moving_avg_6d = slider::slide_dbl(
    close, 
    mean, 
    .before = 5,   # Look back 5 periods (current + 5 previous = 6 days)
    .complete = FALSE  # Allow partial windows
  )) %>%
  ungroup()
  1. View results for Apple and Microsoft
cat("AAPL Data:\n")
## AAPL Data:
print(final_data %>% filter(symbol == "AAPL") %>% head(20))
## # A tibble: 20 × 6
##    symbol date       close year  ytd_avg moving_avg_6d
##    <chr>  <date>     <dbl> <chr>   <dbl>         <dbl>
##  1 AAPL   2022-01-03  182. 2022     182.          182.
##  2 AAPL   2022-01-04  180. 2022     181.          181.
##  3 AAPL   2022-01-05  175. 2022     179.          179.
##  4 AAPL   2022-01-06  172  2022     177.          177.
##  5 AAPL   2022-01-07  172. 2022     176.          176.
##  6 AAPL   2022-01-10  172. 2022     175.          175.
##  7 AAPL   2022-01-11  175. 2022     175.          174.
##  8 AAPL   2022-01-12  176. 2022     175.          174.
##  9 AAPL   2022-01-13  172. 2022     175.          173.
## 10 AAPL   2022-01-14  173. 2022     175.          173.
## 11 AAPL   2022-01-18  170. 2022     174.          173.
## 12 AAPL   2022-01-19  166. 2022     174.          172.
## 13 AAPL   2022-01-20  165. 2022     173.          170.
## 14 AAPL   2022-01-21  162. 2022     172.          168.
## 15 AAPL   2022-01-24  162. 2022     172.          166.
## 16 AAPL   2022-01-25  160. 2022     171.          164.
## 17 AAPL   2022-01-26  160. 2022     170.          162.
## 18 AAPL   2022-01-27  159. 2022     170.          161.
## 19 AAPL   2022-01-28  170. 2022     170.          162.
## 20 AAPL   2022-01-31  175. 2022     170.          164.
cat("\nMSFT Data:\n")
## 
## MSFT Data:
print(final_data %>% filter(symbol == "MSFT") %>% head(20))
## # A tibble: 20 × 6
##    symbol date       close year  ytd_avg moving_avg_6d
##    <chr>  <date>     <dbl> <chr>   <dbl>         <dbl>
##  1 MSFT   2022-01-03  335. 2022     335.          335.
##  2 MSFT   2022-01-04  329. 2022     332.          332.
##  3 MSFT   2022-01-05  316. 2022     327.          327.
##  4 MSFT   2022-01-06  314. 2022     324.          324.
##  5 MSFT   2022-01-07  314. 2022     322.          322.
##  6 MSFT   2022-01-10  314. 2022     320.          320.
##  7 MSFT   2022-01-11  315. 2022     320.          317.
##  8 MSFT   2022-01-12  318. 2022     319.          315.
##  9 MSFT   2022-01-13  305. 2022     318.          313.
## 10 MSFT   2022-01-14  310. 2022     317.          313.
## 11 MSFT   2022-01-18  303. 2022     316.          311.
## 12 MSFT   2022-01-19  303. 2022     315.          309.
## 13 MSFT   2022-01-20  302. 2022     314.          307.
## 14 MSFT   2022-01-21  296. 2022     312.          303.
## 15 MSFT   2022-01-24  296. 2022     311.          302.
## 16 MSFT   2022-01-25  288. 2022     310.          298.
## 17 MSFT   2022-01-26  297. 2022     309.          297.
## 18 MSFT   2022-01-27  300. 2022     309.          297.
## 19 MSFT   2022-01-28  308. 2022     309.          298.
## 20 MSFT   2022-01-31  311. 2022     309.          300.

6.1 Plot for Apple stock price, YTD and moving average

# Create plots for each Apple
aapl_plot <- final_data %>%
  filter(symbol == "AAPL") %>%
  ggplot(aes(x = date)) +
  geom_line(aes(y = close, color = "Daily Close"), linewidth = 0.7) +
  geom_line(aes(y = ytd_avg, color = "YTD Average"), linewidth = 1.2) +
  geom_line(aes(y = moving_avg_6d, color = "6-Day Moving Avg"), linewidth = 1.2) +
  labs(title = "AAPL Stock Price with Averages",
       x = "Date", y = "Price ($)",
       color = "Metric") +
  theme_minimal()
print(aapl_plot)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

6.2 Plot for Microsoft stock price, YTD average and 6-Days moving average

msft_plot <- final_data %>%
  filter(symbol == "MSFT") %>%
  ggplot(aes(x = date)) +
  geom_line(aes(y = close, color = "Daily Close"), linewidth = 0.7) +
  geom_line(aes(y = ytd_avg, color = "YTD Average"), linewidth = 1.2) +
  geom_line(aes(y = moving_avg_6d, color = "6-Day Moving Avg"), linewidth = 1.2) +
  labs(title = "MSFT Stock Price with Averages",
       x = "Date", y = "Price ($)",
       color = "Metric") +
  theme_minimal()
print(msft_plot)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

Conclusion:

The plots revealed how different averaging techniques smooth data differently - YTD averages show longer-term trends while 6-day moving averages capture shorter-term momentum. Consistent alignment suggests stable trends.

This assignment provided hands-on experience with essential techniques used in financial analysis, data science, and quantitative trading strategies.