# Load Apple stock data
apple_raw <- readLines("C:/Users/shafi/OneDrive/Desktop/timeseries/apple_stock.csv")

# Extract header row (likely near top or bottom)
header_line <- apple_raw[grep("^Date,", apple_raw)]
if (length(header_line) == 0) {
  # Try to infer: first 20 lines likely contain header
  sample_lines <- apple_raw[1:20]
  header_line <- sample_lines[grep(",", sample_lines)][1] 
  if (!grepl("^\\d{4}-\\d{2}-\\d{2}", header_line)) {
    header_line <- "Date,Open,High,Low,Close,Adj.Close,Volume"
  }
}

# Clean raw text: combine all lines, split by date pattern
raw_text <- paste(apple_raw, collapse = "")
# Extract data rows (YYYY-MM-DD, numbers)
data_rows <- str_extract_all(raw_text, "\\d{4}-\\d{2}-\\d{2},[\\d\\.]+,[\\d\\.]+,[\\d\\.]+,[\\d\\.]+,[\\d\\.]+,\\d+")[[1]]

apple_df <- data_rows %>%
  as_tibble() %>%
  separate(value, into = c("Date", "Open", "High", "Low", "Close", "Adj.Close", "Volume"), sep = ",") %>%
  mutate(
    Date = ymd(Date),
    across(c(Open, High, Low, Close, Adj.Close), as.numeric),
    Volume = as.numeric(Volume)
  )
# Parse multiTimeline.csv — likely raw "YYYY-MM-DD,##" pattern
trend_raw <- readLines("C:/Users/shafi/OneDrive/Desktop/timeseries/multiTimeline.csv")

# Join and extract "YYYY-MM-DD,##" patterns
trend_text <- paste(trend_raw, collapse = "")
trend_rows <- str_extract_all(trend_text, "\\d{4}-\\d{2}-\\d{2},\\d{1,3}")[[1]]

trend_df <- tibble(value = trend_rows) %>%
  separate(value, into = c("Date", "Interest"), sep = ",") %>%
  mutate(
    Date = ymd(Date),
    Interest = as.numeric(Interest)
  )
start_date <- ymd("2022-07-13")
end_date   <- ymd("2022-12-31")

apple_6m <- apple_df %>% filter(between(Date, start_date, end_date))
trend_6m <- trend_df %>% filter(between(Date, start_date, end_date))
p_trend <- ggplot(trend_6m, aes(Date, Interest)) +
  geom_line(color = "steelblue", size = 0.9) +
  geom_vline(xintercept = ymd(c("2022-07-28", "2022-09-07")), linetype = "dashed", color = "gray50") +
  annotate("text", x = ymd("2022-07-28") + days(3), y = 85, label = "Q3 Earnings ↑", hjust = 0) +
  annotate("text", x = ymd("2022-09-07") + days(3), y = 75, label = "iPhone 14 Launch", hjust = 0) +
  labs(
    title = "(b) Google Search Interest for 'Apple Stock'\n(Jul–Dec 2022)",
    x = "Date", y = "Relative Interest (0–100)",
    caption = "Source: Google Trends"
  ) +
  theme_minimal(base_size = 9)
p_trend

p_price <- ggplot(apple_6m, aes(Date, `Adj.Close`)) +
  geom_line(color = "darkred", size = 0.9) +
  geom_vline(xintercept = ymd(c("2022-07-28", "2022-09-07", "2022-10-13", "2022-12-16")), 
             linetype = "dashed", color = "gray50") +
  annotate("text", x = ymd("2022-07-28"), y = 165, label = "Q3 Earnings Beat", vjust = -0.5, size = 2.5) +
  annotate("text", x = ymd("2022-09-07"), y = 155, label = "iPhone 14 Launch", vjust = -0.5, size = 2.5) +
  annotate("text", x = ymd("2022-10-13"), y = 140, label = "Fed 75bps hike → ↑ volatility", vjust = 1.2, size = 2.5) +
  annotate("text", x = ymd("2022-12-16"), y = 133, label = "2022 Low: Recession Fears", vjust = -0.5, size = 2.5) +
  labs(
    title = "(c) Apple Adjusted Close Price (Jul–Dec 2022)",
    x = "Date", y = "Price (USD)",
    caption = "Source: Yahoo Finance"
  ) +
  theme_minimal(base_size = 9)
p_price

# Use 5 years of data
apple_5y <- apple_df %>% filter(year(Date) >= 2018 & year(Date) <= 2022)

# Create time series (daily, ~252 trading days)
apple_ts <- ts(apple_5y$`Adj.Close`, frequency = 252, start = c(2018, 1))

# STL decomposition
decomp <- stl(apple_ts, s.window = "periodic", robust = TRUE)

p_season <- autoplot(decomp) +
  labs(title = "(d) STL Decomposition (2018–2022):\nNo Strong Daily Seasonality", caption = "Trend dominates; weak Q4 uplift") +
  theme_minimal(base_size = 9) +
  theme(plot.title = element_text(size = 9))
p_season

# Load required packages
library(fable)
library(tsibble)
library(dplyr)
library(ggplot2)
library(lubridate)

# Convert apple_6m to tsibble (with Date as index)
apple_tsib <- apple_df %>%
  filter(Date >= ymd("2022-07-13") & Date <= ymd("2022-12-31")) %>%
  as_tsibble(index = Date)

# Fit ETS model using fable
fit_fable <- apple_tsib %>%
  model(ETS(`Adj.Close` ~ trend("A") + season("N")))

# Forecast next 14 days (Jan 3–20, 2023)
fc_fable <- fit_fable %>%
  forecast(h = "14 days")

# Extract actuals for Jan 3–20, 2023
actual_jan <- apple_df %>%
  filter(Date >= ymd("2023-01-03") & Date <= ymd("2023-01-20"))

# Plot with proper dates
p_fc <- fc_fable %>%
  autoplot(apple_tsib) +
  geom_line(
    data = actual_jan,
    aes(x = Date, y = `Adj.Close`),
    colour = "black",
    linewidth = 0.8,
    na.rm = TRUE
  ) +
  labs(
    title = "(e) 14-Day Forecast (ETS) for Apple (Jan 3–20, 2023)",
    y = "Adj. Close (USD)",
    caption = "ETS(A,A,N): additive trend, no seasonality. Black = actual."
  ) +
  theme_minimal(base_size = 9)

p_fc

library(tidyverse)
library(forecast)
library(Metrics)

# Define key dates
train_end   <- as.Date("2022-12-16")
test_dates  <- seq(train_end + 1, by = "day", length.out = 20)
test_dates  <- test_dates[weekdays(test_dates) %in% c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")][1:10]
# → Dec 19–30, 2022 (10 trading days)

target_dates <- seq(as.Date("2023-01-03"), as.Date("2023-01-20"), by = "day")
target_dates <- target_dates[weekdays(target_dates) %in% c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")][1:10]
# → Jan 3–20, 2023 (10 trading days; matches your `apple_stock.csv`)

# Extract numeric vectors of prices
train_data <- apple_df %>%
  filter(Date <= train_end) %>%
  pull(`Adj.Close`)             # ✅ numeric vector

test_actual <- apple_df %>%
  filter(Date %in% test_dates) %>%
  pull(`Adj.Close`)             # ✅ numeric vector

actual_jan <- apple_df %>%
  filter(Date %in% target_dates) %>%
  pull(`Adj.Close`)             # ✅ numeric vector

# Fit ETS(A,A,N)
fit_ets <- ets(ts(train_data, frequency = 252))
fc_ets  <- forecast(fit_ets, h = length(test_dates) + length(actual_jan))

# Extract forecasted values
fc_test_vals    <- fc_ets$mean[1:length(test_actual)]            # for Dec 19–30
fc_target_vals  <- fc_ets$mean[(length(test_actual) + 1):(length(test_actual) + length(actual_jan))]  # for Jan 3–20

# Compute metrics
mape_test   <- mape(test_actual, fc_test_vals)
rmse_test   <- rmse(test_actual, fc_test_vals)
mape_target <- mape(actual_jan, fc_target_vals)
rmse_target <- rmse(actual_jan, fc_target_vals)

# Print results
cat("Test Set (Dec 19–30): MAPE =", round(mape_test * 100, 2), "% | RMSE =", round(rmse_test, 2), "\n")
## Test Set (Dec 19–30): MAPE = 3.32 % | RMSE = 4.51
cat("Target (Jan 3–20):   MAPE =", round(mape_target * 100, 2), "% | RMSE =", round(rmse_target, 2), "\n")
## Target (Jan 3–20):   MAPE = 4.82 % | RMSE = 7.25

View PDF