# import required packages
library(readr)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(hts)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tidyr)
library(tsibble)
##
## Attaching package: 'tsibble'
## The following object is masked from 'package:lubridate':
##
## interval
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(fable)
## Loading required package: fabletools
library(fabletools)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ purrr 1.0.2 ✔ tibble 3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tsibbledata)
library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ feasts 0.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
# import csv files
apple_df <- read_csv("/Users/laykhoonyu/Documents/MSAA_AE/predictive_analytics/week_5/Apple.csv")
## Rows: 1258 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Date, Close/Last, Open, High, Low
## dbl (1): Volume
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
GM_df <- read_csv("/Users/laykhoonyu/Documents/MSAA_AE/predictive_analytics/week_5/GM.csv")
## Rows: 1258 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Date, Close/Last, Open, High, Low
## dbl (1): Volume
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
microsoft_df <- read_csv("/Users/laykhoonyu/Documents/MSAA_AE/predictive_analytics/week_5/microsoft.csv")
## Rows: 1258 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Date, Close/Last, Open, High, Low
## dbl (1): Volume
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ford_df <- read_csv("/Users/laykhoonyu/Documents/MSAA_AE/predictive_analytics/week_5/ford.csv")
## Rows: 1258 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Date, Close/Last, Open, High, Low
## dbl (1): Volume
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# joining all closing prices into a single dataframe
stocks_df <- data.frame(apple_df$Date, apple_df$`Close/Last`, microsoft_df$`Close/Last`, ford_df$`Close/Last`, GM_df$`Close/Last`)
# renaming columns of closing prices to their respective names
names(stocks_df)[1] <- "date"
names(stocks_df)[2] <- "apple"
names(stocks_df)[3] <- "microsoft"
names(stocks_df)[4] <- "ford"
names(stocks_df)[5] <- "GM"
# format date and converting it to ascending order
stocks_df$date <- as.Date(stocks_df$date, format = "%m/%d/%Y")
stocks_df <- stocks_df[order(as.Date(stocks_df$date, format="%m/%d/%Y")),]
head(stocks_df)
## date apple microsoft ford GM
## 1258 2019-04-10 $50.155 $120.19 $9.33 $39.25
## 1257 2019-04-11 $49.7375 $120.33 $9.39 $39.33
## 1256 2019-04-12 $49.7175 $120.95 $9.45 $39.71
## 1255 2019-04-15 $49.8075 $121.05 $9.33 $39.57
## 1254 2019-04-16 $49.8125 $120.77 $9.36 $39.66
## 1253 2019-04-17 $50.7825 $121.77 $9.50 $39.99
# converting prices to numeric columns and removing dollar signs
columns <-c("apple", "microsoft", "ford", "GM")
stocks_df[, columns] <- lapply(columns, function(x) as.numeric(gsub("\\$", "", stocks_df[[x]])))
# Extract year and month from the Date column
stocks_df$Year <- format(stocks_df$date, "%Y")
stocks_df$Month <- format(stocks_df$date, "%m")
# calculating average closing prices by month
monthly_stocks_df <- stocks_df %>%
group_by(Month, Year) %>%
summarise(apple = mean(apple), microsoft = mean(microsoft), ford = mean(ford), GM = mean(GM))
# Convert Year and Month back to factors
monthly_stocks_df$Year <- factor(monthly_stocks_df$Year)
monthly_stocks_df$Month <- factor(monthly_stocks_df$Month)
# Combine Year and Month columns into a single Date column
monthly_stocks_df$date <- as.Date(paste(monthly_stocks_df$Year, monthly_stocks_df$Month, "01", sep = "-"))
# Convert Date variable to Date format
monthly_stocks_df$date <- as.Date(monthly_stocks_df$date, format = "%Y/%m/%d")
monthly_stocks_df <- monthly_stocks_df[order(as.Date(monthly_stocks_df$date, format="%Y/%m/%d")),]
# removing all columns except closing prices to calculate rate and value
monthly_stocks_df <- monthly_stocks_df %>%
ungroup() %>%
select(-Month, -Year, -date)
head(monthly_stocks_df)
## # A tibble: 6 × 4
## apple microsoft ford GM
## <dbl> <dbl> <dbl> <dbl>
## 1 50.7 124. 9.65 39.6
## 2 47.8 126. 10.1 36.8
## 3 48.2 132. 9.94 36.4
## 4 51.3 138. 10.1 39.4
## 5 51.2 136. 9.13 37.9
## 6 54.5 138. 9.26 38.1
# full set of the time series data
monthly_stocks_df=monthly_stocks_df[1:60,]
# accumulation rate
monthly_stocks_df[2:60,]=1+(monthly_stocks_df[2:60,]-monthly_stocks_df[1:59,])/monthly_stocks_df[1:59,]
# value per month
for (i in 2:60) monthly_stocks_df[i,]=monthly_stocks_df[i-1, ]*monthly_stocks_df[i,]
# adding date back into the dataframe
monthly_stocks_df$date=yearmonth(seq(as.Date("2019-04-01"), as.Date("2024/03/31"), by="months"))
# train test split
train=monthly_stocks_df[1:48,]
test=monthly_stocks_df[49:60,]
# Time Series
full_ts=monthly_stocks_df%>%as_tsibble(index=date)
train_ts=train%>%as_tsibble(index=date)
test_ts=test%>%as_tsibble(index=date)
# convert to time series
full=full_ts%>%pivot_longer(!c(date), names_to='Stock', values_to='Value')
train=train_ts%>%pivot_longer(!c(date), names_to='Stock', values_to='Value')
test=test_ts%>%pivot_longer(!c(date), names_to='Stock', values_to='Value')
# assigning sectors
full$Sector=rep('Tech', nrow(full))
train$Sector=rep('Tech', nrow(train))
test$Sector=rep('Tech', nrow(test))
full$Sector[full$Stock=="GM"|full$Stock=="ford"]="Auto"
train$Sector[train$Stock=="GM"|train$Stock=="ford"]="Auto"
test$Sector[test$Stock=="GM"|test$Stock=="ford"]="Auto"
full %>% autoplot() + ggtitle("Monthly Closing Stock Prices") + xlab("Date (Year Month)") + ylab("Dollars")
## Plot variable not specified, automatically selected `.vars = Value`
# aggregate time series for plotting
myagg <- full |>
aggregate_key(Stock/Sector,Value=sum(Value))
myagg
## # A tsibble: 540 x 4 [1M]
## # Key: Stock, Sector [9]
## date Stock Sector Value
## <mth> <chr*> <chr*> <dbl>
## 1 2019 Apr <aggregated> <aggregated> 224.
## 2 2019 May <aggregated> <aggregated> 221.
## 3 2019 Jun <aggregated> <aggregated> 227.
## 4 2019 Jul <aggregated> <aggregated> 239.
## 5 2019 Aug <aggregated> <aggregated> 235.
## 6 2019 Sep <aggregated> <aggregated> 240.
## 7 2019 Oct <aggregated> <aggregated> 243.
## 8 2019 Nov <aggregated> <aggregated> 260.
## 9 2019 Dec <aggregated> <aggregated> 269.
## 10 2020 Jan <aggregated> <aggregated> 286.
## # ℹ 530 more rows
# plot aggregated time series
myagg%>%filter(is_aggregated(Sector))%>%
autoplot(Value) +
labs(y = "Value",
title = "Value of Investment") +
facet_wrap(vars(Stock), scales = "free_y", ncol = 3) +
theme(legend.position = "none", axis.text.x = element_text(angle = 90, hjust = 1))
myagg2 <- train |>aggregate_key(Stock,Value=mean(Value))
m1=myagg2|> model(ets = ETS(Value))|>reconcile(bu = bottom_up(ets))
aug1=m1%>%augment()
f1=m1%>%forecast(h=12)
f1%>%autoplot(full)
acc1=f1%>%accuracy(test |>aggregate_key(Stock,Value=mean(Value)))
print(acc1)
## # A tibble: 10 × 11
## .model Stock .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr*> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 bu GM Test -1.28 3.86 3.16 -4.82 9.50 NaN NaN
## 2 bu apple Test 10.1 14.0 12.1 5.45 6.58 NaN NaN
## 3 bu ford Test 0.170 1.06 0.772 0.676 6.22 NaN NaN
## 4 bu microsoft Test 58.0 63.3 58.0 15.9 15.9 NaN NaN
## 5 bu <aggregated> Test -368. 368. 368. -255. 255. NaN NaN
## 6 ets GM Test -1.28 3.86 3.16 -4.82 9.50 NaN NaN
## 7 ets apple Test 10.1 14.0 12.1 5.45 6.58 NaN NaN
## 8 ets ford Test 0.170 1.06 0.772 0.676 6.22 NaN NaN
## 9 ets microsoft Test 58.0 63.3 58.0 15.9 15.9 NaN NaN
## 10 ets <aggregated> Test 15.7 16.9 15.7 10.6 10.6 NaN NaN
## # ℹ 1 more variable: ACF1 <dbl>
myagg3 <- train |>aggregate_key(Sector/Stock,Value=mean(Value))
m2=myagg3|> model(ets2 = ETS(Value))|>reconcile(td = top_down(ets2))
f2=m2%>%forecast(h=12)
f2%>%autoplot(full%>%aggregate_key(Sector/Stock,Value=mean(Value)))+facet_wrap(~Stock+Sector) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
acc2=f2%>%accuracy(test|>aggregate_key(Sector/Stock,Value=mean(Value)))
print(acc2)
## # A tibble: 14 × 12
## .model Sector Stock .type ME RMSE MAE MPE MAPE MASE
## <chr> <chr*> <chr*> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets2 Auto GM Test -1.28 3.86 3.16 -4.82 9.50 NaN
## 2 ets2 Auto ford Test 0.170 1.06 0.772 0.676 6.22 NaN
## 3 ets2 Auto <aggregat… Test -0.557 2.28 1.90 -3.29 8.38 NaN
## 4 ets2 Tech apple Test 10.1 14.0 12.1 5.45 6.58 NaN
## 5 ets2 Tech microsoft Test 58.0 63.3 58.0 15.9 15.9 NaN
## 6 ets2 Tech <aggregat… Test 32.0 34.0 32.0 11.8 11.8 NaN
## 7 ets2 <aggregate… <aggregat… Test 15.7 16.9 15.7 10.6 10.6 NaN
## 8 td Auto GM Test 25.9 26.2 25.9 73.8 73.8 NaN
## 9 td Auto ford Test 9.22 9.28 9.22 75.2 75.2 NaN
## 10 td Auto <aggregat… Test 11.5 11.7 11.5 48.3 48.3 NaN
## 11 td Tech apple Test 138. 138. 138. 76.1 76.1 NaN
## 12 td Tech microsoft Test 277. 279. 277. 78.8 78.8 NaN
## 13 td Tech <aggregat… Test 149. 150. 149. 55.9 55.9 NaN
## 14 td <aggregate… <aggregat… Test 15.7 16.9 15.7 10.6 10.6 NaN
## # ℹ 2 more variables: RMSSE <dbl>, ACF1 <dbl>
#Middle Out Forecast
myagg4 <-train |>aggregate_key(Sector/Stock,Value=mean(Value))
m3=myagg4|> model(ets3 = ETS(Value)) |> reconcile(md = middle_out(ets3))
f3=m3%>%forecast(h=12)
f3%>%autoplot(full%>%aggregate_key(Sector/Stock,Value=mean(Value)))+facet_wrap(~Stock+Sector) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
acc3=f3%>%accuracy(test|>aggregate_key(Sector/Stock,Value=mean(Value)))
print(acc3)
## # A tibble: 14 × 12
## .model Sector Stock .type ME RMSE MAE MPE MAPE
## <chr> <chr*> <chr*> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets3 Auto GM Test -1.28 3.86 3.16 -4.82 9.50
## 2 ets3 Auto ford Test 0.170 1.06 0.772 0.676 6.22
## 3 ets3 Auto <aggregated> Test -0.557 2.28 1.90 -3.29 8.38
## 4 ets3 Tech apple Test 10.1 14.0 12.1 5.45 6.58
## 5 ets3 Tech microsoft Test 58.0 63.3 58.0 15.9 15.9
## 6 ets3 Tech <aggregated> Test 32.0 34.0 32.0 11.8 11.8
## 7 ets3 <aggregated> <aggregated> Test 15.7 16.9 15.7 10.6 10.6
## 8 md Auto GM Test 16.9 17.2 16.9 47.6 47.6
## 9 md Auto ford Test 6.20 6.29 6.20 50.3 50.3
## 10 md Auto <aggregated> Test -0.557 2.28 1.90 -3.29 8.38
## 11 md Tech apple Test 94.8 95.2 94.8 52.3 52.3
## 12 md Tech microsoft Test 203. 206. 203. 57.6 57.6
## 13 md Tech <aggregated> Test 32.0 34.0 32.0 11.8 11.8
## 14 md <aggregated> <aggregated> Test -113. 114. 113. -78.7 78.7
## # ℹ 3 more variables: MASE <dbl>, RMSSE <dbl>, ACF1 <dbl>
glance(m1)
## # A tibble: 10 × 10
## Stock .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr*> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 GM ets 14.6 -156. 319. 319. 324. 14.0 36.7 3.03
## 2 GM bu 14.6 -156. 319. 319. 324. 14.0 36.7 3.03
## 3 apple ets 0.00534 -190. 391. 392. 400. 69.2 145. 0.0546
## 4 apple bu 0.00534 -190. 391. 392. 400. 69.2 145. 0.0546
## 5 ford ets 0.0139 -104. 214. 214. 219. 2.16 5.06 0.0880
## 6 ford bu 0.0139 -104. 214. 214. 219. 2.16 5.06 0.0880
## 7 microsoft ets 0.00305 -210. 431. 432. 440. 169. 357. 0.0413
## 8 microsoft bu 0.00305 -210. 431. 432. 440. 169. 357. 0.0413
## 9 <aggregated> ets 0.00314 -171. 352. 354. 362. 30.9 68.5 0.0414
## 10 <aggregated> bu 0.00314 -171. 352. 354. 362. 30.9 68.5 0.0414
glance(m2)
## # A tibble: 14 × 11
## Sector Stock .model sigma2 log_lik AIC AICc BIC MSE AMSE
## <chr*> <chr*> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Auto GM ets2 1.46e+1 -156. 319. 319. 324. 14.0 36.7
## 2 Auto GM td 1.46e+1 -156. 319. 319. 324. 14.0 36.7
## 3 Auto ford ets2 1.39e-2 -104. 214. 214. 219. 2.16 5.06
## 4 Auto ford td 1.39e-2 -104. 214. 214. 219. 2.16 5.06
## 5 Auto <aggregat… ets2 9.86e-3 -136. 279. 279. 284. 6.14 15.8
## 6 Auto <aggregat… td 9.86e-3 -136. 279. 279. 284. 6.14 15.8
## 7 Tech apple ets2 5.34e-3 -190. 391. 392. 400. 69.2 145.
## 8 Tech apple td 5.34e-3 -190. 391. 392. 400. 69.2 145.
## 9 Tech microsoft ets2 3.05e-3 -210. 431. 432. 440. 169. 357.
## 10 Tech microsoft td 3.05e-3 -210. 431. 432. 440. 169. 357.
## 11 Tech <aggregat… ets2 3.16e-3 -197. 405. 406. 414. 96.6 203.
## 12 Tech <aggregat… td 3.16e-3 -197. 405. 406. 414. 96.6 203.
## 13 <aggregate… <aggregat… ets2 3.14e-3 -171. 352. 354. 362. 30.9 68.5
## 14 <aggregate… <aggregat… td 3.14e-3 -171. 352. 354. 362. 30.9 68.5
## # ℹ 1 more variable: MAE <dbl>
glance(m3)
## # A tibble: 14 × 11
## Sector Stock .model sigma2 log_lik AIC AICc BIC MSE AMSE
## <chr*> <chr*> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Auto GM ets3 1.46e+1 -156. 319. 319. 324. 14.0 36.7
## 2 Auto GM md 1.46e+1 -156. 319. 319. 324. 14.0 36.7
## 3 Auto ford ets3 1.39e-2 -104. 214. 214. 219. 2.16 5.06
## 4 Auto ford md 1.39e-2 -104. 214. 214. 219. 2.16 5.06
## 5 Auto <aggregat… ets3 9.86e-3 -136. 279. 279. 284. 6.14 15.8
## 6 Auto <aggregat… md 9.86e-3 -136. 279. 279. 284. 6.14 15.8
## 7 Tech apple ets3 5.34e-3 -190. 391. 392. 400. 69.2 145.
## 8 Tech apple md 5.34e-3 -190. 391. 392. 400. 69.2 145.
## 9 Tech microsoft ets3 3.05e-3 -210. 431. 432. 440. 169. 357.
## 10 Tech microsoft md 3.05e-3 -210. 431. 432. 440. 169. 357.
## 11 Tech <aggregat… ets3 3.16e-3 -197. 405. 406. 414. 96.6 203.
## 12 Tech <aggregat… md 3.16e-3 -197. 405. 406. 414. 96.6 203.
## 13 <aggregate… <aggregat… ets3 3.14e-3 -171. 352. 354. 362. 30.9 68.5
## 14 <aggregate… <aggregat… md 3.14e-3 -171. 352. 354. 362. 30.9 68.5
## # ℹ 1 more variable: MAE <dbl>
For the training set, the middle-out and top-down approach produced identical model statistics where they outperformed the bottom-up model in terms of AIC, MSE and MAE. When it comes to training accuracy, the bottom-up aggregate model performed the worst while the middle-out aggregate model had the lowest RMSE. Although the middle-out and top-down model have similar training results, the middle-out model will be the best model because of its better performance on the test set.