Heirarchal time series project to explore stock price values and returns for Apple, Tesla, Johnson & Johnson, and Pfizer. Sectors: Tech and Healthcare
library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.3
## ✔ dplyr 1.1.2 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.1
## ✔ lubridate 1.9.2 ✔ fable 0.3.3
## ✔ ggplot2 3.4.2 ✔ fabletools 0.3.4
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(kableExtra)
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(latex2exp)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ purrr 1.0.1 ✔ stringr 1.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ kableExtra::group_rows() masks dplyr::group_rows()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(magrittr)
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
#tech
apple <- read_csv("/Users/adrianjones/Documents/Forecasting/Week5/apple_stocks.csv", show_col_types = F)
tesla <- read_csv("/Users/adrianjones/Documents/Forecasting/Week5/tesla_stocks.csv", show_col_types = F)
#health
jnj <- read_csv('/Users/adrianjones/Documents/Forecasting/Week5/jnj_stocks.csv', show_col_types = F)
pfizer <- read_csv("/Users/adrianjones/Documents/Forecasting/Week5/pfizer_stocks.csv", show_col_types = F)
# Convert the "Date" column to Date format
apple$Date <- as.Date(apple$Date, format = "%m/%d/%Y")
# Extract year and month
apple$Year <- format(apple$Date, "%Y")
apple$Month <- format(apple$Date, "%m")
# Convert the "Date" column to Date format
apple$Date <- as.Date(apple$Date, format = "%m/%d/%Y")
# Extract year and month
apple$Year <- format(apple$Date, "%Y")
apple$Month <- format(apple$Date, "%m")
# Combine Year and Month into a single column
apple$YearMonth <- paste(apple$Year, apple$Month, sep = "-")
# Aggregate by YearMonth
apple <- apple %>%
group_by(YearMonth) %>%
summarise(
Average_Close = mean(as.numeric(gsub("\\$", "", `Close/Last`)))
) %>%
arrange(YearMonth) # Sort by YearMonth in ascending order
# Print the aggregated and sorted data
print(apple)
## # A tibble: 61 × 2
## YearMonth Average_Close
## <chr> <dbl>
## 1 2018-11 45.2
## 2 2018-12 41.1
## 3 2019-01 38.5
## 4 2019-02 42.9
## 5 2019-03 45.8
## 6 2019-04 50.1
## 7 2019-05 47.8
## 8 2019-06 48.2
## 9 2019-07 51.3
## 10 2019-08 51.2
## # ℹ 51 more rows
# Convert the "Date" column to Date format
tesla$Date <- as.Date(tesla$Date, format = "%m/%d/%Y")
# Extract year and month
tesla$Year <- format(tesla$Date, "%Y")
tesla$Month <- format(tesla$Date, "%m")
# Convert the "Date" column to Date format
tesla$Date <- as.Date(tesla$Date, format = "%m/%d/%Y")
# Extract year and month
tesla$Year <- format(tesla$Date, "%Y")
tesla$Month <- format(tesla$Date, "%m")
# Combine Year and Month into a single column
tesla$YearMonth <- paste(tesla$Year, tesla$Month, sep = "-")
# Aggregate by YearMonth
tesla <- tesla %>%
group_by(YearMonth) %>%
summarise(
Average_Close = mean(as.numeric(gsub("\\$", "", `Close/Last`)))
) %>%
arrange(YearMonth) # Sort by YearMonth in ascending order
# Print the aggregated and sorted data
print(tesla)
## # A tibble: 61 × 2
## YearMonth Average_Close
## <chr> <dbl>
## 1 2018-11 23.0
## 2 2018-12 22.9
## 3 2019-01 21.2
## 4 2019-02 20.5
## 5 2019-03 18.5
## 6 2019-04 17.8
## 7 2019-05 14.6
## 8 2019-06 14.2
## 9 2019-07 16.2
## 10 2019-08 15.0
## # ℹ 51 more rows
jnj$Date <- as.Date(jnj$Date, format = "%m/%d/%Y")
# Extract year and month
jnj$Year <- format(jnj$Date, "%Y")
jnj$Month <- format(jnj$Date, "%m")
# Convert the "Date" column to Date format
jnj$Date <- as.Date(jnj$Date, format = "%m/%d/%Y")
# Extract year and month
jnj$Year <- format(jnj$Date, "%Y")
jnj$Month <- format(jnj$Date, "%m")
# Combine Year and Month into a single column
jnj$YearMonth <- paste(jnj$Year, jnj$Month, sep = "-")
# Aggregate by YearMonth
jnj <- jnj %>%
group_by(YearMonth) %>%
summarise(
Average_Close = mean(as.numeric(gsub("\\$", "", `Close/Last`)))
) %>%
arrange(YearMonth) # Sort by YearMonth in ascending order
# Print the aggregated and sorted data
print(jnj)
## # A tibble: 61 × 2
## YearMonth Average_Close
## <chr> <dbl>
## 1 2018-11 145.
## 2 2018-12 136.
## 3 2019-01 129.
## 4 2019-02 135.
## 5 2019-03 138.
## 6 2019-04 138.
## 7 2019-05 138.
## 8 2019-06 139.
## 9 2019-07 135.
## 10 2019-08 130.
## # ℹ 51 more rows
pfizer$Date <- as.Date(pfizer$Date, format = "%m/%d/%Y")
# Extract year and month
pfizer$Year <- format(pfizer$Date, "%Y")
pfizer$Month <- format(pfizer$Date, "%m")
# Convert the "Date" column to Date format
pfizer$Date <- as.Date(pfizer$Date, format = "%m/%d/%Y")
# Extract year and month
pfizer$Year <- format(pfizer$Date, "%Y")
pfizer$Month <- format(pfizer$Date, "%m")
# Combine Year and Month into a single column
pfizer$YearMonth <- paste(pfizer$Year, pfizer$Month, sep = "-")
# Aggregate by YearMonth
pfizer <- pfizer %>%
group_by(YearMonth) %>%
summarise(
Average_Close = mean(as.numeric(gsub("\\$", "", `Close/Last`)))
) %>%
arrange(YearMonth) # Sort by YearMonth in ascending order
# Print the aggregated and sorted data
print(pfizer)
## # A tibble: 61 × 2
## YearMonth Average_Close
## <chr> <dbl>
## 1 2018-11 43.0
## 2 2018-12 42.4
## 3 2019-01 41.2
## 4 2019-02 41.4
## 5 2019-03 41.1
## 6 2019-04 40.3
## 7 2019-05 40.4
## 8 2019-06 42.0
## 9 2019-07 41.8
## 10 2019-08 34.8
## # ℹ 51 more rows
#create identifiers for each stock:
apple$label <- "apple"
tesla$label <- "tesla"
jnj$label <- "jnj"
pfizer$label <- "pfizer"
#create sectors
apple$sector <- "tech"
tesla$sector <- "tech"
jnj$sector <- "healthcare"
pfizer$sector <- "healthcare"
mydata <-
bind_rows(apple, tesla, jnj, pfizer) |>
mutate(YearMonth = yearmonth(YearMonth)) |>
as_tsibble(index = YearMonth,
key = Average_Close) |>
arrange(YearMonth)
myhts <- mydata%>%
as_tsibble(index = YearMonth) %>%
arrange(YearMonth) %>%
aggregate_key(sector / label, Average_Close = sum(Average_Close))
#Plot stock prices individually
ggplot(mydata, aes(x = YearMonth, y = Average_Close, color = label)) +
geom_line(aes(y = Average_Close)) +
labs(title = "Stock Prices Individually",
subtitle = "2018-2023",
x = "Month",
y = "Price ($)")
#Then stock prices by sector:
myhts |>
filter(is_aggregated(label)) |>
autoplot(Average_Close) +
facet_wrap(vars(sector), scales = "free_y") +
labs(y = "Price ($)",
title = "Stock Prices by Sector") +
theme(legend.position = "none")
fit <- myhts |>
filter(year(YearMonth) <= 2022) |>
model(base = ETS(Average_Close)) |>
reconcile(
bu = bottom_up(base),
td = top_down(base),
mo = middle_out(base)
)
fit
## # A mable: 7 x 6
## # Key: sector, label [7]
## sector label base bu td mo
## <chr*> <chr*> <model> <model> <model> <model>
## 1 healthcare jnj <ETS(A,N,N)> <ETS(A,N,N)> <ETS(A,N,N)> <ETS(A,N,N)>
## 2 healthcare pfizer <ETS(M,N,N)> <ETS(M,N,N)> <ETS(M,N,N)> <ETS(M,N,N)>
## 3 healthcare <aggregated> <ETS(A,N,N)> <ETS(A,N,N)> <ETS(A,N,N)> <ETS(A,N,N)>
## 4 tech apple <ETS(M,A,N)> <ETS(M,A,N)> <ETS(M,A,N)> <ETS(M,A,N)>
## 5 tech tesla <ETS(M,A,N)> <ETS(M,A,N)> <ETS(M,A,N)> <ETS(M,A,N)>
## 6 tech <aggregated> <ETS(M,A,N)> <ETS(M,A,N)> <ETS(M,A,N)> <ETS(M,A,N)>
## 7 <aggregated> <aggregated> <ETS(M,N,N)> <ETS(M,N,N)> <ETS(M,N,N)> <ETS(M,N,N)>
# None of our models contain any seasonal component, there is either no trend or an additive trend,
# and errors are mixed between additive and multiplicative
fit |> forecast(h = '11 months') |>
autoplot(
myhts |> filter(year(YearMonth) >= 2018),
level = NULL
) +
labs(y = "Price ($)") +
facet_wrap(vars(label, sector), scales = "free_y")
#Generating the forecast table (fable)
myforecast <- fit |> forecast(h = "11 months")
#Generating a point forecast accuracy table
myforecast |>
filter(is_aggregated(sector), is_aggregated(label)) |>
accuracy(
data = myhts,
measures = list(rmse = RMSE, mape = MAPE)
) |>
group_by(.model) |>
summarise(rmse = mean(rmse), mape = mean(mape)) |>
arrange(mape)
## # A tibble: 4 × 3
## .model rmse mape
## <chr> <dbl> <dbl>
## 1 base 80.2 11.1
## 2 td 80.2 11.1
## 3 mo 246. 35.8
## 4 bu 267. 38.8
#top-down is the optimal model given lowest root mean squared error and mean absolute percentage error.
# we will use top-down to predict stock performance
# Filter data for the selected stocks
selected_stocks <- c("apple", "tesla", "jnj", "pfizer")
filtered_data <- mydata %>% filter(label %in% selected_stocks)
#Calculate investment returns
returns <- filtered_data %>%
group_by(label) %>%
mutate(shares_purchased = 100 / first(Average_Close),
portfolio_value = shares_purchased * Average_Close,
profit = portfolio_value - 100) %>%
select(label, portfolio_value, profit)
#Print the results
print(returns)
## # A tsibble: 244 x 5 [1M]
## # Key: Average_Close [244]
## # Groups: label [4]
## label portfolio_value profit YearMonth Average_Close
## <chr> <dbl> <dbl> <mth> <dbl>
## 1 tesla 100 0 2018 Nov 23.0
## 2 pfizer 100 1.42e-14 2018 Nov 43.0
## 3 apple 100 0 2018 Nov 45.2
## 4 jnj 100 0 2018 Nov 145.
## 5 tesla 99.7 -2.86e- 1 2018 Dec 22.9
## 6 apple 90.8 -9.23e+ 0 2018 Dec 41.1
## 7 pfizer 98.7 -1.30e+ 0 2018 Dec 42.4
## 8 jnj 93.8 -6.18e+ 0 2018 Dec 136.
## 9 tesla 92.3 -7.71e+ 0 2019 Jan 21.2
## 10 apple 85.2 -1.48e+ 1 2019 Jan 38.5
## # ℹ 234 more rows
#total value of portfolio
sum(returns$portfolio_value)
## [1] 72119.69
#total profit at end of 5 years
sum(returns$profit)
## [1] 47719.69
We conclude that we make good returns from our investments, especially from the tech sector. Top-bottom approach was the optimal approach, as it had the lowest error calculations. The returns we would make is approximately 1.5 tiimes less than that of the actual value of our portfolio.