Heirarchal time series project to explore stock price values and returns for Apple, Tesla, Johnson & Johnson, and Pfizer. Sectors: Tech and Healthcare

Load in liabilities

library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.3
## ✔ dplyr       1.1.2     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.1
## ✔ lubridate   1.9.2     ✔ fable       0.3.3
## ✔ ggplot2     3.4.2     ✔ fabletools  0.3.4
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(kableExtra)
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(latex2exp)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.4
## ✔ purrr   1.0.1     ✔ stringr 1.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()          masks stats::filter()
## ✖ kableExtra::group_rows() masks dplyr::group_rows()
## ✖ tsibble::interval()      masks lubridate::interval()
## ✖ dplyr::lag()             masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(magrittr)
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract

Read in Data

#tech
apple <- read_csv("/Users/adrianjones/Documents/Forecasting/Week5/apple_stocks.csv", show_col_types = F)
tesla <- read_csv("/Users/adrianjones/Documents/Forecasting/Week5/tesla_stocks.csv", show_col_types = F)

#health
jnj <- read_csv('/Users/adrianjones/Documents/Forecasting/Week5/jnj_stocks.csv', show_col_types = F)
pfizer <- read_csv("/Users/adrianjones/Documents/Forecasting/Week5/pfizer_stocks.csv", show_col_types = F)

Adjustments to Apple Data:

# Convert the "Date" column to Date format
apple$Date <- as.Date(apple$Date, format = "%m/%d/%Y")

# Extract year and month
apple$Year <- format(apple$Date, "%Y")
apple$Month <- format(apple$Date, "%m")

# Convert the "Date" column to Date format
apple$Date <- as.Date(apple$Date, format = "%m/%d/%Y")

# Extract year and month
apple$Year <- format(apple$Date, "%Y")
apple$Month <- format(apple$Date, "%m")

# Combine Year and Month into a single column
apple$YearMonth <- paste(apple$Year, apple$Month, sep = "-")

# Aggregate by YearMonth
apple <- apple %>%
  group_by(YearMonth) %>%
  summarise(
    Average_Close = mean(as.numeric(gsub("\\$", "", `Close/Last`)))
  ) %>%
  arrange(YearMonth)  # Sort by YearMonth in ascending order

# Print the aggregated and sorted data
print(apple)
## # A tibble: 61 × 2
##    YearMonth Average_Close
##    <chr>             <dbl>
##  1 2018-11            45.2
##  2 2018-12            41.1
##  3 2019-01            38.5
##  4 2019-02            42.9
##  5 2019-03            45.8
##  6 2019-04            50.1
##  7 2019-05            47.8
##  8 2019-06            48.2
##  9 2019-07            51.3
## 10 2019-08            51.2
## # ℹ 51 more rows

Adjustments to Tesla data

# Convert the "Date" column to Date format
tesla$Date <- as.Date(tesla$Date, format = "%m/%d/%Y")

# Extract year and month
tesla$Year <- format(tesla$Date, "%Y")
tesla$Month <- format(tesla$Date, "%m")

# Convert the "Date" column to Date format
tesla$Date <- as.Date(tesla$Date, format = "%m/%d/%Y")

# Extract year and month
tesla$Year <- format(tesla$Date, "%Y")
tesla$Month <- format(tesla$Date, "%m")

# Combine Year and Month into a single column
tesla$YearMonth <- paste(tesla$Year, tesla$Month, sep = "-")

# Aggregate by YearMonth
tesla <- tesla %>%
  group_by(YearMonth) %>%
  summarise(
    Average_Close = mean(as.numeric(gsub("\\$", "", `Close/Last`)))
  ) %>%
  arrange(YearMonth)  # Sort by YearMonth in ascending order

# Print the aggregated and sorted data
print(tesla)
## # A tibble: 61 × 2
##    YearMonth Average_Close
##    <chr>             <dbl>
##  1 2018-11            23.0
##  2 2018-12            22.9
##  3 2019-01            21.2
##  4 2019-02            20.5
##  5 2019-03            18.5
##  6 2019-04            17.8
##  7 2019-05            14.6
##  8 2019-06            14.2
##  9 2019-07            16.2
## 10 2019-08            15.0
## # ℹ 51 more rows

Adjustments to johnson & johnson data

jnj$Date <- as.Date(jnj$Date, format = "%m/%d/%Y")

# Extract year and month
jnj$Year <- format(jnj$Date, "%Y")
jnj$Month <- format(jnj$Date, "%m")

# Convert the "Date" column to Date format
jnj$Date <- as.Date(jnj$Date, format = "%m/%d/%Y")

# Extract year and month
jnj$Year <- format(jnj$Date, "%Y")
jnj$Month <- format(jnj$Date, "%m")

# Combine Year and Month into a single column
jnj$YearMonth <- paste(jnj$Year, jnj$Month, sep = "-")

# Aggregate by YearMonth
jnj <- jnj %>%
  group_by(YearMonth) %>%
  summarise(
    Average_Close = mean(as.numeric(gsub("\\$", "", `Close/Last`)))
  ) %>%
  arrange(YearMonth)  # Sort by YearMonth in ascending order

# Print the aggregated and sorted data
print(jnj)
## # A tibble: 61 × 2
##    YearMonth Average_Close
##    <chr>             <dbl>
##  1 2018-11            145.
##  2 2018-12            136.
##  3 2019-01            129.
##  4 2019-02            135.
##  5 2019-03            138.
##  6 2019-04            138.
##  7 2019-05            138.
##  8 2019-06            139.
##  9 2019-07            135.
## 10 2019-08            130.
## # ℹ 51 more rows

Adjustments to pfizer data

pfizer$Date <- as.Date(pfizer$Date, format = "%m/%d/%Y")

# Extract year and month
pfizer$Year <- format(pfizer$Date, "%Y")
pfizer$Month <- format(pfizer$Date, "%m")

# Convert the "Date" column to Date format
pfizer$Date <- as.Date(pfizer$Date, format = "%m/%d/%Y")

# Extract year and month
pfizer$Year <- format(pfizer$Date, "%Y")
pfizer$Month <- format(pfizer$Date, "%m")

# Combine Year and Month into a single column
pfizer$YearMonth <- paste(pfizer$Year, pfizer$Month, sep = "-")

# Aggregate by YearMonth
pfizer <- pfizer %>%
  group_by(YearMonth) %>%
  summarise(
    Average_Close = mean(as.numeric(gsub("\\$", "", `Close/Last`)))
  ) %>%
  arrange(YearMonth)  # Sort by YearMonth in ascending order

# Print the aggregated and sorted data
print(pfizer)
## # A tibble: 61 × 2
##    YearMonth Average_Close
##    <chr>             <dbl>
##  1 2018-11            43.0
##  2 2018-12            42.4
##  3 2019-01            41.2
##  4 2019-02            41.4
##  5 2019-03            41.1
##  6 2019-04            40.3
##  7 2019-05            40.4
##  8 2019-06            42.0
##  9 2019-07            41.8
## 10 2019-08            34.8
## # ℹ 51 more rows
#create identifiers for each stock:
apple$label <- "apple"
tesla$label <- "tesla"
jnj$label <- "jnj"
pfizer$label <- "pfizer"

#create sectors
apple$sector <- "tech"
tesla$sector <- "tech"
jnj$sector <- "healthcare"
pfizer$sector <- "healthcare"

mydata <- 
  bind_rows(apple, tesla, jnj, pfizer) |>
  mutate(YearMonth = yearmonth(YearMonth)) |>
  as_tsibble(index = YearMonth,
             key = Average_Close) |>
  arrange(YearMonth)

Time series:

myhts <- mydata%>%
  as_tsibble(index = YearMonth) %>%
  arrange(YearMonth) %>%
  aggregate_key(sector / label, Average_Close = sum(Average_Close))

More Plots

#Plot stock prices individually
ggplot(mydata, aes(x = YearMonth, y = Average_Close, color = label)) + 
  geom_line(aes(y = Average_Close)) +
  labs(title = "Stock Prices Individually",
       subtitle = "2018-2023",
       x = "Month",
       y = "Price ($)") 

#Then stock prices by sector:
myhts |> 
  filter(is_aggregated(label)) |>
  autoplot(Average_Close) +
  facet_wrap(vars(sector), scales = "free_y") +
  labs(y = "Price ($)",
       title = "Stock Prices by Sector") +
  theme(legend.position = "none")

Models

fit <- myhts |>
  filter(year(YearMonth) <= 2022) |>
  model(base = ETS(Average_Close)) |>
  reconcile(
    bu = bottom_up(base),
    td = top_down(base),
    mo = middle_out(base)
  )
fit
## # A mable: 7 x 6
## # Key:     sector, label [7]
##   sector       label                base bu           td           mo          
##   <chr*>       <chr*>            <model> <model>      <model>      <model>     
## 1 healthcare   jnj          <ETS(A,N,N)> <ETS(A,N,N)> <ETS(A,N,N)> <ETS(A,N,N)>
## 2 healthcare   pfizer       <ETS(M,N,N)> <ETS(M,N,N)> <ETS(M,N,N)> <ETS(M,N,N)>
## 3 healthcare   <aggregated> <ETS(A,N,N)> <ETS(A,N,N)> <ETS(A,N,N)> <ETS(A,N,N)>
## 4 tech         apple        <ETS(M,A,N)> <ETS(M,A,N)> <ETS(M,A,N)> <ETS(M,A,N)>
## 5 tech         tesla        <ETS(M,A,N)> <ETS(M,A,N)> <ETS(M,A,N)> <ETS(M,A,N)>
## 6 tech         <aggregated> <ETS(M,A,N)> <ETS(M,A,N)> <ETS(M,A,N)> <ETS(M,A,N)>
## 7 <aggregated> <aggregated> <ETS(M,N,N)> <ETS(M,N,N)> <ETS(M,N,N)> <ETS(M,N,N)>
# None of our models contain any seasonal component, there is either no trend or an additive trend, 
# and errors are mixed between additive and multiplicative

Forecasting

fit |> forecast(h = '11 months') |>
  autoplot(
    myhts |> filter(year(YearMonth) >= 2018),
    level = NULL
  ) +
  labs(y = "Price ($)") +
  facet_wrap(vars(label, sector), scales = "free_y")

Accuracy Metrics

#Generating the forecast table (fable)
myforecast <- fit |> forecast(h = "11 months")

#Generating a point forecast accuracy table
myforecast |>
  filter(is_aggregated(sector), is_aggregated(label)) |>
  accuracy(
    data = myhts,
    measures = list(rmse = RMSE, mape = MAPE)
  ) |>
  group_by(.model) |>
  summarise(rmse = mean(rmse), mape = mean(mape)) |>
  arrange(mape)
## # A tibble: 4 × 3
##   .model  rmse  mape
##   <chr>  <dbl> <dbl>
## 1 base    80.2  11.1
## 2 td      80.2  11.1
## 3 mo     246.   35.8
## 4 bu     267.   38.8
#top-down is the optimal model given lowest root mean squared error and mean absolute percentage error. 
# we will use top-down to predict stock performance

Stock projections:

# Filter data for the selected stocks
selected_stocks <- c("apple", "tesla", "jnj", "pfizer")
filtered_data <- mydata %>% filter(label %in% selected_stocks)

#Calculate investment returns
returns <- filtered_data %>%
  group_by(label) %>%
  mutate(shares_purchased = 100 / first(Average_Close),
         portfolio_value = shares_purchased * Average_Close,
         profit = portfolio_value - 100) %>%
  select(label, portfolio_value, profit)

#Print the results
print(returns)
## # A tsibble: 244 x 5 [1M]
## # Key:       Average_Close [244]
## # Groups:    label [4]
##    label  portfolio_value    profit YearMonth Average_Close
##    <chr>            <dbl>     <dbl>     <mth>         <dbl>
##  1 tesla            100    0         2018 Nov          23.0
##  2 pfizer           100    1.42e-14  2018 Nov          43.0
##  3 apple            100    0         2018 Nov          45.2
##  4 jnj              100    0         2018 Nov         145. 
##  5 tesla             99.7 -2.86e- 1  2018 Dec          22.9
##  6 apple             90.8 -9.23e+ 0  2018 Dec          41.1
##  7 pfizer            98.7 -1.30e+ 0  2018 Dec          42.4
##  8 jnj               93.8 -6.18e+ 0  2018 Dec         136. 
##  9 tesla             92.3 -7.71e+ 0  2019 Jan          21.2
## 10 apple             85.2 -1.48e+ 1  2019 Jan          38.5
## # ℹ 234 more rows
#total value of portfolio
sum(returns$portfolio_value)
## [1] 72119.69
#total profit at end of 5 years
sum(returns$profit)
## [1] 47719.69

We conclude that we make good returns from our investments, especially from the tech sector. Top-bottom approach was the optimal approach, as it had the lowest error calculations. The returns we would make is approximately 1.5 tiimes less than that of the actual value of our portfolio.