Library

library(fpp3)
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.1
## ✔ lubridate   1.9.4     ✔ fable       0.4.1
## ✔ ggplot2     3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.5
## ✔ purrr   1.0.2     ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tsibble)
library(tsibbledata)
library(fable)
library(feasts)
library(dplyr)
library(lubridate)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following object is masked from 'package:tsibble':
## 
##     index
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(readr)
library(ggplot2)
library(imputeTS)
## 
## Attaching package: 'imputeTS'
## 
## The following object is masked from 'package:zoo':
## 
##     na.locf

Accessing Data

gdppc <- global_economy

##Australia Population

pop_fit <- global_economy |> filter(Country == "Australia") |> model(RW(Population ~ drift()))
  pop_fc <- pop_fit |> forecast(h = 10)
autoplot(pop_fc)

## Bricks

glimpse(aus_production)
## Rows: 218
## Columns: 7
## $ Quarter     <qtr> 1956 Q1, 1956 Q2, 1956 Q3, 1956 Q4, 1957 Q1, 1957 Q2, 1957…
## $ Beer        <dbl> 284, 213, 227, 308, 262, 228, 236, 320, 272, 233, 237, 313…
## $ Tobacco     <dbl> 5225, 5178, 5297, 5681, 5577, 5651, 5317, 6152, 5758, 5641…
## $ Bricks      <dbl> 189, 204, 208, 197, 187, 214, 227, 222, 199, 229, 249, 234…
## $ Cement      <dbl> 465, 532, 561, 570, 529, 604, 603, 582, 554, 620, 646, 637…
## $ Electricity <dbl> 3923, 4436, 4806, 4418, 4339, 4811, 5259, 4735, 4608, 5196…
## $ Gas         <dbl> 5, 6, 7, 6, 5, 7, 7, 6, 5, 7, 8, 6, 5, 7, 8, 6, 6, 8, 8, 7…
aus_production <- aus_production |> drop_na()

bricks_fit <- aus_production |> model(NAIVE(Bricks))
bricks_fc <- bricks_fit |> forecast(h = 10)
autoplot(bricks_fc)

NWS Lambs

Unable to compute the “lamb” I pull the data but unable to use “lambs_fit <- aus_livestock |> filter(State ==”New South Wales”) |> model(SNAIVE(Sheep)) ”

Since I encounter issue with not able to find “Lambs”,I alternate for “Sheep”

data("aus_livestock")

unique(aus_livestock$Animal)
## [1] Bulls, bullocks and steers Calves                    
## [3] Cattle (excl. calves)      Cows and heifers          
## [5] Lambs                      Pigs                      
## [7] Sheep                     
## 7 Levels: Bulls, bullocks and steers Calves ... Sheep
nsw_livestock <- aus_livestock |> 
  filter(State == "New South Wales")
head(nsw_livestock)
## # A tsibble: 6 x 4 [1M]
## # Key:       Animal, State [1]
##      Month Animal                     State            Count
##      <mth> <fct>                      <fct>            <dbl>
## 1 1976 Jul Bulls, bullocks and steers New South Wales 107500
## 2 1976 Aug Bulls, bullocks and steers New South Wales 107400
## 3 1976 Sep Bulls, bullocks and steers New South Wales 123400
## 4 1976 Oct Bulls, bullocks and steers New South Wales 115400
## 5 1976 Nov Bulls, bullocks and steers New South Wales 106400
## 6 1976 Dec Bulls, bullocks and steers New South Wales 115900
nsw_sheep <- aus_livestock |> 
  filter(State == "New South Wales", Animal == "Sheep")

head(nsw_sheep)
## # A tsibble: 6 x 4 [1M]
## # Key:       Animal, State [1]
##      Month Animal State            Count
##      <mth> <fct>  <fct>            <dbl>
## 1 1972 Jul Sheep  New South Wales 669400
## 2 1972 Aug Sheep  New South Wales 581100
## 3 1972 Sep Sheep  New South Wales 468100
## 4 1972 Oct Sheep  New South Wales 515300
## 5 1972 Nov Sheep  New South Wales 564500
## 6 1972 Dec Sheep  New South Wales 565400
nsw_sheep |> 
  autoplot(Count) +
  labs(title = "Sheep Population in NSW", y = "Count", x = "Year")

nsw_livestock |> 
  autoplot(Count) +
  labs(title = "Livestock in NSW", y = "Count", x = "Year")

## Household Wealth

glimpse(hh_budget)
## Rows: 88
## Columns: 8
## Key: Country [4]
## $ Country      <chr> "Australia", "Australia", "Australia", "Australia", "Aust…
## $ Year         <dbl> 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 200…
## $ Debt         <dbl> 95.68999, 99.53078, 107.54020, 114.63320, 121.09980, 126.…
## $ DI           <dbl> 3.7195453, 3.9844784, 2.5163448, 4.0237543, 3.8401975, 3.…
## $ Expenditure  <dbl> 3.4043113, 2.9717413, 4.9491246, 5.7315408, 4.2578288, 3.…
## $ Savings      <dbl> 5.2389216, 6.4716693, 3.7399359, 1.2875994, 0.6377422, 1.…
## $ Wealth       <dbl> 314.9344, 314.5559, 323.2357, 339.3139, 354.4382, 350.279…
## $ Unemployment <dbl> 8.472281, 8.506114, 8.362488, 7.677429, 6.873791, 6.28554…
wealth_fit <- hh_budget |> model(RW(Wealth ~ drift())) 
wealth_fc <- wealth_fit |> forecast(h = 10)
autoplot(wealth_fc)

## Australian Takeaway Food Turnover

glimpse(aus_retail)
## Rows: 64,532
## Columns: 5
## Key: State, Industry [152]
## $ State       <chr> "Australian Capital Territory", "Australian Capital Territ…
## $ Industry    <chr> "Cafes, restaurants and catering services", "Cafes, restau…
## $ `Series ID` <chr> "A3349849A", "A3349849A", "A3349849A", "A3349849A", "A3349…
## $ Month       <mth> 1982 Apr, 1982 May, 1982 Jun, 1982 Jul, 1982 Aug, 1982 Sep…
## $ Turnover    <dbl> 4.4, 3.4, 3.6, 4.0, 3.6, 4.2, 4.8, 5.4, 6.9, 3.8, 4.2, 4.0…
head(aus_retail)
## # A tsibble: 6 x 5 [1M]
## # Key:       State, Industry [1]
##   State                        Industry            `Series ID`    Month Turnover
##   <chr>                        <chr>               <chr>          <mth>    <dbl>
## 1 Australian Capital Territory Cafes, restaurants… A3349849A   1982 Apr      4.4
## 2 Australian Capital Territory Cafes, restaurants… A3349849A   1982 May      3.4
## 3 Australian Capital Territory Cafes, restaurants… A3349849A   1982 Jun      3.6
## 4 Australian Capital Territory Cafes, restaurants… A3349849A   1982 Jul      4  
## 5 Australian Capital Territory Cafes, restaurants… A3349849A   1982 Aug      3.6
## 6 Australian Capital Territory Cafes, restaurants… A3349849A   1982 Sep      4.2
food_data <- aus_retail |>
  filter(Industry == "Takeaway food service", State == "Australia")

glimpse(food_data)
## Rows: 0
## Columns: 5
## Key: State, Industry [0]
## $ State       <chr> 
## $ Industry    <chr> 
## $ `Series ID` <chr> 
## $ Month       <mth> 
## $ Turnover    <dbl>
unique(aus_retail$Industry)
##  [1] "Cafes, restaurants and catering services"                         
##  [2] "Cafes, restaurants and takeaway food services"                    
##  [3] "Clothing retailing"                                               
##  [4] "Clothing, footwear and personal accessory retailing"              
##  [5] "Department stores"                                                
##  [6] "Electrical and electronic goods retailing"                        
##  [7] "Food retailing"                                                   
##  [8] "Footwear and other personal accessory retailing"                  
##  [9] "Furniture, floor coverings, houseware and textile goods retailing"
## [10] "Hardware, building and garden supplies retailing"                 
## [11] "Household goods retailing"                                        
## [12] "Liquor retailing"                                                 
## [13] "Newspaper and book retailing"                                     
## [14] "Other recreational goods retailing"                               
## [15] "Other retailing"                                                  
## [16] "Other retailing n.e.c."                                           
## [17] "Other specialised food retailing"                                 
## [18] "Pharmaceutical, cosmetic and toiletry goods retailing"            
## [19] "Supermarket and grocery stores"                                   
## [20] "Takeaway food services"
unique(aus_retail$State)
## [1] "Australian Capital Territory" "New South Wales"             
## [3] "Northern Territory"           "Queensland"                  
## [5] "South Australia"              "Tasmania"                    
## [7] "Victoria"                     "Western Australia"

Australia was sub for “Australian Capital Territory” base on the finding of the data.

is_tsibble(food_data)
## [1] TRUE
food_fit <- aus_retail |> filter(Industry == "Takeaway food services", State == "Australian Capital Territory") |> model(NAIVE(Turnover))
food_fc <- food_fit |> forecast(h = 10)
autoplot(food_fc)

#Facebook Stock Price

glimpse(gafa_stock )
## Rows: 5,032
## Columns: 8
## Key: Symbol [4]
## $ Symbol    <chr> "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAP…
## $ Date      <date> 2014-01-02, 2014-01-03, 2014-01-06, 2014-01-07, 2014-01-08,…
## $ Open      <dbl> 79.38286, 78.98000, 76.77857, 77.76000, 76.97285, 78.11429, …
## $ High      <dbl> 79.57571, 79.10000, 78.11429, 77.99429, 77.93714, 78.12286, …
## $ Low       <dbl> 78.86000, 77.20428, 76.22857, 76.84571, 76.95571, 76.47857, …
## $ Close     <dbl> 79.01857, 77.28286, 77.70428, 77.14857, 77.63715, 76.64571, …
## $ Adj_Close <dbl> 66.96433, 65.49342, 65.85053, 65.37959, 65.79363, 64.95345, …
## $ Volume    <dbl> 58671200, 98116900, 103152700, 79302300, 64632400, 69787200,…
head(gafa_stock)
## # A tsibble: 6 x 8 [!]
## # Key:       Symbol [1]
##   Symbol Date        Open  High   Low Close Adj_Close    Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>
## 1 AAPL   2014-01-02  79.4  79.6  78.9  79.0      67.0  58671200
## 2 AAPL   2014-01-03  79.0  79.1  77.2  77.3      65.5  98116900
## 3 AAPL   2014-01-06  76.8  78.1  76.2  77.7      65.9 103152700
## 4 AAPL   2014-01-07  77.8  78.0  76.8  77.1      65.4  79302300
## 5 AAPL   2014-01-08  77.0  77.9  77.0  77.6      65.8  64632400
## 6 AAPL   2014-01-09  78.1  78.1  76.5  76.6      65.0  69787200
fb_stock <- gafa_stock |> filter(Symbol =="FB")

has_gaps(fb_stock)
## # A tibble: 1 × 2
##   Symbol .gaps
##   <chr>  <lgl>
## 1 FB     FALSE
autoplot(fb_stock, Close)

Drift method forecast

fb_stock_monthly <- fb_stock %>%
  index_by(Month = ~ yearmonth(.)) %>%
  summarize(Adj_Close = mean(Adj_Close, na.rm = TRUE))


fb_stock_monthly <- fb_stock_monthly %>%
  mutate(manual_drift = first(Adj_Close) + 
           (row_number() - 1) * ((last(Adj_Close) - first(Adj_Close)) / (n() - 1)))


autoplot(fb_stock_monthly, Adj_Close) +
  geom_line(aes(y = manual_drift), color = "purple", linetype = "dashed") +
  labs(title = "Facebook Stock Price Forecast using Monthly Mean",
       y = "Adjusted Close Price",
       x = "Year")

# Extended Identical Line

fb_stock_ts <- fb_stock |> 
  mutate(Date = as.Date(Date)) |> 
  as_tsibble(index = Date)


first_last_line <- tsibble(
  Date = c(min(fb_stock_ts$Date), max(fb_stock_ts$Date)),
  Close = c(first(fb_stock_ts$Close), last(fb_stock_ts$Close))
) |> as_tsibble(index = Date)
## Using `Date` as index variable.
autoplot(fb_stock_ts, Close) + 
  autolayer(first_last_line, Close, color = "orange") +
  labs(title = "Facebook Stock Price with First-Last Line",
       y = "Close Price", x = "Date") +
  theme_minimal()

fb_stock <- fb_stock %>%
  mutate(Close = na_interpolation(Close)) %>%
  as_tsibble(index = Date)

fb_stock <- fb_stock %>%
  fill_gaps() %>%
  update_tsibble(regular = TRUE)

naive_fc <- fb_stock %>%
  model(NAIVE(Close)) %>%
  forecast(h = "3 months")
## Warning: 1 error encountered for NAIVE(Close)
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.
snaive_fc <- fb_stock %>%
  model(SNAIVE(Close ~ season("year"))) %>%
  forecast(h = "3 months")
## Warning: 1 error encountered for SNAIVE(Close ~ season("year"))
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.
fb_fc <- fb_stock %>%
  model(ETS(Close ~ error("A") + trend("A") + season("N"))) %>%
  forecast(h = "3 months")
## Warning: 1 error encountered for ETS(Close ~ error("A") + trend("A") + season("N"))
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.
autoplot(fb_stock, Close) +
  autolayer(naive_fc, series = "Naïve Forecast", color = "blue") + 
  autolayer(snaive_fc, series = "Seasonal Naïve Forecast", color = "green") +
  autolayer(fb_fc, series = "Other Forecast", color = "red") +
  labs(title = "Facebook Stock Forecast",
       y = "Close Price",
       x = "Date") +
  guides(color = guide_legend(title = "Forecast Type")) + 
  theme_minimal()
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), : Ignoring unknown parameters: `series`
## Ignoring unknown parameters: `series`
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 91 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 91 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 91 rows containing missing values or values outside the scale range
## (`geom_line()`).

fb_stock <- fb_stock %>%
  select(Date, Close) %>%  # Keep only the required columns
  mutate(Close = na_interpolation(Close)) %>%
  as_tsibble(index = Date)  # Ensure Date is the time index

snaive_fit <- fb_stock %>%
  model(SNAIVE(Close))
## Warning: 1 error encountered for SNAIVE(Close)
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.
snaive_fc <- snaive_fit %>% forecast(h = "12 months")  # Adjust horizon as needed

# Plot actual data with forecasts
autoplot(fb_stock, Close) +
  autolayer(snaive_fc, Close, name = "SNAIVE Forecast", color = "green") +
  labs(title = "Facebook Stock Forecast",
       y = "Closing Price",
       x = "Date") +
  theme_minimal()
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `name`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters: `name`
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 365 rows containing missing values or values outside the scale range
## (`geom_line()`).

Extract data of interest

recent_production <- aus_production |>
  filter(year(Quarter) >= 1992)

fit <- recent_production |> model(SNAIVE(Beer))

fit |> gg_tsresiduals()
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

fit |> forecast() |> autoplot(recent_production)

recent_production <- aus_production |> filter(year(Quarter) >= 1992)
fit <- recent_production |> model(SNAIVE(Beer))
fit |> gg_tsresiduals()
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

fit |> forecast(h = 8) |> autoplot(recent_production)

# Australian Exports and Bricks

exports_fit <- global_economy |> filter(Country == "Australia") |> model(NAIVE(Exports))
exports_fc <- exports_fit |> forecast(h = 10)
autoplot(exports_fc)

bricks_fit <- aus_production |> model(NAIVE(Bricks))
bricks_fc <- bricks_fit |> forecast(h = 10)
autoplot(bricks_fc)

# Victorian livestock series using SNAIVE()

vic_livestock <- aus_livestock |> filter(State == "Victoria")
vic_fit <- vic_livestock |> model(SNAIVE(Count))
vic_fc <- vic_fit |> forecast(h = 10)
autoplot(vic_fc)

set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

Create training dataset before 2011

myseries_train <- myseries |>
  filter(year(Month) < 2011)

Plot training data

autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

# Fit seasonal naïve model

fit <- myseries_train |> model(SNAIVE(Turnover))

Check residuals

fit |> gg_tsresiduals()
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).

Produce forecasts for test data

fc <- fit |> forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
autoplot(myseries, Turnover) + autolayer(fc, color = "navy blue")

Compare accuracy

fit |> accuracy()
## # A tibble: 1 × 12
##   State    Industry .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>    <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… SNAIV… Trai… 0.439  1.21 0.915  5.23  12.4     1     1 0.768
fc |> accuracy(myseries)
## # A tibble: 1 × 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Nort… Clothin… Test  0.836  1.55  1.24  5.94  9.06  1.36  1.28 0.601