Load Libraries

library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.3
## ✔ dplyr       1.1.1     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.1
## ✔ lubridate   1.9.2     ✔ fable       0.3.3
## ✔ ggplot2     3.4.1     ✔ fabletools  0.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode

Read Data and Plot

#Natural Gas Consumption data from EIA

mydata=read.csv("C:/Users/lfult/Documents/_Courses/Boston College/Predictive analytics/naturalgas.csv")
mydata$Year=seq(as.Date("1973/1/1"), as.Date("2020/12/1"), "months")
myts=mydata%>%
  mutate(YearMonth = yearmonth(as.character(Year))) %>%
  as_tsibble(index = YearMonth)
myts%>%autoplot(Amount)

Build Training & Test Sets

train=myts[1:468,]
test=myts[469:nrow(myts),]

Conduct Box Cox on the Training Set

lambda <- train %>%features(Amount, features = guerrero) %>%pull(lambda_guerrero)
train$Amount=train$Amount^lambda

Apply Transformation to Test Set

test$Amount=test$Amount^lambda

Mean, Drift. Naive, SNAIVE, Trend

fit=train |>model(trend_model = TSLM(Amount ~ trend()), 
                 mean_model=MEAN(Amount), 
                 drift_model=RW(Amount~drift()),
                 naive_model=NAIVE(Amount),
                 snaive_model=SNAIVE(Amount))
fit|>forecast(h=108)
## # A fable: 540 x 4 [1M]
## # Key:     .model [5]
##    .model      YearMonth           Amount .mean
##    <chr>           <mth>           <dist> <dbl>
##  1 trend_model  2012 Jan N(0.19, 0.00031) 0.186
##  2 trend_model  2012 Feb N(0.19, 0.00031) 0.186
##  3 trend_model  2012 Mar N(0.19, 0.00031) 0.186
##  4 trend_model  2012 Apr N(0.19, 0.00031) 0.186
##  5 trend_model  2012 May N(0.19, 0.00031) 0.186
##  6 trend_model  2012 Jun N(0.19, 0.00031) 0.186
##  7 trend_model  2012 Jul N(0.19, 0.00031) 0.186
##  8 trend_model  2012 Aug N(0.19, 0.00031) 0.186
##  9 trend_model  2012 Sep N(0.19, 0.00031) 0.186
## 10 trend_model  2012 Oct N(0.19, 0.00031) 0.186
## # ℹ 530 more rows
fit|>forecast(h=108)|>autoplot(train)

myf=fit%>%forecast(h=108)
(tmp=accuracy(myf, test))
## # A tibble: 5 × 10
##   .model       .type       ME    RMSE     MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr>        <chr>    <dbl>   <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 drift_model  Test  0.0211   0.0291  0.0230  10.3    11.4    NaN   NaN 0.835
## 2 mean_model   Test  0.00208  0.0200  0.0183  -0.0360  9.85   NaN   NaN 0.835
## 3 naive_model  Test  0.0216   0.0294  0.0232  10.6    11.6    NaN   NaN 0.835
## 4 snaive_model Test  0.000670 0.00294 0.00240  0.304   1.32   NaN   NaN 0.427
## 5 trend_model  Test  0.000252 0.0199  0.0183  -1.02    9.91   NaN   NaN 0.835

STL Decomposition

dcmp <- train%>%
  model(stl = STL(Amount))
components(dcmp)
## # A dable: 468 x 7 [1M]
## # Key:     .model [1]
## # :        Amount = trend + season_year + remainder
##    .model YearMonth Amount trend season_year remainder season_adjust
##    <chr>      <mth>  <dbl> <dbl>       <dbl>     <dbl>         <dbl>
##  1 stl     1973 Jan  0.161 0.182    -0.0214   0.000125         0.182
##  2 stl     1973 Feb  0.163 0.182    -0.0197   0.000949         0.183
##  3 stl     1973 Mar  0.166 0.182    -0.0153  -0.000397         0.182
##  4 stl     1973 Apr  0.174 0.182    -0.00868  0.000471         0.183
##  5 stl     1973 May  0.182 0.182     0.00159 -0.00137          0.181
##  6 stl     1973 Jun  0.194 0.182     0.0122  -0.000610         0.182
##  7 stl     1973 Jul  0.202 0.182     0.0188   0.00121          0.184
##  8 stl     1973 Aug  0.204 0.182     0.0222  -0.000831         0.182
##  9 stl     1973 Sep  0.201 0.183     0.0202  -0.00192          0.181
## 10 stl     1973 Oct  0.194 0.183     0.00949  0.00210          0.185
## # ℹ 458 more rows
components(dcmp) %>% as_tsibble() %>%
  autoplot(Amount, colour="black") +
  geom_line(aes(y=season_year+trend), colour = "#D55E00", line_type='dotted') +
  labs(y = "Amount", title = "NG Consumption")
## Warning in geom_line(aes(y = season_year + trend), colour = "#D55E00",
## line_type = "dotted"): Ignoring unknown parameters: `line_type`

autoplot(components(dcmp))

# Forecasting with STL

fit_dcmp <- train |>
  model(stlf_model = decomposition_model(
    STL(Amount ~ trend(window = 7), robust = TRUE),
    NAIVE(season_adjust)
  ))

fit_dcmp |>forecast(h=108) |>autoplot(train)

myf2=fit_dcmp%>%forecast(h=108)

(tmp=rbind(tmp,accuracy(myf2, test)))
## # A tibble: 6 × 10
##   .model       .type       ME    RMSE     MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr>        <chr>    <dbl>   <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 drift_model  Test  0.0211   0.0291  0.0230  10.3    11.4    NaN   NaN 0.835
## 2 mean_model   Test  0.00208  0.0200  0.0183  -0.0360  9.85   NaN   NaN 0.835
## 3 naive_model  Test  0.0216   0.0294  0.0232  10.6    11.6    NaN   NaN 0.835
## 4 snaive_model Test  0.000670 0.00294 0.00240  0.304   1.32   NaN   NaN 0.427
## 5 trend_model  Test  0.000252 0.0199  0.0183  -1.02    9.91   NaN   NaN 0.835
## 6 stlf_model   Test  0.000906 0.00337 0.00275  0.399   1.49   NaN   NaN 0.555

Seasonal Adjustment

components(dcmp) %>% as_tsibble() %>%
  autoplot(Amount, colour = "gray") +
  geom_line(aes(y=season_adjust), colour = "#0072B2") +
  labs(y = "Amount", title = "NG Consumption")

Moving Average

ma <- myts%>%filter(year(Year) >= 2000)%>%
  mutate(`5-MA` = slider::slide_dbl(Amount, mean,
                .before = 2, .after = 2, .complete = TRUE))

ma%>% autoplot(Amount) +
  geom_line(aes(y = `5-MA`), colour = "#D55E00") +
  labs(y = "Amount", title = "NG Consumption") +
  guides(colour = guide_legend(title = "series"))
## Warning: Removed 4 rows containing missing values (`geom_line()`).

# Additive & Multiplicative Decomposition (Classical)

dcmp <- train%>%
  model(add=classical_decomposition(Amount, type = "additive"), 
        mult=classical_decomposition(Amount, type="multiplicative"), 
        stl=STL(Amount))
components(dcmp)
## # A dable: 1,404 x 9 [1M]
## # Key:     .model [3]
## # :        Amount = trend + seasonal + random
##    .model YearMonth Amount  trend seasonal   random season_adjust season_year
##    <chr>      <mth>  <dbl>  <dbl>    <dbl>    <dbl>         <dbl>       <dbl>
##  1 add     1973 Jan  0.161 NA     -0.0240  NA               0.185          NA
##  2 add     1973 Feb  0.163 NA     -0.0216  NA               0.185          NA
##  3 add     1973 Mar  0.166 NA     -0.0171  NA               0.183          NA
##  4 add     1973 Apr  0.174 NA     -0.00776 NA               0.182          NA
##  5 add     1973 May  0.182 NA      0.00485 NA               0.178          NA
##  6 add     1973 Jun  0.194 NA      0.0161  NA               0.178          NA
##  7 add     1973 Jul  0.202  0.182  0.0217  -0.00165         0.181          NA
##  8 add     1973 Aug  0.204  0.182  0.0240  -0.00261         0.180          NA
##  9 add     1973 Sep  0.201  0.183  0.0215  -0.00332         0.179          NA
## 10 add     1973 Oct  0.194  0.183  0.00799  0.00348         0.186          NA
## # ℹ 1,394 more rows
## # ℹ 1 more variable: remainder <dbl>
autoplot(components(dcmp))
## Warning: Removed 948 rows containing missing values (`geom_line()`).

One Method for Forecasting with Classical Decomposition

add=components(dcmp)%>%filter(.model=='add')
mult=components(dcmp)%>%filter(.model=='mult')
stl=components(dcmp)%>%filter(.model=='stl')

trend1=mean(add$trend[457:462])
trend2=mean(mult$trend[457:462])
trend3=mean(stl$trend[457:462])

Initialize Forecasts & Month Counter

f1=rep(0, nrow(test))
f2=rep(0,nrow(test))
f3=rep(0,nrow(test))
j=0 #Month Counter

Loop Over Test Set & Generate Forecasts

for (i in 1:nrow(test)){
  j=j+1
  if(j==13){j=1}
  f1[i]=add$seasonal[j]+trend1
  f2[i]=mult$seasonal[j]*trend2
  f3[i]=stl$season_year[j]+trend3
}

Plot Observed & Forecasts for Test Set

test%>%autoplot()+geom_line(aes(y=f1, x=Year, col='red'))+
  geom_line(aes(y=f2, x=Year, col='blue'))+
  geom_line(aes(y=f3, x=Year, col='orange'))
## Plot variable not specified, automatically selected `.vars = Amount`

Metric Function

metrics = function(res, x){
  RMSE=mean(sqrt(res^2)/length(res))
  MAE=mean(abs(res)/length(res))
  MAPE=mean(abs(res)/x)
  ME=mean(res)
  MPE=mean(res/x)
  mylist=list(RMSE, MAE, MAPE, ME, MPE)
  names(mylist)=c('RMSE','MAE', 'MAPE', 'ME', 'MPE')
  return(mylist)
}

Get Residuals & Metrics

#residuals
res1=test$Amount-f1
res2=test$Amount-f2
res3=test$Amount-f3

#Generate the Metrics
tmp=cbind(metrics(f1,test$Amount), metrics(f2,test$Amount), metrics(f3,test$Amount))
colnames(tmp)=c('Add', 'Mult', 'STL')
tmp
##      Add         Mult        STL        
## RMSE 0.001719719 0.001719719 0.001720351
## MAE  0.001719719 0.001719719 0.001720351
## MAPE 0.9971803   0.9971163   0.998778   
## ME   0.1857297   0.1857297   0.1857979  
## MPE  0.9971803   0.9971163   0.998778