Load Libraries

library(fpp3)

## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──

## ✔ tibble      3.2.1     ✔ tsibble     1.1.3
## ✔ dplyr       1.1.1     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.1
## ✔ lubridate   1.9.2     ✔ fable       0.3.3
## ✔ ggplot2     3.4.1     ✔ fabletools  0.3.2

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

Read Data and Plot

#Natural Gas Consumption data from EIA

mydata=read.csv("C:/Users/lfult/Documents/_Courses/Boston College/Predictive analytics/naturalgas.csv")
mydata$Year=seq(as.Date("1973/1/1"), as.Date("2020/12/1"), "months")
myts=mydata%>%
  mutate(YearMonth = yearmonth(as.character(Year))) %>%
  as_tsibble(index = YearMonth)
myts%>%autoplot(Amount)

Build Training & Test Sets

train=myts[1:468,]
test=myts[469:nrow(myts),]

Conduct Box Cox on the Training Set

lambda <- train %>%features(Amount, features = guerrero) %>%pull(lambda_guerrero)
train$Amount=train$Amount^lambda

Apply Transformation to Test Set

test$Amount=test$Amount^lambda

Mean, Drift. Naive, SNAIVE, Trend

fit=train |>model(trend_model = TSLM(Amount ~ trend()), 
                 mean_model=MEAN(Amount), 
                 drift_model=RW(Amount~drift()),
                 naive_model=NAIVE(Amount),
                 snaive_model=SNAIVE(Amount))
fit|>forecast(h=108)

## # A fable: 540 x 4 [1M]
## # Key:     .model [5]
##    .model      YearMonth           Amount .mean
##    <chr>           <mth>           <dist> <dbl>
##  1 trend_model  2012 Jan N(0.19, 0.00031) 0.186
##  2 trend_model  2012 Feb N(0.19, 0.00031) 0.186
##  3 trend_model  2012 Mar N(0.19, 0.00031) 0.186
##  4 trend_model  2012 Apr N(0.19, 0.00031) 0.186
##  5 trend_model  2012 May N(0.19, 0.00031) 0.186
##  6 trend_model  2012 Jun N(0.19, 0.00031) 0.186
##  7 trend_model  2012 Jul N(0.19, 0.00031) 0.186
##  8 trend_model  2012 Aug N(0.19, 0.00031) 0.186
##  9 trend_model  2012 Sep N(0.19, 0.00031) 0.186
## 10 trend_model  2012 Oct N(0.19, 0.00031) 0.186
## # ℹ 530 more rows

fit|>forecast(h=108)|>autoplot(train)

myf=fit%>%forecast(h=108)
(tmp=accuracy(myf, test))

## # A tibble: 5 × 10
##   .model       .type       ME    RMSE     MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr>        <chr>    <dbl>   <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 drift_model  Test  0.0211   0.0291  0.0230  10.3    11.4    NaN   NaN 0.835
## 2 mean_model   Test  0.00208  0.0200  0.0183  -0.0360  9.85   NaN   NaN 0.835
## 3 naive_model  Test  0.0216   0.0294  0.0232  10.6    11.6    NaN   NaN 0.835
## 4 snaive_model Test  0.000670 0.00294 0.00240  0.304   1.32   NaN   NaN 0.427
## 5 trend_model  Test  0.000252 0.0199  0.0183  -1.02    9.91   NaN   NaN 0.835

STL Decomposition

dcmp <- train%>%
  model(stl = STL(Amount))
components(dcmp)

## # A dable: 468 x 7 [1M]
## # Key:     .model [1]
## # :        Amount = trend + season_year + remainder
##    .model YearMonth Amount trend season_year remainder season_adjust
##    <chr>      <mth>  <dbl> <dbl>       <dbl>     <dbl>         <dbl>
##  1 stl     1973 Jan  0.161 0.182    -0.0214   0.000125         0.182
##  2 stl     1973 Feb  0.163 0.182    -0.0197   0.000949         0.183
##  3 stl     1973 Mar  0.166 0.182    -0.0153  -0.000397         0.182
##  4 stl     1973 Apr  0.174 0.182    -0.00868  0.000471         0.183
##  5 stl     1973 May  0.182 0.182     0.00159 -0.00137          0.181
##  6 stl     1973 Jun  0.194 0.182     0.0122  -0.000610         0.182
##  7 stl     1973 Jul  0.202 0.182     0.0188   0.00121          0.184
##  8 stl     1973 Aug  0.204 0.182     0.0222  -0.000831         0.182
##  9 stl     1973 Sep  0.201 0.183     0.0202  -0.00192          0.181
## 10 stl     1973 Oct  0.194 0.183     0.00949  0.00210          0.185
## # ℹ 458 more rows

components(dcmp) %>% as_tsibble() %>%
  autoplot(Amount, colour="black") +
  geom_line(aes(y=season_year+trend), colour = "#D55E00", line_type='dotted') +
  labs(y = "Amount", title = "NG Consumption")

## Warning in geom_line(aes(y = season_year + trend), colour = "#D55E00",
## line_type = "dotted"): Ignoring unknown parameters: `line_type`

autoplot(components(dcmp))

# Forecasting with STL

fit_dcmp <- train |>
  model(stlf_model = decomposition_model(
    STL(Amount ~ trend(window = 7), robust = TRUE),
    NAIVE(season_adjust)
  ))

fit_dcmp |>forecast(h=108) |>autoplot(train)

myf2=fit_dcmp%>%forecast(h=108)

(tmp=rbind(tmp,accuracy(myf2, test)))

## # A tibble: 6 × 10
##   .model       .type       ME    RMSE     MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr>        <chr>    <dbl>   <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 drift_model  Test  0.0211   0.0291  0.0230  10.3    11.4    NaN   NaN 0.835
## 2 mean_model   Test  0.00208  0.0200  0.0183  -0.0360  9.85   NaN   NaN 0.835
## 3 naive_model  Test  0.0216   0.0294  0.0232  10.6    11.6    NaN   NaN 0.835
## 4 snaive_model Test  0.000670 0.00294 0.00240  0.304   1.32   NaN   NaN 0.427
## 5 trend_model  Test  0.000252 0.0199  0.0183  -1.02    9.91   NaN   NaN 0.835
## 6 stlf_model   Test  0.000906 0.00337 0.00275  0.399   1.49   NaN   NaN 0.555

Seasonal Adjustment

components(dcmp) %>% as_tsibble() %>%
  autoplot(Amount, colour = "gray") +
  geom_line(aes(y=season_adjust), colour = "#0072B2") +
  labs(y = "Amount", title = "NG Consumption")

Moving Average

ma <- myts%>%filter(year(Year) >= 2000)%>%
  mutate(`5-MA` = slider::slide_dbl(Amount, mean,
                .before = 2, .after = 2, .complete = TRUE))

ma%>% autoplot(Amount) +
  geom_line(aes(y = `5-MA`), colour = "#D55E00") +
  labs(y = "Amount", title = "NG Consumption") +
  guides(colour = guide_legend(title = "series"))

## Warning: Removed 4 rows containing missing values (`geom_line()`).

# Additive & Multiplicative Decomposition (Classical)

dcmp <- train%>%
  model(add=classical_decomposition(Amount, type = "additive"), 
        mult=classical_decomposition(Amount, type="multiplicative"), 
        stl=STL(Amount))
components(dcmp)

## # A dable: 1,404 x 9 [1M]
## # Key:     .model [3]
## # :        Amount = trend + seasonal + random
##    .model YearMonth Amount  trend seasonal   random season_adjust season_year
##    <chr>      <mth>  <dbl>  <dbl>    <dbl>    <dbl>         <dbl>       <dbl>
##  1 add     1973 Jan  0.161 NA     -0.0240  NA               0.185          NA
##  2 add     1973 Feb  0.163 NA     -0.0216  NA               0.185          NA
##  3 add     1973 Mar  0.166 NA     -0.0171  NA               0.183          NA
##  4 add     1973 Apr  0.174 NA     -0.00776 NA               0.182          NA
##  5 add     1973 May  0.182 NA      0.00485 NA               0.178          NA
##  6 add     1973 Jun  0.194 NA      0.0161  NA               0.178          NA
##  7 add     1973 Jul  0.202  0.182  0.0217  -0.00165         0.181          NA
##  8 add     1973 Aug  0.204  0.182  0.0240  -0.00261         0.180          NA
##  9 add     1973 Sep  0.201  0.183  0.0215  -0.00332         0.179          NA
## 10 add     1973 Oct  0.194  0.183  0.00799  0.00348         0.186          NA
## # ℹ 1,394 more rows
## # ℹ 1 more variable: remainder <dbl>

autoplot(components(dcmp))

## Warning: Removed 948 rows containing missing values (`geom_line()`).

One Method for Forecasting with Classical Decomposition

add=components(dcmp)%>%filter(.model=='add')
mult=components(dcmp)%>%filter(.model=='mult')
stl=components(dcmp)%>%filter(.model=='stl')

trend1=mean(add$trend[457:462])
trend2=mean(mult$trend[457:462])
trend3=mean(stl$trend[457:462])

Initialize Forecasts & Month Counter

f1=rep(0, nrow(test))
f2=rep(0,nrow(test))
f3=rep(0,nrow(test))
j=0 #Month Counter

Loop Over Test Set & Generate Forecasts

for (i in 1:nrow(test)){
  j=j+1
  if(j==13){j=1}
  f1[i]=add$seasonal[j]+trend1
  f2[i]=mult$seasonal[j]*trend2
  f3[i]=stl$season_year[j]+trend3
}

Plot Observed & Forecasts for Test Set

test%>%autoplot()+geom_line(aes(y=f1, x=Year, col='red'))+
  geom_line(aes(y=f2, x=Year, col='blue'))+
  geom_line(aes(y=f3, x=Year, col='orange'))

## Plot variable not specified, automatically selected `.vars = Amount`

Metric Function

metrics = function(res, x){
  RMSE=mean(sqrt(res^2)/length(res))
  MAE=mean(abs(res)/length(res))
  MAPE=mean(abs(res)/x)
  ME=mean(res)
  MPE=mean(res/x)
  mylist=list(RMSE, MAE, MAPE, ME, MPE)
  names(mylist)=c('RMSE','MAE', 'MAPE', 'ME', 'MPE')
  return(mylist)
}

Get Residuals & Metrics

#residuals
res1=test$Amount-f1
res2=test$Amount-f2
res3=test$Amount-f3

#Generate the Metrics
tmp=cbind(metrics(f1,test$Amount), metrics(f2,test$Amount), metrics(f3,test$Amount))
colnames(tmp)=c('Add', 'Mult', 'STL')
tmp

##      Add         Mult        STL        
## RMSE 0.001719719 0.001719719 0.001720351
## MAE  0.001719719 0.001719719 0.001720351
## MAPE 0.9971803   0.9971163   0.998778   
## ME   0.1857297   0.1857297   0.1857979  
## MPE  0.9971803   0.9971163   0.998778

Week 1

Fulton

3/27/23

Load Libraries

Read Data and Plot

Build Training & Test Sets

Conduct Box Cox on the Training Set

Apply Transformation to Test Set

Mean, Drift. Naive, SNAIVE, Trend

STL Decomposition

Seasonal Adjustment

Moving Average

One Method for Forecasting with Classical Decomposition

Initialize Forecasts & Month Counter

Loop Over Test Set & Generate Forecasts

Plot Observed & Forecasts for Test Set

Metric Function

Get Residuals & Metrics