Libraries and Data

require(fpp3)
## Loading required package: fpp3
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.3
## ✔ dplyr       1.1.1     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.1
## ✔ lubridate   1.9.2     ✔ fable       0.3.3
## ✔ ggplot2     3.4.1     ✔ fabletools  0.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
require(ggplot2)
require(kableExtra)
## Loading required package: kableExtra
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
require(latex2exp)
## Loading required package: latex2exp
require(readxl)
## Loading required package: readxl
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.4
## ✔ purrr   1.0.1     ✔ stringr 1.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()          masks stats::filter()
## ✖ kableExtra::group_rows() masks dplyr::group_rows()
## ✖ tsibble::interval()      masks lubridate::interval()
## ✖ dplyr::lag()             masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
require(magrittr)
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
mydata=read.csv('C:/Users/lfult/Documents/_Courses/Boston College/Predictive analytics/ng.csv')
mydata$Year=seq(as.Date("2001/1/1"), as.Date("2023/1/1"), "months")
p1=c(rep(0,48), rep(1, 13))
p1=c(rep(p1, 4), rep(0, 21))
mydata$Pres=p1

Time Series and Train / Test Set

myts=mydata%>%mutate(YearMonth = yearmonth(as.character(Year))) %>% as_tsibble(index = YearMonth)
myts%>%autoplot(Amount)

train=myts[1:216,]
test=myts[217:nrow(mydata),]

Time Series Plots

myts%>%gg_season(Amount)

myts%>%gg_subseries(Amount)

myts%>%gg_lag(Amount)

myts%>%ACF(Amount)%>%autoplot()

Transformation

lambda <- myts |> features(Amount, features = guerrero) |> pull(lambda_guerrero)
myts |> autoplot(box_cox(Amount, lambda)) +
  labs(y = "",title = latex2exp::TeX(paste0(
         "Transformed NT Consumption with $\\lambda$ = ", round(lambda,2))))

# Decomposition

comp=myts%>%model(stl=STL(Amount))%>%components()
comp|>as_tsibble() |>autoplot(Amount)+  geom_line(aes(y=trend), colour = "red")+
  geom_line(aes(y=season_adjust), colour = "blue")

comp%>%autoplot()

Models

m1=train |>model(Trend_Model = TSLM(Amount ~ trend()))
m2=train |>model(Mean_Model=MEAN(Amount))
m3=train |>model(Drift_Model=RW(Amount~drift()))
m4=train |>model(Naive_Model=NAIVE(Amount))
m5=train |>model(Seasonal_Naive_Model=SNAIVE(Amount))
m6=train |>model(ETS_Model=ETS(Amount))
m7=train |>model(ARIMA_Model=ARIMA(Amount))
m8=train |>model(Trend_Seasonality_Model=TSLM(Amount~trend()+season()))
m9=train |>model(Fourier_Model=TSLM(Amount~trend()+fourier(K=6)))
m10=train |>model(Exponential_Model=TSLM(log(Amount)~trend()+season()))
m11=train|>model(Splines_Model=TSLM(Amount ~ trend(knots = c(1990))))
m12=train|>model(Ensemble_Model=(ETS(Amount)+ARIMA(Amount))/2)
m13=train|>model(Dynamic_Model=ARIMA(Amount~Pres)) 
m14=train|>model(Ensemble_Model_2=(ETS(Amount)+TSLM(Amount~trend()+fourier(K=3))+ARIMA(Amount~Pres))/3)
m15=train|>model(Ensemble_Model_3=(ARIMA(Amount~trend()+season()+Pres)+ETS(Amount~trend()+season()))/2)

Reports

for (i in 1:15){report(eval(parse(text=paste0('m',i))))}
## Series: Amount 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -541195 -306119 -162841  360071 1027306 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1743454.8    54004.0  32.284  < 2e-16 ***
## trend()        2781.3      431.5   6.445 7.54e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 395500 on 214 degrees of freedom
## Multiple R-squared: 0.1626,  Adjusted R-squared: 0.1586
## F-statistic: 41.54 on 1 and 214 DF, p-value: 7.5394e-10
## Series: Amount 
## Model: MEAN 
## 
## Mean: 2045230.1806 
## sigma^2: 185885140083.553 
## Series: Amount 
## Model: RW w/ drift 
## 
## Drift: 1500.0558 (se: 19654.0583)
## sigma^2: 83050631329.0436 
## Series: Amount 
## Model: NAIVE 
## 
## sigma^2: 83050631329.0436 
## Series: Amount 
## Model: SNAIVE 
## 
## sigma^2: 20865051838.0542 
## Series: Amount 
## Model: ETS(M,N,M) 
##   Smoothing parameters:
##     alpha = 0.5126996 
##     gamma = 0.1232869 
## 
##   Initial states:
##     l[0]    s[0]     s[-1]     s[-2]     s[-3]     s[-4]     s[-5]     s[-6]
##  1887142 1.22196 0.9649296 0.8591212 0.7954175 0.9131088 0.9034724 0.8164187
##      s[-7]     s[-8]    s[-9]   s[-10]  s[-11]
##  0.8400235 0.9456213 1.148872 1.226745 1.36431
## 
##   sigma^2:  0.0024
## 
##      AIC     AICc      BIC 
## 6137.055 6139.455 6187.684 
## Series: Amount 
## Model: ARIMA(1,0,2)(0,1,2)[12] w/ drift 
## 
## Coefficients:
##          ar1      ma1      ma2     sma1     sma2   constant
##       0.8928  -0.3767  -0.2161  -0.6236  -0.2143  3782.1540
## s.e.  0.1005   0.1305   0.1191   0.0877   0.0919   752.6637
## 
## sigma^2 estimated as 1.092e+10:  log likelihood=-2650.6
## AIC=5315.2   AICc=5315.77   BIC=5338.43
## Series: Amount 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -501823  -71632  -17370   81118  397694 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2489678.5    33503.0  74.312  < 2e-16 ***
## trend()            2908.4      140.2  20.748  < 2e-16 ***
## season()year2   -307463.2    42754.9  -7.191 1.21e-11 ***
## season()year3   -486080.8    42755.5 -11.369  < 2e-16 ***
## season()year4   -924006.5    42756.7 -21.611  < 2e-16 ***
## season()year5  -1111869.8    42758.3 -26.004  < 2e-16 ***
## season()year6  -1126133.3    42760.4 -26.336  < 2e-16 ***
## season()year7   -954255.1    42762.9 -22.315  < 2e-16 ***
## season()year8   -937634.4    42765.9 -21.925  < 2e-16 ***
## season()year9  -1142501.2    42769.3 -26.713  < 2e-16 ***
## season()year10 -1052758.7    42773.2 -24.613  < 2e-16 ***
## season()year11  -796734.8    42777.6 -18.625  < 2e-16 ***
## season()year12  -280672.1    42782.4  -6.560 4.37e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 128300 on 203 degrees of freedom
## Multiple R-squared: 0.9164,  Adjusted R-squared: 0.9115
## F-statistic: 185.5 on 12 and 203 DF, p-value: < 2.22e-16
## Series: Amount 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -501823  -71632  -17370   81118  397694 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1729669.3    17535.5  98.638  < 2e-16 ***
## trend()                2908.4      140.2  20.748  < 2e-16 ***
## fourier(K = 6)C1_12  452994.6    12343.0  36.701  < 2e-16 ***
## fourier(K = 6)S1_12   52778.2    12353.3   4.272 2.97e-05 ***
## fourier(K = 6)C2_12  244192.0    12343.0  19.784  < 2e-16 ***
## fourier(K = 6)S2_12   63758.3    12344.6   5.165 5.73e-07 ***
## fourier(K = 6)C3_12   -2883.4    12343.0  -0.234 0.815528    
## fourier(K = 6)S3_12  -57340.4    12343.0  -4.646 6.09e-06 ***
## fourier(K = 6)C4_12   27254.1    12343.0   2.208 0.028360 *  
## fourier(K = 6)S4_12  -17077.3    12342.5  -1.384 0.167994    
## fourier(K = 6)C5_12   27016.3    12343.0   2.189 0.029753 *  
## fourier(K = 6)S5_12  -45742.4    12342.3  -3.706 0.000271 ***
## fourier(K = 6)C6_12   11435.5     8727.5   1.310 0.191581    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 128300 on 203 degrees of freedom
## Multiple R-squared: 0.9164,  Adjusted R-squared: 0.9115
## F-statistic: 185.5 on 12 and 203 DF, p-value: < 2.22e-16
## Series: Amount 
## Model: TSLM 
## Transformation: log(Amount) 
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.188353 -0.035196 -0.004048  0.036566  0.159249 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.469e+01  1.559e-02 942.357  < 2e-16 ***
## trend()         1.425e-03  6.522e-05  21.853  < 2e-16 ***
## season()year2  -1.150e-01  1.989e-02  -5.781 2.77e-08 ***
## season()year3  -1.903e-01  1.989e-02  -9.565  < 2e-16 ***
## season()year4  -4.005e-01  1.989e-02 -20.132  < 2e-16 ***
## season()year5  -5.073e-01  1.989e-02 -25.502  < 2e-16 ***
## season()year6  -5.170e-01  1.990e-02 -25.988  < 2e-16 ***
## season()year7  -4.197e-01  1.990e-02 -21.096  < 2e-16 ***
## season()year8  -4.093e-01  1.990e-02 -20.568  < 2e-16 ***
## season()year9  -5.268e-01  1.990e-02 -26.475  < 2e-16 ***
## season()year10 -4.733e-01  1.990e-02 -23.785  < 2e-16 ***
## season()year11 -3.405e-01  1.990e-02 -17.108  < 2e-16 ***
## season()year12 -1.100e-01  1.991e-02  -5.525 1.00e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05968 on 203 degrees of freedom
## Multiple R-squared: 0.9189,  Adjusted R-squared: 0.9141
## F-statistic: 191.6 on 12 and 203 DF, p-value: < 2.22e-16
## Series: Amount 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -541195 -306119 -162841  360071 1027306 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      1743454.8    54004.0  32.284  < 2e-16 ***
## trend(knots = c(1990))trend         2781.3      431.5   6.445 7.54e-10 ***
## trend(knots = c(1990))trend_1619        NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 395500 on 214 degrees of freedom
## Multiple R-squared: 0.1626,  Adjusted R-squared: 0.1586
## F-statistic: 41.54 on 1 and 214 DF, p-value: 7.5394e-10
## Series: Amount 
## Model: COMBINATION 
## Combination: Amount * 0.5
## 
## =========================
## 
## Series: Amount 
## Model: COMBINATION 
## Combination: Amount + Amount
## 
## ============================
## 
## Series: Amount 
## Model: ETS(M,N,M) 
##   Smoothing parameters:
##     alpha = 0.5126996 
##     gamma = 0.1232869 
## 
##   Initial states:
##     l[0]    s[0]     s[-1]     s[-2]     s[-3]     s[-4]     s[-5]     s[-6]
##  1887142 1.22196 0.9649296 0.8591212 0.7954175 0.9131088 0.9034724 0.8164187
##      s[-7]     s[-8]    s[-9]   s[-10]  s[-11]
##  0.8400235 0.9456213 1.148872 1.226745 1.36431
## 
##   sigma^2:  0.0024
## 
##      AIC     AICc      BIC 
## 6137.055 6139.455 6187.684 
## 
## Series: Amount 
## Model: ARIMA(1,0,2)(0,1,2)[12] w/ drift 
## 
## Coefficients:
##          ar1      ma1      ma2     sma1     sma2   constant
##       0.8928  -0.3767  -0.2161  -0.6236  -0.2143  3782.1540
## s.e.  0.1005   0.1305   0.1191   0.0877   0.0919   752.6637
## 
## sigma^2 estimated as 1.092e+10:  log likelihood=-2650.6
## AIC=5315.2   AICc=5315.77   BIC=5338.43
## 
## 
## Series: Amount 
## Model: LM w/ ARIMA(1,0,2)(0,1,2)[12] errors 
## 
## Coefficients:
##          ar1      ma1      ma2     sma1     sma2       Pres  intercept
##       0.8529  -0.3283  -0.1771  -0.6444  -0.1823  -46517.55  35193.307
## s.e.  0.0947   0.1233   0.1048   0.0907   0.0994   34148.50   6358.389
## 
## sigma^2 estimated as 1.09e+10:  log likelihood=-2649.63
## AIC=5315.27   AICc=5316   BIC=5341.81
## Series: Amount 
## Model: COMBINATION 
## Combination: Amount * 0.333333333333333
## 
## =======================================
## 
## Series: Amount 
## Model: COMBINATION 
## Combination: Amount + Amount
## 
## ============================
## 
## Series: Amount 
## Model: COMBINATION 
## Combination: Amount + Amount
## 
## ============================
## 
## Series: Amount 
## Model: ETS(M,N,M) 
##   Smoothing parameters:
##     alpha = 0.5126996 
##     gamma = 0.1232869 
## 
##   Initial states:
##     l[0]    s[0]     s[-1]     s[-2]     s[-3]     s[-4]     s[-5]     s[-6]
##  1887142 1.22196 0.9649296 0.8591212 0.7954175 0.9131088 0.9034724 0.8164187
##      s[-7]     s[-8]    s[-9]   s[-10]  s[-11]
##  0.8400235 0.9456213 1.148872 1.226745 1.36431
## 
##   sigma^2:  0.0024
## 
##      AIC     AICc      BIC 
## 6137.055 6139.455 6187.684 
## 
## Series: Amount 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -491834  -84737  -13169   86918  355173 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1730279.2    18438.3  93.842  < 2e-16 ***
## trend()                2902.8      147.4  19.695  < 2e-16 ***
## fourier(K = 3)C1_12  452989.0    12980.3  34.898  < 2e-16 ***
## fourier(K = 3)S1_12   52757.2    12991.1   4.061 6.92e-05 ***
## fourier(K = 3)C2_12  244186.4    12980.3  18.812  < 2e-16 ***
## fourier(K = 3)S2_12   63748.6    12982.0   4.911 1.83e-06 ***
## fourier(K = 3)C3_12   -2889.0    12980.3  -0.223    0.824    
## fourier(K = 3)S3_12  -57346.0    12980.3  -4.418 1.60e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 134900 on 208 degrees of freedom
## Multiple R-squared: 0.9053,  Adjusted R-squared: 0.9021
## F-statistic: 284.1 on 7 and 208 DF, p-value: < 2.22e-16
## 
## 
## Series: Amount 
## Model: LM w/ ARIMA(1,0,2)(0,1,2)[12] errors 
## 
## Coefficients:
##          ar1      ma1      ma2     sma1     sma2       Pres  intercept
##       0.8529  -0.3283  -0.1771  -0.6444  -0.1823  -46517.55  35193.307
## s.e.  0.0947   0.1233   0.1048   0.0907   0.0994   34148.50   6358.389
## 
## sigma^2 estimated as 1.09e+10:  log likelihood=-2649.63
## AIC=5315.27   AICc=5316   BIC=5341.81
## 
## 
## Series: Amount 
## Model: COMBINATION 
## Combination: Amount * 0.5
## 
## =========================
## 
## Series: Amount 
## Model: COMBINATION 
## Combination: Amount + Amount
## 
## ============================
## 
## Series: Amount 
## Model: LM w/ ARIMA(1,0,2)(0,0,2)[12] errors 
## 
## Coefficients:
##          ar1      ma1      ma2    sma1     sma2    trend()  season()year2
##       0.8699  -0.3554  -0.1588  0.1753  -0.1470  2965.3055     -306599.40
## s.e.  0.0878   0.1205   0.0991  0.0737   0.0705   377.2247       26335.99
##       season()year3  season()year4  season()year5  season()year6  season()year7
##          -489996.92     -930241.08    -1119339.27    -1134195.12     -962357.44
## s.e.       31281.13       32485.72       33273.79       33747.36       33897.52
##       season()year8  season()year9  season()year10  season()year11
##          -944515.09    -1149786.15     -1059939.28      -804072.65
## s.e.       33770.71       33348.26        32618.54        31535.92
##       season()year12       Pres   intercept
##           -287517.37  -51163.38  2506813.74
## s.e.        26746.91   33936.01    52092.11
## 
## sigma^2 estimated as 9.855e+09:  log likelihood=-2782.57
## AIC=5605.15   AICc=5609.46   BIC=5672.65
## 
## Series: Amount 
## Model: ETS(M,N,M) 
##   Smoothing parameters:
##     alpha = 0.5126996 
##     gamma = 0.1232869 
## 
##   Initial states:
##     l[0]    s[0]     s[-1]     s[-2]     s[-3]     s[-4]     s[-5]     s[-6]
##  1887142 1.22196 0.9649296 0.8591212 0.7954175 0.9131088 0.9034724 0.8164187
##      s[-7]     s[-8]    s[-9]   s[-10]  s[-11]
##  0.8400235 0.9456213 1.148872 1.226745 1.36431
## 
##   sigma^2:  0.0024
## 
##      AIC     AICc      BIC 
## 6137.055 6139.455 6187.684

Plots

myplot=function(model){
  model|>forecast(test)|>autoplot(myts)+geom_line(aes(y=.fitted, col=.model),data=augment(model))+ggtitle(names(model))}

for (i in 1:15){print(myplot(eval(parse(text=paste0('m',i)))))} #NNETAR takes long to plot, avoiding that model

## Warning: Removed 1 row containing missing values (`geom_line()`).

## Warning: Removed 1 row containing missing values (`geom_line()`).

## Warning: Removed 12 rows containing missing values (`geom_line()`).

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Splines_Model = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading

Accuracy Metrics

tmp3=c(rep(0,10))
mymat=matrix(rep(0, 15*nrow(train)), ncol=15)

for (i in 1:15){
  tmp=eval(parse(text=paste0('m',i)))%>%forecast(test)
  tmp2=tmp%>%accuracy(test,measures = list(point_accuracy_measures))
  tmp3=rbind(tmp2, tmp3)
  #mymat[,i]=abs(augment(eval(parse(text=paste0('m',i))))$.resid)
}
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Splines_Model = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading
tmp3=tmp3[tmp3$RMSE>0,]
tmp3$MASE=tmp3$RMSSE=tmp3$.type=NULL
tmp3%>%arrange(RMSE)
## # A tibble: 15 × 7
##    .model                        ME    RMSE     MAE     MPE  MAPE    ACF1
##    <chr>                      <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>
##  1 Ensemble_Model            56287. 123475. 103177.   2.10   3.93  0.238 
##  2 ETS_Model                 15588. 131442. 109248.   0.663  4.20  0.380 
##  3 Ensemble_Model_3          73577. 134409. 108622.   2.78   4.16  0.283 
##  4 ARIMA_Model               96985. 139334. 113014.   3.53   4.21  0.130 
##  5 Ensemble_Model_2          99142. 142784. 117612.   3.71   4.46  0.199 
##  6 Dynamic_Model            114926. 149261. 122712.   4.27   4.59  0.0270
##  7 Seasonal_Naive_Model      82424. 161589. 120905.   2.94   4.51 -0.0513
##  8 Exponential_Model        129594. 179201. 154308.   5.23   6.07  0.320 
##  9 Fourier_Model            164824. 198187. 167727.   6.15   6.26  0.239 
## 10 Trend_Seasonality_Model  164824. 198187. 167727.   6.15   6.26  0.239 
## 11 Splines_Model            197169. 469121. 351128.   5.25  12.3   0.657 
## 12 Trend_Model              197169. 469121. 351128.   5.25  12.3   0.657 
## 13 Naive_Model             -388583. 577232. 510162. -17.8   21.4   0.661 
## 14 Drift_Model             -426085. 602348. 529650. -19.2   22.3   0.658 
## 15 Mean_Model               565697. 708670. 565697.  19.7   19.7   0.661