Libraries and Data

require(fpp3)
## Loading required package: fpp3
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.3
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.1
## ✔ lubridate   1.9.3     ✔ fable       0.3.3
## ✔ ggplot2     3.4.4     ✔ fabletools  0.3.4
## Warning: package 'tibble' was built under R version 4.3.2
## Warning: package 'dplyr' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'tsibble' was built under R version 4.3.2
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.2
## Warning: package 'fabletools' was built under R version 4.3.2
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
require(ggplot2)
require(kableExtra)
## Loading required package: kableExtra
## Warning: package 'kableExtra' was built under R version 4.3.2
## Error: package or namespace load failed for 'kableExtra':
##  .onLoad failed in loadNamespace() for 'kableExtra', details:
##   call: !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output %in% 
##   error: 'length = 2' in coercion to 'logical(1)'
require(latex2exp)
## Loading required package: latex2exp
## Warning: package 'latex2exp' was built under R version 4.3.2
require(readxl)
## Loading required package: readxl
## Warning: package 'readxl' was built under R version 4.3.2
require(tidyverse)
## Loading required package: tidyverse
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'purrr' was built under R version 4.3.2
## Warning: package 'stringr' was built under R version 4.3.2
## Warning: package 'forcats' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.4
## ✔ purrr   1.0.2     ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
require(magrittr)
## Loading required package: magrittr
## Warning: package 'magrittr' was built under R version 4.3.2
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract

Read & Edit Data

mydata=read.csv('ndata.csv')
mydata$Month=seq(as.Date("2000/5/1"), as.Date("2023/11/1"), by="1 month")
mydata$Month=yearmonth(mydata$Month)
str(mydata)
## 'data.frame':    283 obs. of  9 variables:
##  $ Month             : mth [1:283] 2000 May, 2000 Jun, 2000 Jul, 2000 Aug, 2000 Sep, 2000 Oct...
##  $ GDP               : num  10.2 10.3 10.2 10.3 10.4 ...
##  $ Inflation.Rate    : num  0.0319 0.0366 0.0366 0.0341 0.0345 0.0345 0.0345 0.0339 0.0373 0.0114 ...
##  $ US.Oil.Production : int  181242 174686 177920 179451 172731 180080 174980 181508 179767 182076 ...
##  $ US.Coal.Production: num  1.88 1.92 1.8 2.05 1.89 1.97 1.92 1.78 2.03 2.12 ...
##  $ US.gas.Production : num  1.69 1.65 1.71 1.72 1.66 ...
##  $ Real.Oil.Price    : num  25.6 27.6 26.8 27.9 29.7 ...
##  $ Real.Coal.Price   : num  86.1 85.1 85.6 83.3 83.5 83.6 84.1 85.1 84.3 95.3 ...
##  $ Real.Gas.Price    : num  1.67 1.59 1.51 1.59 1.57 ...

Time Series and Train / Test Set

myts=mydata%>%as_tsibble(index = Month)
train=myts[1:191,]
test=myts[192:nrow(mydata),]

myts%>%autoplot(Inflation.Rate)

myts%>%autoplot(GDP)

myts%>%autoplot(Real.Oil.Price)

Time Series Plots

myts%>%gg_season(GDP)

myts%>%gg_lag(GDP)

myts%>%ACF(GDP)%>%autoplot()

Transformation

lambda=myts%>%features(GDP, features = guerrero) |> pull(lambda_guerrero)
myts%>%autoplot(box_cox(GDP, lambda[1])) +
  labs(y = "",title = latex2exp::TeX(paste0(
         "Transformed GDP with $\\lambda$ = ", round(lambda[1],2))))

# Decomposition

comp=myts%>%model(stl=STL(GDP))%>%components()
comp%>%as_tsibble() %>%autoplot(GDP)+  geom_line(aes(y=trend), colour = "red")+
  geom_line(aes(y=season_adjust), colour = "blue")

comp%>%autoplot()

Models for New England

#Naive Models
m1=train |>model(Mean_Model=MEAN(GDP))
m2=train |>model(Drift_Model=RW(GDP~drift()))
m3=train |>model(Naive_Model=NAIVE(GDP))
m4=train |>model(Seasonal_Naive_Model=SNAIVE(GDP))

#Basic Models
m5=train%>%model(TS_Model = TSLM(GDP))
m6=train |>model(ETS_Model=ETS(GDP))
m7=train |>model(ARIMA_Model=ARIMA(GDP))

#Dynamic Models
m8=train |>model(Fourier_Model=TSLM(GDP~Inflation.Rate+fourier(K=2)))
m9=train |>model(Exponential_Model=TSLM(log(GDP)~Inflation.Rate+season()))

#Dynamic Ensemble Models
m10=train|>model(Ensemble_Model=(ETS(GDP)+ARIMA(GDP~Inflation.Rate)))
m11=train|>model(Ensemble_Model_2=(ETS(GDP)+TSLM(GDP~Inflation.Rate+fourier(K=2))+ARIMA(GDP))/3)

#Dynamic Neural Network Models
#m12=train|>model(NeuralNet_Model=NNETAR(box_cox(GDP, .9)~ElectionSpline+trend(knots=yearquarter('2020 Q2'))+season()))

Reports

for (i in 1:11){report(eval(parse(text=paste0('m',i))))}
## Series: GDP 
## Model: MEAN 
## 
## Mean: 16.656 
## sigma^2: 20.4733 
## Series: GDP 
## Model: RW w/ drift 
## 
## Drift: 0.0837 (se: 0.2404)
## sigma^2: 10.981 
## Series: GDP 
## Model: NAIVE 
## 
## sigma^2: 10.981 
## Series: GDP 
## Model: SNAIVE 
## 
## sigma^2: 53.5088 
## Series: GDP 
## Model: TSLM 
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.426 -3.321 -0.936  2.909 10.724 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  16.6560     0.3274   50.87   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.525 on 190 degrees of freedom
## Series: GDP 
## Model: ETS(M,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998986 
##     beta  = 0.0001001601 
## 
##   Initial states:
##      l[0]     b[0]
##  9.383544 0.292614
## 
##   sigma^2:  0.0158
## 
##      AIC     AICc      BIC 
## 1282.937 1283.261 1299.198 
## Series: GDP 
## Model: ARIMA(1,0,0) w/ mean 
## 
## Coefficients:
##          ar1  constant
##       0.7371    4.3898
## s.e.  0.0499    0.2205
## 
## sigma^2 estimated as 9.656:  log likelihood=-486.96
## AIC=979.92   AICc=980.05   BIC=989.67
## Series: GDP 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.7089 -3.9126 -0.1127  3.9991  9.6714 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          14.8747     0.5386  27.619  < 2e-16 ***
## Inflation.Rate       69.6078    17.0623   4.080  6.7e-05 ***
## fourier(K = 2)C1_12  -0.4663     0.4445  -1.049   0.2955    
## fourier(K = 2)S1_12   0.7448     0.4448   1.674   0.0957 .  
## fourier(K = 2)C2_12  -0.0864     0.4449  -0.194   0.8463    
## fourier(K = 2)S2_12   0.1807     0.4425   0.408   0.6835    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.334 on 185 degrees of freedom
## Multiple R-squared: 0.1068,  Adjusted R-squared: 0.08268
## F-statistic: 4.425 on 5 and 185 DF, p-value: 0.00078704
## Series: GDP 
## Model: TSLM 
## Transformation: log(GDP) 
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.523227 -0.220493  0.008203  0.239682  0.475351 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.66704    0.07111  37.507   <2e-16 ***
## Inflation.Rate  3.08295    1.05419   2.924   0.0039 ** 
## season()year2   0.04315    0.09458   0.456   0.6488    
## season()year3   0.07760    0.09460   0.820   0.4131    
## season()year4   0.07955    0.09626   0.826   0.4097    
## season()year5   0.08316    0.09477   0.878   0.3814    
## season()year6   0.06889    0.09475   0.727   0.4681    
## season()year7   0.05010    0.09470   0.529   0.5974    
## season()year8   0.02823    0.09469   0.298   0.7660    
## season()year9   0.01368    0.09459   0.145   0.8852    
## season()year10 -0.01346    0.09469  -0.142   0.8871    
## season()year11 -0.02455    0.09461  -0.260   0.7955    
## season()year12 -0.03583    0.09458  -0.379   0.7053    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2675 on 178 degrees of freedom
## Multiple R-squared: 0.07242, Adjusted R-squared: 0.009882
## F-statistic: 1.158 on 12 and 178 DF, p-value: 0.31684
## Series: GDP 
## Model: COMBINATION 
## Combination: GDP + GDP
## 
## ======================
## 
## Series: GDP 
## Model: ETS(M,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998986 
##     beta  = 0.0001001601 
## 
##   Initial states:
##      l[0]     b[0]
##  9.383544 0.292614
## 
##   sigma^2:  0.0158
## 
##      AIC     AICc      BIC 
## 1282.937 1283.261 1299.198 
## 
## Series: GDP 
## Model: LM w/ ARIMA(2,0,0)(0,0,1)[12] errors 
## 
## Coefficients:
##          ar1     ar2     sma1  Inflation.Rate  intercept
##       0.5574  0.2016  -0.1337         63.3685    15.0317
## s.e.  0.0941  0.1013   0.0741         15.6763     0.8668
## 
## sigma^2 estimated as 8.948:  log likelihood=-478.24
## AIC=968.49   AICc=968.94   BIC=988
## 
## Series: GDP 
## Model: COMBINATION 
## Combination: GDP * 0.333333333333333
## 
## ====================================
## 
## Series: GDP 
## Model: COMBINATION 
## Combination: GDP + GDP
## 
## ======================
## 
## Series: GDP 
## Model: COMBINATION 
## Combination: GDP + GDP
## 
## ======================
## 
## Series: GDP 
## Model: ETS(M,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998986 
##     beta  = 0.0001001601 
## 
##   Initial states:
##      l[0]     b[0]
##  9.383544 0.292614
## 
##   sigma^2:  0.0158
## 
##      AIC     AICc      BIC 
## 1282.937 1283.261 1299.198 
## 
## Series: GDP 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.7089 -3.9126 -0.1127  3.9991  9.6714 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          14.8747     0.5386  27.619  < 2e-16 ***
## Inflation.Rate       69.6078    17.0623   4.080  6.7e-05 ***
## fourier(K = 2)C1_12  -0.4663     0.4445  -1.049   0.2955    
## fourier(K = 2)S1_12   0.7448     0.4448   1.674   0.0957 .  
## fourier(K = 2)C2_12  -0.0864     0.4449  -0.194   0.8463    
## fourier(K = 2)S2_12   0.1807     0.4425   0.408   0.6835    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.334 on 185 degrees of freedom
## Multiple R-squared: 0.1068,  Adjusted R-squared: 0.08268
## F-statistic: 4.425 on 5 and 185 DF, p-value: 0.00078704
## 
## 
## Series: GDP 
## Model: ARIMA(1,0,0) w/ mean 
## 
## Coefficients:
##          ar1  constant
##       0.7371    4.3898
## s.e.  0.0499    0.2205
## 
## sigma^2 estimated as 9.656:  log likelihood=-486.96
## AIC=979.92   AICc=980.05   BIC=989.67

Plots

myplot=function(model){
  model|>forecast(test)|>autoplot(myts)+geom_line(aes(y=.fitted,col=.model),
                                                   data=augment(model))+ggtitle(names(model))}

for (i in 1:11){print(myplot(eval(parse(text=paste0('m',i)))))} #NNETAR takes long to plot

## Warning: Removed 1 row containing missing values (`geom_line()`).

## Warning: Removed 1 row containing missing values (`geom_line()`).

## Warning: Removed 12 rows containing missing values (`geom_line()`).

Accuracy Metrics

tmp3=c(rep(0,11))
mymat=matrix(rep(0, 12*11), ncol=11)

for (i in 1:11){
  tmp=eval(parse(text=paste0('m',i)))%>%forecast(test)
  tmp2=tmp%>%accuracy(test)
  tmp3=rbind(tmp2, tmp3)
}
## Warning in rbind(deparse.level, ...): number of columns of result, 10, is not a
## multiple of vector length 11 of arg 2
tmp3=tmp3[tmp3$RMSE>0,]
tmp3$MASE=tmp3$RMSSE=tmp3$.type=NULL
tmp3%>%arrange(MAPE)
## # A tibble: 11 × 7
##    .model                    ME  RMSE   MAE     MPE  MAPE  ACF1
##    <chr>                  <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl>
##  1 TS_Model               0.758  4.74  3.82   -2.53  22.2 0.642
##  2 Mean_Model             0.758  4.74  3.82   -2.53  22.2 0.642
##  3 Exponential_Model      0.717  4.67  3.78   -2.74  22.3 0.643
##  4 Fourier_Model          0.776  4.62  3.81   -2.24  22.6 0.630
##  5 ARIMA_Model            0.431  4.88  3.95   -4.82  23.7 0.699
##  6 Seasonal_Naive_Model  -2.26   5.52  4.64  -20.3   30.7 0.536
##  7 Ensemble_Model_2      -6.98   8.48  7.44  -49.4   51.1 0.679
##  8 Naive_Model           -8.72   9.89  8.88  -60.8   61.4 0.642
##  9 Drift_Model          -12.6   13.5  12.6   -84.1   84.2 0.653
## 10 ETS_Model            -22.1   23.6  22.2  -141.   141.  0.854
## 11 Ensemble_Model       -38.9   39.7  38.9  -244.   244.  0.864