Libraries and Data

require(fpp3)
## Loading required package: fpp3
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.3
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.1
## ✔ lubridate   1.9.3     ✔ fable       0.3.3
## ✔ ggplot2     3.4.4     ✔ fabletools  0.3.4
## Warning: package 'tibble' was built under R version 4.3.2
## Warning: package 'dplyr' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'tsibble' was built under R version 4.3.2
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.2
## Warning: package 'fabletools' was built under R version 4.3.2
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
require(ggplot2)
require(kableExtra)
## Loading required package: kableExtra
## Warning: package 'kableExtra' was built under R version 4.3.2
## Error: package or namespace load failed for 'kableExtra':
##  .onLoad failed in loadNamespace() for 'kableExtra', details:
##   call: !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output %in% 
##   error: 'length = 2' in coercion to 'logical(1)'
require(latex2exp)
## Loading required package: latex2exp
## Warning: package 'latex2exp' was built under R version 4.3.2
require(readxl)
## Loading required package: readxl
## Warning: package 'readxl' was built under R version 4.3.2
require(tidyverse)
## Loading required package: tidyverse
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'purrr' was built under R version 4.3.2
## Warning: package 'stringr' was built under R version 4.3.2
## Warning: package 'forcats' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.4
## ✔ purrr   1.0.2     ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
require(magrittr)
## Loading required package: magrittr
## Warning: package 'magrittr' was built under R version 4.3.2
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract

Read & Edit Data

mydata=read.csv('gdpstate.csv')
mydata$Quarter=seq(as.Date("2005/1/1"), as.Date("2022/10/1"), by="1 quarter")
mydata$Quarter=yearquarter(mydata$Quarter)

Time Series and Train / Test Set

myts=mydata%>%as_tsibble(index = Quarter)
temp=myts%>%pivot_longer(!c(Quarter,Dem0Rep1,ElectionSpline), names_to='Region', values_to='GDP')

newts=myts%>%pivot_longer(!c(Quarter,Dem0Rep1,ElectionSpline), names_to='Region', values_to='GDP')%>%filter(Region=='New.England')

train=newts[1:64,]
test=newts[65:nrow(mydata),]
temp%>%autoplot(GDP)

Time Series Plots

temp%>%gg_season(GDP)

newts%>%gg_lag(GDP)

temp%>%ACF(GDP)%>%autoplot()

Transformation

lambda=temp%>%features(GDP, features = guerrero) |> pull(lambda_guerrero)
temp%>%autoplot(box_cox(GDP, lambda[1])) +
  labs(y = "",title = latex2exp::TeX(paste0(
         "Transformed NT Consumption with $\\lambda$ = ", round(lambda[1],2))))

# Decomposition

comp=temp%>%model(stl=STL(GDP))%>%components()
comp%>%as_tsibble() %>%autoplot(GDP)+  geom_line(aes(y=trend), colour = "red")+
  geom_line(aes(y=season_adjust), colour = "blue")

comp%>%autoplot()

Models for New England

#Naive Models
m1=train |>model(Mean_Model=MEAN(GDP))
m2=train |>model(Drift_Model=RW(GDP~drift()))
m3=train |>model(Naive_Model=NAIVE(GDP))
m4=train |>model(Seasonal_Naive_Model=SNAIVE(GDP))

#Basic Models
m5=train%>%model(TS_Model = TSLM(GDP))
m6=train |>model(ETS_Model=ETS(GDP))
m7=train |>model(ARIMA_Model=ARIMA(GDP))

#Dynamic Models
m8=train |>model(Fourier_Model=TSLM(GDP~ElectionSpline+trend(knots=yearquarter('2020 Q2'))+fourier(K=2)))
m9=train |>model(Exponential_Model=TSLM(log(GDP)~ElectionSpline+trend(knots=yearquarter('2020 Q2'))+season()))
m10=train |>model(ARIMA(GDP ~ ElectionSpline))

#Dynamic Ensemble Models
m11=train|>model(Ensemble_Model=(ETS(GDP)+ARIMA(GDP~ElectionSpline+trend(knots = yearquarter('2020 Q2'))))/2)
m12=train|>model(Ensemble_Model_2=(ETS(GDP)+TSLM(GDP~trend(knots=yearquarter('2020 Q2'))+fourier(K=2))+ARIMA(GDP))/3)

#Dynamic Neural Network Models
m13=train|>model(NeuralNet_Model=NNETAR(box_cox(GDP, .9)~ElectionSpline+trend(knots=yearquarter('2020 Q2'))+season()))

Example ARIMA + Regressor

fit <- train |>
     model(ARIMA(GDP ~ ElectionSpline))
 report(fit)
## Series: GDP 
## Model: LM w/ ARIMA(0,1,2) errors 
## 
## Coefficients:
##           ma1      ma2  ElectionSpline  intercept
##       -0.3731  -0.2539        605.2746  2205.9575
## s.e.   0.1284   0.1379        531.0135   733.0774
## 
## sigma^2 estimated as 236268805:  log likelihood=-694.87
## AIC=1399.74   AICc=1400.79   BIC=1410.46
 bind_rows(
    `Regression residuals` =
        as_tibble(residuals(fit, type = "regression")),
    `ARIMA residuals` =
        as_tibble(residuals(fit, type = "innovation")),
    .id = "type"
  ) |>
  mutate(
    type = factor(type, levels=c(
      "Regression residuals", "ARIMA residuals"))
  ) |>
  ggplot(aes(x = Quarter, y = .resid)) +
  geom_line() +
  facet_grid(vars(type))

 fit |> gg_tsresiduals()

 augment(fit) |>
  features(.innov, ljung_box, dof = 4, lag = 12)
## # A tibble: 1 × 4
##   Region      .model                      lb_stat lb_pvalue
##   <chr>       <chr>                         <dbl>     <dbl>
## 1 New.England ARIMA(GDP ~ ElectionSpline)    1.52     0.992
 forecast(fit, new_data = test) |>
  autoplot(train) +
  labs(y = "Percentage change")

Reports

for (i in 1:13){report(eval(parse(text=paste0('m',i))))}
## Series: GDP 
## Model: MEAN 
## 
## Mean: 897355.6938 
## sigma^2: 2221866475.2888 
## Series: GDP 
## Model: RW w/ drift 
## 
## Drift: 2572.6032 (se: 2039.4553)
## sigma^2: 262040811.6726 
## Series: GDP 
## Model: NAIVE 
## 
## sigma^2: 262040811.6726 
## Series: GDP 
## Model: SNAIVE 
## 
## sigma^2: 463407580.2306 
## Series: GDP 
## Model: TSLM 
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -80991 -34649 -11460  34541  92783 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   897356       5892   152.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47140 on 63 degrees of freedom
## Series: GDP 
## Model: ETS(M,A,N) 
##   Smoothing parameters:
##     alpha = 0.6128825 
##     beta  = 0.0001000659 
## 
##   Initial states:
##      l[0]     b[0]
##  825584.7 2399.832
## 
##   sigma^2:  3e-04
## 
##      AIC     AICc      BIC 
## 1503.211 1504.246 1514.006 
## Series: GDP 
## Model: ARIMA(0,1,2) w/ drift 
## 
## Coefficients:
##           ma1      ma2  constant
##       -0.3379  -0.2639  2312.988
## s.e.   0.1285   0.1439   781.541
## 
## sigma^2 estimated as 237186164:  log likelihood=-695.5
## AIC=1399   AICc=1399.69   BIC=1407.58
## Series: GDP 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -76036.0 -11078.1   -965.6  12112.2  29662.0 
## 
## Coefficients:
##                                               Estimate Std. Error t value
## (Intercept)                                   815633.7     5969.1 136.643
## ElectionSpline                                   829.9      536.4   1.547
## trend(knots = yearquarter("2020 Q2"))trend      2301.7      131.9  17.454
## trend(knots = yearquarter("2020 Q2"))trend_62  -2963.5     8962.1  -0.331
## fourier(K = 2)C1_4                               311.1     3283.4   0.095
## fourier(K = 2)S1_4                             -2014.1     3308.4  -0.609
## fourier(K = 2)C2_4                               853.5     2306.3   0.370
##                                               Pr(>|t|)    
## (Intercept)                                     <2e-16 ***
## ElectionSpline                                   0.127    
## trend(knots = yearquarter("2020 Q2"))trend      <2e-16 ***
## trend(knots = yearquarter("2020 Q2"))trend_62    0.742    
## fourier(K = 2)C1_4                               0.925    
## fourier(K = 2)S1_4                               0.545    
## fourier(K = 2)C2_4                               0.713    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18330 on 57 degrees of freedom
## Multiple R-squared: 0.8632,  Adjusted R-squared: 0.8488
## F-statistic: 59.97 on 6 and 57 DF, p-value: < 2.22e-16
## Series: GDP 
## Model: TSLM 
## Transformation: log(GDP) 
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.083644 -0.011813 -0.001242  0.013177  0.033398 
## 
## Coefficients:
##                                                 Estimate Std. Error  t value
## (Intercept)                                   13.6159006  0.0073138 1861.681
## ElectionSpline                                 0.0010023  0.0005786    1.732
## trend(knots = yearquarter("2020 Q2"))trend     0.0025505  0.0001422   17.931
## trend(knots = yearquarter("2020 Q2"))trend_62 -0.0045089  0.0096669   -0.466
## season()year2                                 -0.0042597  0.0070118   -0.608
## season()year3                                 -0.0006295  0.0070833   -0.089
## season()year4                                 -0.0000553  0.0072270   -0.008
##                                               Pr(>|t|)    
## (Intercept)                                     <2e-16 ***
## ElectionSpline                                  0.0886 .  
## trend(knots = yearquarter("2020 Q2"))trend      <2e-16 ***
## trend(knots = yearquarter("2020 Q2"))trend_62   0.6427    
## season()year2                                   0.5459    
## season()year3                                   0.9295    
## season()year4                                   0.9939    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01977 on 57 degrees of freedom
## Multiple R-squared: 0.8696,  Adjusted R-squared: 0.8559
## F-statistic: 63.35 on 6 and 57 DF, p-value: < 2.22e-16
## Series: GDP 
## Model: LM w/ ARIMA(0,1,2) errors 
## 
## Coefficients:
##           ma1      ma2  ElectionSpline  intercept
##       -0.3731  -0.2539        605.2746  2205.9575
## s.e.   0.1284   0.1379        531.0135   733.0774
## 
## sigma^2 estimated as 236268805:  log likelihood=-694.87
## AIC=1399.74   AICc=1400.79   BIC=1410.46
## Series: GDP 
## Model: COMBINATION 
## Combination: GDP * 0.5
## 
## ======================
## 
## Series: GDP 
## Model: COMBINATION 
## Combination: GDP + GDP
## 
## ======================
## 
## Series: GDP 
## Model: ETS(M,A,N) 
##   Smoothing parameters:
##     alpha = 0.6128825 
##     beta  = 0.0001000659 
## 
##   Initial states:
##      l[0]     b[0]
##  825584.7 2399.832
## 
##   sigma^2:  3e-04
## 
##      AIC     AICc      BIC 
## 1503.211 1504.246 1514.006 
## 
## Series: GDP 
## Model: LM w/ ARIMA(1,0,0) errors 
## 
## Coefficients:
##          ar1  ElectionSpline  trend(knots = yearquarter("2020 Q2"))trend
##       0.7153        163.5517                                   2119.8435
## s.e.  0.1120        542.0752                                    320.0627
##       trend(knots = yearquarter("2020 Q2"))trend_62  intercept
##                                            25698.59  824086.28
## s.e.                                       10939.24   11715.09
## 
## sigma^2 estimated as 197738794:  log likelihood=-699.85
## AIC=1411.69   AICc=1413.17   BIC=1424.65
## 
## 
## Series: GDP 
## Model: COMBINATION 
## Combination: GDP * 0.333333333333333
## 
## ====================================
## 
## Series: GDP 
## Model: COMBINATION 
## Combination: GDP + GDP
## 
## ======================
## 
## Series: GDP 
## Model: COMBINATION 
## Combination: GDP + GDP
## 
## ======================
## 
## Series: GDP 
## Model: ETS(M,A,N) 
##   Smoothing parameters:
##     alpha = 0.6128825 
##     beta  = 0.0001000659 
## 
##   Initial states:
##      l[0]     b[0]
##  825584.7 2399.832
## 
##   sigma^2:  3e-04
## 
##      AIC     AICc      BIC 
## 1503.211 1504.246 1514.006 
## 
## Series: GDP 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -72217.7 -12581.2   -667.5  11782.3  35338.6 
## 
## Coefficients:
##                                               Estimate Std. Error t value
## (Intercept)                                   821315.5     4762.1 172.468
## trend(knots = yearquarter("2020 Q2"))trend      2340.4      131.0  17.862
## trend(knots = yearquarter("2020 Q2"))trend_62   -519.8     8927.3  -0.058
## fourier(K = 2)C1_4                              -403.8     3289.6  -0.123
## fourier(K = 2)S1_4                             -2652.6     3321.8  -0.799
## fourier(K = 2)C2_4                               496.0     2322.1   0.214
##                                               Pr(>|t|)    
## (Intercept)                                     <2e-16 ***
## trend(knots = yearquarter("2020 Q2"))trend      <2e-16 ***
## trend(knots = yearquarter("2020 Q2"))trend_62    0.954    
## fourier(K = 2)C1_4                               0.903    
## fourier(K = 2)S1_4                               0.428    
## fourier(K = 2)C2_4                               0.832    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18540 on 58 degrees of freedom
## Multiple R-squared: 0.8575,  Adjusted R-squared: 0.8452
## F-statistic:  69.8 on 5 and 58 DF, p-value: < 2.22e-16
## 
## 
## Series: GDP 
## Model: ARIMA(0,1,2) w/ drift 
## 
## Coefficients:
##           ma1      ma2  constant
##       -0.3379  -0.2639  2312.988
## s.e.   0.1285   0.1439   781.541
## 
## sigma^2 estimated as 237186164:  log likelihood=-695.5
## AIC=1399   AICc=1399.69   BIC=1407.58
## 
## 
## Series: GDP 
## Model: NNAR(4,1,6)[4] 
## Transformation: box_cox(GDP, 0.9) 
## 
## Average of 20 networks, each of which is
## a 10-6-1 network with 73 weights
## options were - linear output units 
## 
## sigma^2 estimated as 33274

Plots

myplot=function(model){
  model|>forecast(test)|>autoplot(newts)+geom_line(aes(y=.fitted,col=.model),
                                                   data=augment(model))+ggtitle(names(model))}

for (i in 1:12){print(myplot(eval(parse(text=paste0('m',i)))))} #NNETAR takes long to plot

## Warning: Removed 1 row containing missing values (`geom_line()`).

## Warning: Removed 1 row containing missing values (`geom_line()`).

## Warning: Removed 4 rows containing missing values (`geom_line()`).

Accuracy Metrics

tmp3=c(rep(0,12))
mymat=matrix(rep(0, 12*12), ncol=12)

for (i in 1:13){
  tmp=eval(parse(text=paste0('m',i)))%>%forecast(test)
  tmp2=tmp%>%accuracy(test)
  tmp3=rbind(tmp2, tmp3)
}
## Warning in rbind(deparse.level, ...): number of columns of result, 11, is not a
## multiple of vector length 12 of arg 2
tmp3=tmp3[tmp3$RMSE>0,]
tmp3$MASE=tmp3$RMSSE=tmp3$.type=NULL
tmp3%>%arrange(MAPE)
## # A tibble: 13 × 8
##    .model                      Region        ME   RMSE    MAE   MPE  MAPE   ACF1
##    <chr>                       <chr>      <dbl>  <dbl>  <dbl> <dbl> <dbl>  <dbl>
##  1 Ensemble_Model              New.Eng… -31154. 4.08e4 3.12e4 -3.03  3.03  0.680
##  2 Drift_Model                 New.Eng…  31118. 3.32e4 3.11e4  3.03  3.03  0.278
##  3 ARIMA_Model                 New.Eng…  42614. 4.43e4 4.26e4  4.16  4.16  0.282
##  4 Naive_Model                 New.Eng…  42695. 4.56e4 4.27e4  4.16  4.16  0.384
##  5 Ensemble_Model_2            New.Eng…  42929. 4.46e4 4.29e4  4.19  4.19  0.278
##  6 ETS_Model                   New.Eng…  43295. 4.49e4 4.33e4  4.22  4.22  0.286
##  7 ARIMA(GDP ~ ElectionSpline) New.Eng…  48440. 4.98e4 4.84e4  4.73  4.73  0.262
##  8 NeuralNet_Model             New.Eng…  51928. 5.55e4 5.19e4  5.06  5.06  0.470
##  9 Fourier_Model               New.Eng…  63359. 6.52e4 6.34e4  6.18  6.18  0.338
## 10 Exponential_Model           New.Eng…  68549. 7.05e4 6.85e4  6.69  6.69  0.359
## 11 Seasonal_Naive_Model        New.Eng…  69532. 7.91e4 6.95e4  6.79  6.79 -0.355
## 12 TS_Model                    New.Eng… 123778. 1.25e5 1.24e5 12.1  12.1   0.384
## 13 Mean_Model                  New.Eng… 123778. 1.25e5 1.24e5 12.1  12.1   0.384