require(fpp3)
## Loading required package: fpp3
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.3
## ✔ dplyr 1.1.1 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.1
## ✔ lubridate 1.9.2 ✔ fable 0.3.3
## ✔ ggplot2 3.4.1 ✔ fabletools 0.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
require(ggplot2)
require(kableExtra)
## Loading required package: kableExtra
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
require(latex2exp)
## Loading required package: latex2exp
require(readxl)
## Loading required package: readxl
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ purrr 1.0.1 ✔ stringr 1.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ kableExtra::group_rows() masks dplyr::group_rows()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
require(magrittr)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
mydata=read.csv('C:/Users/lfult/Documents/_Courses/Boston College/Predictive analytics/ng.csv')
mydata$Year=seq(as.Date("2001/1/1"), as.Date("2023/1/1"), "months")
p1=c(rep(0,48), rep(1, 13))
p1=c(rep(p1, 4), rep(0, 21))
mydata$Pres=p1
myts=mydata%>%mutate(Year_Month = yearmonth(as.character(Year))) %>% as_tsibble(index = Year_Month)
myts%>%autoplot(Amount)
train=myts[1:216,]
test=myts[217:nrow(mydata),]
myts%>%gg_season(Amount)
myts%>%gg_subseries(Amount)
myts%>%gg_lag(Amount)
myts%>%ACF(Amount)%>%autoplot()
lambda <- myts |> features(Amount, features = guerrero) |> pull(lambda_guerrero)
myts |> autoplot(box_cox(Amount, lambda)) +
labs(y = "",title = latex2exp::TeX(paste0(
"Transformed NT Consumption with $\\lambda$ = ", round(lambda,2))))
# Decomposition
comp=myts%>%model(stl=STL(Amount))%>%components()
comp|>as_tsibble() |>autoplot(Amount)+ geom_line(aes(y=trend), colour = "red")+
geom_line(aes(y=season_adjust), colour = "blue")
comp%>%autoplot()
m1=train |>model(Trend_Model = TSLM(Amount ~ trend()))
m2=train |>model(Mean_Model=MEAN(Amount))
m3=train |>model(Drift_Model=RW(Amount~drift()))
m4=train |>model(Naive_Model=NAIVE(Amount))
m5=train |>model(Seasonal_Naive_Model=SNAIVE(Amount))
m6=train |>model(ETS_Model=ETS(Amount))
m7=train |>model(ARIMA_Model=ARIMA(Amount))
m8=train |>model(Trend_Seasonality_Model=TSLM(Amount~trend()+season()))
m9=train |>model(Fourier_Model=TSLM(Amount~trend()+fourier(K=6)))
m10=train |>model(Exponential_Model=TSLM(log(Amount)~trend()+season()))
m11=train|>model(Splines_Model=TSLM(Amount ~ trend(knots = c(1990))))
m12=train|>model(Ensemble_Model=(ETS(Amount)+ARIMA(Amount))/2)
m13=train|>model(Dynamic_Model=ARIMA(Amount~Pres))
m14=train|>model(Ensemble_Model_2=(ETS(Amount)+TSLM(Amount~trend()+fourier(K=3))+ARIMA(Amount~Pres))/3)
m15=train|>model(Ensemble_Model_3=(ARIMA(Amount~trend()+season()+Pres)+ETS(Amount~trend()+season()))/2)
m16=train|>model(nnetar=NNETAR(box_cox(Amount, .65)~Pres+trend()+season()))
for (i in 1:16){report(eval(parse(text=paste0('m',i))))}
## Series: Amount
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -541195 -306119 -162841 360071 1027306
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1743454.8 54004.0 32.284 < 2e-16 ***
## trend() 2781.3 431.5 6.445 7.54e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 395500 on 214 degrees of freedom
## Multiple R-squared: 0.1626, Adjusted R-squared: 0.1586
## F-statistic: 41.54 on 1 and 214 DF, p-value: 7.5394e-10
## Series: Amount
## Model: MEAN
##
## Mean: 2045230.1806
## sigma^2: 185885140083.553
## Series: Amount
## Model: RW w/ drift
##
## Drift: 1500.0558 (se: 19654.0583)
## sigma^2: 83050631329.0436
## Series: Amount
## Model: NAIVE
##
## sigma^2: 83050631329.0436
## Series: Amount
## Model: SNAIVE
##
## sigma^2: 20865051838.0542
## Series: Amount
## Model: ETS(M,N,M)
## Smoothing parameters:
## alpha = 0.5126996
## gamma = 0.1232869
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
## 1887142 1.22196 0.9649296 0.8591212 0.7954175 0.9131088 0.9034724 0.8164187
## s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.8400235 0.9456213 1.148872 1.226745 1.36431
##
## sigma^2: 0.0024
##
## AIC AICc BIC
## 6137.055 6139.455 6187.684
## Series: Amount
## Model: ARIMA(1,0,2)(0,1,2)[12] w/ drift
##
## Coefficients:
## ar1 ma1 ma2 sma1 sma2 constant
## 0.8928 -0.3767 -0.2161 -0.6236 -0.2143 3782.1540
## s.e. 0.1005 0.1305 0.1191 0.0877 0.0919 752.6637
##
## sigma^2 estimated as 1.092e+10: log likelihood=-2650.6
## AIC=5315.2 AICc=5315.77 BIC=5338.43
## Series: Amount
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -501823 -71632 -17370 81118 397694
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2489678.5 33503.0 74.312 < 2e-16 ***
## trend() 2908.4 140.2 20.748 < 2e-16 ***
## season()year2 -307463.2 42754.9 -7.191 1.21e-11 ***
## season()year3 -486080.8 42755.5 -11.369 < 2e-16 ***
## season()year4 -924006.5 42756.7 -21.611 < 2e-16 ***
## season()year5 -1111869.8 42758.3 -26.004 < 2e-16 ***
## season()year6 -1126133.3 42760.4 -26.336 < 2e-16 ***
## season()year7 -954255.1 42762.9 -22.315 < 2e-16 ***
## season()year8 -937634.4 42765.9 -21.925 < 2e-16 ***
## season()year9 -1142501.2 42769.3 -26.713 < 2e-16 ***
## season()year10 -1052758.7 42773.2 -24.613 < 2e-16 ***
## season()year11 -796734.8 42777.6 -18.625 < 2e-16 ***
## season()year12 -280672.1 42782.4 -6.560 4.37e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 128300 on 203 degrees of freedom
## Multiple R-squared: 0.9164, Adjusted R-squared: 0.9115
## F-statistic: 185.5 on 12 and 203 DF, p-value: < 2.22e-16
## Series: Amount
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -501823 -71632 -17370 81118 397694
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1729669.3 17535.5 98.638 < 2e-16 ***
## trend() 2908.4 140.2 20.748 < 2e-16 ***
## fourier(K = 6)C1_12 452994.6 12343.0 36.701 < 2e-16 ***
## fourier(K = 6)S1_12 52778.2 12353.3 4.272 2.97e-05 ***
## fourier(K = 6)C2_12 244192.0 12343.0 19.784 < 2e-16 ***
## fourier(K = 6)S2_12 63758.3 12344.6 5.165 5.73e-07 ***
## fourier(K = 6)C3_12 -2883.4 12343.0 -0.234 0.815528
## fourier(K = 6)S3_12 -57340.4 12343.0 -4.646 6.09e-06 ***
## fourier(K = 6)C4_12 27254.1 12343.0 2.208 0.028360 *
## fourier(K = 6)S4_12 -17077.3 12342.5 -1.384 0.167994
## fourier(K = 6)C5_12 27016.3 12343.0 2.189 0.029753 *
## fourier(K = 6)S5_12 -45742.4 12342.3 -3.706 0.000271 ***
## fourier(K = 6)C6_12 11435.5 8727.5 1.310 0.191581
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 128300 on 203 degrees of freedom
## Multiple R-squared: 0.9164, Adjusted R-squared: 0.9115
## F-statistic: 185.5 on 12 and 203 DF, p-value: < 2.22e-16
## Series: Amount
## Model: TSLM
## Transformation: log(Amount)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.188353 -0.035196 -0.004048 0.036566 0.159249
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.469e+01 1.559e-02 942.357 < 2e-16 ***
## trend() 1.425e-03 6.522e-05 21.853 < 2e-16 ***
## season()year2 -1.150e-01 1.989e-02 -5.781 2.77e-08 ***
## season()year3 -1.903e-01 1.989e-02 -9.565 < 2e-16 ***
## season()year4 -4.005e-01 1.989e-02 -20.132 < 2e-16 ***
## season()year5 -5.073e-01 1.989e-02 -25.502 < 2e-16 ***
## season()year6 -5.170e-01 1.990e-02 -25.988 < 2e-16 ***
## season()year7 -4.197e-01 1.990e-02 -21.096 < 2e-16 ***
## season()year8 -4.093e-01 1.990e-02 -20.568 < 2e-16 ***
## season()year9 -5.268e-01 1.990e-02 -26.475 < 2e-16 ***
## season()year10 -4.733e-01 1.990e-02 -23.785 < 2e-16 ***
## season()year11 -3.405e-01 1.990e-02 -17.108 < 2e-16 ***
## season()year12 -1.100e-01 1.991e-02 -5.525 1.00e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05968 on 203 degrees of freedom
## Multiple R-squared: 0.9189, Adjusted R-squared: 0.9141
## F-statistic: 191.6 on 12 and 203 DF, p-value: < 2.22e-16
## Series: Amount
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -541195 -306119 -162841 360071 1027306
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1743454.8 54004.0 32.284 < 2e-16 ***
## trend(knots = c(1990))trend 2781.3 431.5 6.445 7.54e-10 ***
## trend(knots = c(1990))trend_1619 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 395500 on 214 degrees of freedom
## Multiple R-squared: 0.1626, Adjusted R-squared: 0.1586
## F-statistic: 41.54 on 1 and 214 DF, p-value: 7.5394e-10
## Series: Amount
## Model: COMBINATION
## Combination: Amount * 0.5
##
## =========================
##
## Series: Amount
## Model: COMBINATION
## Combination: Amount + Amount
##
## ============================
##
## Series: Amount
## Model: ETS(M,N,M)
## Smoothing parameters:
## alpha = 0.5126996
## gamma = 0.1232869
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
## 1887142 1.22196 0.9649296 0.8591212 0.7954175 0.9131088 0.9034724 0.8164187
## s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.8400235 0.9456213 1.148872 1.226745 1.36431
##
## sigma^2: 0.0024
##
## AIC AICc BIC
## 6137.055 6139.455 6187.684
##
## Series: Amount
## Model: ARIMA(1,0,2)(0,1,2)[12] w/ drift
##
## Coefficients:
## ar1 ma1 ma2 sma1 sma2 constant
## 0.8928 -0.3767 -0.2161 -0.6236 -0.2143 3782.1540
## s.e. 0.1005 0.1305 0.1191 0.0877 0.0919 752.6637
##
## sigma^2 estimated as 1.092e+10: log likelihood=-2650.6
## AIC=5315.2 AICc=5315.77 BIC=5338.43
##
##
## Series: Amount
## Model: LM w/ ARIMA(1,0,2)(0,1,2)[12] errors
##
## Coefficients:
## ar1 ma1 ma2 sma1 sma2 Pres intercept
## 0.8529 -0.3283 -0.1771 -0.6444 -0.1823 -46517.55 35193.307
## s.e. 0.0947 0.1233 0.1048 0.0907 0.0994 34148.50 6358.389
##
## sigma^2 estimated as 1.09e+10: log likelihood=-2649.63
## AIC=5315.27 AICc=5316 BIC=5341.81
## Series: Amount
## Model: COMBINATION
## Combination: Amount * 0.333333333333333
##
## =======================================
##
## Series: Amount
## Model: COMBINATION
## Combination: Amount + Amount
##
## ============================
##
## Series: Amount
## Model: COMBINATION
## Combination: Amount + Amount
##
## ============================
##
## Series: Amount
## Model: ETS(M,N,M)
## Smoothing parameters:
## alpha = 0.5126996
## gamma = 0.1232869
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
## 1887142 1.22196 0.9649296 0.8591212 0.7954175 0.9131088 0.9034724 0.8164187
## s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.8400235 0.9456213 1.148872 1.226745 1.36431
##
## sigma^2: 0.0024
##
## AIC AICc BIC
## 6137.055 6139.455 6187.684
##
## Series: Amount
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -491834 -84737 -13169 86918 355173
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1730279.2 18438.3 93.842 < 2e-16 ***
## trend() 2902.8 147.4 19.695 < 2e-16 ***
## fourier(K = 3)C1_12 452989.0 12980.3 34.898 < 2e-16 ***
## fourier(K = 3)S1_12 52757.2 12991.1 4.061 6.92e-05 ***
## fourier(K = 3)C2_12 244186.4 12980.3 18.812 < 2e-16 ***
## fourier(K = 3)S2_12 63748.6 12982.0 4.911 1.83e-06 ***
## fourier(K = 3)C3_12 -2889.0 12980.3 -0.223 0.824
## fourier(K = 3)S3_12 -57346.0 12980.3 -4.418 1.60e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 134900 on 208 degrees of freedom
## Multiple R-squared: 0.9053, Adjusted R-squared: 0.9021
## F-statistic: 284.1 on 7 and 208 DF, p-value: < 2.22e-16
##
##
## Series: Amount
## Model: LM w/ ARIMA(1,0,2)(0,1,2)[12] errors
##
## Coefficients:
## ar1 ma1 ma2 sma1 sma2 Pres intercept
## 0.8529 -0.3283 -0.1771 -0.6444 -0.1823 -46517.55 35193.307
## s.e. 0.0947 0.1233 0.1048 0.0907 0.0994 34148.50 6358.389
##
## sigma^2 estimated as 1.09e+10: log likelihood=-2649.63
## AIC=5315.27 AICc=5316 BIC=5341.81
##
##
## Series: Amount
## Model: COMBINATION
## Combination: Amount * 0.5
##
## =========================
##
## Series: Amount
## Model: COMBINATION
## Combination: Amount + Amount
##
## ============================
##
## Series: Amount
## Model: LM w/ ARIMA(1,0,2)(0,0,2)[12] errors
##
## Coefficients:
## ar1 ma1 ma2 sma1 sma2 trend() season()year2
## 0.8699 -0.3554 -0.1588 0.1753 -0.1470 2965.3055 -306599.40
## s.e. 0.0878 0.1205 0.0991 0.0737 0.0705 377.2247 26335.99
## season()year3 season()year4 season()year5 season()year6 season()year7
## -489996.92 -930241.08 -1119339.27 -1134195.12 -962357.44
## s.e. 31281.13 32485.72 33273.79 33747.36 33897.52
## season()year8 season()year9 season()year10 season()year11
## -944515.09 -1149786.15 -1059939.28 -804072.65
## s.e. 33770.71 33348.26 32618.54 31535.92
## season()year12 Pres intercept
## -287517.37 -51163.38 2506813.74
## s.e. 26746.91 33936.01 52092.11
##
## sigma^2 estimated as 9.855e+09: log likelihood=-2782.57
## AIC=5605.15 AICc=5609.46 BIC=5672.65
##
## Series: Amount
## Model: ETS(M,N,M)
## Smoothing parameters:
## alpha = 0.5126996
## gamma = 0.1232869
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
## 1887142 1.22196 0.9649296 0.8591212 0.7954175 0.9131088 0.9034724 0.8164187
## s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.8400235 0.9456213 1.148872 1.226745 1.36431
##
## sigma^2: 0.0024
##
## AIC AICc BIC
## 6137.055 6139.455 6187.684
##
##
## Series: Amount
## Model: NNAR(3,1,9)[12]
## Transformation: box_cox(Amount, 0.65)
##
## Average of 20 networks, each of which is
## a 17-9-1 network with 172 weights
## options were - linear output units
##
## sigma^2 estimated as 35607
myplot=function(model){
model|>forecast(test)|>autoplot(myts)+geom_line(aes(y=.fitted, col=.model),data=augment(model))+ggtitle(names(model))}
for (i in 1:15){print(myplot(eval(parse(text=paste0('m',i)))))} #NNETAR takes long to plot, avoiding that model
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 12 rows containing missing values (`geom_line()`).
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Splines_Model = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading
tmp3=c(rep(0,10))
mymat=matrix(rep(0, 15*nrow(train)), ncol=15)
for (i in 1:16){
tmp=eval(parse(text=paste0('m',i)))%>%forecast(test)
tmp2=tmp%>%accuracy(test,measures = list(point_accuracy_measures))
tmp3=rbind(tmp2, tmp3)
#mymat[,i]=abs(augment(eval(parse(text=paste0('m',i))))$.resid)
}
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Splines_Model = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading
tmp3=tmp3[tmp3$RMSE>0,]
tmp3$MASE=tmp3$RMSSE=tmp3$.type=NULL
tmp3%>%arrange(RMSE)
## # A tibble: 16 × 7
## .model ME RMSE MAE MPE MAPE ACF1
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ensemble_Model 56287. 123475. 103177. 2.10 3.93 0.238
## 2 ETS_Model 15588. 131442. 109248. 0.663 4.20 0.380
## 3 Ensemble_Model_3 73577. 134409. 108622. 2.78 4.16 0.283
## 4 ARIMA_Model 96985. 139334. 113014. 3.53 4.21 0.130
## 5 nnetar -21432. 141820. 108853. -1.11 4.16 -0.0115
## 6 Ensemble_Model_2 99142. 142784. 117612. 3.71 4.46 0.199
## 7 Dynamic_Model 114926. 149261. 122712. 4.27 4.59 0.0270
## 8 Seasonal_Naive_Model 82424. 161589. 120905. 2.94 4.51 -0.0513
## 9 Exponential_Model 129594. 179201. 154308. 5.23 6.07 0.320
## 10 Fourier_Model 164824. 198187. 167727. 6.15 6.26 0.239
## 11 Trend_Seasonality_Model 164824. 198187. 167727. 6.15 6.26 0.239
## 12 Splines_Model 197169. 469121. 351128. 5.25 12.3 0.657
## 13 Trend_Model 197169. 469121. 351128. 5.25 12.3 0.657
## 14 Naive_Model -388583. 577232. 510162. -17.8 21.4 0.661
## 15 Drift_Model -426085. 602348. 529650. -19.2 22.3 0.658
## 16 Mean_Model 565697. 708670. 565697. 19.7 19.7 0.661