This files provides the codes that allow to reproduce all results as in Table 3 to 6 of Sbrana and Silvestrini (IJF, 2024).
If you have any question please email to : giacomo.sbrana@neoma-bs.fr
Below we report the code that allows to reproduce the results as in Table 3 using the Structural THETA.
This code generates two sets of processes (DGP1 and DGP2). You can select the one you wish.
library("forecast")
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
THETAstructural<-function(y,steps){
likelihood<-function(parameters){q=abs(parameters[1]);co=abs(parameters[2])
state<-p<-rep(0,length(y)+1)
state[1]=y[1]
p[1]=10000
k<-inn<-rep(0,length(y))
k[1]=p[1]/(p[1]+1);
sigmae=0
p[2]=p[1]-p[1]*k[1]+q;
state[2]=co+state[1]+k[1]*inn[1]
for(t in (2):length(y)){
k[t]=p[t]/(p[t]+1);
inn[t]=y[t]-state[t]
sigmae=sigmae+inn[t]^2/(p[t]+1)
p[t+1]=p[t]-p[t]*k[t]+q;
state[t+1]=co+state[t]+k[t]*inn[t]
}
sum(log(p+1))+length(y)*log(sigmae/length(y))}
results<-optim(c(.01,.001),likelihood)
q=abs(results[[1]][1]);co=abs(results[[1]][2])
state<-p<-rep(0,length(y)+1)
state[1]=y[1]
p[1]=10000
k<-inn<-rep(0,length(y))
k[1]=p[1]/(p[1]+1);
sigmae=0
p[2]=p[1]-p[1]*k[1]+q;
state[2]=co+state[1]+k[1]*inn[1]
for(t in (2):length(y)){
k[t]=p[t]/(p[t]+1);
inn[t]=y[t]-state[t]
sigmae=sigmae+inn[t]^2/(p[t]+1)
p[t+1]=p[t]-p[t]*k[t]+q;
state[t+1]=co+state[t]+k[t]*inn[t]
}
PointF<-rep(0,steps);PointF[1]=state[length(y)+1];
for( t in 2:steps){PointF[t]=co*(t-1)+state[length(y)+1]}
H<-sigmae/(length(y)-1);P<-tail(p,1)*H;Eta<-q*H;
Inter<-c();Inter[1]=P;
for(j in 2:steps){Inter[j]=Inter[j-1]+Eta};
Interv<-c();
for(j in 1:steps){Interv[j]=Inter[j]+H};
prob1=.85;prob2=.95;
lower1<-PointF-qnorm((1+prob1)/2)*sqrt(Interv);
#for(d in 1:steps){if(lower1[d]<0){lower1[d]=0}};
upper1<-PointF+qnorm((1+prob1)/2)*sqrt(Interv);
lower2<-PointF-qnorm((1+prob2)/2)*sqrt(Interv);
#for(d in 1:steps){if(lower2[d]<0){lower2[d]=0}};
upper2<-PointF+qnorm((1+prob2)/2)*sqrt(Interv);
list(mean=PointF,lower=cbind(lower1,lower2),upper=cbind(upper1,upper2))
}
AUTO<-function(y,steps){forecast::forecast(auto.arima(y),h=steps,level = c(.85,.95))}
THETA<-function(y,steps){forecast::forecast(thetaf(y,h=steps,level = c(.85,.95)))}
ETS<-function(y,steps){forecast::forecast(ets(y),h=steps)}
competitors<-list(THETAstructural,ETS,AUTO)
####################### data Monte Carlo ##################################################
steps<-6
freq<-frequ<-1
times=10000
set.seed(1)
DGP1=list()
for(u in 1:times){
n=60+steps
eta=runif(1,.1,.5)*rnorm(n)
mu=y=trend=c()
alpha=runif(1,.1,5);beta=runif(1,.01,.1)
ar=runif(1,.7,.99)
delta=runif(1,-.7,-.1)
trend[1]=alpha+beta
mu[1]=eta[1];y[1]=trend[1]+mu[1]
for(t in 2:n){mu[t]=ar*mu[t-1]+eta[t]+delta*eta[t-1]
trend[t]=alpha+beta*t
y[t]=trend[t]+mu[t]
}
DGP1[[u]]=y
}
DGP2=list()
for(u in 1:times){
n=60+steps
eta=runif(1,.1,.5)*rnorm(n)
mu=y=c()
ar=runif(1,.1,.7)
delta=runif(1,-.7,0)
co=runif(1,.01,.1);
trend=somma=c();trend[1]=co
mu[1]=eta[1];y[1]=co;somma[1]=mu[1]
for(t in 2:n){mu[t]=ar*mu[t-1]+eta[t]+delta*eta[t-1]
trend[t]=trend[t-1]+co
somma[t]=mu[t]+somma[t-1]
y[t]=somma[t]+trend[t]
}
DGP2[[u]]=y
}
######################## Select the dataset to use! ##############################
DATA=DGP1
goods=length(DATA)
#############################################################################################
rmsse<-function(y,yf,steps){OutSample=tail(y,length(yf));sqrt(mean((OutSample[1:steps]-yf[1:steps])^2)/mean(diff(head(y,(length(y)-length(yf))),frequ)^2))}
mase<-function(y,yf,steps){OutSample=tail(y,length(yf));(mean(abs(OutSample[1:steps]-yf[1:steps])))/mean(abs(diff(head(y,(length(y)-length(yf))),frequ)))}
mis<-function(yout,lower,upper,prob,yin){
a<-b<-d<-rep(0,length(yout));
for(t in 1:length(yout)){
if(yout[t]<lower[t]){a[t]=(2/(1-prob))*(lower[t]-yout[t])}
if(yout[t]>upper[t]){b[t]=(2/(1-prob))*(yout[t]-upper[t])}
d[t]=upper[t]-lower[t]}
mean(a+b+d)/mean(abs(diff(yin,freq)))
}
par(mfrow=c(1,3))
resMASE<-resRMSSE<-resSMAPE<-matrix(NA,steps,length(competitors))
resCOVER85<-resCOVER95<-resMIS85<-resMIS95<-matrix(NA,1,length(competitors))
results1MASE<-resultsMASE<-results1RMSSE<-resultsRMSSE<-results1SMAPE<-resultsSMAPE<-forec<-list()
resultsCOVER85<-resultsMIS85<-resultsCOVER95<-resultsMIS95<-matrix(NA,goods,length(competitors))
results1COVER85<-results1MIS85<-results1COVER95<-results1MIS95<-list()
for(h in 1:length(DATA)){
y=ts(DATA[[h]],frequency = freq);
for(j in 1:length(competitors)){
forec[[j]]<-competitors[[j]](head(y,(length(y)-steps)),steps)
for(s in 1:steps){
resRMSSE[s,j]<-rmsse(y,forec[[j]]$mean,s)
resMASE[s,j]<-mase(y,forec[[j]]$mean,s)
}
resMIS85[1,j]<-mis(tail(y,steps),forec[[j]]$lower[,1],forec[[j]]$upper[,1],.85,head(y,length(y)-steps));
resMIS95[1,j]<-mis(tail(y,steps),forec[[j]]$lower[,2],forec[[j]]$upper[,2],.95,head(y,length(y)-steps));
}
results1RMSSE[[1]]=resRMSSE
results1MASE[[1]]=resMASE
resultsRMSSE[[h]]<-Reduce("+", results1RMSSE)
resultsMASE[[h]]<-Reduce("+", results1MASE)
resultsMIS85[h,]<-resMIS85
resultsMIS95[h,]<-resMIS95
data_frame <- data.frame(col1 = 1:steps)
}
print(round(Reduce("+", resultsRMSSE)/h,3))
## [,1] [,2] [,3]
## [1,] 0.716 0.728 0.729
## [2,] 0.840 0.856 0.857
## [3,] 0.914 0.935 0.933
## [4,] 0.972 0.999 0.994
## [5,] 1.020 1.054 1.046
## [6,] 1.062 1.103 1.090
print(rbind(round(colMeans(resultsMIS85,na.rm = TRUE),3),round(colMeans(resultsMIS95,na.rm = TRUE),3)))
## [,1] [,2] [,3]
## [1,] 5.554 5.957 5.729
## [2,] 7.115 7.569 7.294
This code allows to reproduce the results as in Table 4,5 and 6 of Sbrana & Silvestrini (IJF, 2024).
The results refer to Yearly data used in the M4 competition comparing:
The Structural (MSOE) THETA, SES, Holt, Damped, Comb, THETA classic, AutoTHETA, DOTM, RWDAR, naive, naive seasonal, naive2.
One can test additional methods by adding them among the competitors as in the Benchmarks function below.
One can change the frequency of data. For example, we provide the code to use All M4 data, which allows to reproduce Table 6.
We used part of the code already available on github. However, we adapted it to the specific M4 competition.
library("Mcomp")
library("M4comp2018")
library("forecTheta")
## Caricamento del pacchetto richiesto: tseries
############################# All M4 data #####################################
##### This code allows testing the performance over all M4 data ###############
###############################################################################
# Mdata<-M4
# In<-Out<-list();for(i in 1:length(Mdata)){In[[i]]=Mdata[[i]]$x;Out[[i]]=Mdata[[i]]$xx}
# replic<-length(In)
# data_train = In
# data_test=Out
############################# Yearly data #####################################
##### This code allows testing the performance over as specific frequency #####
###############################################################################
############## You may change "Yearly" with any other frequency you wish ######
Mdata<-Filter(function(l) l$period=="Yearly",M4)
In<-Out<-list();for(i in 1:length(Mdata)){In[[i]]=Mdata[[i]]$x;Out[[i]]=Mdata[[i]]$xx}
replic<-length(In)
data_train = In
data_test=Out
fh=6
###### Dynamic Optimized Theta Model ##########################################
#(forecTheta library)
######## https://www.sciencedirect.com/science/article/pii/S0169207016300243#!
DOTM<-function(y,steps){s=freq
dotm(ts(y,frequency = s),h=steps,level=c(80,95))
#dotm(y,h=steps)$mean
}
################################# Structural THETA code ###############################
THETAstructural=function(y,steps){frq=freq
prob1=.80;prob2=.95;
if(frq!=1){
s=freq;
for(t in 1:length(y)){if(y[t]!=0){y<-y[t:length(y)];break}}
w<-rep(1/(2*s),s+1);w[2:s]<-1/s
cma<-matrix(NA,length(y),1);
for(g in 1:(length(y)-s)){cma[g+s/2]<-sum(w*y[g:(g+s)])};
residuals<-y/cma
sfactors<-c();for(seas in 1:s){
sfactors[seas]<-mean(na.omit(residuals[seq(seas,length(y)-s+seas,by=s)]))}
sfactors<-sfactors*s/sum(sfactors)
sfactout<-rep(sfactors,length(y)+steps)[(length(y)+1):(length(y)+steps)]
y<-y/rep(sfactors,ceiling(length(y)/s))[1:length(y)]}
likelihood<-function(parameters){q=abs(parameters[1]);co=abs(parameters[2])
state<-p<-rep(0,length(y)+1)
state[1]=y[1]
p[1]=10000
k<-inn<-rep(0,length(y))
k[1]=p[1]/(p[1]+1);
sigmae=0
p[2]=p[1]-p[1]*k[1]+q;
state[2]=co+state[1]+k[1]*inn[1]
for(t in (2):length(y)){
k[t]=p[t]/(p[t]+1);
inn[t]=y[t]-state[t]
sigmae=sigmae+inn[t]^2/(p[t]+1)
p[t+1]=p[t]-p[t]*k[t]+q;
state[t+1]=co+state[t]+k[t]*inn[t]
}
sum(log(p+1))+length(y)*log(sigmae/length(y))}
results<-optim(c(.01,1),likelihood)
q=abs(results[[1]][1]);co=abs(results[[1]][2])
state<-p<-rep(0,length(y)+1)
state[1]=y[1]
p[1]=10000
k<-inn<-rep(0,length(y))
k[1]=p[1]/(p[1]+1);
p[2]=p[1]-p[1]*k[1]+q;
state[2]=co+state[1]+k[1]*inn[1]
for(t in (2):length(y)){
k[t]=p[t]/(p[t]+1);
inn[t]=y[t]-state[t]
p[t+1]=p[t]-p[t]*k[t]+q;
state[t+1]=co+state[t]+k[t]*inn[t]
}
PointF<-rep(0,steps);PointF[1]=state[length(y)+1];
for( t in 2:steps){PointF[t]=co*(t-1)+state[length(y)+1]}
if(frq!=1){PointF=PointF*sfactout}
PointF
}
# This code forecast with the RWDAR as in Sbrana & Silvestrini (IJF, 2023)
rwdar<-function(y,h){s=freq
if(s!=1){w<-rep(1/(2*s),s+1);w[2:s]<-1/s
cma<-matrix(NA,length(y),1);
for(g in 1:(length(y)-s)){cma[g+s/2]<-sum(w*y[g:(g+s)])};
residuals<-y/cma
sfactors<-c();for(seas in 1:s){
sfactors[seas]<-mean(na.omit(residuals[seq(seas,length(y)-s+seas,by=s)]))}
sfactors<-sfactors*s/sum(sfactors)
sfactout<-rep(sfactors,length(y)+h)[(length(y)+1):(length(y)+h)]
y<-y/rep(sfactors,ceiling(length(y)/s))[1:length(y)]}else{sfactout=rep(1,h)}
obs<-length(y); lambda <- beta <- c(); v <- rep(0,obs)
lambda[1]=y[1]
beta[1]<-0
ConcLogLikelihood <- function(Kg){q<-1-exp(-abs(Kg[1]));p<-1-exp(-abs(Kg[2]));co<-abs(Kg[3])
k1<- -(2*sqrt(q))/(sqrt(q)*(-1+p)-sqrt(4+q* (1+p)^2))
k2<-(p*(2+q+q*p-sqrt(q)*sqrt(4+q *(1+p)^2)))/(2+2 *q*p)
for (t in 2:(obs)) {
v[t]<-y[t]-lambda[t-1]-beta[t-1]
lambda[t]<-co+lambda[t-1]+k1*v[t]
beta[t]<-p*beta[t-1]+k2*v[t]
}
sum(v^2)
}
result<-optim(c(.8,.4,2),ConcLogLikelihood)
q<-1-exp(-abs(result[[1]][[1]])); p<-1-exp(-abs(result[[1]][[2]]));co<-abs(result[[1]][[3]])
if(p==1){p=.99}
k1<- -(2*sqrt(q))/(sqrt(q)*(-1+p)-sqrt(4+q* (1+p)^2))
k2<-(p*(2+q+q*p-sqrt(q)*sqrt(4+q *(1+p)^2)))/(2+2 *q*p)
for (t in 2:(obs)) {
v[t]<-y[t]-lambda[t-1]-beta[t-1]
lambda[t]<-co+lambda[t-1]+k1*v[t]
beta[t]<-p*beta[t-1]+k2*v[t]
}
T<-Tmatp<-diag(1,2);T[2,2]<-p;Z<-c(1,1)
Forecast<-c()
for(i in 1:h){
Forecast[i]<-Z%*%Tmatp%*%rbind(lambda[obs],beta[obs]);
Forecast[i]<-Forecast[i]+(i-1)*co
Tmatp<-T%*%Tmatp
}
Forecast*sfactout
}
##################################################################################
SeasonalityTest <- function(input, ppy){
tcrit <- 1.645
if (length(input)<3*ppy){
test_seasonal <- FALSE
}else{
xacf <- acf(input, plot = FALSE)$acf[-1, 1, 1]
clim <- tcrit/sqrt(length(input)) * sqrt(cumsum(c(1, 2 * xacf^2)))
test_seasonal <- ( abs(xacf[ppy]) > clim[ppy] )
if (is.na(test_seasonal)==TRUE){ test_seasonal <- FALSE }
}
return(test_seasonal)
}
Theta.models.fit <- function(input, fh, theta, curve, model, seasonality , plot=FALSE, positive=TRUE){
#Check if the inputs are valid
if (theta<1){ theta <- 1}
if (fh<1){ fh <- 1}
#Estimate theta line weights
outtest <- naive(input, h=fh)$mean
wses <- (1/theta) ; wlrl <- (1-wses)
#Estimate seasonaly adjusted time series
ppy <- frequency(input)
if (seasonality=="N"){
des_input <- input ; SIout <- rep(1, fh) ; SIin <- rep(1, length(input))
}else if (seasonality=="A"){
Dec <- decompose(input, type="additive")
des_input <- input-Dec$seasonal
SIin <- Dec$seasonal
SIout <- head(rep(Dec$seasonal[(length(Dec$seasonal)-ppy+1):length(Dec$seasonal)], fh), fh)
}else{
Dec <- decompose(input, type="multiplicative")
des_input <- input/Dec$seasonal
SIin <- Dec$seasonal
SIout <- head(rep(Dec$seasonal[(length(Dec$seasonal)-ppy+1):length(Dec$seasonal)], fh), fh)
}
#Estimate theta line zero
observations <- length(des_input)
xs <- c(1:observations)
xf = xff <- c((observations+1):(observations+fh))
dat=data.frame(des_input=des_input, xs=xs)
newdf <- data.frame(xs = xff)
if (curve=="Exp"){
estimate <- lm(log(des_input)~xs)
thetaline0In <- exp(predict(estimate))+input-input
thetaline0Out <- exp(predict(estimate, newdf))+outtest-outtest
}else{
estimate <- lm(des_input ~ poly(xs, 1, raw=TRUE))
thetaline0In <- predict(estimate)+des_input-des_input
thetaline0Out <- predict(estimate, newdf)+outtest-outtest
}
#Estimete Theta line (theta)
if (model=="A"){
thetalineT <- theta*des_input+(1-theta)*thetaline0In
}else{
thetalineT <- (des_input^theta)*(thetaline0In^(1-theta))
}
#forecasting TL2
sesmodel <- ses(thetalineT, h=fh)
thetaline2In <- sesmodel$fitted
thetaline2Out <- sesmodel$mean
#Theta forecasts
if (model=="A"){
forecastsIn <- as.numeric(thetaline2In*wses)+as.numeric(thetaline0In*wlrl)+des_input-des_input
forecastsOut <- as.numeric(thetaline2Out*wses)+as.numeric(thetaline0Out*wlrl)+outtest-outtest
}else{
forecastsIn <- ((as.numeric(thetaline2In)^(1/theta))*(as.numeric(thetaline0In)^(1-(1/theta))))+des_input-des_input
forecastsOut <- ((as.numeric(thetaline2Out)^(1/theta))*(as.numeric(thetaline0Out)^(1-(1/theta))))+outtest-outtest
}
#Seasonal adjustments
if (seasonality=="A"){
forecastsIn <- forecastsIn+SIin
forecastsOut <- forecastsOut+SIout
}else{
forecastsIn <- forecastsIn*SIin
forecastsOut <- forecastsOut*SIout
}
#Zero forecasts become positive
if (positive==T){
for (i in 1:length(forecastsOut)){
if (forecastsOut[i]<0){ forecastsOut[i] <- 0 }
}
}
if (plot==TRUE){
united <- cbind(input,forecastsOut)
for (ik in 1:(observations+fh)){ united[ik,1] = sum(united[ik,2],united[ik,1], na.rm = TRUE) }
plot(united[,1],col="black",type="l",main=paste("Model:",model,",Curve:",curve,",Theta:",theta),xlab="Time",ylab="Values",
ylim=c(min(united[,1])*0.85,max(united[,1])*1.15))
lines(forecastsIn, col="green") ; lines(forecastsOut, col="green")
lines(thetaline2In, col="blue") ; lines(thetaline2Out, col="blue")
lines(thetaline0In, col="red") ; lines(thetaline0Out, col="red")
}
output=list(fitted=forecastsIn,mean=forecastsOut,
fitted0=thetaline0In,mean0=thetaline0Out,
fitted2=thetaline2In,mean2=thetaline2Out,
model=paste(seasonality,model,curve,
c(round(theta,2)),
round(sesmodel$model$par[1],3),
round(sesmodel$model$par[2],3)))
return(output)
}
AutoTheta<- function(input, fh, positive=TRUE){
if (min(input)>0){
molist <- c("M","A") ; trlist <- c("Lrl","Exp")
}else{
molist <- c("A") ; trlist <- c("Lrl")
}
#Scale
base <- mean(input) ; input <- input/base
#Check seasonality & Create list of models
ppy <- frequency(input) ; ST <- F
if (ppy>1){ ST <- SeasonalityTest(input, ppy) }
if (ST==T){
selist <- c("M","A")
listnames <- c()
for (i in 1:length(selist)){
for (ii in 1:length(molist)){
for (iii in 1:length(trlist)){
listnames <- c(listnames,paste(selist[i], molist[ii], trlist[iii]))
}
}
}
}else{
listnames <- c()
for (ii in 1:length(molist)){
for (iii in 1:length(trlist)){
listnames <- c(listnames, paste("N", molist[ii], trlist[iii]))
}
}
}
#Exclude instable models
excluded <- c("N M Lrl", "A M Lrl", "A M Exp", "M M Lrl")
listnames <- listnames[!(listnames %in% excluded)]
modellist <- NULL
for (i in 1:length(listnames)){
modellist[length(modellist)+1] <- list(c(substr(listnames,1,1)[i], substr(listnames,3,3)[i],
substr(listnames,5,7)[i]))
}
#Start validation
errorsin <- c() ; models <- NULL
#With this function determine opt theta per case
optfun <- function(x, input, fh, curve, model, seasonality){
mean(abs(Theta.models.fit(input=input, fh, theta=x, curve, model, seasonality , plot=FALSE)$fitted-input))
}
for (j in 1:length(listnames)){
optTheta <- suppressWarnings(optimize(optfun, c(1:3),
input=input, fh=fh, curve=modellist[[j]][3], model=modellist[[j]][2],
seasonality=modellist[[j]][1])$minimum)
fortheta <- Theta.models.fit(input=input, fh=fh, theta=optTheta, curve=modellist[[j]][3], model=modellist[[j]][2],
seasonality=modellist[[j]][1], plot=F)
models[length(models)+1] <- list(fortheta)
errorsin <- c(errorsin, mean(abs(input-fortheta$fitted)))
}
#Select model and export
selected.model <- models[[which.min(errorsin)]]
description <- selected.model$model
#Estimate Prediction Intervals
frc <- selected.model$mean*base
fitted <- selected.model$fitted*base
residuals_t <- as.numeric(input*base-fitted)
if (frequency(input)==1){
m <- 12
}else if (frequency(input)==4){
m <- 4
}else{
m <- 1
}
pisl <- frc-1.960*sd(residuals_t)*sqrt(1+m*(c(1:fh)-1))
pisu <- frc+1.960*sd(residuals_t)*sqrt(1+m*(c(1:fh)-1))
if (positive==T){
pisl[pisl<0] <- 0 ; pisu[pisu<0] <- 0
}
output <- list(fitted=fitted, mean=frc, description=description, piu=pisu, pil=pisl)
return(output)
}
#################################################################################
smape_cal <- function(outsample, forecasts){
#Used to estimate sMAPE
outsample <- as.numeric(outsample) ; forecasts<-as.numeric(forecasts)
smape <- (abs(outsample-forecasts)*200)/(abs(outsample)+abs(forecasts))
return(smape)
}
mase_cal <- function(insample, outsample, forecasts){
#Used to estimate MASE
frq <- frequency(insample)
forecastsNaiveSD <- rep(NA,frq)
for (j in (frq+1):length(insample)){
forecastsNaiveSD <- c(forecastsNaiveSD, insample[j-frq])
}
masep<-mean(abs(insample-forecastsNaiveSD),na.rm = TRUE)
outsample <- as.numeric(outsample) ; forecasts <- as.numeric(forecasts)
mase <- (abs(outsample-forecasts))/masep
return(mase)
}
naive_seasonal <- function(input, fh){
#Used to estimate Seasonal Naive
frcy <- frequency(input)
frcst <- naive(input, h=fh)$mean
if (frcy>1){
frcst <- head(rep(as.numeric(tail(input,frcy)), fh), fh) + frcst - frcst
}
return(frcst)
}
Theta.classic <- function(input, fh){
#Used to estimate Theta classic
#Set parameters
wses <- wlrl<-0.5 ; theta <- 2
#Estimate theta line (0)
observations <- length(input)
xt <- c(1:observations)
xf <- c((observations+1):(observations+fh))
train <- data.frame(input=input, xt=xt)
test <- data.frame(xt = xf)
estimate <- lm(input ~ poly(xt, 1, raw=TRUE))
thetaline0In <- as.numeric(predict(estimate))
thetaline0Out <- as.numeric(predict(estimate,test))
#Estimate theta line (2)
thetalineT <- theta*input+(1-theta)*thetaline0In
sesmodel <- ses(thetalineT, h=fh)
thetaline2In <- sesmodel$fitted
thetaline2Out <- sesmodel$mean
#Theta forecasts
forecastsIn <- (thetaline2In*wses)+(thetaline0In*wlrl)
forecastsOut <- (thetaline2Out*wses)+(thetaline0Out*wlrl)
#Zero forecasts become positive
for (i in 1:length(forecastsOut)){
if (forecastsOut[i]<0){ forecastsOut[i]<-0 }
}
output=list(fitted = forecastsIn, mean = forecastsOut,
fitted0 = thetaline0In, mean0 = thetaline0Out,
fitted2 = thetaline2In, mean2 = thetaline2Out)
return(output)
}
Benchmarks <- function(input, fh){
#Used to estimate the statistical benchmarks of the M4 competition
#Estimate seasonally adjusted time series
ppy <- frequency(input) ; ST <- F
if (ppy>1){ ST <- SeasonalityTest(input,ppy) }
if (ST==TRUE){
Dec <- decompose(input,type="multiplicative")
des_input <- input/Dec$seasonal
SIout <- head(rep(Dec$seasonal[(length(Dec$seasonal)-ppy+1):length(Dec$seasonal)], fh), fh)
}else{
des_input <- input ; SIout <- rep(1, fh)
}
#f1 <- naive(input, h=fh)$mean #Naive
#f2 <- naive_seasonal(input, fh=fh) #Seasonal Naive
#f3 <- naive(des_input, h=fh)$mean*SIout #Naive2
f4 <- ses(des_input, h=fh)$mean*SIout #Ses
f5 <- holt(des_input, h=fh, damped=F)$mean*SIout #Holt
f6 <- holt(des_input, h=fh, damped=T)$mean*SIout #Damped
f7 <- Theta.classic(input=des_input, fh=fh)$mean*SIout #Theta
f8 <- (f4+f5+f6)/3 #Comb
f9 <- THETAstructural(input,fh)
f10 <-DOTM(input,fh)$mean
f11 <-AutoTheta(input,fh)$mean
f12 <-rwdar(input,fh)
return(list(f4,f5,f6,f7,f8,f9,f10,f11,f12))
}
Names_benchmarks <- c("SES", "Holt", "Damped", "Theta", "Comb", "MSOE Theta","DOTM","AutoTheta","RWDAR")
Total_smape=Total_mase <- array(NA,dim = c(length(Names_benchmarks), fh, length(data_train)))
# # PARALLEL
# system.time(expr =
# forecasts <- mclapply(X=data_train, FUN = Benchmarks, fh=fh, mc.cores=20)
# )
forecasts <- lapply(X = data_train, FUN = Benchmarks, fh=fh)
for (i in 1:length(forecasts)) {
insample <- data_train[[i]]
outsample <- data_test[[i]]
forecast = forecasts[[i]]
#sMAPE
for (j in 1:length(Names_benchmarks)){
Total_smape[j,,i] <- smape_cal(outsample, forecast[[j]]) #j the # of the benchmark
}
#MASE
for (j in 1:length(Names_benchmarks)){
Total_mase[j,,i] <- mase_cal(insample, outsample, forecast[[j]]) #j the # of the benchmark
}
}
print("########### sMAPE ###############")
## [1] "########### sMAPE ###############"
for (i in 1:length(Names_benchmarks)){
print(paste(Names_benchmarks[i], round(mean(Total_smape[i,,]), 3)))
}
## [1] "SES 16.398"
## [1] "Holt 16.535"
## [1] "Damped 15.163"
## [1] "Theta 14.603"
## [1] "Comb 14.874"
## [1] "MSOE Theta 13.507"
## [1] "DOTM 13.677"
## [1] "AutoTheta 13.797"
## [1] "RWDAR 13.69"
print("########### MASE ################")
## [1] "########### MASE ################"
for (i in 1:length(Names_benchmarks)){
print(paste(Names_benchmarks[i], round(mean(Total_mase[i,,]), 3)))
}
## [1] "SES 3.981"
## [1] "Holt 3.576"
## [1] "Damped 3.372"
## [1] "Theta 3.382"
## [1] "Comb 3.282"
## [1] "MSOE Theta 3.054"
## [1] "DOTM 3.075"
## [1] "AutoTheta 3.184"
## [1] "RWDAR 3.059"
####
Total_mase_m=matrix(NA, length(Names_benchmarks), fh)
for (i in 1:length(Names_benchmarks)){
for (j in 1:fh){
Total_mase_m[i,j] = mean(Total_mase[i,j,])
}
}
Total_mase_m=t(Total_mase_m)
colnames(Total_mase_m)=Names_benchmarks
overalResults <- matrix(c(colMeans(Total_mase_m),apply(Total_mase_m,2,median)),
ncol(Total_mase_m), 2, dimnames=list(colnames(Total_mase_m),c("Mean","Median")))
round(overalResults,5)
## Mean Median
## SES 3.98107 4.04776
## Holt 3.57645 3.56633
## Damped 3.37207 3.38714
## Theta 3.38249 3.43749
## Comb 3.28233 3.30335
## MSOE Theta 3.05447 3.09244
## DOTM 3.07510 3.12251
## AutoTheta 3.18403 3.17059
## RWDAR 3.05862 3.09934
boxplot(Total_mase_m)
points(colMeans(Total_mase_m),col="red",pch=16)
#options(download.file.method="wininet")
#install.packages("tsutils")
#help(print.nemenyi)
library(tsutils)
## Registered S3 methods overwritten by 'tsutils':
## method from
## print.nemenyi greybox
## summary.nemenyi greybox
nemenyi(Total_mase_m, plottype="mcb")
## Friedman and Nemenyi Tests
## The confidence level is 5%
## Number of observations is 6 and number of methods is 9
## Friedman test p-value: 0.0000 - Ha: Different
## Critical distance: 4.9043