Structural Theta: codes and Results (Part II)

The R codes

This files provides the codes that allow to reproduce results as in Table 7 to 9 of Sbrana and Silvestrini (IJF, 2024).

If you have any question please email to : giacomo.sbrana@neoma-bs.fr

Table 7: M4 competition: Prediction interval results

Below we report the code that allows to reproduce the results as in Table 7 using the Structural THETA only.

You may also change method and obtain the results for your choice.

library("M4comp2018")

####################### Yearly data ##################################################

 Mdata<-Filter(function(l) l$period=="Yearly",M4)
 In<-Out<-list();for(i in 1:length(Mdata)){In[[i]]=Mdata[[i]]$x;Out[[i]]=Mdata[[i]]$xx}
 replic<-length(In)
 steps<-6
 freq<-frequ<-1
 
 ####################### Quarterly data ###############################################
 
 # Mdata<-Filter(function(l) l$period=="Quarterly",M4)
 # In<-Out<-list();for(i in 1:length(Mdata)){In[[i]]=Mdata[[i]]$x;Out[[i]]=Mdata[[i]]$xx}
 # replic<-length(In)
 # steps<-fh<-8
 # freq<-frequ<-frq<-4
 
##########################################
 THETAstructural<-function(y,steps){
   
   if(freq!=1){
     s=freq;
     for(t in 1:length(y)){if(y[t]!=0){y<-y[t:length(y)];break}}
     w<-rep(1/(2*s),s+1);w[2:s]<-1/s
     cma<-matrix(NA,length(y),1);
     for(g in 1:(length(y)-s)){cma[g+s/2]<-sum(w*y[g:(g+s)])};
     residuals<-y/cma
     sfactors<-c();for(seas in 1:s){
       sfactors[seas]<-mean(na.omit(residuals[seq(seas,length(y)-s+seas,by=s)]))}
     sfactors<-sfactors*s/sum(sfactors)
     sfactout<-rep(sfactors,length(y)+steps)[(length(y)+1):(length(y)+steps)]
     y<-y/rep(sfactors,ceiling(length(y)/s))[1:length(y)]}
   
   likelihood<-function(parameters){q=abs(parameters[1]);co=abs(parameters[2])
   state<-p<-rep(0,length(y)+1)
   state[1]=y[1]
   p[1]=10000
   k<-inn<-rep(0,length(y))
   k[1]=p[1]/(p[1]+1);
   sigmae=0
   
   p[2]=p[1]-p[1]*k[1]+q;
   state[2]=co+state[1]+k[1]*inn[1]
   
   for(t in (2):length(y)){
     
     k[t]=p[t]/(p[t]+1);
     inn[t]=y[t]-state[t]
     sigmae=sigmae+inn[t]^2/(p[t]+1)
     p[t+1]=p[t]-p[t]*k[t]+q;
     state[t+1]=co+state[t]+k[t]*inn[t]
   }
   
   sum(log(p+1))+length(y)*log(sigmae/length(y))}
   
   results<-optim(c(.01,1),likelihood)
   
   q=abs(results[[1]][1]);co=abs(results[[1]][2])
   state<-p<-rep(0,length(y)+1)
   state[1]=y[1]
   p[1]=10000
   k<-inn<-rep(0,length(y))
   k[1]=p[1]/(p[1]+1);
   sigmae=0
   
   p[2]=p[1]-p[1]*k[1]+q;
   state[2]=co+state[1]+k[1]*inn[1]
   
   for(t in (2):length(y)){
     
     k[t]=p[t]/(p[t]+1);
     inn[t]=y[t]-state[t]
     sigmae=sigmae+inn[t]^2/(p[t]+1)
     p[t+1]=p[t]-p[t]*k[t]+q;
     state[t+1]=co+state[t]+k[t]*inn[t]
   }
   
   PointF<-rep(0,steps);PointF[1]=state[length(y)+1];
   for( t in 2:steps){PointF[t]=co*(t-1)+state[length(y)+1]}
   
   if(freq!=1){PointF=PointF*sfactout}
   
   
   H<-sigmae/(length(y)-1);P<-tail(p,1)*H;Eta<-q*H;
   Inter<-c();Inter[1]=P;
   for(j in 2:steps){Inter[j]=Inter[j-1]+Eta};
   Interv<-c();
   for(j in 1:steps){Interv[j]=Inter[j]+H};
   prob1=.85;prob2=.95; 
   lower1<-PointF-qnorm((1+prob1)/2)*sqrt(Interv);
   for(d in 1:steps){if(lower1[d]<0){lower1[d]=0}};
   upper1<-PointF+qnorm((1+prob1)/2)*sqrt(Interv);
   lower2<-PointF-qnorm((1+prob2)/2)*sqrt(Interv);
   for(d in 1:steps){if(lower2[d]<0){lower2[d]=0}};
   upper2<-PointF+qnorm((1+prob2)/2)*sqrt(Interv);
   
   list(mean=PointF,lower=cbind(lower1,lower2),upper=cbind(upper1,upper2))
   
 }
 
 #########################################


msis<-function(yout,lower,upper,prob,yin){
  a<-b<-d<-rep(0,length(yout));
  for(t in 1:length(yout)){
    if(yout[t]<lower[t]){a[t]=(2/(1-prob))*(lower[t]-yout[t])}
    if(yout[t]>upper[t]){b[t]=(2/(1-prob))*(yout[t]-upper[t])}
    d[t]=upper[t]-lower[t]}
  mean(a+b+d)/mean(abs(diff(yin,freq)))
}


resultsMIS95<-matrix(NA,length(In),1)

for(h in 1:length(In)){
  
  y=ts(c(In[[h]],Out[[h]]),frequency = freq);  
    forec=THETAstructural(head(y,(length(y)-steps)),steps)
  resultsMIS95[h,]<-msis(tail(y,steps),forec$lower[,2],forec$upper[,2],.95,head(y,length(y)-steps));

}


print(colMeans(resultsMIS95,na.rm = TRUE))

## [1] 46.77735

Table 8: M3 competition: SMAPE results

Below we report the code that allows to reproduce the results as in Table 8 using the Structural THETA.

You may also change method and obtain the results for it.

library("Mcomp")

## Caricamento del pacchetto richiesto: forecast

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

################################### MSOE (Structural) THETA #############################

## Warning : This code provides only point forecasts (not prediction intervals) #########
######## This is done since the M3 was focusing only on point forecasts #################
###### The full code can be found using this link: https://rpubs.com/giac76/1167511 #####

THETAstructural<-function(y,steps,freq){
  
  if(freq!=1){
    s=freq;
    for(t in 1:length(y)){if(y[t]!=0){y<-y[t:length(y)];break}}
    w<-rep(1/(2*s),s+1);w[2:s]<-1/s
    cma<-matrix(NA,length(y),1);
    for(g in 1:(length(y)-s)){cma[g+s/2]<-sum(w*y[g:(g+s)])};
    residuals<-y/cma
    sfactors<-c();for(seas in 1:s){
      sfactors[seas]<-mean(na.omit(residuals[seq(seas,length(y)-s+seas,by=s)]))}
    sfactors<-sfactors*s/sum(sfactors)
    sfactout<-rep(sfactors,length(y)+steps)[(length(y)+1):(length(y)+steps)]
    y<-y/rep(sfactors,ceiling(length(y)/s))[1:length(y)]}
  
  likelihood<-function(parameters){q=abs(parameters[1]);co=abs(parameters[2])
  state<-p<-rep(0,length(y)+1)
  state[1]=y[1]
  p[1]=10000
  k<-inn<-rep(0,length(y))
  k[1]=p[1]/(p[1]+1);
  sigmae=0
  
  p[2]=p[1]-p[1]*k[1]+q;
  state[2]=co+state[1]+k[1]*inn[1]
  
  for(t in (2):length(y)){
    
    k[t]=p[t]/(p[t]+1);
    inn[t]=y[t]-state[t]
    sigmae=sigmae+inn[t]^2/(p[t]+1)
    p[t+1]=p[t]-p[t]*k[t]+q;
    state[t+1]=co+state[t]+k[t]*inn[t]
  }
  
  sum(log(p+1))+length(y)*log(sigmae/length(y))}
  
  results<-optim(c(.01,1),likelihood)
  
  q=abs(results[[1]][1]);co=abs(results[[1]][2])
  state<-p<-rep(0,length(y)+1)
  state[1]=y[1]
  p[1]=10000
  k<-inn<-rep(0,length(y))
  k[1]=p[1]/(p[1]+1);
  sigmae=0
  
  p[2]=p[1]-p[1]*k[1]+q;
  state[2]=co+state[1]+k[1]*inn[1]
  
  for(t in (2):length(y)){
    
    k[t]=p[t]/(p[t]+1);
    inn[t]=y[t]-state[t]
    sigmae=sigmae+inn[t]^2/(p[t]+1)
    p[t+1]=p[t]-p[t]*k[t]+q;
    state[t+1]=co+state[t]+k[t]*inn[t]
  }
  
  PointF<-rep(0,steps);PointF[1]=state[length(y)+1];
  for( t in 2:steps){PointF[t]=co*(t-1)+state[length(y)+1]}
  
  if(freq!=1){PointF=PointF*sfactout}
  
  
  H<-sigmae/(length(y)-1);P<-tail(p,1)*H;Eta<-q*H;
  Inter<-c();Inter[1]=P;
  for(j in 2:steps){Inter[j]=Inter[j-1]+Eta};
  Interv<-c();
  for(j in 1:steps){Interv[j]=Inter[j]+H};
  prob1=.85;prob2=.95; 
  lower1<-PointF-qnorm((1+prob1)/2)*sqrt(Interv);
  for(d in 1:steps){if(lower1[d]<0){lower1[d]=0}};
  upper1<-PointF+qnorm((1+prob1)/2)*sqrt(Interv);
  lower2<-PointF-qnorm((1+prob2)/2)*sqrt(Interv);
  for(d in 1:steps){if(lower2[d]<0){lower2[d]=0}};
  upper2<-PointF+qnorm((1+prob2)/2)*sqrt(Interv);
  
  PointF
  
}

######################### For M3 Data  ##########################

M3y<-subset(M3,"Yearly")
M3q<-subset(M3,"Quarterly")
M3m<-subset(M3,"Monthly")
M3o<-subset(M3,"Other")
In<-Out<-list();
for(i in 1:length(M3y)){In[[i]]=M3y[[i]]$x;Out[[i]]=M3y[[i]]$xx}
for(i in 1:length(M3q)){In[[length(M3y)+i]]=M3q[[i]]$x;Out[[length(M3y)+i]]=M3q[[i]]$xx}
for(i in 1:length(M3m)){In[[length(M3y)+length(M3q)+i]]=M3m[[i]]$x;Out[[length(M3y)+length(M3q)+i]]=M3m[[i]]$xx}
for(i in 1:length(M3o)){In[[length(M3y)+length(M3q)+length(M3m)+i]]=M3o[[i]]$x;Out[[length(M3y)+length(M3q)+length(M3m)+i]]=M3o[[i]]$xx}

# This is to chech that Naive 
#naiV=function(y,step, freq){naive(y,steps)$mean}

smape1<-function(DataOut,Forc,h){200*abs(DataOut[h]-Forc[h])/(DataOut[h]+Forc[h])}

################# Yearly data ###########################

steps<-6
freq<-1

SMAPE1y<-matrix(NA,length(M3y),6)
for(j in 1:length(M3y)){
  FO=THETAstructural(In[[j]],steps,freq)
  for(w in 1:steps){
  SMAPE1y[j,w]=smape1(Out[[j]][1:w],FO[1:w],w)
  }
}

################# Quarterly data###########################

steps<-8
freq<-4

SMAPE1q<-matrix(NA,length(M3q),steps)
for(j in 1:length(M3q)){
  FO=THETAstructural(In[[length(M3y)+j]],steps,freq)
  for(w in 1:steps){
    SMAPE1q[j,w]=smape1(Out[[length(M3y)+j]][1:w],FO[1:w],w)
  }
}

################# Monthly data###########################

steps<-18
freq<-12


SMAPE1m<-matrix(NA,length(M3m),steps)
for(j in 1:length(M3m)){
  FO=THETAstructural(In[[length(M3y)+length(M3q)+j]],steps,freq)
  for(w in 1:steps){
    SMAPE1m[j,w]=smape1(Out[[length(M3y)+length(M3q)+j]][1:w],FO[1:w],w)
  }
}

################# Other data###########################

steps<-8
freq<-1

SMAPE1o<-matrix(NA,length(M3o),steps)
for(j in 1:length(M3o)){
  FO=THETAstructural(In[[length(M3y)+length(M3q)+length(M3m)+j]],steps,freq)
  for(w in 1:steps){
    SMAPE1o[j,w]=smape1(Out[[length(M3y)+length(M3q)+length(M3m)+j]][1:w],FO[1:w],w)
  }
}

################# All data###########################

SMAPE<-matrix(NA,length(In),18)
for(j in 1:length(In)){
  if(j<=length(M3y)){steps=6;freq=1}
  if(j>length(M3y)&j<=(length(M3y)+length(M3q))){steps=8;freq=4}
  if(j>(length(M3y)+length(M3q))&j<=(length(M3y)+length(M3q)+length(M3m))){steps=18;freq=12}
  if(j>(length(M3y)+length(M3q)+length(M3m))){steps=8;freq=1}
  FO=THETAstructural(In[[j]],steps,freq)
  for(w in 1:steps){
    SMAPE[j,w]=smape1(Out[[j]][1:w],FO[1:w],w)
  }
}

resAve=c();j=1
for(i in c(4,6,8,12,15,18)){resAve[j]=mean(SMAPE[,1:i],na.rm = T);j=j+1};

round(c(mean(SMAPE1y[,1:4]),mean(SMAPE1y[,1:6])),2)

## [1] 13.17 15.80

round(c(mean(SMAPE1q[,1:4]),mean(SMAPE1q[,1:6]),mean(SMAPE1q[,1:8])),2)

## [1] 7.11 8.11 8.94

round(c(mean(SMAPE1m[,1:4]),mean(SMAPE1m[,1:6]),mean(SMAPE1m[,1:8]),mean(SMAPE1m[,1:12]),mean(SMAPE1m[,1:15]),mean(SMAPE1m[,1:18])),2)

## [1] 12.49 12.38 12.74 13.11 13.72 14.42

round(c(mean(SMAPE1o[,1:4]),mean(SMAPE1o[,1:6]),mean(SMAPE1o[,1:8])),2)

## [1] 4.44 5.58 6.43

print(round(resAve,2))

## [1] 10.82 11.65 11.86 12.26 12.77 13.37

Table 9: M5 competition:

Below we report the code that allows to reproduce the results as in Table 9 using the Structural THETA.