Task 1:

printing the basic features from each series, producing a plot:

# Our referenced series from M4 competition: Y500-Y509
count <- 500
for (series in c("Y500","Y501","Y502","Y503","Y504","Y505","Y506","Y507","Y508","Y509")){
  assign(paste("ts", series, sep = "_"), ts(c(M4[[count]]$x, M4[[count]]$xx),start = start(M4[[count]]$x), frequency = frequency(M4[[count]]$x)))
  cat("\nthe name of the series is:", paste("ts", series, sep = "_"))
  cat("\nthe length of the series is:" ,length(get(paste("ts", series, sep = "_"))))
  cat("\nthe frequancy of the series is:" ,frequency(get(paste("ts", series, sep = "_")))," (yearly)")
  plot(get(paste("ts", series, sep = "_")),main=paste("Plotting Time Series", series, sep = " "), ylab = "Value", type = "l")
  count <- count + 1
  cat("\n")
  
}

## 
## the name of the series is: ts_Y500
## the length of the series is: 55
## the frequancy of the series is: 1  (yearly)

## 
## 
## the name of the series is: ts_Y501
## the length of the series is: 49
## the frequancy of the series is: 1  (yearly)

## 
## 
## the name of the series is: ts_Y502
## the length of the series is: 35
## the frequancy of the series is: 1  (yearly)

## 
## 
## the name of the series is: ts_Y503
## the length of the series is: 35
## the frequancy of the series is: 1  (yearly)

## 
## 
## the name of the series is: ts_Y504
## the length of the series is: 55
## the frequancy of the series is: 1  (yearly)

## 
## 
## the name of the series is: ts_Y505
## the length of the series is: 40
## the frequancy of the series is: 1  (yearly)

## 
## 
## the name of the series is: ts_Y506
## the length of the series is: 40
## the frequancy of the series is: 1  (yearly)

## 
## 
## the name of the series is: ts_Y507
## the length of the series is: 40
## the frequancy of the series is: 1  (yearly)

## 
## 
## the name of the series is: ts_Y508
## the length of the series is: 35
## the frequancy of the series is: 1  (yearly)

## 
## 
## the name of the series is: ts_Y509
## the length of the series is: 50
## the frequancy of the series is: 1  (yearly)

Task 2:

Produce a forecast for each of the series (Y500-Y509), based of the 12 methods from M4 competition:

# Models function creation

fh <- 6 #The forecasting horizon examined
frq <- 1 #The frequency of the data



smape_cal <- function(outsample, forecasts){
                      #Used to estimate sMAPE
                      outsample <- as.numeric(outsample) ; forecasts<-as.numeric(forecasts)
                      smape <- (abs(outsample-forecasts)*200)/(abs(outsample)+abs(forecasts))
                      return(smape)
                    }

mase_cal <- function(insample, outsample, forecasts){
                      #Used to estimate MASE
                      frq <- frequency(insample)
                      forecastsNaiveSD <- rep(NA,frq)
                      for (j in (frq+1):length(insample))
                        {
                          forecastsNaiveSD <- c(forecastsNaiveSD, insample[j-frq])
                        }
                      masep<-mean(abs(insample-forecastsNaiveSD),na.rm = TRUE)
                      
                      outsample <- as.numeric(outsample)
                      forecasts <- as.numeric(forecasts)
                      mase <- (abs(outsample-forecasts))/masep
                      return(mase)
                    }

naive_seasonal <- function(input, fh){
                            #Used to estimate Seasonal Naive
                            frcy <- frequency(input)
                            frcst <- naive(input, h=fh)$mean 
                            if (frcy>1)
                              { 
                                frcst <- head(rep(as.numeric(tail(input,frcy)), fh), fh) + frcst - frcst
                              }
                            return(frcst)
                          }

Theta.classic <- function(input, fh){
                          #Used to estimate Theta classic
                          
                          #Set parameters
                          wses <- wlrl<-0.5 ; theta <- 2
                          #Estimate theta line (0)
                          observations <- length(input)
                          xt <- c(1:observations)
                          xf <- c((observations+1):(observations+fh))
                          train <- data.frame(input=input, xt=xt)
                          test <- data.frame(xt = xf)
                          
                          estimate <- lm(input ~ poly(xt, 1, raw=TRUE))
                          thetaline0In <- as.numeric(predict(estimate))
                          thetaline0Out <- as.numeric(predict(estimate,test))
                          
                          #Estimate theta line (2)
                          thetalineT <- theta*input+(1-theta)*thetaline0In
                          sesmodel <- ses(thetalineT, h=fh)
                          thetaline2In <- sesmodel$fitted
                          thetaline2Out <- sesmodel$mean
                          
                          #Theta forecasts
                          forecastsIn <- (thetaline2In*wses)+(thetaline0In*wlrl)
                          forecastsOut <- (thetaline2Out*wses)+(thetaline0Out*wlrl)
                          
                          #Zero forecasts become positive
                          for (i in 1:length(forecastsOut))
                            {
                            if (forecastsOut[i]<0)
                              { 
                                forecastsOut[i]<-0
                              }
                            }
                          
                          output=list(fitted = forecastsIn, mean = forecastsOut,
                                      fitted0 = thetaline0In, mean0 = thetaline0Out,
                                      fitted2 = thetaline2In, mean2 = thetaline2Out)
                          
                          return(output)
                        }

SeasonalityTest <- function(input, ppy){
                            #Used to determine whether a time series is seasonal
                            tcrit <- 1.645
                            if (length(input)<3*ppy)
                              {
                                test_seasonal <- FALSE
                              }
                            else
                              {
                                xacf <- acf(input, plot = FALSE)$acf[-1, 1, 1]
                                clim <- tcrit/sqrt(length(input)) * sqrt(cumsum(c(1, 2 * xacf^2)))
                                test_seasonal <- ( abs(xacf[ppy]) > clim[ppy] )
                                
                                if (is.na(test_seasonal)==TRUE)
                                 { 
                                   test_seasonal <- FALSE
                                 }
                               }
                            
                            return(test_seasonal)
                          }

#Models Benchmarks Function

Benchmarks <- function(input, h){
                        #Used to estimate the statistical benchmarks of the M4 competition
                        
                        #Estimate seasonaly adjusted time series
  
                        ppy <- frequency(input)
                        ST <- F
                        if (ppy>1)
                          {
                            ST <- SeasonalityTest(input,ppy)
                           }
                        if (ST==T)
                          {
                              Dec <- decompose(input,type="multiplicative")
                              des_input <- input/Dec$seasonal
                              SIout <- head(rep(Dec$seasonal[(length(Dec$seasonal)-ppy+1):length(Dec$seasonal)], fh), fh)
                          }
                        else
                          {
                              des_input <- input 
                              SIout <- rep(1, fh)
                          }
                        
                        naive <- naive(input, h=h)$mean                                     #Naive
                        naive_s <- naive_seasonal(input, fh=h)                              #Seasonal Naive 
                        naive2 <- naive(des_input, h=h)$mean*SIout                          #Naive2
                        ses <- ses(des_input, h=h)$mean*SIout                               #Ses
                        holt <- holt(des_input, h=h, damped=F)$mean*SIout                   #Holt
                        damp <- holt(des_input, h=h, damped=T)$mean*SIout                   #Damped
                        theta <- Theta.classic(input=des_input, fh=h)$mean*SIout            #Theta
                        comb <- (ses+holt+damp)/3                                           #Comb
                        ets <- forecast(ets(des_input), h=fh, level =0)$mean*SIout          #ets
                        arima <- forecast(auto.arima(des_input), h=fh, level =0)$mean*SIout #arima
                        mlp <- forecast(nnetar(des_input), h=fh, level =0)$mean*SIout       #mlp
                        return(list(naive,naive_s,naive2,ses,holt,damp,theta,comb,ets,arima,mlp))
                        }

Names_benchmarks <- c("Naive", "sNaive", "Naive2", "SES", "Holt", "Damped", "Theta", "Comb", "ets","arima","mlp")
model_eval_func <- function(ts_count)
                    {
                          cat("\nthe name of the series is:", paste("ts", ts_count, sep = "_"))
                          Names_benchmarks <- c("Naive", "sNaive", "Naive2", "SES", "Holt", "Damped", "Theta", "Comb", "ets","arima","mlp")
                          
                          data_train <- NULL
                          data_test <- NULL
                          data_all <- assign(paste("ts", ts_count, sep = "_"), ts(c(M4[[ts_count]]$x, M4[[ts_count]]$xx),start = start(M4[[ts_count]]$x), frequency = frequency(M4[[ts_count]]$x))) 
                          data_train[length(data_train)+1] <- list(ts(head(data_all,length(data_all)-fh),frequency = frq))
                          data_test[length(data_test)+1] <- list(tail(data_all,fh))
                          SMAPE_df <- data.frame(Names_benchmarks)
                          MASE_df <- data.frame(Names_benchmarks)
                          OWA_df <- data.frame(Names_benchmarks)
                          Total_smape <- array(NA,dim = c(length(Names_benchmarks), fh, length(data_train)))
                          Total_mase  <- array(NA,dim = c(length(Names_benchmarks), fh, length(data_train)))
                          
                          
                          #Methods, Horizon, time-series
                          for (i in 1:length(data_train)){
                            
                            insample <- data_train[[i]]
                            outsample <- data_test[[i]]
                            forecasts <- Benchmarks(input=insample, h=fh)

                            #SMAPE
                            for (j in 1:length(Names_benchmarks)){
                              Total_smape[j,,i] <- smape_cal(outsample, forecasts[[j]]) 
                            }
                            #MASE
                            for (j in 1:length(Names_benchmarks)){
                              Total_mase[j,,i] <- mase_cal(insample, outsample, forecasts[[j]])
                            }
                            
                          }
                          
                          print("-------------------- sMAPE")
                          for (i in 1:length(Names_benchmarks)){
                            SMAPE <- round(mean(Total_smape[i,,]), 3)
                            SMAPE_df[i,2] <- SMAPE
                            print(paste(Names_benchmarks[i], SMAPE))
                            
                          }
                          print("\n-------------------- MASE")
                          for (i in 1:length(Names_benchmarks)){
                            MASE <- round(mean(Total_mase[i,,]), 3)
                            MASE_df[i,2] <- MASE
                            print(paste(Names_benchmarks[i], MASE))
                          }
                          print("-------------------- OWA")
                          for (i in 1:length(Names_benchmarks)){
                            OWA <- round(((mean(Total_mase[i,,])/mean(Total_mase[3,,]))+(mean(Total_smape[i,,])/mean(Total_smape[3,,])))/2, 3)
                            OWA_df[i,2] <- OWA
                            print(paste(Names_benchmarks[i],OWA))
                            
                            
                          }

                          grouped_df <- data.frame(MASE_df$V2,SMAPE_df$V2,OWA_df$V2)
                          barplot(t(as.matrix(grouped_df)), names.arg = Names_benchmarks , cex.names = 0.6 ,beside=TRUE, main=paste("Visual Compresion Y", ts_count, " Series", sep = " "),  legend = c("MASE", "SMAPE", "OWA"))
                          
                          return()
}

for (series in 500:509){
  x <- model_eval_func(series)

}

## 
## the name of the series is: ts_500[1] "-------------------- sMAPE"
## [1] "Naive 0.84"
## [1] "sNaive 0.84"
## [1] "Naive2 0.84"
## [1] "SES 0.84"
## [1] "Holt 0.805"
## [1] "Damped 0.722"
## [1] "Theta 1.079"
## [1] "Comb 0.242"
## [1] "ets 0.805"
## [1] "arima 2.782"
## [1] "mlp 0.463"
## [1] "\n-------------------- MASE"
## [1] "Naive 0.798"
## [1] "sNaive 0.798"
## [1] "Naive2 0.798"
## [1] "SES 0.798"
## [1] "Holt 0.773"
## [1] "Damped 0.693"
## [1] "Theta 1.037"
## [1] "Comb 0.231"
## [1] "ets 0.773"
## [1] "arima 2.702"
## [1] "mlp 0.443"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.964"
## [1] "Damped 0.864"
## [1] "Theta 1.292"
## [1] "Comb 0.289"
## [1] "ets 0.964"
## [1] "arima 3.35"
## [1] "mlp 0.553"

## 
## the name of the series is: ts_501[1] "-------------------- sMAPE"
## [1] "Naive 2.376"
## [1] "sNaive 2.376"
## [1] "Naive2 2.376"
## [1] "SES 2.418"
## [1] "Holt 3.308"
## [1] "Damped 2.08"
## [1] "Theta 2.12"
## [1] "Comb 2.115"
## [1] "ets 3.311"
## [1] "arima 3.301"
## [1] "mlp 2.098"
## [1] "\n-------------------- MASE"
## [1] "Naive 0.881"
## [1] "sNaive 0.881"
## [1] "Naive2 0.881"
## [1] "SES 0.897"
## [1] "Holt 1.245"
## [1] "Damped 0.771"
## [1] "Theta 0.786"
## [1] "Comb 0.785"
## [1] "ets 1.246"
## [1] "arima 1.242"
## [1] "mlp 0.779"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1.018"
## [1] "Holt 1.403"
## [1] "Damped 0.875"
## [1] "Theta 0.892"
## [1] "Comb 0.89"
## [1] "ets 1.404"
## [1] "arima 1.4"
## [1] "mlp 0.883"

## 
## the name of the series is: ts_502[1] "-------------------- sMAPE"
## [1] "Naive 15.965"
## [1] "sNaive 15.965"
## [1] "Naive2 15.965"
## [1] "SES 15.965"
## [1] "Holt 6.563"
## [1] "Damped 9.478"
## [1] "Theta 11.21"
## [1] "Comb 10.58"
## [1] "ets 6.563"
## [1] "arima 6.607"
## [1] "mlp 13.519"
## [1] "\n-------------------- MASE"
## [1] "Naive 4.855"
## [1] "sNaive 4.855"
## [1] "Naive2 4.855"
## [1] "SES 4.855"
## [1] "Holt 2.078"
## [1] "Damped 2.972"
## [1] "Theta 3.488"
## [1] "Comb 3.302"
## [1] "ets 2.078"
## [1] "arima 2.092"
## [1] "mlp 4.162"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.42"
## [1] "Damped 0.603"
## [1] "Theta 0.71"
## [1] "Comb 0.671"
## [1] "ets 0.42"
## [1] "arima 0.422"
## [1] "mlp 0.852"

## 
## the name of the series is: ts_503[1] "-------------------- sMAPE"
## [1] "Naive 15.216"
## [1] "sNaive 15.216"
## [1] "Naive2 15.216"
## [1] "SES 15.216"
## [1] "Holt 7.444"
## [1] "Damped 9.728"
## [1] "Theta 11.331"
## [1] "Comb 10.734"
## [1] "ets 7.445"
## [1] "arima 7.314"
## [1] "mlp 11.702"
## [1] "\n-------------------- MASE"
## [1] "Naive 4.812"
## [1] "sNaive 4.812"
## [1] "Naive2 4.812"
## [1] "SES 4.812"
## [1] "Holt 2.438"
## [1] "Damped 3.159"
## [1] "Theta 3.652"
## [1] "Comb 3.47"
## [1] "ets 2.439"
## [1] "arima 2.397"
## [1] "mlp 3.767"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.498"
## [1] "Damped 0.648"
## [1] "Theta 0.752"
## [1] "Comb 0.713"
## [1] "ets 0.498"
## [1] "arima 0.489"
## [1] "mlp 0.776"

## 
## the name of the series is: ts_504[1] "-------------------- sMAPE"
## [1] "Naive 13.912"
## [1] "sNaive 13.912"
## [1] "Naive2 13.912"
## [1] "SES 13.913"
## [1] "Holt 21.76"
## [1] "Damped 14.396"
## [1] "Theta 12.039"
## [1] "Comb 9.834"
## [1] "ets 19.372"
## [1] "arima 6.758"
## [1] "mlp 35.797"
## [1] "\n-------------------- MASE"
## [1] "Naive 4.532"
## [1] "sNaive 4.532"
## [1] "Naive2 4.532"
## [1] "SES 4.532"
## [1] "Holt 8.389"
## [1] "Damped 5.17"
## [1] "Theta 3.951"
## [1] "Comb 3.394"
## [1] "ets 7.33"
## [1] "arima 2.187"
## [1] "mlp 15.517"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 1.708"
## [1] "Damped 1.088"
## [1] "Theta 0.869"
## [1] "Comb 0.728"
## [1] "ets 1.505"
## [1] "arima 0.484"
## [1] "mlp 2.999"

## 
## the name of the series is: ts_505[1] "-------------------- sMAPE"
## [1] "Naive 23.832"
## [1] "sNaive 23.832"
## [1] "Naive2 23.832"
## [1] "SES 23.831"
## [1] "Holt 15.007"
## [1] "Damped 7.051"
## [1] "Theta 19.461"
## [1] "Comb 9.855"
## [1] "ets 10.142"
## [1] "arima 14.7"
## [1] "mlp 19.163"
## [1] "\n-------------------- MASE"
## [1] "Naive 7.295"
## [1] "sNaive 7.295"
## [1] "Naive2 7.295"
## [1] "SES 7.295"
## [1] "Holt 4.786"
## [1] "Damped 2.551"
## [1] "Theta 6.083"
## [1] "Comb 3.225"
## [1] "ets 3.294"
## [1] "arima 4.694"
## [1] "mlp 6.005"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.643"
## [1] "Damped 0.323"
## [1] "Theta 0.825"
## [1] "Comb 0.428"
## [1] "ets 0.439"
## [1] "arima 0.63"
## [1] "mlp 0.814"

## 
## the name of the series is: ts_506[1] "-------------------- sMAPE"
## [1] "Naive 24.053"
## [1] "sNaive 24.053"
## [1] "Naive2 24.053"
## [1] "SES 24.052"
## [1] "Holt 14.765"
## [1] "Damped 12.461"
## [1] "Theta 19.623"
## [1] "Comb 16.956"
## [1] "ets 13.729"
## [1] "arima 14.928"
## [1] "mlp 18.773"
## [1] "\n-------------------- MASE"
## [1] "Naive 7.177"
## [1] "sNaive 7.177"
## [1] "Naive2 7.177"
## [1] "SES 7.177"
## [1] "Holt 4.592"
## [1] "Damped 3.91"
## [1] "Theta 5.977"
## [1] "Comb 5.227"
## [1] "ets 4.287"
## [1] "arima 4.64"
## [1] "mlp 5.75"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.627"
## [1] "Damped 0.531"
## [1] "Theta 0.824"
## [1] "Comb 0.717"
## [1] "ets 0.584"
## [1] "arima 0.634"
## [1] "mlp 0.791"

## 
## the name of the series is: ts_507[1] "-------------------- sMAPE"
## [1] "Naive 23.096"
## [1] "sNaive 23.096"
## [1] "Naive2 23.096"
## [1] "SES 23.095"
## [1] "Holt 14.18"
## [1] "Damped 12.519"
## [1] "Theta 18.861"
## [1] "Comb 16.48"
## [1] "ets 23.095"
## [1] "arima 14.231"
## [1] "mlp 18.574"
## [1] "\n-------------------- MASE"
## [1] "Naive 6.772"
## [1] "sNaive 6.772"
## [1] "Naive2 6.772"
## [1] "SES 6.772"
## [1] "Holt 4.333"
## [1] "Damped 3.853"
## [1] "Theta 5.644"
## [1] "Comb 4.986"
## [1] "ets 6.772"
## [1] "arima 4.348"
## [1] "mlp 5.571"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.627"
## [1] "Damped 0.556"
## [1] "Theta 0.825"
## [1] "Comb 0.725"
## [1] "ets 1"
## [1] "arima 0.629"
## [1] "mlp 0.813"

## 
## the name of the series is: ts_508[1] "-------------------- sMAPE"
## [1] "Naive 77.788"
## [1] "sNaive 77.788"
## [1] "Naive2 77.788"
## [1] "SES 77.786"
## [1] "Holt 72.665"
## [1] "Damped 74.1"
## [1] "Theta 77.037"
## [1] "Comb 74.831"
## [1] "ets 77.786"
## [1] "arima 117.61"
## [1] "mlp 121.166"
## [1] "\n-------------------- MASE"
## [1] "Naive 14.459"
## [1] "sNaive 14.459"
## [1] "Naive2 14.459"
## [1] "SES 14.459"
## [1] "Holt 13.766"
## [1] "Damped 13.966"
## [1] "Theta 14.359"
## [1] "Comb 14.063"
## [1] "ets 14.459"
## [1] "arima 19.017"
## [1] "mlp 19.356"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.943"
## [1] "Damped 0.959"
## [1] "Theta 0.992"
## [1] "Comb 0.967"
## [1] "ets 1"
## [1] "arima 1.414"
## [1] "mlp 1.448"

## 
## the name of the series is: ts_509[1] "-------------------- sMAPE"
## [1] "Naive 9.066"
## [1] "sNaive 9.066"
## [1] "Naive2 9.066"
## [1] "SES 9.066"
## [1] "Holt 1.984"
## [1] "Damped 1.435"
## [1] "Theta 6.971"
## [1] "Comb 1.731"
## [1] "ets 4.824"
## [1] "arima 2.209"
## [1] "mlp 1.011"
## [1] "\n-------------------- MASE"
## [1] "Naive 4.129"
## [1] "sNaive 4.129"
## [1] "Naive2 4.129"
## [1] "SES 4.129"
## [1] "Holt 0.97"
## [1] "Damped 0.698"
## [1] "Theta 3.212"
## [1] "Comb 0.82"
## [1] "ets 2.246"
## [1] "arima 1.08"
## [1] "mlp 0.486"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.227"
## [1] "Damped 0.164"
## [1] "Theta 0.773"
## [1] "Comb 0.195"
## [1] "ets 0.538"
## [1] "arima 0.253"
## [1] "mlp 0.115"

Task 3:

A brief summary of the article- The article we chose, ‘Fast and accurate yearly time series forecasting with forecast combinations by David Shaub’ discussing different aspects of the authors’ submission for the m4 competition, which ranked third on yearly time series, outperforming many other competitors. His submission was based on an simplistic ensembled model, which combined 3 of the models provided by the ‘‘forecast’’ package (Hyndman & Khandakar, 2008): auto.arima, thetaf, and tbats (with equal weights for each model). The discussion in the article is divided into few sections, and mainly discusses the forecast methodology and its advantages, also providing an analysis of the forecast performance (compared to other models as benchmarks). Shaub explains that the relatively high accuracy of the ensembled model (compared to each model individually, and to more complex metrics in general) comes mainly from reducing the specification error that gets from each model individually. He also emphasises the simplicity of his forecasting approach, which can be implemented easily by any business/ user which is interested in forecasting a large number of time series automatically, without intervention, and without a global model that needs retraining.

Aspect to be explored in the article- We decided to explore the specific forecasting method described in the article, based on it’s attached code file (see https://github.com/dashaub/m4): 1. We are gonna implemented Shaubs’ methodology and forecast our 10 yearly time series, plot the forecasting results and the fitting performance measurements (RMSE, MASE, SMAPE. 2. We will discuss the briefly the obtained results. 3. We will summaries the main adavantage of the

args = commandArgs(trailingOnly = TRUE)


inputs <- c("Yearly")
paths <- paste0("~", inputs,  "-train.csv")



combineForecasts <- function(forecastPI, forecastsPoint){
  results <- forecastPI
  for(ind in seq_along(results)){
    extracted <- lapply(forecastsPoint, function(x) x[[ind]]$mean)
    extracted <- extracted[!sapply(extracted, is.null)]
    if(length(extracted) > 0){
      combined <- ts(rowMeans(data.frame(extracted), na.rm = TRUE))
      tsp(combined) <- tsp(forecastPI[[ind]]$mean)
      results[[ind]]$mean <- combined
    }
  }
  return(results)
}

# Extract the dataframe to a list of msts objects
extractList <- function(x, seriesName){
  tab <- c( "Yearly" = 1)
  seriesFrequency <- tab[seriesName]
  tab <- list("Yearly" = c(1))
  multSeason <- tab[[seriesName]]
  cleaned <- as.numeric(x[!is.na(as.numeric(x))])
  series <- msts(cleaned, seasonal.periods = multSeason, ts.frequency = seriesFrequency)
  return(series)
}

thiefForecast <- function(x, horizon, model){
  if(length(x) < 2 * frequency(x)){
    return(NULL)
  }
  return(thief(y = x, h = horizon, usemodel = model))
}

writeResults <- function(forecastList, seriesName, floorZero = TRUE){
  components <- c("mean", "lower", "upper")
  for(component in components){
    baseDir <- paste0("~/", component)
    dir.create(file.path(baseDir), showWarnings = FALSE)
    forecasts <- data.frame(t(data.frame(lapply(forecastList, FUN = function(x) x[[component]]))))
    forecasts <- round(forecasts, 4)
    if(floorZero){
      forecasts[forecasts < 0] <- 0
    }
    rownames(forecasts) <- names(forecastList)
    filename <- paste0(baseDir, "/", seriesName, "-", component, ".csv")
    write.table(x = forecasts, file = filename, quote = FALSE, sep = ",", col.names = FALSE)
  }
}

  inputPath <-  "Yearly-train.csv"
 dat <- fread(inputPath, header = TRUE, data.table = FALSE)
 
  range <- c("Y500","Y501","Y502","Y503","Y504","Y505","Y506","Y507","Y508","Y509")

  
  
  for (row in range) {
    
     # Train Set
     inputPath <-  "Yearly-train.csv"
     train_set <- fread(inputPath, header = TRUE, data.table = FALSE)
     train_set <- train_set[train_set$V1 %in% row,]
     seriesNames <- train_set[, 1]
     train_set <- train_set[, -1]
     
     # Validation Set
     inputPath <-  "Yearly-test.csv"
     validation_set <- fread(inputPath, header = TRUE, data.table = FALSE)
     validation_set <- validation_set[validation_set$V1 %in% row,]
     seriesNames <- validation_set[, 1]
     validation_set <- validation_set[, -1]
     validation_set <- as_tibble(validation_set)
     
     # Extract series names and remove from dataframe
      currentSeries <- "Yearly"
      h <- 6
      # Transform to list
      train_set <- apply(train_set, MARGIN = 1, FUN = function(x) extractList(x, currentSeries))
      names(train_set) <- seriesNames
      models <- "aft"
      model <- hybridModel(train_set, models = models, verbose = FALSE)
      forecasts <-forecast(model, h = h, level = 95,PI.combination = "mean")
      

      #Model Errors
      accurecy_model <- accuracy(forecasts, t(validation_set))
      RMSE <- round(accurecy_model[2,2],2)
      sMAPE <- round(mean(smape_cal(validation_set, forecasts$mean)),2) 
      MASE <- round(mean(mase_cal(train_set, validation_set,forecasts$mean)),2)

      # Plot
      plot(train_set, main = c(paste0("Series ",row)), sub =c(paste0("RMSE ",RMSE," MASE ",MASE, " SMAPE ",sMAPE,'%')), xlab="Year", ylab="Number", ylim = range(min(train_set),max(forecasts$mean)),xlim = range(0,length(forecasts$mean)+length(train_set)))
      #lines(model_pred$mean, lwd = 2, lty=2, col = "green")
      lines(forecasts$mean, lwd = 2, lty=2, col = "red")
      lines(forecasts$fitted, lwd = 2, lty=2, col = "green")
      legend("bottomright", c("train real", "Test Prediction", "Train Prediction"), lty=c(1,2,2), lwd=c(1,2,2),col = c("black","red","green"), bty = "n",cex = 0.8)
      
  }

Conclusions: After modeled the yearly series, printed the results and inspected the MASE and sMAPE, we can see that: * The only series in which the method outperformed all 11 models from Q2 is series Y504. * In Y506 and Y507, Damped model was the only one which outperformed the method. * In Y508+ Y509 it was ranked 8th place, which is not great. * In the other series which we didn’t mentioned, it ranked in the 3rd-4th places.

In general, the results of the hybrid model in question 3 mostly preformed better then most of the models from question 2, even though the gaps between the MASE and sMAPE error were minor. In summary, ‘HybridModel’ method can protect against model selection risks and prevent some large forecasting errors. It is easy to implement and automate, and provide flexibility on the models selection and models weighting according to the data at hand, and business case. With that being said, we need to keep in mind that using user-supplied weights to the model may cause overfitting, and for that reason cross validation is recommended to fine tune the model params.

Time Series Analysis and Forecasting 2022
Final Project submisstion

Dor Bar Natan & Yarden Ganon

5/27/2022

Task 1:

Task 2:

Task 3:

Time Series Analysis and Forecasting 2022 Final Project submisstion

Dor Bar Natan & Yarden Ganon

5/27/2022

Task 1:

Task 2:

Task 3:

Time Series Analysis and Forecasting 2022
Final Project submisstion