printing the basic features from each series, producing a plot:
# Our referenced series from M4 competition: Y500-Y509
count <- 500
for (series in c("Y500","Y501","Y502","Y503","Y504","Y505","Y506","Y507","Y508","Y509")){
assign(paste("ts", series, sep = "_"), ts(c(M4[[count]]$x, M4[[count]]$xx),start = start(M4[[count]]$x), frequency = frequency(M4[[count]]$x)))
cat("\nthe name of the series is:", paste("ts", series, sep = "_"))
cat("\nthe length of the series is:" ,length(get(paste("ts", series, sep = "_"))))
cat("\nthe frequancy of the series is:" ,frequency(get(paste("ts", series, sep = "_")))," (yearly)")
plot(get(paste("ts", series, sep = "_")),main=paste("Plotting Time Series", series, sep = " "), ylab = "Value", type = "l")
count <- count + 1
cat("\n")
}
##
## the name of the series is: ts_Y500
## the length of the series is: 55
## the frequancy of the series is: 1 (yearly)
##
##
## the name of the series is: ts_Y501
## the length of the series is: 49
## the frequancy of the series is: 1 (yearly)
##
##
## the name of the series is: ts_Y502
## the length of the series is: 35
## the frequancy of the series is: 1 (yearly)
##
##
## the name of the series is: ts_Y503
## the length of the series is: 35
## the frequancy of the series is: 1 (yearly)
##
##
## the name of the series is: ts_Y504
## the length of the series is: 55
## the frequancy of the series is: 1 (yearly)
##
##
## the name of the series is: ts_Y505
## the length of the series is: 40
## the frequancy of the series is: 1 (yearly)
##
##
## the name of the series is: ts_Y506
## the length of the series is: 40
## the frequancy of the series is: 1 (yearly)
##
##
## the name of the series is: ts_Y507
## the length of the series is: 40
## the frequancy of the series is: 1 (yearly)
##
##
## the name of the series is: ts_Y508
## the length of the series is: 35
## the frequancy of the series is: 1 (yearly)
##
##
## the name of the series is: ts_Y509
## the length of the series is: 50
## the frequancy of the series is: 1 (yearly)
Produce a forecast for each of the series (Y500-Y509), based of the 12 methods from M4 competition:
# Models function creation
fh <- 6 #The forecasting horizon examined
frq <- 1 #The frequency of the data
smape_cal <- function(outsample, forecasts){
#Used to estimate sMAPE
outsample <- as.numeric(outsample) ; forecasts<-as.numeric(forecasts)
smape <- (abs(outsample-forecasts)*200)/(abs(outsample)+abs(forecasts))
return(smape)
}
mase_cal <- function(insample, outsample, forecasts){
#Used to estimate MASE
frq <- frequency(insample)
forecastsNaiveSD <- rep(NA,frq)
for (j in (frq+1):length(insample))
{
forecastsNaiveSD <- c(forecastsNaiveSD, insample[j-frq])
}
masep<-mean(abs(insample-forecastsNaiveSD),na.rm = TRUE)
outsample <- as.numeric(outsample)
forecasts <- as.numeric(forecasts)
mase <- (abs(outsample-forecasts))/masep
return(mase)
}
naive_seasonal <- function(input, fh){
#Used to estimate Seasonal Naive
frcy <- frequency(input)
frcst <- naive(input, h=fh)$mean
if (frcy>1)
{
frcst <- head(rep(as.numeric(tail(input,frcy)), fh), fh) + frcst - frcst
}
return(frcst)
}
Theta.classic <- function(input, fh){
#Used to estimate Theta classic
#Set parameters
wses <- wlrl<-0.5 ; theta <- 2
#Estimate theta line (0)
observations <- length(input)
xt <- c(1:observations)
xf <- c((observations+1):(observations+fh))
train <- data.frame(input=input, xt=xt)
test <- data.frame(xt = xf)
estimate <- lm(input ~ poly(xt, 1, raw=TRUE))
thetaline0In <- as.numeric(predict(estimate))
thetaline0Out <- as.numeric(predict(estimate,test))
#Estimate theta line (2)
thetalineT <- theta*input+(1-theta)*thetaline0In
sesmodel <- ses(thetalineT, h=fh)
thetaline2In <- sesmodel$fitted
thetaline2Out <- sesmodel$mean
#Theta forecasts
forecastsIn <- (thetaline2In*wses)+(thetaline0In*wlrl)
forecastsOut <- (thetaline2Out*wses)+(thetaline0Out*wlrl)
#Zero forecasts become positive
for (i in 1:length(forecastsOut))
{
if (forecastsOut[i]<0)
{
forecastsOut[i]<-0
}
}
output=list(fitted = forecastsIn, mean = forecastsOut,
fitted0 = thetaline0In, mean0 = thetaline0Out,
fitted2 = thetaline2In, mean2 = thetaline2Out)
return(output)
}
SeasonalityTest <- function(input, ppy){
#Used to determine whether a time series is seasonal
tcrit <- 1.645
if (length(input)<3*ppy)
{
test_seasonal <- FALSE
}
else
{
xacf <- acf(input, plot = FALSE)$acf[-1, 1, 1]
clim <- tcrit/sqrt(length(input)) * sqrt(cumsum(c(1, 2 * xacf^2)))
test_seasonal <- ( abs(xacf[ppy]) > clim[ppy] )
if (is.na(test_seasonal)==TRUE)
{
test_seasonal <- FALSE
}
}
return(test_seasonal)
}
#Models Benchmarks Function
Benchmarks <- function(input, h){
#Used to estimate the statistical benchmarks of the M4 competition
#Estimate seasonaly adjusted time series
ppy <- frequency(input)
ST <- F
if (ppy>1)
{
ST <- SeasonalityTest(input,ppy)
}
if (ST==T)
{
Dec <- decompose(input,type="multiplicative")
des_input <- input/Dec$seasonal
SIout <- head(rep(Dec$seasonal[(length(Dec$seasonal)-ppy+1):length(Dec$seasonal)], fh), fh)
}
else
{
des_input <- input
SIout <- rep(1, fh)
}
naive <- naive(input, h=h)$mean #Naive
naive_s <- naive_seasonal(input, fh=h) #Seasonal Naive
naive2 <- naive(des_input, h=h)$mean*SIout #Naive2
ses <- ses(des_input, h=h)$mean*SIout #Ses
holt <- holt(des_input, h=h, damped=F)$mean*SIout #Holt
damp <- holt(des_input, h=h, damped=T)$mean*SIout #Damped
theta <- Theta.classic(input=des_input, fh=h)$mean*SIout #Theta
comb <- (ses+holt+damp)/3 #Comb
ets <- forecast(ets(des_input), h=fh, level =0)$mean*SIout #ets
arima <- forecast(auto.arima(des_input), h=fh, level =0)$mean*SIout #arima
mlp <- forecast(nnetar(des_input), h=fh, level =0)$mean*SIout #mlp
return(list(naive,naive_s,naive2,ses,holt,damp,theta,comb,ets,arima,mlp))
}
Names_benchmarks <- c("Naive", "sNaive", "Naive2", "SES", "Holt", "Damped", "Theta", "Comb", "ets","arima","mlp")
model_eval_func <- function(ts_count)
{
cat("\nthe name of the series is:", paste("ts", ts_count, sep = "_"))
Names_benchmarks <- c("Naive", "sNaive", "Naive2", "SES", "Holt", "Damped", "Theta", "Comb", "ets","arima","mlp")
data_train <- NULL
data_test <- NULL
data_all <- assign(paste("ts", ts_count, sep = "_"), ts(c(M4[[ts_count]]$x, M4[[ts_count]]$xx),start = start(M4[[ts_count]]$x), frequency = frequency(M4[[ts_count]]$x)))
data_train[length(data_train)+1] <- list(ts(head(data_all,length(data_all)-fh),frequency = frq))
data_test[length(data_test)+1] <- list(tail(data_all,fh))
SMAPE_df <- data.frame(Names_benchmarks)
MASE_df <- data.frame(Names_benchmarks)
OWA_df <- data.frame(Names_benchmarks)
Total_smape <- array(NA,dim = c(length(Names_benchmarks), fh, length(data_train)))
Total_mase <- array(NA,dim = c(length(Names_benchmarks), fh, length(data_train)))
#Methods, Horizon, time-series
for (i in 1:length(data_train)){
insample <- data_train[[i]]
outsample <- data_test[[i]]
forecasts <- Benchmarks(input=insample, h=fh)
#SMAPE
for (j in 1:length(Names_benchmarks)){
Total_smape[j,,i] <- smape_cal(outsample, forecasts[[j]])
}
#MASE
for (j in 1:length(Names_benchmarks)){
Total_mase[j,,i] <- mase_cal(insample, outsample, forecasts[[j]])
}
}
print("-------------------- sMAPE")
for (i in 1:length(Names_benchmarks)){
SMAPE <- round(mean(Total_smape[i,,]), 3)
SMAPE_df[i,2] <- SMAPE
print(paste(Names_benchmarks[i], SMAPE))
}
print("\n-------------------- MASE")
for (i in 1:length(Names_benchmarks)){
MASE <- round(mean(Total_mase[i,,]), 3)
MASE_df[i,2] <- MASE
print(paste(Names_benchmarks[i], MASE))
}
print("-------------------- OWA")
for (i in 1:length(Names_benchmarks)){
OWA <- round(((mean(Total_mase[i,,])/mean(Total_mase[3,,]))+(mean(Total_smape[i,,])/mean(Total_smape[3,,])))/2, 3)
OWA_df[i,2] <- OWA
print(paste(Names_benchmarks[i],OWA))
}
grouped_df <- data.frame(MASE_df$V2,SMAPE_df$V2,OWA_df$V2)
barplot(t(as.matrix(grouped_df)), names.arg = Names_benchmarks , cex.names = 0.6 ,beside=TRUE, main=paste("Visual Compresion Y", ts_count, " Series", sep = " "), legend = c("MASE", "SMAPE", "OWA"))
return()
}
for (series in 500:509){
x <- model_eval_func(series)
}
##
## the name of the series is: ts_500[1] "-------------------- sMAPE"
## [1] "Naive 0.84"
## [1] "sNaive 0.84"
## [1] "Naive2 0.84"
## [1] "SES 0.84"
## [1] "Holt 0.805"
## [1] "Damped 0.722"
## [1] "Theta 1.079"
## [1] "Comb 0.242"
## [1] "ets 0.805"
## [1] "arima 2.782"
## [1] "mlp 0.463"
## [1] "\n-------------------- MASE"
## [1] "Naive 0.798"
## [1] "sNaive 0.798"
## [1] "Naive2 0.798"
## [1] "SES 0.798"
## [1] "Holt 0.773"
## [1] "Damped 0.693"
## [1] "Theta 1.037"
## [1] "Comb 0.231"
## [1] "ets 0.773"
## [1] "arima 2.702"
## [1] "mlp 0.443"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.964"
## [1] "Damped 0.864"
## [1] "Theta 1.292"
## [1] "Comb 0.289"
## [1] "ets 0.964"
## [1] "arima 3.35"
## [1] "mlp 0.553"
##
## the name of the series is: ts_501[1] "-------------------- sMAPE"
## [1] "Naive 2.376"
## [1] "sNaive 2.376"
## [1] "Naive2 2.376"
## [1] "SES 2.418"
## [1] "Holt 3.308"
## [1] "Damped 2.08"
## [1] "Theta 2.12"
## [1] "Comb 2.115"
## [1] "ets 3.311"
## [1] "arima 3.301"
## [1] "mlp 2.098"
## [1] "\n-------------------- MASE"
## [1] "Naive 0.881"
## [1] "sNaive 0.881"
## [1] "Naive2 0.881"
## [1] "SES 0.897"
## [1] "Holt 1.245"
## [1] "Damped 0.771"
## [1] "Theta 0.786"
## [1] "Comb 0.785"
## [1] "ets 1.246"
## [1] "arima 1.242"
## [1] "mlp 0.779"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1.018"
## [1] "Holt 1.403"
## [1] "Damped 0.875"
## [1] "Theta 0.892"
## [1] "Comb 0.89"
## [1] "ets 1.404"
## [1] "arima 1.4"
## [1] "mlp 0.883"
##
## the name of the series is: ts_502[1] "-------------------- sMAPE"
## [1] "Naive 15.965"
## [1] "sNaive 15.965"
## [1] "Naive2 15.965"
## [1] "SES 15.965"
## [1] "Holt 6.563"
## [1] "Damped 9.478"
## [1] "Theta 11.21"
## [1] "Comb 10.58"
## [1] "ets 6.563"
## [1] "arima 6.607"
## [1] "mlp 13.519"
## [1] "\n-------------------- MASE"
## [1] "Naive 4.855"
## [1] "sNaive 4.855"
## [1] "Naive2 4.855"
## [1] "SES 4.855"
## [1] "Holt 2.078"
## [1] "Damped 2.972"
## [1] "Theta 3.488"
## [1] "Comb 3.302"
## [1] "ets 2.078"
## [1] "arima 2.092"
## [1] "mlp 4.162"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.42"
## [1] "Damped 0.603"
## [1] "Theta 0.71"
## [1] "Comb 0.671"
## [1] "ets 0.42"
## [1] "arima 0.422"
## [1] "mlp 0.852"
##
## the name of the series is: ts_503[1] "-------------------- sMAPE"
## [1] "Naive 15.216"
## [1] "sNaive 15.216"
## [1] "Naive2 15.216"
## [1] "SES 15.216"
## [1] "Holt 7.444"
## [1] "Damped 9.728"
## [1] "Theta 11.331"
## [1] "Comb 10.734"
## [1] "ets 7.445"
## [1] "arima 7.314"
## [1] "mlp 11.702"
## [1] "\n-------------------- MASE"
## [1] "Naive 4.812"
## [1] "sNaive 4.812"
## [1] "Naive2 4.812"
## [1] "SES 4.812"
## [1] "Holt 2.438"
## [1] "Damped 3.159"
## [1] "Theta 3.652"
## [1] "Comb 3.47"
## [1] "ets 2.439"
## [1] "arima 2.397"
## [1] "mlp 3.767"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.498"
## [1] "Damped 0.648"
## [1] "Theta 0.752"
## [1] "Comb 0.713"
## [1] "ets 0.498"
## [1] "arima 0.489"
## [1] "mlp 0.776"
##
## the name of the series is: ts_504[1] "-------------------- sMAPE"
## [1] "Naive 13.912"
## [1] "sNaive 13.912"
## [1] "Naive2 13.912"
## [1] "SES 13.913"
## [1] "Holt 21.76"
## [1] "Damped 14.396"
## [1] "Theta 12.039"
## [1] "Comb 9.834"
## [1] "ets 19.372"
## [1] "arima 6.758"
## [1] "mlp 35.797"
## [1] "\n-------------------- MASE"
## [1] "Naive 4.532"
## [1] "sNaive 4.532"
## [1] "Naive2 4.532"
## [1] "SES 4.532"
## [1] "Holt 8.389"
## [1] "Damped 5.17"
## [1] "Theta 3.951"
## [1] "Comb 3.394"
## [1] "ets 7.33"
## [1] "arima 2.187"
## [1] "mlp 15.517"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 1.708"
## [1] "Damped 1.088"
## [1] "Theta 0.869"
## [1] "Comb 0.728"
## [1] "ets 1.505"
## [1] "arima 0.484"
## [1] "mlp 2.999"
##
## the name of the series is: ts_505[1] "-------------------- sMAPE"
## [1] "Naive 23.832"
## [1] "sNaive 23.832"
## [1] "Naive2 23.832"
## [1] "SES 23.831"
## [1] "Holt 15.007"
## [1] "Damped 7.051"
## [1] "Theta 19.461"
## [1] "Comb 9.855"
## [1] "ets 10.142"
## [1] "arima 14.7"
## [1] "mlp 19.163"
## [1] "\n-------------------- MASE"
## [1] "Naive 7.295"
## [1] "sNaive 7.295"
## [1] "Naive2 7.295"
## [1] "SES 7.295"
## [1] "Holt 4.786"
## [1] "Damped 2.551"
## [1] "Theta 6.083"
## [1] "Comb 3.225"
## [1] "ets 3.294"
## [1] "arima 4.694"
## [1] "mlp 6.005"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.643"
## [1] "Damped 0.323"
## [1] "Theta 0.825"
## [1] "Comb 0.428"
## [1] "ets 0.439"
## [1] "arima 0.63"
## [1] "mlp 0.814"
##
## the name of the series is: ts_506[1] "-------------------- sMAPE"
## [1] "Naive 24.053"
## [1] "sNaive 24.053"
## [1] "Naive2 24.053"
## [1] "SES 24.052"
## [1] "Holt 14.765"
## [1] "Damped 12.461"
## [1] "Theta 19.623"
## [1] "Comb 16.956"
## [1] "ets 13.729"
## [1] "arima 14.928"
## [1] "mlp 18.773"
## [1] "\n-------------------- MASE"
## [1] "Naive 7.177"
## [1] "sNaive 7.177"
## [1] "Naive2 7.177"
## [1] "SES 7.177"
## [1] "Holt 4.592"
## [1] "Damped 3.91"
## [1] "Theta 5.977"
## [1] "Comb 5.227"
## [1] "ets 4.287"
## [1] "arima 4.64"
## [1] "mlp 5.75"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.627"
## [1] "Damped 0.531"
## [1] "Theta 0.824"
## [1] "Comb 0.717"
## [1] "ets 0.584"
## [1] "arima 0.634"
## [1] "mlp 0.791"
##
## the name of the series is: ts_507[1] "-------------------- sMAPE"
## [1] "Naive 23.096"
## [1] "sNaive 23.096"
## [1] "Naive2 23.096"
## [1] "SES 23.095"
## [1] "Holt 14.18"
## [1] "Damped 12.519"
## [1] "Theta 18.861"
## [1] "Comb 16.48"
## [1] "ets 23.095"
## [1] "arima 14.231"
## [1] "mlp 18.574"
## [1] "\n-------------------- MASE"
## [1] "Naive 6.772"
## [1] "sNaive 6.772"
## [1] "Naive2 6.772"
## [1] "SES 6.772"
## [1] "Holt 4.333"
## [1] "Damped 3.853"
## [1] "Theta 5.644"
## [1] "Comb 4.986"
## [1] "ets 6.772"
## [1] "arima 4.348"
## [1] "mlp 5.571"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.627"
## [1] "Damped 0.556"
## [1] "Theta 0.825"
## [1] "Comb 0.725"
## [1] "ets 1"
## [1] "arima 0.629"
## [1] "mlp 0.813"
##
## the name of the series is: ts_508[1] "-------------------- sMAPE"
## [1] "Naive 77.788"
## [1] "sNaive 77.788"
## [1] "Naive2 77.788"
## [1] "SES 77.786"
## [1] "Holt 72.665"
## [1] "Damped 74.1"
## [1] "Theta 77.037"
## [1] "Comb 74.831"
## [1] "ets 77.786"
## [1] "arima 117.61"
## [1] "mlp 121.166"
## [1] "\n-------------------- MASE"
## [1] "Naive 14.459"
## [1] "sNaive 14.459"
## [1] "Naive2 14.459"
## [1] "SES 14.459"
## [1] "Holt 13.766"
## [1] "Damped 13.966"
## [1] "Theta 14.359"
## [1] "Comb 14.063"
## [1] "ets 14.459"
## [1] "arima 19.017"
## [1] "mlp 19.356"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.943"
## [1] "Damped 0.959"
## [1] "Theta 0.992"
## [1] "Comb 0.967"
## [1] "ets 1"
## [1] "arima 1.414"
## [1] "mlp 1.448"
##
## the name of the series is: ts_509[1] "-------------------- sMAPE"
## [1] "Naive 9.066"
## [1] "sNaive 9.066"
## [1] "Naive2 9.066"
## [1] "SES 9.066"
## [1] "Holt 1.984"
## [1] "Damped 1.435"
## [1] "Theta 6.971"
## [1] "Comb 1.731"
## [1] "ets 4.824"
## [1] "arima 2.209"
## [1] "mlp 1.011"
## [1] "\n-------------------- MASE"
## [1] "Naive 4.129"
## [1] "sNaive 4.129"
## [1] "Naive2 4.129"
## [1] "SES 4.129"
## [1] "Holt 0.97"
## [1] "Damped 0.698"
## [1] "Theta 3.212"
## [1] "Comb 0.82"
## [1] "ets 2.246"
## [1] "arima 1.08"
## [1] "mlp 0.486"
## [1] "-------------------- OWA"
## [1] "Naive 1"
## [1] "sNaive 1"
## [1] "Naive2 1"
## [1] "SES 1"
## [1] "Holt 0.227"
## [1] "Damped 0.164"
## [1] "Theta 0.773"
## [1] "Comb 0.195"
## [1] "ets 0.538"
## [1] "arima 0.253"
## [1] "mlp 0.115"
A brief summary of the article- The article we chose, ‘Fast and accurate yearly time series forecasting with forecast combinations by David Shaub’ discussing different aspects of the authors’ submission for the m4 competition, which ranked third on yearly time series, outperforming many other competitors. His submission was based on an simplistic ensembled model, which combined 3 of the models provided by the ‘‘forecast’’ package (Hyndman & Khandakar, 2008): auto.arima, thetaf, and tbats (with equal weights for each model). The discussion in the article is divided into few sections, and mainly discusses the forecast methodology and its advantages, also providing an analysis of the forecast performance (compared to other models as benchmarks). Shaub explains that the relatively high accuracy of the ensembled model (compared to each model individually, and to more complex metrics in general) comes mainly from reducing the specification error that gets from each model individually. He also emphasises the simplicity of his forecasting approach, which can be implemented easily by any business/ user which is interested in forecasting a large number of time series automatically, without intervention, and without a global model that needs retraining.
Aspect to be explored in the article- We decided to explore the specific forecasting method described in the article, based on it’s attached code file (see https://github.com/dashaub/m4): 1. We are gonna implemented Shaubs’ methodology and forecast our 10 yearly time series, plot the forecasting results and the fitting performance measurements (RMSE, MASE, SMAPE. 2. We will discuss the briefly the obtained results. 3. We will summaries the main adavantage of the
args = commandArgs(trailingOnly = TRUE)
inputs <- c("Yearly")
paths <- paste0("~", inputs, "-train.csv")
combineForecasts <- function(forecastPI, forecastsPoint){
results <- forecastPI
for(ind in seq_along(results)){
extracted <- lapply(forecastsPoint, function(x) x[[ind]]$mean)
extracted <- extracted[!sapply(extracted, is.null)]
if(length(extracted) > 0){
combined <- ts(rowMeans(data.frame(extracted), na.rm = TRUE))
tsp(combined) <- tsp(forecastPI[[ind]]$mean)
results[[ind]]$mean <- combined
}
}
return(results)
}
# Extract the dataframe to a list of msts objects
extractList <- function(x, seriesName){
tab <- c( "Yearly" = 1)
seriesFrequency <- tab[seriesName]
tab <- list("Yearly" = c(1))
multSeason <- tab[[seriesName]]
cleaned <- as.numeric(x[!is.na(as.numeric(x))])
series <- msts(cleaned, seasonal.periods = multSeason, ts.frequency = seriesFrequency)
return(series)
}
thiefForecast <- function(x, horizon, model){
if(length(x) < 2 * frequency(x)){
return(NULL)
}
return(thief(y = x, h = horizon, usemodel = model))
}
writeResults <- function(forecastList, seriesName, floorZero = TRUE){
components <- c("mean", "lower", "upper")
for(component in components){
baseDir <- paste0("~/", component)
dir.create(file.path(baseDir), showWarnings = FALSE)
forecasts <- data.frame(t(data.frame(lapply(forecastList, FUN = function(x) x[[component]]))))
forecasts <- round(forecasts, 4)
if(floorZero){
forecasts[forecasts < 0] <- 0
}
rownames(forecasts) <- names(forecastList)
filename <- paste0(baseDir, "/", seriesName, "-", component, ".csv")
write.table(x = forecasts, file = filename, quote = FALSE, sep = ",", col.names = FALSE)
}
}
inputPath <- "Yearly-train.csv"
dat <- fread(inputPath, header = TRUE, data.table = FALSE)
range <- c("Y500","Y501","Y502","Y503","Y504","Y505","Y506","Y507","Y508","Y509")
for (row in range) {
# Train Set
inputPath <- "Yearly-train.csv"
train_set <- fread(inputPath, header = TRUE, data.table = FALSE)
train_set <- train_set[train_set$V1 %in% row,]
seriesNames <- train_set[, 1]
train_set <- train_set[, -1]
# Validation Set
inputPath <- "Yearly-test.csv"
validation_set <- fread(inputPath, header = TRUE, data.table = FALSE)
validation_set <- validation_set[validation_set$V1 %in% row,]
seriesNames <- validation_set[, 1]
validation_set <- validation_set[, -1]
validation_set <- as_tibble(validation_set)
# Extract series names and remove from dataframe
currentSeries <- "Yearly"
h <- 6
# Transform to list
train_set <- apply(train_set, MARGIN = 1, FUN = function(x) extractList(x, currentSeries))
names(train_set) <- seriesNames
models <- "aft"
model <- hybridModel(train_set, models = models, verbose = FALSE)
forecasts <-forecast(model, h = h, level = 95,PI.combination = "mean")
#Model Errors
accurecy_model <- accuracy(forecasts, t(validation_set))
RMSE <- round(accurecy_model[2,2],2)
sMAPE <- round(mean(smape_cal(validation_set, forecasts$mean)),2)
MASE <- round(mean(mase_cal(train_set, validation_set,forecasts$mean)),2)
# Plot
plot(train_set, main = c(paste0("Series ",row)), sub =c(paste0("RMSE ",RMSE," MASE ",MASE, " SMAPE ",sMAPE,'%')), xlab="Year", ylab="Number", ylim = range(min(train_set),max(forecasts$mean)),xlim = range(0,length(forecasts$mean)+length(train_set)))
#lines(model_pred$mean, lwd = 2, lty=2, col = "green")
lines(forecasts$mean, lwd = 2, lty=2, col = "red")
lines(forecasts$fitted, lwd = 2, lty=2, col = "green")
legend("bottomright", c("train real", "Test Prediction", "Train Prediction"), lty=c(1,2,2), lwd=c(1,2,2),col = c("black","red","green"), bty = "n",cex = 0.8)
}
Conclusions: After modeled the yearly series, printed the results and inspected the MASE and sMAPE, we can see that: * The only series in which the method outperformed all 11 models from Q2 is series Y504. * In Y506 and Y507, Damped model was the only one which outperformed the method. * In Y508+ Y509 it was ranked 8th place, which is not great. * In the other series which we didn’t mentioned, it ranked in the 3rd-4th places.
In general, the results of the hybrid model in question 3 mostly preformed better then most of the models from question 2, even though the gaps between the MASE and sMAPE error were minor. In summary, ‘HybridModel’ method can protect against model selection risks and prevent some large forecasting errors. It is easy to implement and automate, and provide flexibility on the models selection and models weighting according to the data at hand, and business case. With that being said, we need to keep in mind that using user-supplied weights to the model may cause overfitting, and for that reason cross validation is recommended to fine tune the model params.