Develop algorithm TBATS Model for time series analysis in R programming

That’s an algorithm developed

By

Makarovskikh Tatyana Anatolyevna“Макаровских Татьяна Анатольевна”

Abotaleb mostafa “Аботалеб Мостафа”

south ural state university, Russian federation

# Imports
library(fpp2)
## Warning: package 'fpp2' was built under R version 4.0.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## -- Attaching packages ------------------------------------------------------------------------------ fpp2 2.4 --
## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3
## Warning: package 'ggplot2' was built under R version 4.0.3
## Warning: package 'forecast' was built under R version 4.0.3
## 
library(forecast)
library(ggplot2)
dataset_path<-"F:/Phd/Private Project/train_1-slice0.csv" #path of your time series data in your pc 
y_lab<- "Number Vistors Apple page in wikipedia"   # input name of Time series data
Actual_date_interval <- c("2015/07/01","2016/12/31") # Write your actual interval date of your time series
Forecast_date_interval <- c("2017/01/01","2017/01/7") # Write your Forecasting  interval of the date  your time series
validation_data_days <-7 # how many rows from actual data that you want to test the TBATS model and calculate MAPE for that perioud 
frequency<-"days" # Type of your data ("years","months","weeks","days") that should you choose type of your data 
# Data Preparation
original_data<-read.csv(dataset_path) 
# Apple Page : original_data$Apple_II_zh.wikipedia.org_all.access_spider
# Facebook Page : original_data$Facebook_zh.wikipedia.org_all.access_spider
# Android Page : original_data$Android_zh.wikipedia.org_all.access_spider
# Energy Page : original_data$Energy_zh.wikipedia.org_all.access_spider
input_data<-cumsum(original_data$Apple_II_zh.wikipedia.org_all.access_spider) # convert data to cumulative
rows <- NROW(input_data)
training_data<-input_data[1:(rows-validation_data_days)]
testing_data<-input_data[(rows-validation_data_days+1):rows]
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency)  #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency)  #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD)) 
validation_dates<-tail(AD,validation_data_days)
validation_data_by_name<-weekdays(validation_dates)
forecasting_data_by_name<-weekdays(FD)
# Data Modeling
data_series<-ts(training_data) 
model_TBATS<-forecast:::fitSpecificTBATS(data_series,use.box.cox=FALSE, use.beta=TRUE,  seasonal.periods=c(6),use.damping=FALSE,k.vector=c(2))
accuracy(model_TBATS)  # accuracy on training data
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.08187205 5.010726 3.299648 0.8127762 1.243233 0.3717333
##                     ACF1
## Training set 0.002301955
# Print Model Parameters
model_TBATS
## TBATS(1, {0,0}, 1, {<6,2>})
## 
## Call: NULL
## 
## Parameters
##   Alpha: 1.039657
##   Beta: 0.01173392
##   Damping Parameter: 1
##   Gamma-1 Values: -0.003746758
##   Gamma-2 Values: 0.0009470691
## 
## Seed States:
##              [,1]
## [1,] -22.32833575
## [2,]   8.19750852
## [3,]   0.31662003
## [4,]  -0.06067068
## [5,]  -0.31820908
## [6,]   0.25838291
## 
## Sigma: 5.010726
## AIC: 5189.507
plot(model_TBATS,main =paste(y_lab),xlab = paste ("Time in  ", frequency , sep=" "),ylab = y_lab)

# Testing Data Evaluation 
forecasting_tbats <- predict(model_TBATS, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_tbats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using TBATS Model for  ==> ",y_lab, sep=" ")
## [1] "MAPE % For  7 days by using TBATS Model for  ==>  Number Vistors Apple page in wikipedia"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_TBATS<-paste(round(MAPE_Per_Day,3),"%")
MAPE_TBATS_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for  7  days in TBATS Model for  ==>  Number Vistors Apple page in wikipedia"
paste(MAPE_Mean_All,"%")
## [1] "0.34 % MAPE  7 days Number Vistors Apple page in wikipedia %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for  7  days in TBATS Model for  ==>  Number Vistors Apple page in wikipedia"
data.frame(date_TBATS=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_TBATS=validation_forecast,MAPE_TBATS_Model)
##   date_TBATS validation_data_by_name actual_data forecasting_TBATS
## 1 2016-12-25             воскресенье        4826          4823.971
## 2 2016-12-26             понедельник        4834          4833.684
## 3 2016-12-27                 вторник        4858          4842.021
## 4 2016-12-28                   среда        4868          4850.823
## 5 2016-12-29                 четверг        4883          4859.679
## 6 2016-12-30                 пятница        4895          4867.321
## 7 2016-12-31                 суббота        4906          4876.286
##   MAPE_TBATS_Model
## 1          0.042 %
## 2          0.007 %
## 3          0.329 %
## 4          0.353 %
## 5          0.478 %
## 6          0.565 %
## 7          0.606 %
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_TBATS=tail(forecasting_tbats$mean,N_forecasting_days))
##           FD forecating_date forecasting_by_TBATS
## 1 2017-01-01     воскресенье             4885.999
## 2 2017-01-02     понедельник             4894.336
## 3 2017-01-03         вторник             4903.138
## 4 2017-01-04           среда             4911.994
## 5 2017-01-05         четверг             4919.636
## 6 2017-01-06         пятница             4928.601
## 7 2017-01-07         суббота             4938.314
plot(forecasting_tbats,xlab = paste ("Time in  ", frequency , sep=" "),ylab = y_lab)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph<-autoplot(forecasting_tbats,xlab = paste ("Time in  ", frequency , sep=" "),ylab = y_lab)
graph