Develop algorithm bats Model for time series analysis in R programming

That’s an algorithm developed

By

Makarovskikh Tatyana Anatolyevna“Макаровских Татьяна Анатольевна”

Abotaleb mostafa “Аботалеб Мостафа”

south ural state university, Russian federation

# Imports
library(fpp2)
## Warning: package 'fpp2' was built under R version 4.0.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## -- Attaching packages ------------------------------------------------------------------------------ fpp2 2.4 --
## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3
## Warning: package 'ggplot2' was built under R version 4.0.3
## Warning: package 'forecast' was built under R version 4.0.3
## 
library(forecast)
library(ggplot2)
dataset_path<-"F:/Phd/Private Project/train_1-slice0.csv" #path of your time series data in your pc 
y_lab<- "Number Vistors Apple page in wikipedia"   # input name of Time series data
Actual_date_interval <- c("2015/07/01","2016/12/31") # Write your actual interval date of your time series
Forecast_date_interval <- c("2017/01/01","2017/01/7") # Write your Forecasting  interval of the date  your time series
validation_data_days <-7 # how many rows from actual data that you want to test the TBATS model and calculate MAPE for that perioud 
frequency<-"days" # Type of your data ("years","months","weeks","days") that should you choose type of your data 
# Data Preparation
original_data<-read.csv(dataset_path) 
# Apple Page : original_data$Apple_II_zh.wikipedia.org_all.access_spider
# Facebook Page : original_data$Facebook_zh.wikipedia.org_all.access_spider
# Android Page : original_data$Android_zh.wikipedia.org_all.access_spider
# Energy Page : original_data$Energy_zh.wikipedia.org_all.access_spider
input_data<-cumsum(original_data$Apple_II_zh.wikipedia.org_all.access_spider) # convert data to cumulative
rows <- NROW(input_data)
training_data<-input_data[1:(rows-validation_data_days)]
testing_data<-input_data[(rows-validation_data_days+1):rows]
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency)  #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency)  #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD)) 
validation_dates<-tail(AD,validation_data_days)
validation_data_by_name<-weekdays(validation_dates)
forecasting_data_by_name<-weekdays(FD)
# Data Modeling
data_series<-ts(training_data)
model_bats<-bats(data_series)
accuracy(model_bats)  # accuracy on training data
##                       ME     RMSE      MAE       MPE     MAPE     MASE
## Training set -0.02153136 4.976799 3.269713 0.6647574 1.135035 0.368361
##                     ACF1
## Training set -0.06509243
# Print Model Parameters
model_bats
## BATS(0.972, {1,0}, 1, -)
## 
## Call: bats(y = data_series)
## 
## Parameters
##   Lambda: 0.971881
##   Alpha: 0.3868964
##   Beta: -0.004016503
##   Damping Parameter: 0.999781
##   AR coefficients: 0.705365
## 
## Seed States:
##            [,1]
## [1,] -19.354452
## [2,]   7.540656
## [3,]   0.000000
## attr(,"lambda")
## [1] 0.9718814
## 
## Sigma: 4.047286
## AIC: 5181.524
plot(model_bats,main =paste(y_lab),xlab = paste ("Time in  ", frequency , sep=" "),ylab = y_lab)

# Testing Data Evaluation
forecasting_bats <- predict(model_bats, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_bats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")
## [1] "MAPE % For  7 days by using bats Model for  ==>  Number Vistors Apple page in wikipedia"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_bats<-paste(round(MAPE_Per_Day,3),"%")
MAPE_bats_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  Number Vistors Apple page in wikipedia"
paste(MAPE_Mean_All,"%")
## [1] "0.357 % MAPE  7 days Number Vistors Apple page in wikipedia %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  Number Vistors Apple page in wikipedia"
data.frame(date_bats=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_bats=validation_forecast,MAPE_bats_Model)
##    date_bats validation_data_by_name actual_data forecasting_bats
## 1 2016-12-25             воскресенье        4826         4823.733
## 2 2016-12-26             понедельник        4834         4832.427
## 3 2016-12-27                 вторник        4858         4841.091
## 4 2016-12-28                   среда        4868         4849.735
## 5 2016-12-29                 четверг        4883         4858.364
## 6 2016-12-30                 пятница        4895         4866.981
## 7 2016-12-31                 суббота        4906         4875.591
##   MAPE_bats_Model
## 1         0.047 %
## 2         0.033 %
## 3         0.348 %
## 4         0.375 %
## 5         0.505 %
## 6         0.572 %
## 7          0.62 %
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_bats=tail(forecasting_bats$mean,N_forecasting_days))
##           FD forecating_date forecasting_by_bats
## 1 2017-01-01     воскресенье            4884.194
## 2 2017-01-02     понедельник            4892.793
## 3 2017-01-03         вторник            4901.387
## 4 2017-01-04           среда            4909.979
## 5 2017-01-05         четверг            4918.567
## 6 2017-01-06         пятница            4927.154
## 7 2017-01-07         суббота            4935.738
plot(forecasting_bats,xlab = paste ("Time in  ", frequency , sep=" "),ylab = y_lab)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph<-autoplot(forecasting_bats,xlab = paste ("Time in  ", frequency , sep=" "),ylab = y_lab)
graph