south ural state university, Russian federation
# Imports
library(fpp2)
## Warning: package 'fpp2' was built under R version 4.0.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages ------------------------------------------------------------------------------ fpp2 2.4 --
## v ggplot2 3.3.2 v fma 2.4
## v forecast 8.13 v expsmooth 2.3
## Warning: package 'ggplot2' was built under R version 4.0.3
## Warning: package 'forecast' was built under R version 4.0.3
##
library(forecast)
library(ggplot2)
dataset_path<-"F:/Phd/Private Project/train_1-slice0.csv" #path of your time series data in your pc
y_lab<- "Number Vistors Apple page in wikipedia" # input name of Time series data
Actual_date_interval <- c("2015/07/01","2016/12/31") # Write your actual interval date of your time series
Forecast_date_interval <- c("2017/01/01","2017/01/7") # Write your Forecasting interval of the date your time series
validation_data_days <-7 # how many rows from actual data that you want to test the TBATS model and calculate MAPE for that perioud
frequency<-"days" # Type of your data ("years","months","weeks","days") that should you choose type of your data
# Data Preparation
original_data<-read.csv(dataset_path)
# Apple Page : original_data$Apple_II_zh.wikipedia.org_all.access_spider
# Facebook Page : original_data$Facebook_zh.wikipedia.org_all.access_spider
# Android Page : original_data$Android_zh.wikipedia.org_all.access_spider
# Energy Page : original_data$Energy_zh.wikipedia.org_all.access_spider
input_data<-cumsum(original_data$Apple_II_zh.wikipedia.org_all.access_spider) # convert data to cumulative
rows <- NROW(input_data)
training_data<-input_data[1:(rows-validation_data_days)]
testing_data<-input_data[(rows-validation_data_days+1):rows]
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency) #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency) #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD))
validation_dates<-tail(AD,validation_data_days)
validation_data_by_name<-weekdays(validation_dates)
forecasting_data_by_name<-weekdays(FD)
# Data Modeling
data_series<-ts(training_data)
model_holt<-holt(data_series,h=N_forecasting_days+validation_data_days,lambda = "auto")
accuracy(model_holt) # accuracy on training data
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1331319 5.00807 3.324313 0.1261135 0.6548812 0.3745121
## ACF1
## Training set 0.02402595
# Print Model Parameters
plot(model_holt,main =paste(y_lab),xlab = paste ("Time in ", frequency , sep=" "),ylab = y_lab)

# Testing Data Evaluation
forecasting_holt <- predict(model_holt, h=N_forecasting_days+validation_data_days,lambda = "auto")
validation_forecast<-head(forecasting_holt$mean,validation_data_days)
MAPE_Per_Day<-round( abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using holt Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 7 days by using holt Model for ==> Number Vistors Apple page in wikipedia"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_holt<-paste(round(MAPE_Per_Day,3),"%")
MAPE_holt_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in holt Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 7 days in holt Model for ==> Number Vistors Apple page in wikipedia"
paste(MAPE_Mean_All,"%")
## [1] "0.363 % MAPE 7 days Number Vistors Apple page in wikipedia %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in holt Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 7 days in holt Model for ==> Number Vistors Apple page in wikipedia"
data.frame(date_holt=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_holt=validation_forecast,MAPE_holt_Model)
## date_holt validation_data_by_name actual_data forecasting_holt
## 1 2016-12-25 воскресенье 4826 4823.601
## 2 2016-12-26 понедельник 4834 4832.202
## 3 2016-12-27 вторник 4858 4840.803
## 4 2016-12-28 среда 4868 4849.405
## 5 2016-12-29 четверг 4883 4858.007
## 6 2016-12-30 пятница 4895 4866.610
## 7 2016-12-31 суббота 4906 4875.214
## MAPE_holt_Model
## 1 0.05 %
## 2 0.037 %
## 3 0.354 %
## 4 0.382 %
## 5 0.512 %
## 6 0.58 %
## 7 0.628 %
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_holt=tail(forecasting_holt$mean,N_forecasting_days))
## FD forecating_date forecasting_by_holt
## 1 2017-01-01 воскресенье 4883.818
## 2 2017-01-02 понедельник 4892.422
## 3 2017-01-03 вторник 4901.027
## 4 2017-01-04 среда 4909.633
## 5 2017-01-05 четверг 4918.239
## 6 2017-01-06 пятница 4926.845
## 7 2017-01-07 суббота 4935.452
plot(forecasting_holt,xlab = paste ("Time in ", frequency , sep=" "),ylab = y_lab)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph<-autoplot(forecasting_holt,xlab = paste ("Time in ", frequency , sep=" "),ylab = y_lab)
graph

# Data Modeling
data_series<-ts(training_data)
model_TBATS<-forecast:::fitSpecificTBATS(data_series,use.box.cox=FALSE, use.beta=TRUE, seasonal.periods=c(6),use.damping=FALSE,k.vector=c(2))
accuracy(model_TBATS) # accuracy on training data
## ME RMSE MAE MPE MAPE MASE
## Training set 0.08187205 5.010726 3.299648 0.8127762 1.243233 0.3717333
## ACF1
## Training set 0.002301955
# Print Model Parameters
model_TBATS
## TBATS(1, {0,0}, 1, {<6,2>})
##
## Call: NULL
##
## Parameters
## Alpha: 1.039657
## Beta: 0.01173392
## Damping Parameter: 1
## Gamma-1 Values: -0.003746758
## Gamma-2 Values: 0.0009470691
##
## Seed States:
## [,1]
## [1,] -22.32833575
## [2,] 8.19750852
## [3,] 0.31662003
## [4,] -0.06067068
## [5,] -0.31820908
## [6,] 0.25838291
##
## Sigma: 5.010726
## AIC: 5189.507
plot(model_TBATS,main =paste(y_lab),xlab = paste ("Time in ", frequency , sep=" "),ylab = y_lab)

# Testing Data Evaluation
forecasting_tbats <- predict(model_TBATS, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_tbats$mean,validation_data_days)
MAPE_Per_Day<-round( abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using TBATS Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 7 days by using TBATS Model for ==> Number Vistors Apple page in wikipedia"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_TBATS<-paste(round(MAPE_Per_Day,3),"%")
MAPE_TBATS_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in TBATS Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 7 days in TBATS Model for ==> Number Vistors Apple page in wikipedia"
paste(MAPE_Mean_All,"%")
## [1] "0.34 % MAPE 7 days Number Vistors Apple page in wikipedia %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in TBATS Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 7 days in TBATS Model for ==> Number Vistors Apple page in wikipedia"
data.frame(date_TBATS=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_TBATS=validation_forecast,MAPE_TBATS_Model)
## date_TBATS validation_data_by_name actual_data forecasting_TBATS
## 1 2016-12-25 воскресенье 4826 4823.971
## 2 2016-12-26 понедельник 4834 4833.684
## 3 2016-12-27 вторник 4858 4842.021
## 4 2016-12-28 среда 4868 4850.823
## 5 2016-12-29 четверг 4883 4859.679
## 6 2016-12-30 пятница 4895 4867.321
## 7 2016-12-31 суббота 4906 4876.286
## MAPE_TBATS_Model
## 1 0.042 %
## 2 0.007 %
## 3 0.329 %
## 4 0.353 %
## 5 0.478 %
## 6 0.565 %
## 7 0.606 %
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_TBATS=tail(forecasting_tbats$mean,N_forecasting_days))
## FD forecating_date forecasting_by_TBATS
## 1 2017-01-01 воскресенье 4885.999
## 2 2017-01-02 понедельник 4894.336
## 3 2017-01-03 вторник 4903.138
## 4 2017-01-04 среда 4911.994
## 5 2017-01-05 четверг 4919.636
## 6 2017-01-06 пятница 4928.601
## 7 2017-01-07 суббота 4938.314
plot(forecasting_tbats,xlab = paste ("Time in ", frequency , sep=" "),ylab = y_lab)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph2<-autoplot(forecasting_tbats,xlab = paste ("Time in ", frequency , sep=" "),ylab = y_lab)
graph2

# Data Modeling
data_series<-ts(training_data)
model_bats<-bats(data_series)
accuracy(model_bats) # accuracy on training data
## ME RMSE MAE MPE MAPE MASE
## Training set -0.02153136 4.976799 3.269713 0.6647574 1.135035 0.368361
## ACF1
## Training set -0.06509243
# Print Model Parameters
model_bats
## BATS(0.972, {1,0}, 1, -)
##
## Call: bats(y = data_series)
##
## Parameters
## Lambda: 0.971881
## Alpha: 0.3868964
## Beta: -0.004016503
## Damping Parameter: 0.999781
## AR coefficients: 0.705365
##
## Seed States:
## [,1]
## [1,] -19.354452
## [2,] 7.540656
## [3,] 0.000000
## attr(,"lambda")
## [1] 0.9718814
##
## Sigma: 4.047286
## AIC: 5181.524
plot(model_bats,main =paste(y_lab),xlab = paste ("Time in ", frequency , sep=" "),ylab = y_lab)

# Testing Data Evaluation
forecasting_bats <- predict(model_bats, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_bats$mean,validation_data_days)
MAPE_Per_Day<-round( abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 7 days by using bats Model for ==> Number Vistors Apple page in wikipedia"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_bats<-paste(round(MAPE_Per_Day,3),"%")
MAPE_bats_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 7 days in bats Model for ==> Number Vistors Apple page in wikipedia"
paste(MAPE_Mean_All,"%")
## [1] "0.357 % MAPE 7 days Number Vistors Apple page in wikipedia %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 7 days in bats Model for ==> Number Vistors Apple page in wikipedia"
data.frame(date_bats=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_bats=validation_forecast,MAPE_bats_Model)
## date_bats validation_data_by_name actual_data forecasting_bats
## 1 2016-12-25 воскресенье 4826 4823.733
## 2 2016-12-26 понедельник 4834 4832.427
## 3 2016-12-27 вторник 4858 4841.091
## 4 2016-12-28 среда 4868 4849.735
## 5 2016-12-29 четверг 4883 4858.364
## 6 2016-12-30 пятница 4895 4866.981
## 7 2016-12-31 суббота 4906 4875.591
## MAPE_bats_Model
## 1 0.047 %
## 2 0.033 %
## 3 0.348 %
## 4 0.375 %
## 5 0.505 %
## 6 0.572 %
## 7 0.62 %
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_bats=tail(forecasting_bats$mean,N_forecasting_days))
## FD forecating_date forecasting_by_bats
## 1 2017-01-01 воскресенье 4884.194
## 2 2017-01-02 понедельник 4892.793
## 3 2017-01-03 вторник 4901.387
## 4 2017-01-04 среда 4909.979
## 5 2017-01-05 четверг 4918.567
## 6 2017-01-06 пятница 4927.154
## 7 2017-01-07 суббота 4935.738
plot(forecasting_bats,xlab = paste ("Time in ", frequency , sep=" "),ylab = y_lab)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph3<-autoplot(forecasting_bats,xlab = paste ("Time in ", frequency , sep=" "),ylab = y_lab)
graph3
