Forecasting Covid-19 vaccination cases in England by using BATS, TBATS, Holt’s Linear trend, and ARIMA model

Covid 19 deaths cases in india

Makarovskikh Tatyana Anatolyevna “Макаровских Татьяна Анатольевна”

Abotaleb mostafa “Аботалеб Мостафа”

Department of Electrical Engineering and Computer Science

South ural state university, Chelyabinsk, Russian federation

Pradeep Mishra

Department of Mathematics & Statistics

Jawaharlal Nehru Krishi Vishwavidyalaya, India

#Import
library(fpp2)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## -- Attaching packages ---------------------------------------------- fpp2 2.4 --

## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3

##

library(forecast)
library(ggplot2)
library("readxl")
library(moments)
library(forecast)
require(forecast)  
require(tseries)

## Loading required package: tseries

require(markovchain)

## Loading required package: markovchain

## Package:  markovchain
## Version:  0.8.5-3
## Date:     2020-12-03
## BugReport: https://github.com/spedygiorgio/markovchain/issues

require(data.table)

## Loading required package: data.table

library(Hmisc)

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:base':
## 
##     format.pval, units

##Global vriable##
Full_original_data <- read_excel("F:/Phd/vaccination/Vaccine data.xlsx", sheet = "England_vaccine") # path of your data ( time series data)
original_data<-Full_original_data$cum_vaccine
y_lab <- "Covid 19 vaccination cases in England"   # input name of data
Actual_date_interval <- c("2020/12/14","2021/03/10")
Forecast_date_interval <- c("2021/03/11","2021/03/17")
validation_data_days <-7
frequency<-"days"
country.name <- "England"
# Data Preparation & calculate some of statistics measures
summary(original_data) # Summary your time series

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    74179  1317380  5963830  7591661 13327317 19933433

# calculate standard deviation 
data.frame(skewness=skewness(original_data))  # calculate Cofficient of skewness

##    skewness
## 1 0.4542546

data.frame(kurtosis=kurtosis(original_data))   # calculate Cofficient of kurtosis

##   kurtosis
## 1 1.755602

data.frame(Standard.deviation =sd(original_data))

##   Standard.deviation
## 1            6485606

#processing on data (input data)
rows <- NROW(original_data) # calculate number of rows in time series (number of days)
training_data<-original_data[1:(rows-validation_data_days)] # Training data
testing_data<-original_data[(rows-validation_data_days+1):rows] #testing data
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency)  #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency)  #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD))  #calculate number of days that you want to forecasting
validation_dates<-tail(AD,validation_data_days) # select validation_dates
validation_data_by_name<-weekdays(validation_dates) # put names of validation dates
forecasting_data_by_name<-weekdays(FD)  # put names of Forecasting dates

##bats model
# Data Modeling
data_series<-ts(training_data) # make your data to time series
autoplot(data_series ,xlab=paste ("Time in  ", frequency, sep=" "), ylab = y_lab, main=paste ("Actual Data training data :", y_lab, sep=" "))

model_bats<-bats(data_series)
accuracy(model_bats)  # accuracy on training data

##                    ME     RMSE      MAE        MPE     MAPE       MASE
## Training set 4522.336 24345.38 13229.49 -0.3022656 3.709728 0.05862319
##                     ACF1
## Training set -0.05448596

# Print Model Parameters
model_bats

## BATS(1, {0,0}, 1, -)
## 
## Call: bats(y = data_series)
## 
## Parameters
##   Lambda: 1
##   Alpha: 0.9112004
##   Beta: 0.8168303
##   Damping Parameter: 1
## 
## Seed States:
##           [,1]
## [1,] 174476.79
## [2,]  12239.83
## attr(,"lambda")
## [1] 0.9999999
## 
## Sigma: 24345.35
## AIC: 1976.578

#ploting BATS Model
plot(model_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4)

# Testing Data Evaluation
forecasting_bats <- predict(model_bats, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_bats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using bats Model for  ==>  Covid 19 vaccination cases in England"

MAPE_Mean_All.bats_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.bats<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_bats<-paste(round(MAPE_Per_Day,3),"%")
MAPE_bats_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  Covid 19 vaccination cases in England"

paste(MAPE_Mean_All.bats,"%")

## [1] "0.388 % MAPE  7 days Covid 19 vaccination cases in England %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  Covid 19 vaccination cases in England"

data.frame(date_bats=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_bats=validation_forecast,MAPE_bats_Model)

##    date_bats validation_data_by_name actual_data forecasting_bats
## 1 2021-03-04                Thursday    18194890         18211909
## 2 2021-03-05                  Friday    18481094         18519667
## 3 2021-03-06                Saturday    18769920         18827426
## 4 2021-03-07                  Sunday    19055646         19135184
## 5 2021-03-08                  Monday    19346275         19442943
## 6 2021-03-09                 Tuesday    19639663         19750701
## 7 2021-03-10               Wednesday    19933433         20058460
##   MAPE_bats_Model
## 1         0.094 %
## 2         0.209 %
## 3         0.306 %
## 4         0.417 %
## 5           0.5 %
## 6         0.565 %
## 7         0.627 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_bats=tail(forecasting_bats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_bats
## 1 2021-03-11        Thursday            20366218
## 2 2021-03-12          Friday            20673977
## 3 2021-03-13        Saturday            20981735
## 4 2021-03-14          Sunday            21289493
## 5 2021-03-15          Monday            21597252
## 6 2021-03-16         Tuesday            21905010
## 7 2021-03-17       Wednesday            22212769

plot(forecasting_bats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph1<-autoplot(forecasting_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph1

## Error of forecasting
Error_bats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_bats<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_bats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_bats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_bats<-sqrt(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MSE_bats<-(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MAD_bats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_bats<-c(Error_bats)
REOF_Abats<-c(paste(round(REOF_A_bats,3),"%"))
REOF_Fbats<-c(paste(round(REOF_F_bats,3),"%"))
data.frame(correlation_bats,MSE_bats,RMSE_bats,MAPE_Mean_All.bats_Model,MAD_bats) # analysis of Error  by using Bats Model shows result of correlation ,MSE ,MPER

##   correlation_bats   MSE_bats RMSE_bats MAPE_Mean_All.bats_Model MAD_bats
## 1        0.9999861 6959512255  83423.69                    0.388  75052.7

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_bats,REOF_Abats,REOF_Fbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_bats REOF_Abats REOF_Fbats
## 1       2021-03-04            Thursday  17018.93    0.094 %    0.093 %
## 2       2021-03-05              Friday  38573.38    0.209 %    0.208 %
## 3       2021-03-06            Saturday  57505.82    0.306 %    0.305 %
## 4       2021-03-07              Sunday  79538.27    0.417 %    0.416 %
## 5       2021-03-08              Monday  96667.72      0.5 %    0.497 %
## 6       2021-03-09             Tuesday 111038.17    0.565 %    0.562 %
## 7       2021-03-10           Wednesday 125026.62    0.627 %    0.623 %

## TBATS Model
# Data Modeling
data_series<-ts(training_data)
model_TBATS<-forecast:::fitSpecificTBATS(data_series,use.box.cox=FALSE, use.beta=TRUE,  seasonal.periods=c(6),use.damping=FALSE,k.vector=c(2))
accuracy(model_TBATS)  # accuracy on training data

##                    ME     RMSE      MAE       MPE     MAPE       MASE
## Training set 3747.694 25234.83 14759.43 -1.587306 4.155822 0.06540274
##                     ACF1
## Training set -0.02629988

# Print Model Parameters
model_TBATS

## TBATS(1, {0,0}, 1, {<6,2>})
## 
## Call: NULL
## 
## Parameters
##   Alpha: 0.9017306
##   Beta: 0.7336273
##   Damping Parameter: 1
##   Gamma-1 Values: 0.002167977
##   Gamma-2 Values: -0.001746456
## 
## Seed States:
##             [,1]
## [1,] 143088.4275
## [2,]  92635.2779
## [3,]   1774.7409
## [4,]  -1362.1766
## [5,]  -3809.2027
## [6,]    102.8208
## 
## Sigma: 25234.83
## AIC: 1992.319

plot(model_TBATS,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)

# Testing Data Evaluation
forecasting_tbats <- predict(model_TBATS, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_tbats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using TBATS Model for  ==>  Covid 19 vaccination cases in England"

MAPE_Mean_All.TBATS_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.TBATS<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_TBATS<-paste(round(MAPE_Per_Day,3),"%")
MAPE_TBATS_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in TBATS Model for  ==>  Covid 19 vaccination cases in England"

paste(MAPE_Mean_All.TBATS,"%")

## [1] "0.496 % MAPE  7 days Covid 19 vaccination cases in England %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in TBATS Model for  ==>  Covid 19 vaccination cases in England"

data.frame(date_TBATS=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_TBATS=validation_forecast,MAPE_TBATS_Model)

##   date_TBATS validation_data_by_name actual_data forecasting_TBATS
## 1 2021-03-04                Thursday    18194890          18215364
## 2 2021-03-05                  Friday    18481094          18528409
## 3 2021-03-06                Saturday    18769920          18846701
## 4 2021-03-07                  Sunday    19055646          19161018
## 5 2021-03-08                  Monday    19346275          19469496
## 6 2021-03-09                 Tuesday    19639663          19779898
## 7 2021-03-10               Wednesday    19933433          20090893
##   MAPE_TBATS_Model
## 1          0.113 %
## 2          0.256 %
## 3          0.409 %
## 4          0.553 %
## 5          0.637 %
## 6          0.714 %
## 7           0.79 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_TBATS=tail(forecasting_tbats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_TBATS
## 1 2021-03-11        Thursday             20403938
## 2 2021-03-12          Friday             20722230
## 3 2021-03-13        Saturday             21036547
## 4 2021-03-14          Sunday             21345025
## 5 2021-03-15          Monday             21655427
## 6 2021-03-16         Tuesday             21966422
## 7 2021-03-17       Wednesday             22279467

plot(forecasting_tbats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph2<-autoplot(forecasting_tbats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph2

## Error of forecasting TBATS Model
Error_tbats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_tbats1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_tbats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_tbats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_tbats<-sqrt(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MSE_tbats<-(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MAD_tbats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_tbats<-c(Error_tbats)
REOF_A_tbats<-c(paste(round(REOF_A_tbats1,3),"%"))
REOF_F_tbats<-c(paste(round(REOF_F_tbats,3),"%"))
data.frame(correlation_tbats,MSE_tbats,RMSE_tbats,MAPE_Mean_All.TBATS_Model,MAD_tbats) # analysis of Error  by using TBATS model shows result of correlation ,MSE ,MPER

##   correlation_tbats   MSE_tbats RMSE_tbats MAPE_Mean_All.TBATS_Model MAD_tbats
## 1         0.9999546 11328499304   106435.4                     0.496  95836.96

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_tbats,REOF_A_tbats,REOF_F_tbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_tbats REOF_A_tbats REOF_F_tbats
## 1       2021-03-04            Thursday   20474.23      0.113 %      0.112 %
## 2       2021-03-05              Friday   47315.07      0.256 %      0.255 %
## 3       2021-03-06            Saturday   76781.17      0.409 %      0.407 %
## 4       2021-03-07              Sunday  105372.49      0.553 %       0.55 %
## 5       2021-03-08              Monday  123220.64      0.637 %      0.633 %
## 6       2021-03-09             Tuesday  140234.95      0.714 %      0.709 %
## 7       2021-03-10           Wednesday  157460.18       0.79 %      0.784 %

## Holt's linear trend
# Data Modeling
data_series<-ts(training_data)
model_holt<-holt(data_series,h=N_forecasting_days+validation_data_days,lambda = "auto")
accuracy(model_holt)  # accuracy on training data

##                   ME     RMSE      MAE        MPE      MAPE       MASE
## Training set -1931.5 10180.71 8446.923 -0.5699167 0.8149895 0.03743044
##                   ACF1
## Training set 0.6569311

# Print Model Parameters
summary(model_holt$model)

## Holt's method 
## 
## Call:
##  holt(y = data_series, h = N_forecasting_days + validation_data_days,  
## 
##  Call:
##      lambda = "auto") 
## 
##   Box-Cox transformation: lambda= 0.5838 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.9999 
## 
##   Initial states:
##     l = 542.8926 
##     b = 793.8615 
## 
##   sigma:  30.249
## 
##      AIC     AICc      BIC 
## 901.9726 902.7834 913.8827 
## 
## Training set error measures:
##                   ME     RMSE      MAE        MPE      MAPE       MASE
## Training set -1931.5 10180.71 8446.923 -0.5699167 0.8149895 0.03743044
##                   ACF1
## Training set 0.6569311

# Testing Data Evaluation
forecasting_holt <- predict(model_holt, h=N_forecasting_days+validation_data_days,lambda = "auto")
validation_forecast<-head(forecasting_holt$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using holt Model for  ==>  Covid 19 vaccination cases in England"

MAPE_Mean_All.Holt_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.Holt<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_holt<-paste(round(MAPE_Per_Day,3),"%")
MAPE_holt_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in holt Model for  ==>  Covid 19 vaccination cases in England"

paste(MAPE_Mean_All.Holt,"%")

## [1] "0.446 % MAPE  7 days Covid 19 vaccination cases in England %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in holt Model for  ==>  Covid 19 vaccination cases in England"

data.frame(date_holt=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_holt=validation_forecast,MAPE_holt_Model)

##    date_holt validation_data_by_name actual_data forecasting_holt
## 1 2021-03-04                Thursday    18194890         18208901
## 2 2021-03-05                  Friday    18481094         18517864
## 3 2021-03-06                Saturday    18769920         18828988
## 4 2021-03-07                  Sunday    19055646         19142266
## 5 2021-03-08                  Monday    19346275         19457693
## 6 2021-03-09                 Tuesday    19639663         19775262
## 7 2021-03-10               Wednesday    19933433         20094969
##   MAPE_holt_Model
## 1         0.077 %
## 2         0.199 %
## 3         0.315 %
## 4         0.455 %
## 5         0.576 %
## 6          0.69 %
## 7          0.81 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_holt=tail(forecasting_holt$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_holt
## 1 2021-03-11        Thursday            20416806
## 2 2021-03-12          Friday            20740769
## 3 2021-03-13        Saturday            21066852
## 4 2021-03-14          Sunday            21395049
## 5 2021-03-15          Monday            21725355
## 6 2021-03-16         Tuesday            22057764
## 7 2021-03-17       Wednesday            22392272

plot(forecasting_holt)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph3<-autoplot(forecasting_holt,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph3

## Error of forecasting by using Holt's linear model
Error_Holt<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_Holt1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_Holt<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_Holt<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_Holt<-sqrt(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
MSE_Holt<-(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
MAD_Holt<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_Holt<-c(Error_Holt)
REOF_A_Holt<-c(paste(round(REOF_A_Holt1,3),"%"))
REOF_F_Holt<-c(paste(round(REOF_F_Holt,3),"%"))
REOF_A_Holt11<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
data.frame(correlation_Holt,MSE_Holt,RMSE_Holt,MAPE_Mean_All.Holt_Model,MAD_Holt) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_Holt   MSE_Holt RMSE_Holt MAPE_Mean_All.Holt_Model MAD_Holt
## 1        0.9999982 9919318864  99595.78                    0.446 86431.66

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_Holt,REOF_A_Holt,REOF_F_Holt)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_Holt REOF_A_Holt REOF_F_Holt
## 1       2021-03-04            Thursday  14010.92     0.077 %     0.077 %
## 2       2021-03-05              Friday  36770.12     0.199 %     0.199 %
## 3       2021-03-06            Saturday  59067.84     0.315 %     0.314 %
## 4       2021-03-07              Sunday  86620.06     0.455 %     0.453 %
## 5       2021-03-08              Monday 111417.83     0.576 %     0.573 %
## 6       2021-03-09             Tuesday 135599.27      0.69 %     0.686 %
## 7       2021-03-10           Wednesday 161535.58      0.81 %     0.804 %

#Auto arima model
##################
require(tseries) # need to install tseries tj test Stationarity in time series 
paste ("tests For Check Stationarity in series  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series  ==>  Covid 19 vaccination cases in England"

kpss.test(data_series) # applay kpss test

## Warning in kpss.test(data_series): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  data_series
## KPSS Level = 2.0197, Truncation lag parameter = 3, p-value = 0.01

pp.test(data_series)   # applay pp test

## 
##  Phillips-Perron Unit Root Test
## 
## data:  data_series
## Dickey-Fuller Z(alpha) = -1.6581, Truncation lag parameter = 3, p-value
## = 0.9756
## alternative hypothesis: stationary

adf.test(data_series)  # applay adf test

## 
##  Augmented Dickey-Fuller Test
## 
## data:  data_series
## Dickey-Fuller = -4.014, Lag order = 4, p-value = 0.01331
## alternative hypothesis: stationary

ndiffs(data_series)    # Doing first diffrencing on data

## [1] 2

#Taking the first difference
diff1_x1<-diff(data_series)
autoplot(diff1_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black",  ylab=y_lab,main = "1nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking first differences in  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking first differences in  ==>  Covid 19 vaccination cases in England"

kpss.test(diff1_x1)   # applay kpss test after taking first differences

## Warning in kpss.test(diff1_x1): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff1_x1
## KPSS Level = 1.7751, Truncation lag parameter = 3, p-value = 0.01

pp.test(diff1_x1)     # applay pp test after taking first differences

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff1_x1
## Dickey-Fuller Z(alpha) = -1.1942, Truncation lag parameter = 3, p-value
## = 0.9826
## alternative hypothesis: stationary

adf.test(diff1_x1)    # applay adf test after taking first differences

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1_x1
## Dickey-Fuller = -0.483, Lag order = 4, p-value = 0.9803
## alternative hypothesis: stationary

#Taking the second difference
diff2_x1=diff(diff1_x1)
autoplot(diff2_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", ylab=y_lab ,main = "2nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking Second differences in",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking Second differences in Covid 19 vaccination cases in England"

kpss.test(diff2_x1)   # applay kpss test after taking Second differences

## Warning in kpss.test(diff2_x1): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff2_x1
## KPSS Level = 0.33363, Truncation lag parameter = 3, p-value = 0.1

pp.test(diff2_x1)     # applay pp test after taking Second differences

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff2_x1
## Dickey-Fuller Z(alpha) = -23.91, Truncation lag parameter = 3, p-value
## = 0.02138
## alternative hypothesis: stationary

adf.test(diff2_x1)    # applay adf test after taking Second differences

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff2_x1
## Dickey-Fuller = -2.8579, Lag order = 4, p-value = 0.225
## alternative hypothesis: stationary

####Fitting an ARIMA Model
#1. Using auto arima function
model1 <- auto.arima(data_series,stepwise=FALSE, approximation=FALSE, trace=T, test = c("kpss", "adf", "pp"))  #applaying auto arima

## 
##  ARIMA(0,2,0)                    : 1664.114
##  ARIMA(0,2,1)                    : 1620.83
##  ARIMA(0,2,2)                    : 1611.25
##  ARIMA(0,2,3)                    : 1610.347
##  ARIMA(0,2,4)                    : 1611.768
##  ARIMA(0,2,5)                    : 1613.91
##  ARIMA(1,2,0)                    : 1609.102
##  ARIMA(1,2,1)                    : 1607.286
##  ARIMA(1,2,2)                    : 1609.495
##  ARIMA(1,2,3)                    : 1610.997
##  ARIMA(1,2,4)                    : 1612.741
##  ARIMA(2,2,0)                    : 1607.566
##  ARIMA(2,2,1)                    : 1609.497
##  ARIMA(2,2,2)                    : Inf
##  ARIMA(2,2,3)                    : 1613.989
##  ARIMA(3,2,0)                    : 1609.458
##  ARIMA(3,2,1)                    : 1610.023
##  ARIMA(3,2,2)                    : 1612.847
##  ARIMA(4,2,0)                    : 1611.721
##  ARIMA(4,2,1)                    : 1614.089
##  ARIMA(5,2,0)                    : 1613.843
## 
## 
## 
##  Best model: ARIMA(1,2,1)

model1 # show the result of autoarima

## Series: data_series 
## ARIMA(1,2,1) 
## 
## Coefficients:
##          ar1     ma1
##       0.5943  0.3038
## s.e.  0.1206  0.1391
## 
## sigma^2 estimated as 48747711:  log likelihood=-800.48
## AIC=1606.96   AICc=1607.29   BIC=1614.03

#Make changes in the source of auto arima to run the best model
arima.string <- function (object, padding = FALSE) 
{
  order <- object$arma[c(1, 6, 2, 3, 7, 4, 5)]
  m <- order[7]
  result <- paste("ARIMA(", order[1], ",", order[2], ",", 
                  order[3], ")", sep = "")
  if (m > 1 && sum(order[4:6]) > 0) {
    result <- paste(result, "(", order[4], ",", order[5], 
                    ",", order[6], ")[", m, "]", sep = "")
  }
  if (padding && m > 1 && sum(order[4:6]) == 0) {
    result <- paste(result, "         ", sep = "")
    if (m <= 9) {
      result <- paste(result, " ", sep = "")
    }
    else if (m <= 99) {
      result <- paste(result, "  ", sep = "")
    }
    else {
      result <- paste(result, "   ", sep = "")
    }
  }
  if (!is.null(object$xreg)) {
    if (NCOL(object$xreg) == 1 && is.element("drift", names(object$coef))) {
      result <- paste(result, "with drift        ")
    }
    else {
      result <- paste("Regression with", result, "errors")
    }
  }
  else {
    if (is.element("constant", names(object$coef)) || is.element("intercept", 
                                                                 names(object$coef))) {
      result <- paste(result, "with non-zero mean")
    }
    else if (order[2] == 0 && order[5] == 0) {
      result <- paste(result, "with zero mean    ")
    }
    else {
      result <- paste(result, "                  ")
    }
  }
  if (!padding) {
    result <- gsub("[ ]*$", "", result)
  }
  return(result)
}


bestmodel <- arima.string(model1, padding = TRUE)
bestmodel <- substring(bestmodel,7,11)
bestmodel <- gsub(" ", "", bestmodel)
bestmodel <- gsub(")", "", bestmodel)
bestmodel <- strsplit(bestmodel, ",")[[1]]
bestmodel <- c(strtoi(bestmodel[1]),strtoi(bestmodel[2]),strtoi(bestmodel[3]))
bestmodel

## [1] 1 2 1

strtoi(bestmodel[3])

## [1] 1

#2. Using ACF and PACF Function
#par(mfrow=c(1,2))  # Code for making two plot in one graph 
acf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab, main=paste("ACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))    # plot ACF "auto correlation function after taking second diffrences

pacf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main=paste("PACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))   # plot PACF " Partial auto correlation function after taking second diffrences

library(forecast)   # install library forecast             
x1_model1= arima(data_series, order=c(bestmodel)) # Run Best model of auto arima  for forecasting
x1_model1  # Show result of best model of auto arima

## 
## Call:
## arima(x = data_series, order = c(bestmodel))
## 
## Coefficients:
##          ar1     ma1
##       0.5943  0.3038
## s.e.  0.1206  0.1391
## 
## sigma^2 estimated as 47497699:  log likelihood = -800.48,  aic = 1606.96

paste ("accuracy of autoarima Model For  ==> ",y_lab, sep=" ")

## [1] "accuracy of autoarima Model For  ==>  Covid 19 vaccination cases in England"

accuracy(x1_model1)  # aacuracy of best model from auto arima

##                    ME     RMSE      MAE        MPE      MAPE       MASE
## Training set 762.0841 6805.169 5108.247 0.03485609 0.1456201 0.02263592
##                       ACF1
## Training set -0.0004192353

x1_model1$x          # show result of best model from auto arima

## NULL

checkresiduals(x1_model1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)  # checkresiduals from best model from using auto arima

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,2,1)
## Q* = 11.909, df = 8, p-value = 0.1553
## 
## Model df: 2.   Total lags used: 10

paste("Box-Ljung test , Ljung-Box test For Modelling for   ==> ",y_lab, sep=" ")

## [1] "Box-Ljung test , Ljung-Box test For Modelling for   ==>  Covid 19 vaccination cases in England"

Box.test(x1_model1$residuals^2, lag=20, type="Ljung-Box")   # Do test for resdulas by using Box-Ljung test , Ljung-Box test For Modelling

## 
##  Box-Ljung test
## 
## data:  x1_model1$residuals^2
## X-squared = 32.506, df = 20, p-value = 0.03819

library(tseries)
jarque.bera.test(x1_model1$residuals)  # Do test jarque.bera.test

## 
##  Jarque Bera Test
## 
## data:  x1_model1$residuals
## X-squared = 0.24177, df = 2, p-value = 0.8861

#Actual Vs Fitted
plot(data_series, col='red',lwd=2, main="Actual vs Fitted Plot", xlab='Time in (days)', ylab=y_lab) # plot actual and Fitted model 
lines(fitted(x1_model1), col='black')

#Test data
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) ) # make testing data in time series and start from rows-6
forecasting_auto_arima <- forecast(x1_model1, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_auto_arima$mean,validation_data_days)
MAPE_Per_Day<-round(abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using bats Model for  ==>  Covid 19 vaccination cases in England"

MAPE_Mean_All.ARIMA_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.ARIMA<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_auto_arima<-paste(round(MAPE_Per_Day,3),"%")
MAPE_auto.arima_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  Covid 19 vaccination cases in England"

paste(MAPE_Mean_All.ARIMA,"%")

## [1] "0.196 % MAPE  7 days Covid 19 vaccination cases in England %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  Covid 19 vaccination cases in England"

data.frame(date_auto.arima=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_auto.arima=validation_forecast,MAPE_auto.arima_Model)

##   date_auto.arima validation_data_by_name actual_data forecasting_auto.arima
## 1      2021-03-04                Thursday    18194890               18192590
## 2      2021-03-05                  Friday    18481094               18474678
## 3      2021-03-06                Saturday    18769920               18751774
## 4      2021-03-07                  Sunday    19055646               19025903
## 5      2021-03-08                  Monday    19346275               19298269
## 6      2021-03-09                 Tuesday    19639663               19569587
## 7      2021-03-10               Wednesday    19933433               19840282
##   MAPE_auto.arima_Model
## 1               0.013 %
## 2               0.035 %
## 3               0.097 %
## 4               0.156 %
## 5               0.248 %
## 6               0.357 %
## 7               0.467 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_auto.arima=tail(forecasting_auto_arima$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_auto.arima
## 1 2021-03-11        Thursday                  20110608
## 2 2021-03-12          Friday                  20380713
## 3 2021-03-13        Saturday                  20650688
## 4 2021-03-14          Sunday                  20920585
## 5 2021-03-15          Monday                  21190436
## 6 2021-03-16         Tuesday                  21460260
## 7 2021-03-17       Wednesday                  21730067

plot(forecasting_auto_arima)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph4<-autoplot(forecasting_auto_arima,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph4

MAPE_Mean_All.ARIMA

## [1] "0.196 % MAPE  7 days Covid 19 vaccination cases in England"

## Error of forecasting
Error_auto.arima<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_auto.arima<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_auto.arima<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_auto.arima<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_auto.arima<-sqrt(sum((Error_auto.arima^2))/validation_data_days)   #  Root mean square forecast error
MSE_auto.arima<-(sum((Error_auto.arima^2))/validation_data_days)   #  Root mean square forecast error
MAD_auto.arima<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_auto.arima<-c(Error_auto.arima)
REOF_auto.arima1<-c(paste(round(REOF_A_auto.arima,3),"%"))
REOF_auto.arima2<-c(paste(round(REOF_F_auto.arima,3),"%"))
data.frame(correlation_auto.arima,MSE_auto.arima,RMSE_auto.arima,MAPE_Mean_All.ARIMA_Model,MAD_auto.arima) # analysis of Error  by using Auto ARIMAA model shows result of correlation ,MSE ,MPER

##   correlation_auto.arima MSE_auto.arima RMSE_auto.arima
## 1                0.99993     2450400138        49501.52
##   MAPE_Mean_All.ARIMA_Model MAD_auto.arima
## 1                     0.196       38262.78

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_auto.arima,REOF_A_auto.arima=REOF_auto.arima1,REOF_F_auto.arima=REOF_auto.arima2)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_auto.arima REOF_A_auto.arima
## 1       2021-03-04            Thursday        2299.971           0.013 %
## 2       2021-03-05              Friday        6416.134           0.035 %
## 3       2021-03-06            Saturday       18146.398           0.097 %
## 4       2021-03-07              Sunday       29743.390           0.156 %
## 5       2021-03-08              Monday       48006.461           0.248 %
## 6       2021-03-09             Tuesday       70076.303           0.357 %
## 7       2021-03-10           Wednesday       93150.818           0.467 %
##   REOF_F_auto.arima
## 1           0.013 %
## 2           0.035 %
## 3           0.097 %
## 4           0.156 %
## 5           0.249 %
## 6           0.358 %
## 7            0.47 %

# Table for MAPE For counry
best_recommended_model <- min(MAPE_Mean_All.bats_Model,MAPE_Mean_All.TBATS_Model,MAPE_Mean_All.Holt_Model,MAPE_Mean_All.ARIMA_Model)
paste("System Choose Least Error ==> ( MAPE %) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==> ", y_lab , sep=" ")

## [1] "System Choose Least Error ==> ( MAPE %) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==>  Covid 19 vaccination cases in England"

best_recommended_model

## [1] 0.196

x1<-if(best_recommended_model >= MAPE_Mean_All.bats_Model) {paste("BATS Model")}
x2<-if(best_recommended_model >= MAPE_Mean_All.TBATS_Model) {paste("TBATS Model")}
x3<-if(best_recommended_model >= MAPE_Mean_All.Holt_Model) {paste("Holt Model")}
x4<-if(best_recommended_model >= MAPE_Mean_All.ARIMA_Model) {paste("ARIMA Model")}
result<-c(x1,x2,x3,x4)
table.error<-data.frame(country.name,BATS.Model=MAPE_Mean_All.bats_Model,TBATS.Model=MAPE_Mean_All.TBATS_Model,Holt.Model=MAPE_Mean_All.Holt_Model,ARIMA.Model=MAPE_Mean_All.ARIMA_Model,Best.Model=result)
library(ascii)
print(ascii(table(table.error)), type = "rest")

## 
## +---+--------------+------------+-------------+------------+-------------+-------------+------+
## |   | country.name | BATS.Model | TBATS.Model | Holt.Model | ARIMA.Model | Best.Model  | Freq |
## +===+==============+============+=============+============+=============+=============+======+
## | 1 | England      | 0.388      | 0.496       | 0.446      | 0.196       | ARIMA Model | 1.00 |
## +---+--------------+------------+-------------+------------+-------------+-------------+------+

message("System finished Forecasting  by using autoarima and Holt's ,TBATS, and SIR  Model ==>",y_lab, sep=" ")

## System finished Forecasting  by using autoarima and Holt's ,TBATS, and SIR  Model ==>Covid 19 vaccination cases in England

message(" Thank you for using our System For Modelling  ==> ",y_lab, sep=" ")

##  Thank you for using our System For Modelling  ==> Covid 19 vaccination cases in England