Forecasting Covid-19 deaths cases in SAARC and China by using BATS, TBATS, Holt’s Linear trend, and ARIMA model

Covid 19 deaths cases in india

Makarovskikh Tatyana Anatolyevna “Макаровских Татьяна Анатольевна”

Abotaleb mostafa “Аботалеб Мостафа”

Department of Electrical Engineering and Computer Science

South ural state university, Chelyabinsk, Russian federation

Pradeep Mishra

Department of Mathematics & Statistics

Jawaharlal Nehru Krishi Vishwavidyalaya, India

#Import
library(fpp2)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## -- Attaching packages ---------------------------------------------- fpp2 2.4 --

## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3

##

library(forecast)
library(ggplot2)
library("readxl")
library(moments)
library(forecast)
require(forecast)  
require(tseries)

## Loading required package: tseries

require(markovchain)

## Loading required package: markovchain

## Package:  markovchain
## Version:  0.8.5-3
## Date:     2020-12-03
## BugReport: https://github.com/spedygiorgio/markovchain/issues

require(data.table)

## Loading required package: data.table

library(Hmisc)

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:base':
## 
##     format.pval, units

##Global vriable##
Full_original_data <- read_excel("F:/Phd/Covid 19 in SAARC/Covid 19 in SAARC.xlsx", sheet = "india") # path of your data ( time series data)
original_data<-Full_original_data$Cumulative_deaths
y_lab <- "Covid 19 deaths cases in india"   # input name of data
Actual_date_interval <- c("2020/03/01","2021/03/10")
Forecast_date_interval <- c("2021/03/11","2021/03/17")
validation_data_days <-7
frequency<-"day"
country.name <- "india"
# Data Preparation & calculate some of statistics measures
summary(original_data) # Summary your time series

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0     543   40699   63878  133227  158063

# calculate standard deviation 
data.frame(skewness=skewness(original_data))  # calculate Cofficient of skewness

##    skewness
## 1 0.3240957

data.frame(kurtosis=kurtosis(original_data))   # calculate Cofficient of kurtosis

##   kurtosis
## 1 1.381315

data.frame(Standard.deviation =sd(original_data))

##   Standard.deviation
## 1           63024.53

#processing on data (input data)
rows <- NROW(original_data) # calculate number of rows in time series (number of days)
training_data<-original_data[1:(rows-validation_data_days)] # Training data
testing_data<-original_data[(rows-validation_data_days+1):rows] #testing data
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency)  #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency)  #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD))  #calculate number of days that you want to forecasting
validation_dates<-tail(AD,validation_data_days) # select validation_dates
validation_data_by_name<-weekdays(validation_dates) # put names of validation dates
forecasting_data_by_name<-weekdays(FD)  # put names of Forecasting dates

##bats model
# Data Modeling
data_series<-ts(training_data) # make your data to time series
autoplot(data_series ,xlab=paste ("Time in  ", frequency, sep=" "), ylab = y_lab, main=paste ("Actual Data training data :", y_lab, sep=" "))

model_bats<-bats(data_series)
accuracy(model_bats)  # accuracy on training data

##                    ME     RMSE      MAE MPE MAPE      MASE       ACF1
## Training set 1.693479 192.6369 84.80609 NaN  Inf 0.2290658 0.05422684

# Print Model Parameters
model_bats

## BATS(1, {0,0}, 1, -)
## 
## Call: bats(y = data_series)
## 
## Parameters
##   Alpha: 0.5903846
##   Beta: 0.1460504
##   Damping Parameter: 1
## 
## Seed States:
##            [,1]
## [1,] -0.9843571
## [2,] -0.0233703
## 
## Sigma: 192.6369
## AIC: 7069.399

#ploting BATS Model
plot(model_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4)

# Testing Data Evaluation
forecasting_bats <- predict(model_bats, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_bats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 day by using bats Model for  ==>  Covid 19 deaths cases in india"

MAPE_Mean_All.bats_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.bats<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_bats<-paste(round(MAPE_Per_Day,3),"%")
MAPE_bats_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  Covid 19 deaths cases in india"

paste(MAPE_Mean_All.bats,"%")

## [1] "0.016 % MAPE  7 day Covid 19 deaths cases in india %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  Covid 19 deaths cases in india"

data.frame(date_bats=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_bats=validation_forecast,MAPE_bats_Model)

##    date_bats validation_data_by_name actual_data forecasting_bats
## 1 2021-03-04                Thursday      157435         157458.6
## 2 2021-03-05                  Friday      157548         157563.9
## 3 2021-03-06                Saturday      157656         157669.3
## 4 2021-03-07                  Sunday      157756         157774.6
## 5 2021-03-08                  Monday      157853         157879.9
## 6 2021-03-09                 Tuesday      157930         157985.3
## 7 2021-03-10               Wednesday      158063         158090.6
##   MAPE_bats_Model
## 1         0.015 %
## 2          0.01 %
## 3         0.008 %
## 4         0.012 %
## 5         0.017 %
## 6         0.035 %
## 7         0.017 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_bats=tail(forecasting_bats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_bats
## 1 2021-03-11        Thursday            158196.0
## 2 2021-03-12          Friday            158301.3
## 3 2021-03-13        Saturday            158406.6
## 4 2021-03-14          Sunday            158512.0
## 5 2021-03-15          Monday            158617.3
## 6 2021-03-16         Tuesday            158722.7
## 7 2021-03-17       Wednesday            158828.0

plot(forecasting_bats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph1<-autoplot(forecasting_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph1

## Error of forecasting
Error_bats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_bats<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_bats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_bats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_bats<-sqrt(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MSE_bats<-(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MAD_bats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_bats<-c(Error_bats)
REOF_Abats<-c(paste(round(REOF_A_bats,3),"%"))
REOF_Fbats<-c(paste(round(REOF_F_bats,3),"%"))
data.frame(correlation_bats,MSE_bats,RMSE_bats,MAPE_Mean_All.bats_Model,MAD_bats) # analysis of Error  by using Bats Model shows result of correlation ,MSE ,MPER

##   correlation_bats MSE_bats RMSE_bats MAPE_Mean_All.bats_Model MAD_bats
## 1        0.9986276 839.6221  28.97623                    0.016 25.88978

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_bats,REOF_Abats,REOF_Fbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_bats REOF_Abats REOF_Fbats
## 1       2021-03-04            Thursday  23.58229    0.015 %    0.015 %
## 2       2021-03-05              Friday  15.92289     0.01 %     0.01 %
## 3       2021-03-06            Saturday  13.26348    0.008 %    0.008 %
## 4       2021-03-07              Sunday  18.60407    0.012 %    0.012 %
## 5       2021-03-08              Monday  26.94466    0.017 %    0.017 %
## 6       2021-03-09             Tuesday  55.28525    0.035 %    0.035 %
## 7       2021-03-10           Wednesday  27.62584    0.017 %    0.017 %

## TBATS Model
# Data Modeling
data_series<-ts(training_data)
model_TBATS<-forecast:::fitSpecificTBATS(data_series,use.box.cox=FALSE, use.beta=TRUE,  seasonal.periods=c(6),use.damping=FALSE,k.vector=c(2))
accuracy(model_TBATS)  # accuracy on training data

##                   ME     RMSE      MAE MPE MAPE      MASE       ACF1
## Training set 1.67981 190.2337 89.75114 NaN  Inf 0.2424226 0.06042829

# Print Model Parameters
model_TBATS

## TBATS(1, {0,0}, 1, {<6,2>})
## 
## Call: NULL
## 
## Parameters
##   Alpha: 0.5912045
##   Beta: 0.1470497
##   Damping Parameter: 1
##   Gamma-1 Values: -0.002843247
##   Gamma-2 Values: 0.004128876
## 
## Seed States:
##            [,1]
## [1,]   2.198960
## [2,]  -1.346886
## [3,]  -6.906433
## [4,] -12.970679
## [5,] -19.045215
## [6,]   7.142075
## 
## Sigma: 190.2337
## AIC: 7070.703

plot(model_TBATS,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)

# Testing Data Evaluation
forecasting_tbats <- predict(model_TBATS, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_tbats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 day by using TBATS Model for  ==>  Covid 19 deaths cases in india"

MAPE_Mean_All.TBATS_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.TBATS<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_TBATS<-paste(round(MAPE_Per_Day,3),"%")
MAPE_TBATS_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in TBATS Model for  ==>  Covid 19 deaths cases in india"

paste(MAPE_Mean_All.TBATS,"%")

## [1] "0.009 % MAPE  7 day Covid 19 deaths cases in india %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in TBATS Model for  ==>  Covid 19 deaths cases in india"

data.frame(date_TBATS=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_TBATS=validation_forecast,MAPE_TBATS_Model)

##   date_TBATS validation_data_by_name actual_data forecasting_TBATS
## 1 2021-03-04                Thursday      157435          157431.7
## 2 2021-03-05                  Friday      157548          157557.2
## 3 2021-03-06                Saturday      157656          157651.8
## 4 2021-03-07                  Sunday      157756          157757.0
## 5 2021-03-08                  Monday      157853          157889.9
## 6 2021-03-09                 Tuesday      157930          157970.9
## 7 2021-03-10               Wednesday      158063          158055.0
##   MAPE_TBATS_Model
## 1          0.002 %
## 2          0.006 %
## 3          0.003 %
## 4          0.001 %
## 5          0.023 %
## 6          0.026 %
## 7          0.005 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_TBATS=tail(forecasting_tbats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_TBATS
## 1 2021-03-11        Thursday             158180.4
## 2 2021-03-12          Friday             158275.0
## 3 2021-03-13        Saturday             158380.3
## 4 2021-03-14          Sunday             158513.2
## 5 2021-03-15          Monday             158594.1
## 6 2021-03-16         Tuesday             158678.2
## 7 2021-03-17       Wednesday             158803.7

plot(forecasting_tbats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph2<-autoplot(forecasting_tbats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph2

## Error of forecasting TBATS Model
Error_tbats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_tbats1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_tbats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_tbats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_tbats<-sqrt(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MSE_tbats<-(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MAD_tbats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_tbats<-c(Error_tbats)
REOF_A_tbats<-c(paste(round(REOF_A_tbats1,3),"%"))
REOF_F_tbats<-c(paste(round(REOF_F_tbats,3),"%"))
data.frame(correlation_tbats,MSE_tbats,RMSE_tbats,MAPE_Mean_All.TBATS_Model,MAD_tbats) # analysis of Error  by using TBATS model shows result of correlation ,MSE ,MPER

##   correlation_tbats MSE_tbats RMSE_tbats MAPE_Mean_All.TBATS_Model MAD_tbats
## 1         0.9963939  458.8629   21.42109                     0.009  10.33376

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_tbats,REOF_A_tbats,REOF_F_tbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_tbats REOF_A_tbats REOF_F_tbats
## 1       2021-03-04            Thursday   3.331479      0.002 %      0.002 %
## 2       2021-03-05              Friday   9.154587      0.006 %      0.006 %
## 3       2021-03-06            Saturday   4.240392      0.003 %      0.003 %
## 4       2021-03-07              Sunday   1.002494      0.001 %      0.001 %
## 5       2021-03-08              Monday  36.938047      0.023 %      0.023 %
## 6       2021-03-09             Tuesday  40.854240      0.026 %      0.026 %
## 7       2021-03-10           Wednesday   8.041190      0.005 %      0.005 %

## Holt's linear trend
# Data Modeling
data_series<-ts(training_data)
model_holt<-holt(data_series,h=N_forecasting_days+validation_data_days,lambda = "auto")
accuracy(model_holt)  # accuracy on training data

##                    ME     RMSE      MAE  MPE MAPE     MASE       ACF1
## Training set -27.2825 200.4808 81.98246 -Inf  Inf 0.221439 -0.1616733

# Print Model Parameters
summary(model_holt$model)

## Holt's method 
## 
## Call:
##  holt(y = data_series, h = N_forecasting_days + validation_data_days,  
## 
##  Call:
##      lambda = "auto") 
## 
##   Box-Cox transformation: lambda= 0.3777 
## 
##   Smoothing parameters:
##     alpha = 0.8642 
##     beta  = 0.1275 
## 
##   Initial states:
##     l = -2.6471 
##     b = 2e-04 
## 
##   sigma:  0.3753
## 
##      AIC     AICc      BIC 
## 1750.255 1750.398 1770.527 
## 
## Training set error measures:
##                    ME     RMSE      MAE  MPE MAPE     MASE       ACF1
## Training set -27.2825 200.4808 81.98246 -Inf  Inf 0.221439 -0.1616733

# Testing Data Evaluation
forecasting_holt <- predict(model_holt, h=N_forecasting_days+validation_data_days,lambda = "auto")
validation_forecast<-head(forecasting_holt$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 day by using holt Model for  ==>  Covid 19 deaths cases in india"

MAPE_Mean_All.Holt_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.Holt<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_holt<-paste(round(MAPE_Per_Day,3),"%")
MAPE_holt_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in holt Model for  ==>  Covid 19 deaths cases in india"

paste(MAPE_Mean_All.Holt,"%")

## [1] "0.008 % MAPE  7 day Covid 19 deaths cases in india %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in holt Model for  ==>  Covid 19 deaths cases in india"

data.frame(date_holt=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_holt=validation_forecast,MAPE_holt_Model)

##    date_holt validation_data_by_name actual_data forecasting_holt
## 1 2021-03-04                Thursday      157435         157450.7
## 2 2021-03-05                  Friday      157548         157554.2
## 3 2021-03-06                Saturday      157656         157657.8
## 4 2021-03-07                  Sunday      157756         157761.4
## 5 2021-03-08                  Monday      157853         157865.1
## 6 2021-03-09                 Tuesday      157930         157968.8
## 7 2021-03-10               Wednesday      158063         158072.5
##   MAPE_holt_Model
## 1          0.01 %
## 2         0.004 %
## 3         0.001 %
## 4         0.003 %
## 5         0.008 %
## 6         0.025 %
## 7         0.006 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_holt=tail(forecasting_holt$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_holt
## 1 2021-03-11        Thursday            158176.3
## 2 2021-03-12          Friday            158280.1
## 3 2021-03-13        Saturday            158384.0
## 4 2021-03-14          Sunday            158487.9
## 5 2021-03-15          Monday            158591.9
## 6 2021-03-16         Tuesday            158695.9
## 7 2021-03-17       Wednesday            158799.9

plot(forecasting_holt)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph3<-autoplot(forecasting_holt,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph3

## Error of forecasting by using Holt's linear model
Error_Holt<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_Holt1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_Holt<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_Holt<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_Holt<-sqrt(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
MSE_Holt<-(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
MAD_Holt<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_Holt<-c(Error_Holt)
REOF_A_Holt<-c(paste(round(REOF_A_Holt1,3),"%"))
REOF_F_Holt<-c(paste(round(REOF_F_Holt,3),"%"))
REOF_A_Holt11<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
data.frame(correlation_Holt,MSE_Holt,RMSE_Holt,MAPE_Mean_All.Holt_Model,MAD_Holt) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_Holt MSE_Holt RMSE_Holt MAPE_Mean_All.Holt_Model MAD_Holt
## 1        0.9986231  293.701  17.13771                    0.008 12.77392

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_Holt,REOF_A_Holt,REOF_F_Holt)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_Holt REOF_A_Holt REOF_F_Holt
## 1       2021-03-04            Thursday 15.667398      0.01 %      0.01 %
## 2       2021-03-05              Friday  6.203726     0.004 %     0.004 %
## 3       2021-03-06            Saturday  1.782413     0.001 %     0.001 %
## 4       2021-03-07              Sunday  5.403465     0.003 %     0.003 %
## 5       2021-03-08              Monday 12.066890     0.008 %     0.008 %
## 6       2021-03-09             Tuesday 38.772693     0.025 %     0.025 %
## 7       2021-03-10           Wednesday  9.520881     0.006 %     0.006 %

#Auto arima model
##################
require(tseries) # need to install tseries tj test Stationarity in time series 
paste ("tests For Check Stationarity in series  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series  ==>  Covid 19 deaths cases in india"

kpss.test(data_series) # applay kpss test

## Warning in kpss.test(data_series): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  data_series
## KPSS Level = 6.9199, Truncation lag parameter = 5, p-value = 0.01

pp.test(data_series)   # applay pp test

## 
##  Phillips-Perron Unit Root Test
## 
## data:  data_series
## Dickey-Fuller Z(alpha) = -2.6087, Truncation lag parameter = 5, p-value
## = 0.9511
## alternative hypothesis: stationary

adf.test(data_series)  # applay adf test

## 
##  Augmented Dickey-Fuller Test
## 
## data:  data_series
## Dickey-Fuller = -3.1442, Lag order = 7, p-value = 0.09758
## alternative hypothesis: stationary

ndiffs(data_series)    # Doing first diffrencing on data

## [1] 2

#Taking the first difference
diff1_x1<-diff(data_series)
autoplot(diff1_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black",  ylab=y_lab,main = "1nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking first differences in  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking first differences in  ==>  Covid 19 deaths cases in india"

kpss.test(diff1_x1)   # applay kpss test after taking first differences

## Warning in kpss.test(diff1_x1): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff1_x1
## KPSS Level = 2.417, Truncation lag parameter = 5, p-value = 0.01

pp.test(diff1_x1)     # applay pp test after taking first differences

## Warning in pp.test(diff1_x1): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff1_x1
## Dickey-Fuller Z(alpha) = -130.11, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary

adf.test(diff1_x1)    # applay adf test after taking first differences

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1_x1
## Dickey-Fuller = -0.37585, Lag order = 7, p-value = 0.9873
## alternative hypothesis: stationary

#Taking the second difference
diff2_x1=diff(diff1_x1)
autoplot(diff2_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", ylab=y_lab ,main = "2nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking Second differences in",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking Second differences in Covid 19 deaths cases in india"

kpss.test(diff2_x1)   # applay kpss test after taking Second differences

## Warning in kpss.test(diff2_x1): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff2_x1
## KPSS Level = 0.057299, Truncation lag parameter = 5, p-value = 0.1

pp.test(diff2_x1)     # applay pp test after taking Second differences

## Warning in pp.test(diff2_x1): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff2_x1
## Dickey-Fuller Z(alpha) = -569.12, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary

adf.test(diff2_x1)    # applay adf test after taking Second differences

## Warning in adf.test(diff2_x1): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff2_x1
## Dickey-Fuller = -12.819, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary

####Fitting an ARIMA Model
#1. Using auto arima function
model1 <- auto.arima(data_series,stepwise=FALSE, approximation=FALSE, trace=T, test = c("kpss", "adf", "pp"))  #applaying auto arima

## 
##  ARIMA(0,2,0)                    : 6060.457
##  ARIMA(0,2,1)                    : 5727.467
##  ARIMA(0,2,2)                    : 5674.463
##  ARIMA(0,2,3)                    : 5668.703
##  ARIMA(0,2,4)                    : 5667.936
##  ARIMA(0,2,5)                    : 5668.488
##  ARIMA(1,2,0)                    : 5871.378
##  ARIMA(1,2,1)                    : 5691.716
##  ARIMA(1,2,2)                    : 5665.062
##  ARIMA(1,2,3)                    : 5664.799
##  ARIMA(1,2,4)                    : 5664.759
##  ARIMA(2,2,0)                    : 5785.484
##  ARIMA(2,2,1)                    : 5682.463
##  ARIMA(2,2,2)                    : 5665.71
##  ARIMA(2,2,3)                    : 5661.518
##  ARIMA(3,2,0)                    : 5737.996
##  ARIMA(3,2,1)                    : 5680.051
##  ARIMA(3,2,2)                    : 5667.035
##  ARIMA(4,2,0)                    : 5720.373
##  ARIMA(4,2,1)                    : 5680.291
##  ARIMA(5,2,0)                    : 5687.858
## 
## 
## 
##  Best model: ARIMA(2,2,3)

model1 # show the result of autoarima

## Series: data_series 
## ARIMA(2,2,3) 
## 
## Coefficients:
##           ar1     ar2      ma1      ma2     ma3
##       -0.5213  0.4419  -0.6850  -0.8120  0.6863
## s.e.   0.0995  0.0938   0.0778   0.0472  0.0577
## 
## sigma^2 estimated as 36036:  log likelihood=-2824.66
## AIC=5661.32   AICc=5661.52   BIC=5685.61

#Make changes in the source of auto arima to run the best model
arima.string <- function (object, padding = FALSE) 
{
  order <- object$arma[c(1, 6, 2, 3, 7, 4, 5)]
  m <- order[7]
  result <- paste("ARIMA(", order[1], ",", order[2], ",", 
                  order[3], ")", sep = "")
  if (m > 1 && sum(order[4:6]) > 0) {
    result <- paste(result, "(", order[4], ",", order[5], 
                    ",", order[6], ")[", m, "]", sep = "")
  }
  if (padding && m > 1 && sum(order[4:6]) == 0) {
    result <- paste(result, "         ", sep = "")
    if (m <= 9) {
      result <- paste(result, " ", sep = "")
    }
    else if (m <= 99) {
      result <- paste(result, "  ", sep = "")
    }
    else {
      result <- paste(result, "   ", sep = "")
    }
  }
  if (!is.null(object$xreg)) {
    if (NCOL(object$xreg) == 1 && is.element("drift", names(object$coef))) {
      result <- paste(result, "with drift        ")
    }
    else {
      result <- paste("Regression with", result, "errors")
    }
  }
  else {
    if (is.element("constant", names(object$coef)) || is.element("intercept", 
                                                                 names(object$coef))) {
      result <- paste(result, "with non-zero mean")
    }
    else if (order[2] == 0 && order[5] == 0) {
      result <- paste(result, "with zero mean    ")
    }
    else {
      result <- paste(result, "                  ")
    }
  }
  if (!padding) {
    result <- gsub("[ ]*$", "", result)
  }
  return(result)
}


bestmodel <- arima.string(model1, padding = TRUE)
bestmodel <- substring(bestmodel,7,11)
bestmodel <- gsub(" ", "", bestmodel)
bestmodel <- gsub(")", "", bestmodel)
bestmodel <- strsplit(bestmodel, ",")[[1]]
bestmodel <- c(strtoi(bestmodel[1]),strtoi(bestmodel[2]),strtoi(bestmodel[3]))
bestmodel

## [1] 2 2 3

strtoi(bestmodel[3])

## [1] 3

#2. Using ACF and PACF Function
#par(mfrow=c(1,2))  # Code for making two plot in one graph 
acf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab, main=paste("ACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))    # plot ACF "auto correlation function after taking second diffrences

pacf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main=paste("PACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))   # plot PACF " Partial auto correlation function after taking second diffrences

library(forecast)   # install library forecast             
x1_model1= arima(data_series, order=c(bestmodel)) # Run Best model of auto arima  for forecasting
x1_model1  # Show result of best model of auto arima

## 
## Call:
## arima(x = data_series, order = c(bestmodel))
## 
## Coefficients:
##           ar1     ar2      ma1      ma2     ma3
##       -0.5213  0.4419  -0.6850  -0.8120  0.6863
## s.e.   0.0995  0.0938   0.0778   0.0472  0.0577
## 
## sigma^2 estimated as 35611:  log likelihood = -2824.66,  aic = 5661.32

paste ("accuracy of autoarima Model For  ==> ",y_lab, sep=" ")

## [1] "accuracy of autoarima Model For  ==>  Covid 19 deaths cases in india"

accuracy(x1_model1)  # aacuracy of best model from auto arima

##                    ME     RMSE      MAE      MPE     MAPE      MASE
## Training set 1.467933 188.2662 82.17214 1.436279 1.969348 0.2219514
##                      ACF1
## Training set -0.005500256

x1_model1$x          # show result of best model from auto arima

## NULL

checkresiduals(x1_model1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)  # checkresiduals from best model from using auto arima

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,2,3)
## Q* = 4.8113, df = 5, p-value = 0.4393
## 
## Model df: 5.   Total lags used: 10

paste("Box-Ljung test , Ljung-Box test For Modelling for   ==> ",y_lab, sep=" ")

## [1] "Box-Ljung test , Ljung-Box test For Modelling for   ==>  Covid 19 deaths cases in india"

Box.test(x1_model1$residuals^2, lag=20, type="Ljung-Box")   # Do test for resdulas by using Box-Ljung test , Ljung-Box test For Modelling

## 
##  Box-Ljung test
## 
## data:  x1_model1$residuals^2
## X-squared = 51.48, df = 20, p-value = 0.0001355

library(tseries)
jarque.bera.test(x1_model1$residuals)  # Do test jarque.bera.test

## 
##  Jarque Bera Test
## 
## data:  x1_model1$residuals
## X-squared = 11034, df = 2, p-value < 2.2e-16

#Actual Vs Fitted
plot(data_series, col='red',lwd=2, main="Actual vs Fitted Plot", xlab='Time in (days)', ylab=y_lab) # plot actual and Fitted model 
lines(fitted(x1_model1), col='black')

#Test data
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) ) # make testing data in time series and start from rows-6
forecasting_auto_arima <- forecast(x1_model1, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_auto_arima$mean,validation_data_days)
MAPE_Per_Day<-round(abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 day by using bats Model for  ==>  Covid 19 deaths cases in india"

MAPE_Mean_All.ARIMA_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.ARIMA<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_auto_arima<-paste(round(MAPE_Per_Day,3),"%")
MAPE_auto.arima_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  Covid 19 deaths cases in india"

paste(MAPE_Mean_All.ARIMA,"%")

## [1] "0.029 % MAPE  7 day Covid 19 deaths cases in india %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  Covid 19 deaths cases in india"

data.frame(date_auto.arima=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_auto.arima=validation_forecast,MAPE_auto.arima_Model)

##   date_auto.arima validation_data_by_name actual_data forecasting_auto.arima
## 1      2021-03-04                Thursday      157435               157460.0
## 2      2021-03-05                  Friday      157548               157574.8
## 3      2021-03-06                Saturday      157656               157683.7
## 4      2021-03-07                  Sunday      157756               157796.1
## 5      2021-03-08                  Monday      157853               157904.0
## 6      2021-03-09                 Tuesday      157930               158015.8
## 7      2021-03-10               Wednesday      158063               158123.6
##   MAPE_auto.arima_Model
## 1               0.016 %
## 2               0.017 %
## 3               0.018 %
## 4               0.025 %
## 5               0.032 %
## 6               0.054 %
## 7               0.038 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_auto.arima=tail(forecasting_auto_arima$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_auto.arima
## 1 2021-03-11        Thursday                  158235.1
## 2 2021-03-12          Friday                  158343.0
## 3 2021-03-13        Saturday                  158454.5
## 4 2021-03-14          Sunday                  158562.4
## 5 2021-03-15          Monday                  158673.8
## 6 2021-03-16         Tuesday                  158781.8
## 7 2021-03-17       Wednesday                  158893.1

plot(forecasting_auto_arima)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph4<-autoplot(forecasting_auto_arima,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph4

MAPE_Mean_All.ARIMA

## [1] "0.029 % MAPE  7 day Covid 19 deaths cases in india"

## Error of forecasting
Error_auto.arima<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_auto.arima<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_auto.arima<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_auto.arima<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_auto.arima<-sqrt(sum((Error_auto.arima^2))/validation_data_days)   #  Root mean square forecast error
MSE_auto.arima<-(sum((Error_auto.arima^2))/validation_data_days)   #  Root mean square forecast error
MAD_auto.arima<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_auto.arima<-c(Error_auto.arima)
REOF_auto.arima1<-c(paste(round(REOF_A_auto.arima,3),"%"))
REOF_auto.arima2<-c(paste(round(REOF_F_auto.arima,3),"%"))
data.frame(correlation_auto.arima,MSE_auto.arima,RMSE_auto.arima,MAPE_Mean_All.ARIMA_Model,MAD_auto.arima) # analysis of Error  by using Auto ARIMAA model shows result of correlation ,MSE ,MPER

##   correlation_auto.arima MSE_auto.arima RMSE_auto.arima
## 1              0.9986241       2477.043         49.7699
##   MAPE_Mean_All.ARIMA_Model MAD_auto.arima
## 1                     0.029       45.27005

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_auto.arima,REOF_A_auto.arima=REOF_auto.arima1,REOF_F_auto.arima=REOF_auto.arima2)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_auto.arima REOF_A_auto.arima
## 1       2021-03-04            Thursday        24.98493           0.016 %
## 2       2021-03-05              Friday        26.81935           0.017 %
## 3       2021-03-06            Saturday        27.70505           0.018 %
## 4       2021-03-07              Sunday        40.06750           0.025 %
## 5       2021-03-08              Monday        50.98852           0.032 %
## 6       2021-03-09             Tuesday        85.76149           0.054 %
## 7       2021-03-10           Wednesday        60.56351           0.038 %
##   REOF_F_auto.arima
## 1           0.016 %
## 2           0.017 %
## 3           0.018 %
## 4           0.025 %
## 5           0.032 %
## 6           0.054 %
## 7           0.038 %

# Table for MAPE For counry
best_recommended_model <- min(MAPE_Mean_All.bats_Model,MAPE_Mean_All.TBATS_Model,MAPE_Mean_All.Holt_Model,MAPE_Mean_All.ARIMA_Model)
paste("System Choose Least Error ==> ( MAPE %) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==> ", y_lab , sep=" ")

## [1] "System Choose Least Error ==> ( MAPE %) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==>  Covid 19 deaths cases in india"

best_recommended_model

## [1] 0.008

x1<-if(best_recommended_model >= MAPE_Mean_All.bats_Model) {paste("BATS Model")}
x2<-if(best_recommended_model >= MAPE_Mean_All.TBATS_Model) {paste("TBATS Model")}
x3<-if(best_recommended_model >= MAPE_Mean_All.Holt_Model) {paste("Holt Model")}
x4<-if(best_recommended_model >= MAPE_Mean_All.ARIMA_Model) {paste("ARIMA Model")}
result<-c(x1,x2,x3,x4)
table.error<-data.frame(country.name,BATS.Model=MAPE_Mean_All.bats_Model,TBATS.Model=MAPE_Mean_All.TBATS_Model,Holt.Model=MAPE_Mean_All.Holt_Model,ARIMA.Model=MAPE_Mean_All.ARIMA_Model,Best.Model=result)
library(ascii)
print(ascii(table(table.error)), type = "rest")

## 
## +---+--------------+------------+-------------+------------+-------------+------------+------+
## |   | country.name | BATS.Model | TBATS.Model | Holt.Model | ARIMA.Model | Best.Model | Freq |
## +===+==============+============+=============+============+=============+============+======+
## | 1 | india        | 0.016      | 0.009       | 0.008      | 0.029       | Holt Model | 1.00 |
## +---+--------------+------------+-------------+------------+-------------+------------+------+

message("System finished Forecasting  by using autoarima and Holt's ,TBATS, and SIR  Model ==>",y_lab, sep=" ")

## System finished Forecasting  by using autoarima and Holt's ,TBATS, and SIR  Model ==>Covid 19 deaths cases in india

message(" Thank you for using our System For Modelling  ==> ",y_lab, sep=" ")

##  Thank you for using our System For Modelling  ==> Covid 19 deaths cases in india