New techniques For Forecasting Covid 19 In top 10 countries infected (India)

By

Makarovskikh Tatyana Anatolyevna “Макаровских Татьяна Анатольевна”

Abotaleb mostafa “Аботалеб Мостафа”

Department of Electrical Engineering and Computer Science

South ural state university, Chelyabinsk, Russian federation

# Imports
library(fpp2)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## -- Attaching packages ---------------------------------------------- fpp2 2.4 --

## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3

##

library(forecast)
library(ggplot2)
library("readxl")
library(moments)
library(forecast)
require(forecast)  
require(tseries)

## Loading required package: tseries

require(markovchain)

## Loading required package: markovchain

## Package:  markovchain
## Version:  0.8.5-3
## Date:     2020-12-03
## BugReport: https://github.com/spedygiorgio/markovchain/issues

require(data.table)

## Loading required package: data.table

#population in india = 1387369463
#WHO COVID-19 global table data January 11th 2021 at 11.53.00 AM.csv
Full_original_data<-read.csv("F:/Phd/COVID 19 in 2021/WHO_data.csv")
View(Full_original_data)
y_lab<- "Covid 19 Infection cases in India "   # input name of data
Actual_date_interval <- c("2020/01/03","2021/01/10")
Forecast_date_interval <- c("2021/01/11","2021/01/17")
validation_data_days <-7
frequency <-"days"
Population <-1387369463 # population in india

# Data Preparation & calculate some of statistics measures
Covid_data<-Full_original_data[Full_original_data$Country == "India", ]
original_data<-Covid_data$Cumulative_cases
View(original_data)
summary(original_data)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0     3547   731041  3148878  6888527 10450284

sd(original_data)  # calculate standard deviation

## [1] 3817709

skewness(original_data)  # calculate Cofficient of skewness

## [1] 0.7700359

kurtosis(original_data)   # calculate Cofficient of kurtosis

## [1] 1.924422

rows <- NROW(original_data)
training_data<-original_data[1:(rows-validation_data_days)]
testing_data<-original_data[(rows-validation_data_days+1):rows]
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency)  #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency)  #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD)) 
validation_dates<-tail(AD,validation_data_days)
validation_data_by_name<-weekdays(validation_dates)
forecasting_data_by_name<-weekdays(FD)

##bats model
# Data Modeling
data_series<-ts(training_data)
autoplot(data_series ,xlab=paste ("Time in  ", frequency, sep=" "), ylab = y_lab, main=paste ("Actual Data :", y_lab, sep=" "))

model_bats<-bats(data_series)
accuracy(model_bats)  # accuracy on training data

##                    ME     RMSE      MAE MPE MAPE      MASE        ACF1
## Training set 86.56065 3208.077 1816.781 NaN  Inf 0.0644076 -0.02768263

# Print Model Parameters
model_bats

## BATS(1, {0,0}, 1, -)
## 
## Call: bats(y = data_series)
## 
## Parameters
##   Alpha: 1.379331
##   Beta: 0.5984324
##   Damping Parameter: 1
## 
## Seed States:
##            [,1]
## [1,] -24.982765
## [2,]   2.154116
## 
## Sigma: 3208.077
## AIC: 8101.163

plot(model_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4)

# Testing Data Evaluation
forecasting_bats <- predict(model_bats, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_bats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using bats Model for  ==>  Covid 19 Infection cases in India "

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_bats<-paste(round(MAPE_Per_Day,3),"%")
MAPE_bats_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  Covid 19 Infection cases in India "

paste(MAPE_Mean_All,"%")

## [1] "0.073 % MAPE  7 days Covid 19 Infection cases in India  %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  Covid 19 Infection cases in India "

data.frame(date_bats=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_bats=validation_forecast,MAPE_bats_Model)

##    date_bats validation_data_by_name actual_data forecasting_bats
## 1 2021-01-04                  Monday    10340469         10342602
## 2 2021-01-05                 Tuesday    10356844         10361615
## 3 2021-01-06               Wednesday    10374932         10380628
## 4 2021-01-07                Thursday    10395278         10399641
## 5 2021-01-08                  Friday    10413417         10418654
## 6 2021-01-09                Saturday    10413417         10437667
## 7 2021-01-10                  Sunday    10450284         10456680
##   MAPE_bats_Model
## 1         0.021 %
## 2         0.046 %
## 3         0.055 %
## 4         0.042 %
## 5          0.05 %
## 6         0.233 %
## 7         0.061 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_bats=tail(forecasting_bats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_bats
## 1 2021-01-11          Monday            10475693
## 2 2021-01-12         Tuesday            10494706
## 3 2021-01-13       Wednesday            10513719
## 4 2021-01-14        Thursday            10532732
## 5 2021-01-15          Friday            10551745
## 6 2021-01-16        Saturday            10570759
## 7 2021-01-17          Sunday            10589772

plot(forecasting_bats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph1<-autoplot(forecasting_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph1

## Error of forecasting
Error_bats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_bats<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_bats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_bats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_bats<-sqrt(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MSE_bats<-(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MAD_bats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_bats<-c(Error_bats)
REOF_Abats<-c(paste(round(REOF_A_bats,3),"%"))
REOF_Fbats<-c(paste(round(REOF_F_bats,3),"%"))
data.frame(correlation_bats,MSE_bats,RMSE_bats,MAPE_Mean_All,MAD_bats) # analysis of Error  by using Bats Model shows result of correlation ,MSE ,MPER

##   correlation_bats  MSE_bats RMSE_bats
## 1         0.985658 105033451  10248.58
##                                             MAPE_Mean_All MAD_bats
## 1 0.073 % MAPE  7 days Covid 19 Infection cases in India  7549.837

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_bats,REOF_Abats,REOF_Fbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_bats REOF_Abats REOF_Fbats
## 1       2021-01-04              Monday  2133.374    0.021 %    0.021 %
## 2       2021-01-05             Tuesday  4771.386    0.046 %    0.046 %
## 3       2021-01-06           Wednesday  5696.397    0.055 %    0.055 %
## 4       2021-01-07            Thursday  4363.409    0.042 %    0.042 %
## 5       2021-01-08              Friday  5237.420     0.05 %     0.05 %
## 6       2021-01-09            Saturday 24250.431    0.233 %    0.232 %
## 7       2021-01-10              Sunday  6396.443    0.061 %    0.061 %

## TBATS Model

# Data Modeling
data_series<-ts(training_data)
model_TBATS<-forecast:::fitSpecificTBATS(data_series,use.box.cox=FALSE, use.beta=TRUE,  seasonal.periods=c(6),use.damping=FALSE,k.vector=c(2))
accuracy(model_TBATS)  # accuracy on training data

##                    ME     RMSE      MAE MPE MAPE      MASE        ACF1
## Training set 87.03885 3185.779 1884.273 NaN  Inf 0.0668003 -0.03100345

# Print Model Parameters
model_TBATS

## TBATS(1, {0,0}, 1, {<6,2>})
## 
## Call: NULL
## 
## Parameters
##   Alpha: 1.387155
##   Beta: 0.6003708
##   Damping Parameter: 1
##   Gamma-1 Values: -0.003017072
##   Gamma-2 Values: 0.002311937
## 
## Seed States:
##             [,1]
## [1,]   -8.306940
## [2,]   -8.246748
## [3,] -209.710848
## [4,]  -69.297703
## [5,]  -68.536786
## [6,]   80.298188
## 
## Sigma: 3185.779
## AIC: 8108.044

plot(model_TBATS,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)

# Testing Data Evaluation
forecasting_tbats <- predict(model_TBATS, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_tbats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using TBATS Model for  ==>  Covid 19 Infection cases in India "

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_TBATS<-paste(round(MAPE_Per_Day,3),"%")
MAPE_TBATS_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in TBATS Model for  ==>  Covid 19 Infection cases in India "

paste(MAPE_Mean_All,"%")

## [1] "0.082 % MAPE  7 days Covid 19 Infection cases in India  %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in TBATS Model for  ==>  Covid 19 Infection cases in India "

data.frame(date_TBATS=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_TBATS=validation_forecast,MAPE_TBATS_Model)

##   date_TBATS validation_data_by_name actual_data forecasting_TBATS
## 1 2021-01-04                  Monday    10340469          10343250
## 2 2021-01-05                 Tuesday    10356844          10362372
## 3 2021-01-06               Wednesday    10374932          10381401
## 4 2021-01-07                Thursday    10395278          10401015
## 5 2021-01-08                  Friday    10413417          10419780
## 6 2021-01-09                Saturday    10413417          10438553
## 7 2021-01-10                  Sunday    10450284          10458268
##   MAPE_TBATS_Model
## 1          0.027 %
## 2          0.053 %
## 3          0.062 %
## 4          0.055 %
## 5          0.061 %
## 6          0.241 %
## 7          0.076 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_TBATS=tail(forecasting_tbats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_TBATS
## 1 2021-01-11          Monday             10477389
## 2 2021-01-12         Tuesday             10496418
## 3 2021-01-13       Wednesday             10516032
## 4 2021-01-14        Thursday             10534798
## 5 2021-01-15          Friday             10553571
## 6 2021-01-16        Saturday             10573285
## 7 2021-01-17          Sunday             10592407

plot(forecasting_tbats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph2<-autoplot(forecasting_tbats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph2

## Error of forecasting TBATS Model

Error_tbats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_tbats1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_tbats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_tbats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_tbats<-sqrt(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MSE_tbats<-(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MAD_tbats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_tbats<-c(Error_tbats)
REOF_A_tbats<-c(paste(round(REOF_A_tbats1,3),"%"))
REOF_F_tbats<-c(paste(round(REOF_F_tbats,3),"%"))
data.frame(correlation_tbats,MSE_tbats,RMSE_tbats,MAPE_Mean_All,MAD_tbats) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_tbats MSE_tbats RMSE_tbats
## 1         0.9864285 121301378   11013.69
##                                             MAPE_Mean_All MAD_tbats
## 1 0.082 % MAPE  7 days Covid 19 Infection cases in India   8571.155

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_tbats,REOF_A_tbats,REOF_F_tbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_tbats REOF_A_tbats REOF_F_tbats
## 1       2021-01-04              Monday   2781.479      0.027 %      0.027 %
## 2       2021-01-05             Tuesday   5528.161      0.053 %      0.053 %
## 3       2021-01-06           Wednesday   6468.634      0.062 %      0.062 %
## 4       2021-01-07            Thursday   5736.596      0.055 %      0.055 %
## 5       2021-01-08              Friday   6363.196      0.061 %      0.061 %
## 6       2021-01-09            Saturday  25136.215      0.241 %      0.241 %
## 7       2021-01-10              Sunday   7983.803      0.076 %      0.076 %

## Holt's linear trend


# Data Modeling
data_series<-ts(training_data)
model_holt<-holt(data_series,h=N_forecasting_days+validation_data_days,lambda = "auto")
accuracy(model_holt)  # accuracy on training data

##                    ME     RMSE      MAE MPE MAPE       MASE      ACF1
## Training set -1484.96 4658.704 2534.105 NaN  Inf 0.08983782 0.6358872

# Print Model Parameters
summary(model_holt$model)

## Holt's method 
## 
## Call:
##  holt(y = data_series, h = N_forecasting_days + validation_data_days,  
## 
##  Call:
##      lambda = "auto") 
## 
##   Box-Cox transformation: lambda= 0.2971 
## 
##   Smoothing parameters:
##     alpha = 0.9097 
##     beta  = 0.2077 
## 
##   Initial states:
##     l = -3.3681 
##     b = 0.0018 
## 
##   sigma:  0.4004
## 
##      AIC     AICc      BIC 
## 1501.374 1501.540 1520.901 
## 
## Training set error measures:
##                    ME     RMSE      MAE MPE MAPE       MASE      ACF1
## Training set -1484.96 4658.704 2534.105 NaN  Inf 0.08983782 0.6358872

# Testing Data Evaluation
forecasting_holt <- predict(model_holt, h=N_forecasting_days+validation_data_days,lambda = "auto")
validation_forecast<-head(forecasting_holt$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using holt Model for  ==>  Covid 19 Infection cases in India "

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_holt<-paste(round(MAPE_Per_Day,3),"%")
MAPE_holt_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in holt Model for  ==>  Covid 19 Infection cases in India "

paste(MAPE_Mean_All,"%")

## [1] "0.118 % MAPE  7 days Covid 19 Infection cases in India  %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in holt Model for  ==>  Covid 19 Infection cases in India "

data.frame(date_holt=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_holt=validation_forecast,MAPE_holt_Model)

##    date_holt validation_data_by_name actual_data forecasting_holt
## 1 2021-01-04                  Monday    10340469         10344178
## 2 2021-01-05                 Tuesday    10356844         10364196
## 3 2021-01-06               Wednesday    10374932         10384241
## 4 2021-01-07                Thursday    10395278         10404313
## 5 2021-01-08                  Friday    10413417         10424412
## 6 2021-01-09                Saturday    10413417         10444539
## 7 2021-01-10                  Sunday    10450284         10464693
##   MAPE_holt_Model
## 1         0.036 %
## 2         0.071 %
## 3          0.09 %
## 4         0.087 %
## 5         0.106 %
## 6         0.299 %
## 7         0.138 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_holt=tail(forecasting_holt$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_holt
## 1 2021-01-11          Monday            10484874
## 2 2021-01-12         Tuesday            10505083
## 3 2021-01-13       Wednesday            10525318
## 4 2021-01-14        Thursday            10545582
## 5 2021-01-15          Friday            10565872
## 6 2021-01-16        Saturday            10586191
## 7 2021-01-17          Sunday            10606536

plot(forecasting_holt)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph3<-autoplot(forecasting_holt,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph3

## Error of forecasting by using Holt's linear model
Error_Holt<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_Holt1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_Holt<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_Holt<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_Holt<-sqrt(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
RMSE_Holt<-(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
MAD_Holt<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_Holt<-c(Error_Holt)
REOF_A_Holt<-c(paste(round(REOF_A_Holt1,3),"%"))
REOF_F_Holt<-c(paste(round(REOF_F_Holt,3),"%"))
REOF_A_Holt11<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
data.frame(correlation_Holt,RMSE_Holt,RMSE_Holt,MAPE_Mean_All,MAD_Holt) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_Holt RMSE_Holt RMSE_Holt.1
## 1        0.9856679 219023050   219023050
##                                             MAPE_Mean_All MAD_Holt
## 1 0.118 % MAPE  7 days Covid 19 Infection cases in India  12275.77

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_Holt,REOF_A_Holt,REOF_F_Holt)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_Holt REOF_A_Holt REOF_F_Holt
## 1       2021-01-04              Monday  3709.251     0.036 %     0.036 %
## 2       2021-01-05             Tuesday  7351.894     0.071 %     0.071 %
## 3       2021-01-06           Wednesday  9308.749      0.09 %      0.09 %
## 4       2021-01-07            Thursday  9034.837     0.087 %     0.087 %
## 5       2021-01-08              Friday 10995.180     0.106 %     0.105 %
## 6       2021-01-09            Saturday 31121.798     0.299 %     0.298 %
## 7       2021-01-10              Sunday 14408.714     0.138 %     0.138 %

#Auto arima model
##################

require(tseries) # need to install tseries tj test Stationarity in time series 
paste ("tests For Check Stationarity in series  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series  ==>  Covid 19 Infection cases in India "

kpss.test(data_series) # applay kpss test

## Warning in kpss.test(data_series): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  data_series
## KPSS Level = 5.4104, Truncation lag parameter = 5, p-value = 0.01

pp.test(data_series)   # applay pp test

## 
##  Phillips-Perron Unit Root Test
## 
## data:  data_series
## Dickey-Fuller Z(alpha) = -1.3086, Truncation lag parameter = 5, p-value
## = 0.9824
## alternative hypothesis: stationary

adf.test(data_series)  # applay adf test

## Warning in adf.test(data_series): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  data_series
## Dickey-Fuller = -8.1613, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary

ndiffs(data_series)    # Doing first diffrencing on data

## [1] 2

#Taking the first difference
diff1_x1<-diff(data_series)
autoplot(diff1_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main = "1nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub, cex.main,
## cex.lab, cex.sub, font.main, font.lab

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking first differences in  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking first differences in  ==>  Covid 19 Infection cases in India "

kpss.test(diff1_x1)   # applay kpss test after taking first differences

## Warning in kpss.test(diff1_x1): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff1_x1
## KPSS Level = 3.887, Truncation lag parameter = 5, p-value = 0.01

pp.test(diff1_x1)     # applay pp test after taking first differences

## Warning in pp.test(diff1_x1): p-value greater than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff1_x1
## Dickey-Fuller Z(alpha) = 0.19125, Truncation lag parameter = 5, p-value
## = 0.99
## alternative hypothesis: stationary

adf.test(diff1_x1)    # applay adf test after taking first differences

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1_x1
## Dickey-Fuller = -0.60168, Lag order = 7, p-value = 0.977
## alternative hypothesis: stationary

#Taking the second difference
diff2_x1=diff(diff1_x1)
autoplot(diff2_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab ,main = "2nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub, cex.main,
## cex.lab, cex.sub, font.main, font.lab

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking Second differences in",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking Second differences in Covid 19 Infection cases in India "

kpss.test(diff2_x1)   # applay kpss test after taking Second differences

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff2_x1
## KPSS Level = 0.58181, Truncation lag parameter = 5, p-value = 0.02429

pp.test(diff2_x1)     # applay pp test after taking Second differences

## Warning in pp.test(diff2_x1): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff2_x1
## Dickey-Fuller Z(alpha) = -268.05, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary

adf.test(diff2_x1)    # applay adf test after taking Second differences

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff2_x1
## Dickey-Fuller = -3.5242, Lag order = 7, p-value = 0.0405
## alternative hypothesis: stationary

####Fitting an ARIMA Model
#1. Using auto arima function
model1 <- auto.arima(data_series,stepwise=FALSE, approximation=FALSE, trace=T, test = c("kpss", "adf", "pp"))  #applaying auto arima

## 
##  ARIMA(0,2,0)                    : 6984.838
##  ARIMA(0,2,1)                    : 6986.09
##  ARIMA(0,2,2)                    : 6937.797
##  ARIMA(0,2,3)                    : 6937.621
##  ARIMA(0,2,4)                    : 6939.359
##  ARIMA(0,2,5)                    : 6925.974
##  ARIMA(1,2,0)                    : 6986.599
##  ARIMA(1,2,1)                    : 6970.3
##  ARIMA(1,2,2)                    : 6936.606
##  ARIMA(1,2,3)                    : 6935.194
##  ARIMA(1,2,4)                    : 6937.193
##  ARIMA(2,2,0)                    : 6946.158
##  ARIMA(2,2,1)                    : 6945.256
##  ARIMA(2,2,2)                    : 6937.019
##  ARIMA(2,2,3)                    : 6937.225
##  ARIMA(3,2,0)                    : 6948.202
##  ARIMA(3,2,1)                    : 6945.656
##  ARIMA(3,2,2)                    : 6935.153
##  ARIMA(4,2,0)                    : 6941.324
##  ARIMA(4,2,1)                    : 6931.411
##  ARIMA(5,2,0)                    : 6898.366
## 
## 
## 
##  Best model: ARIMA(5,2,0)

model1 # show the result of autoarima

## Series: data_series 
## ARIMA(5,2,0) 
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5
##       -0.0899  -0.3847  -0.1381  -0.1677  -0.3390
## s.e.   0.0491   0.0485   0.0519   0.0483   0.0489
## 
## sigma^2 estimated as 9243714:  log likelihood=-3443.07
## AIC=6898.13   AICc=6898.37   BIC=6921.53

#Make changes in the source of auto arima to run the best model
arima.string <- function (object, padding = FALSE) 
{
  order <- object$arma[c(1, 6, 2, 3, 7, 4, 5)]
  m <- order[7]
  result <- paste("ARIMA(", order[1], ",", order[2], ",", 
                  order[3], ")", sep = "")
  if (m > 1 && sum(order[4:6]) > 0) {
    result <- paste(result, "(", order[4], ",", order[5], 
                    ",", order[6], ")[", m, "]", sep = "")
  }
  if (padding && m > 1 && sum(order[4:6]) == 0) {
    result <- paste(result, "         ", sep = "")
    if (m <= 9) {
      result <- paste(result, " ", sep = "")
    }
    else if (m <= 99) {
      result <- paste(result, "  ", sep = "")
    }
    else {
      result <- paste(result, "   ", sep = "")
    }
  }
  if (!is.null(object$xreg)) {
    if (NCOL(object$xreg) == 1 && is.element("drift", names(object$coef))) {
      result <- paste(result, "with drift        ")
    }
    else {
      result <- paste("Regression with", result, "errors")
    }
  }
  else {
    if (is.element("constant", names(object$coef)) || is.element("intercept", 
                                                                 names(object$coef))) {
      result <- paste(result, "with non-zero mean")
    }
    else if (order[2] == 0 && order[5] == 0) {
      result <- paste(result, "with zero mean    ")
    }
    else {
      result <- paste(result, "                  ")
    }
  }
  if (!padding) {
    result <- gsub("[ ]*$", "", result)
  }
  return(result)
}






source("stringthearima.R")  
bestmodel <- arima.string(model1, padding = TRUE)
bestmodel <- substring(bestmodel,7,11)
bestmodel <- gsub(" ", "", bestmodel)
bestmodel <- gsub(")", "", bestmodel)
bestmodel <- strsplit(bestmodel, ",")[[1]]
bestmodel <- c(strtoi(bestmodel[1]),strtoi(bestmodel[2]),strtoi(bestmodel[3]))
bestmodel

## [1] 5 2 0

strtoi(bestmodel[3])

## [1] 0

#2. Using ACF and PACF Function
#par(mfrow=c(1,2))  # Code for making two plot in one graph 
acf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab, main=paste("ACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))    # plot ACF "auto correlation function after taking second diffrences

pacf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main=paste("PACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))   # plot PACF " Partial auto correlation function after taking second diffrences

library(forecast)   # install library forecast             
x1_model1= arima(data_series, order=c(bestmodel)) # Run Best model of auto arima  for forecasting
x1_model1  # Show result of best model of auto arima

## 
## Call:
## arima(x = data_series, order = c(bestmodel))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5
##       -0.0899  -0.3847  -0.1381  -0.1677  -0.3390
## s.e.   0.0491   0.0485   0.0519   0.0483   0.0489
## 
## sigma^2 estimated as 9117088:  log likelihood = -3443.07,  aic = 6898.13

paste ("accuracy of autoarima Model For  ==> ",y_lab, sep=" ")

## [1] "accuracy of autoarima Model For  ==>  Covid 19 Infection cases in India "

accuracy(x1_model1)  # aacuracy of best model from auto arima

##                    ME     RMSE     MAE       MPE    MAPE       MASE        ACF1
## Training set 107.9783 3011.213 1797.98 0.5578941 2.58824 0.06374108 -0.02985419

x1_model1$x          # show result of best model from auto arima

## NULL

checkresiduals(x1_model1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)  # checkresiduals from best model from using auto arima

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(5,2,0)
## Q* = 224.52, df = 5, p-value < 2.2e-16
## 
## Model df: 5.   Total lags used: 10

paste("Box-Ljung test , Ljung-Box test For Modelling for   ==> ",y_lab, sep=" ")

## [1] "Box-Ljung test , Ljung-Box test For Modelling for   ==>  Covid 19 Infection cases in India "

Box.test(x1_model1$residuals^2, lag=20, type="Ljung-Box")   # Do test for resdulas by using Box-Ljung test , Ljung-Box test For Modelling

## 
##  Box-Ljung test
## 
## data:  x1_model1$residuals^2
## X-squared = 450.83, df = 20, p-value < 2.2e-16

library(tseries)
jarque.bera.test(x1_model1$residuals)  # Do test jarque.bera.test

## 
##  Jarque Bera Test
## 
## data:  x1_model1$residuals
## X-squared = 294.01, df = 2, p-value < 2.2e-16

#Actual Vs Fitted
plot(data_series, col='red',lwd=2, main="Actual vs Fitted Plot", xlab='Time in (days)', ylab=y_lab) # plot actual and Fitted model 
lines(fitted(x1_model1), col='black')

#Test data

x1_test <- ts(testing_data, start =(rows-validation_data_days+1) ) # make testing data in time series and start from rows-6
forecasting_auto_arima <- forecast(x1_model1, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_auto_arima$mean,validation_data_days)
MAPE_Per_Day<-round(abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using bats Model for  ==>  Covid 19 Infection cases in India "

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_auto_arima<-paste(round(MAPE_Per_Day,3),"%")
MAPE_auto.arima_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  Covid 19 Infection cases in India "

paste(MAPE_Mean_All,"%")

## [1] "0.049 % MAPE  7 days Covid 19 Infection cases in India  %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  Covid 19 Infection cases in India "

data.frame(date_auto.arima=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_auto.arima=validation_forecast,MAPE_auto.arima_Model)

##   date_auto.arima validation_data_by_name actual_data forecasting_auto.arima
## 1      2021-01-04                  Monday    10340469               10341229
## 2      2021-01-05                 Tuesday    10356844               10358921
## 3      2021-01-06               Wednesday    10374932               10377818
## 4      2021-01-07                Thursday    10395278               10397042
## 5      2021-01-08                  Friday    10413417               10416173
## 6      2021-01-09                Saturday    10413417               10435259
## 7      2021-01-10                  Sunday    10450284               10453991
##   MAPE_auto.arima_Model
## 1               0.007 %
## 2                0.02 %
## 3               0.028 %
## 4               0.017 %
## 5               0.026 %
## 6                0.21 %
## 7               0.035 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_auto.arima=tail(forecasting_auto_arima$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_auto.arima
## 1 2021-01-11          Monday                  10472323
## 2 2021-01-12         Tuesday                  10490737
## 3 2021-01-13       Wednesday                  10509386
## 4 2021-01-14        Thursday                  10528112
## 5 2021-01-15          Friday                  10546917
## 6 2021-01-16        Saturday                  10565774
## 7 2021-01-17          Sunday                  10584518

plot(forecasting_auto_arima)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph4<-autoplot(forecasting_auto_arima,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph4

## Error of forecasting
Error_auto.arima<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_auto.arima<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_auto.arima<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_auto.arima<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_auto.arima<-sqrt(sum((Error_auto.arima^2))/validation_data_days)   #  Root mean square forecast error
MSE_auto.arima<-(sum((Error_auto.arima^2))/validation_data_days)   #  Root mean square forecast error
MAD_auto.arima<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_auto.arima<-c(Error_auto.arima)
REOF_auto.arima1<-c(paste(round(REOF_A_auto.arima,3),"%"))
REOF_auto.arima2<-c(paste(round(REOF_F_auto.arima,3),"%"))
data.frame(correlation_auto.arima,MSE_auto.arima,RMSE_auto.arima,MAPE_Mean_All,MAD_auto.arima) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_auto.arima MSE_auto.arima RMSE_auto.arima
## 1              0.9853211       73531713        8575.063
##                                             MAPE_Mean_All MAD_auto.arima
## 1 0.049 % MAPE  7 days Covid 19 Infection cases in India        5113.028

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_auto.arima,REOF_A_auto.arima=REOF_auto.arima1,REOF_F_auto.arima=REOF_auto.arima2)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_auto.arima REOF_A_auto.arima
## 1       2021-01-04              Monday        759.5676           0.007 %
## 2       2021-01-05             Tuesday       2077.3719            0.02 %
## 3       2021-01-06           Wednesday       2885.5444           0.028 %
## 4       2021-01-07            Thursday       1763.8331           0.017 %
## 5       2021-01-08              Friday       2756.3751           0.026 %
## 6       2021-01-09            Saturday      21841.5408            0.21 %
## 7       2021-01-10              Sunday       3706.9649           0.035 %
##   REOF_F_auto.arima
## 1           0.007 %
## 2            0.02 %
## 3           0.028 %
## 4           0.017 %
## 5           0.026 %
## 6           0.209 %
## 7           0.035 %

# SIR Model 
#install.packages("dplyr")
library(deSolve)
first<-rows-13
secondr<-rows-7
vector_SIR<-original_data[first:secondr]
Infected <- c(vector_SIR)
Day <- 1:(length(Infected))
N <- Population # population of the us
SIR <- function(time, state, parameters) {
  par <- as.list(c(state, parameters))
  with(par, {
    dS <- -beta/N * I * S
    dI <- beta/N * I * S - gamma * I
    dR <- gamma * I
    list(c(dS, dI, dR))
  })
}

init <- c(S = N-Infected[1], I = Infected[1], R = 0)
RSS <- function(parameters) {
  names(parameters) <- c("beta", "gamma")
  out <- ode(y = init, times = Day, func = SIR, parms = parameters)
  fit <- out[ , 3]
  sum((Infected - fit)^2)
}

# optimize with some sensible conditions
Opt <- optim(c(0.5, 0.5), RSS, method = "L-BFGS-B", 
             lower = c(0, 0), upper = c(10, 10))
Opt$message

## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

Opt_par <- setNames(Opt$par, c("beta", "gamma"))
Opt_par

##      beta     gamma 
## 0.1215115 0.1184629

# beta     gamma 
# 0.6512503 0.4920399 

out <- ode(y = init, times = Day, func = SIR, parms = Opt_par)

plot(out)
plot(out, obs=data.frame(time=Day, I=Infected))

result_SIR<-data.frame(out)
validation_forecast<-result_SIR$I

## Error of forecasting
Error_SIR<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_SIR<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_SIR<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_SIR<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_SIR<-sqrt(sum((Error_SIR^2))/validation_data_days)   #  Root mean square forecast error
MSE_SIR<-(sum((Error_SIR^2))/validation_data_days)   #  Root mean square forecast error
MAD_SIR<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_SIR<-c(Error_SIR)
REOF_A_SIR<-c(paste(round(REOF_A_SIR,3),"%"))
REOF_A_SIR1<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))

REOF_F_SIR<-c(paste(round(REOF_F_SIR,3),"%"))
MAPE_Mean_All<-paste(round(mean(abs(((testing_data-validation_forecast)/testing_data)*100)),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
data.frame(correlation_SIR,MSE_SIR,RMSE_SIR,MAPE_Mean_All,MAD_SIR) # analysis of Error  by using SIR's linear model shows result of correlation ,MSE ,MPER

##   correlation_SIR     MSE_SIR RMSE_SIR
## 1       0.9839908 15717021519 125367.5
##                                             MAPE_Mean_All  MAD_SIR
## 1 1.205 % MAPE  7 days Covid 19 Infection cases in India  125169.5

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_SIR,REOF_A_SIR,REOF_F_SIR,validation_forecast,testing_data)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_SIR REOF_A_SIR REOF_F_SIR
## 1       2021-01-04              Monday 132598.0    1.282 %    1.299 %
## 2       2021-01-05             Tuesday 127507.6    1.231 %    1.246 %
## 3       2021-01-06           Wednesday 125191.7    1.207 %    1.221 %
## 4       2021-01-07            Thursday 126203.2    1.214 %    1.229 %
## 5       2021-01-08              Friday 126084.2    1.211 %    1.226 %
## 6       2021-01-09            Saturday 108909.7    1.046 %    1.057 %
## 7       2021-01-10              Sunday 129691.9    1.241 %    1.257 %
##   validation_forecast testing_data
## 1            10207871     10340469
## 2            10229336     10356844
## 3            10249740     10374932
## 4            10269075     10395278
## 5            10287333     10413417
## 6            10304507     10413417
## 7            10320592     10450284

## forecasting by SIR model

Infected <- c(tail(original_data,validation_data_days))
Day <- 1:(length(Infected))
N <- Population # population of the us

SIR <- function(time, state, parameters) {
  par <- as.list(c(state, parameters))
  with(par, {
    dS <- -beta/N * I * S
    dI <- beta/N * I * S - gamma * I
    dR <- gamma * I
    list(c(dS, dI, dR))
  })
}

init <- c(S = N-Infected[1], I = Infected[1], R = 0)
RSS <- function(parameters) {
  names(parameters) <- c("beta", "gamma")
  out <- ode(y = init, times = Day, func = SIR, parms = parameters)
  fit <- out[ , 3]
  sum((Infected - fit)^2)
}

# optimize with some sensible conditions
Opt <- optim(c(0.5, 0.5), RSS, method = "L-BFGS-B", 
             lower = c(0, 0), upper = c(10, 10))
Opt$message

## [1] "ERROR: ABNORMAL_TERMINATION_IN_LNSRCH"

Opt_par <- setNames(Opt$par, c("beta", "gamma"))
Opt_par

##      beta     gamma 
## 0.1240022 0.1211474

# beta     gamma 
# 0.6512503 0.4920399 

out <- ode(y = init, times = Day, func = SIR, parms = Opt_par)

plot(out)
plot(out, obs=data.frame(time=Day, I=Infected))

result_SIR <-data.frame(out)
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_SIR=result_SIR$I)

##           FD forecating_date forecasting_by_SIR
## 1 2021-01-11          Monday           10340469
## 2 2021-01-12         Tuesday           10359861
## 3 2021-01-13       Wednesday           10378108
## 4 2021-01-14        Thursday           10395202
## 5 2021-01-15          Friday           10411137
## 6 2021-01-16        Saturday           10425907
## 7 2021-01-17          Sunday           10439506

# Choose Best model by least error

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==>  Covid 19 Infection cases in India "

M1<-mean(REOF_A_bats)

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using TBATS  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using TBATS  Model For ==>  Covid 19 Infection cases in India "

M2<-mean(REOF_A_tbats1)

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using Holt's Linear << Exponential Smoothing >>  For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using Holt's Linear << Exponential Smoothing >>  For ==>  Covid 19 Infection cases in India "

M3<-REOF_A_Holt11

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using auto arima  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using auto arima  Model For ==>  Covid 19 Infection cases in India "

M4<-mean(REOF_A_auto.arima)
paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using SIR Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using SIR Model For ==>  Covid 19 Infection cases in India "

M5<-REOF_A_SIR1

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using autoarima  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using autoarima  Model For ==>  Covid 19 Infection cases in India "

data.frame(validation_dates,forecating_date=forecasting_data_by_name,MAPE_bats_error=REOF_A_bats,MAPE_TBATS_error=REOF_A_tbats1,MAPE_Holt_error=REOF_A_Holt1,MAPE_autoarima_error = REOF_A_auto.arima)

##   validation_dates forecating_date MAPE_bats_error MAPE_TBATS_error
## 1       2021-01-04          Monday      0.02063131       0.02689896
## 2       2021-01-05         Tuesday      0.04606988       0.05337689
## 3       2021-01-06       Wednesday      0.05490539       0.06234869
## 4       2021-01-07        Thursday      0.04197491       0.05518463
## 5       2021-01-08          Friday      0.05029492       0.06110575
## 6       2021-01-09        Saturday      0.23287679       0.24138297
## 7       2021-01-10          Sunday      0.06120831       0.07639796
##   MAPE_Holt_error MAPE_autoarima_error
## 1      0.03587120          0.007345582
## 2      0.07098585          0.020057963
## 3      0.08972347          0.027812659
## 4      0.08691290          0.016967638
## 5      0.10558667          0.026469458
## 6      0.29886250          0.209744225
## 7      0.13787868          0.035472384

recommend_Model<-c(M1,M2,M3,M4,M5)
best_recommended_model<-min(recommend_Model)
paste ("lodaing .....   ... . .Select Minimum MAPE from Models for select best Model ==> ", y_lab , sep=" ")

## [1] "lodaing .....   ... . .Select Minimum MAPE from Models for select best Model ==>  Covid 19 Infection cases in India "

best_recommended_model

## [1] 0.04912427

paste ("Best Model For Forecasting  ==> ",y_lab, sep=" ")

## [1] "Best Model For Forecasting  ==>  Covid 19 Infection cases in India "

if(best_recommended_model >= M1) {paste("System Recommend Bats Model That's better  For forecasting==> ",y_lab, sep=" ")}
if(best_recommended_model >= M2) {paste("System Recommend  That's better TBATS  For forecasting ==> ",y_lab, sep=" ")}
if(best_recommended_model >= M3) {paste("System Recommend Holt's Linear Model < Exponential Smoothing Model >   That's better  For forecasting ==> ",y_lab, sep=" ")}
if(best_recommended_model >= M4) {paste("System Recommend auto arima Model  That's better  For forecasting ==> ",y_lab, sep=" ")}

## [1] "System Recommend auto arima Model  That's better  For forecasting ==>  Covid 19 Infection cases in India "

if(best_recommended_model >= M5) {paste("System Recommend SIR Model  That's better  For forecasting ==> ",y_lab, sep=" ")}

message("System finished Forecasting  by using autoarima and Holt's ,TBATS, and SIR  Model ==>",y_lab, sep=" ")

## System finished Forecasting  by using autoarima and Holt's ,TBATS, and SIR  Model ==>Covid 19 Infection cases in India

message(" Thank you for using our System For Modelling  ==> ",y_lab, sep=" ")

##  Thank you for using our System For Modelling  ==> Covid 19 Infection cases in India