New techniques For Forecasting Covid 19 In top 10 countries infected (USA)

By

Makarovskikh Tatyana Anatolyevna “Макаровских Татьяна Анатольевна”

Abotaleb mostafa “Аботалеб Мостафа”

Department of Electrical Engineering and Computer Science

South ural state university, Chelyabinsk, Russian federation

# Imports
library(fpp2)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## -- Attaching packages ---------------------------------------------- fpp2 2.4 --

## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3

##

library(forecast)
library(ggplot2)
library("readxl")
library(moments)
library(forecast)
require(forecast)  
require(tseries)

## Loading required package: tseries

require(markovchain)

## Loading required package: markovchain

## Package:  markovchain
## Version:  0.8.5-3
## Date:     2020-12-03
## BugReport: https://github.com/spedygiorgio/markovchain/issues

require(data.table)

## Loading required package: data.table

#population in usa = 332049624
#WHO COVID-19 global table data January 11th 2021 at 11.53.00 AM.csv
Full_original_data<-read.csv("F:/Phd/COVID 19 in 2021/WHO_data.csv")
View(Full_original_data)
y_lab<- "Covid 19 Infection cases in USA "   # input name of data
Actual_date_interval <- c("2020/01/03","2021/01/10")
Forecast_date_interval <- c("2021/01/11","2021/01/17")
validation_data_days <-7
frequency <-"days"
Population <-332049624

# Data Preparation & calculate some of statistics measures
Covid_data<-Full_original_data[Full_original_data$Country == "United States of America", ]
original_data<-Covid_data$Cumulative_cases
View(original_data)
summary(original_data)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0   282186  2900335  4996211  7458574 21761186

sd(original_data)  # calculate standard deviation

## [1] 5544147

skewness(original_data)  # calculate Cofficient of skewness

## [1] 1.237707

kurtosis(original_data)   # calculate Cofficient of kurtosis

## [1] 3.74154

rows <- NROW(original_data)
training_data<-original_data[1:(rows-validation_data_days)]
testing_data<-original_data[(rows-validation_data_days+1):rows]
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency)  #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency)  #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD)) 
validation_dates<-tail(AD,validation_data_days)
validation_data_by_name<-weekdays(validation_dates)
forecasting_data_by_name<-weekdays(FD)

##bats model
# Data Modeling
data_series<-ts(training_data)
autoplot(data_series ,xlab=paste ("Time in  ", frequency, sep=" "), ylab = y_lab, main=paste ("Actual Data :", y_lab, sep=" "))

model_bats<-bats(data_series)
accuracy(model_bats)  # accuracy on training data

##                    ME     RMSE      MAE  MPE MAPE      MASE        ACF1
## Training set 1544.871 15778.47 7448.289 -Inf  Inf 0.1364783 0.006312369

# Print Model Parameters
model_bats

## BATS(1, {0,0}, 1, -)
## 
## Call: bats(y = data_series)
## 
## Parameters
##   Alpha: 1.118853
##   Beta: 0.3392747
##   Damping Parameter: 1
## 
## Seed States:
##           [,1]
## [1,] -118.8717
## [2,]  212.7390
## 
## Sigma: 15778.47
## AIC: 9270.407

plot(model_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4)

# Testing Data Evaluation
forecasting_bats <- predict(model_bats, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_bats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using bats Model for  ==>  Covid 19 Infection cases in USA "

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_bats<-paste(round(MAPE_Per_Day,3),"%")
MAPE_bats_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  Covid 19 Infection cases in USA "

paste(MAPE_Mean_All,"%")

## [1] "0.972 % MAPE  7 days Covid 19 Infection cases in USA  %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  Covid 19 Infection cases in USA "

data.frame(date_bats=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_bats=validation_forecast,MAPE_bats_Model)

##    date_bats validation_data_by_name actual_data forecasting_bats
## 1 2021-01-04                  Monday    20258725         20161996
## 2 2021-01-05                 Tuesday    20470169         20354567
## 3 2021-01-06               Wednesday    20643544         20547137
## 4 2021-01-07                Thursday    20870913         20739708
## 5 2021-01-08                  Friday    21170475         20932278
## 6 2021-01-09                Saturday    21447670         21124849
## 7 2021-01-10                  Sunday    21761186         21317419
##   MAPE_bats_Model
## 1         0.477 %
## 2         0.565 %
## 3         0.467 %
## 4         0.629 %
## 5         1.125 %
## 6         1.505 %
## 7         2.039 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_bats=tail(forecasting_bats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_bats
## 1 2021-01-11          Monday            21509990
## 2 2021-01-12         Tuesday            21702560
## 3 2021-01-13       Wednesday            21895131
## 4 2021-01-14        Thursday            22087701
## 5 2021-01-15          Friday            22280272
## 6 2021-01-16        Saturday            22472842
## 7 2021-01-17          Sunday            22665413

plot(forecasting_bats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph1<-autoplot(forecasting_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph1

## Error of forecasting
Error_bats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_bats<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_bats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_bats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_bats<-sqrt(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MSE_bats<-(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MAD_bats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_bats<-c(Error_bats)
REOF_Abats<-c(paste(round(REOF_A_bats,3),"%"))
REOF_Fbats<-c(paste(round(REOF_F_bats,3),"%"))
data.frame(correlation_bats,MSE_bats,RMSE_bats,MAPE_Mean_All,MAD_bats) # analysis of Error  by using Bats Model shows result of correlation ,MSE ,MPER

##   correlation_bats    MSE_bats RMSE_bats
## 1        0.9949353 58158566307  241160.9
##                                           MAPE_Mean_All MAD_bats
## 1 0.972 % MAPE  7 days Covid 19 Infection cases in USA  206389.8

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_bats,REOF_Abats,REOF_Fbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_bats REOF_Abats REOF_Fbats
## 1       2021-01-04              Monday  96728.89    0.477 %     0.48 %
## 2       2021-01-05             Tuesday 115602.39    0.565 %    0.568 %
## 3       2021-01-06           Wednesday  96406.89    0.467 %    0.469 %
## 4       2021-01-07            Thursday 131205.38    0.629 %    0.633 %
## 5       2021-01-08              Friday 238196.88    1.125 %    1.138 %
## 6       2021-01-09            Saturday 322821.38    1.505 %    1.528 %
## 7       2021-01-10              Sunday 443766.87    2.039 %    2.082 %

## TBATS Model

# Data Modeling
data_series<-ts(training_data)
model_TBATS<-forecast:::fitSpecificTBATS(data_series,use.box.cox=FALSE, use.beta=TRUE,  seasonal.periods=c(6),use.damping=FALSE,k.vector=c(2))
accuracy(model_TBATS)  # accuracy on training data

##                    ME     RMSE     MAE MPE MAPE      MASE        ACF1
## Training set 1538.606 15686.37 7886.49 NaN  Inf 0.1445076 0.005151201

# Print Model Parameters
model_TBATS

## TBATS(1, {0,0}, 1, {<6,2>})
## 
## Call: NULL
## 
## Parameters
##   Alpha: 1.117863
##   Beta: 0.3415187
##   Damping Parameter: 1
##   Gamma-1 Values: -0.001567755
##   Gamma-2 Values: 0.0001247185
## 
## Seed States:
##             [,1]
## [1,]    81.33683
## [2,]   140.94673
## [3,]    61.83254
## [4,]  -607.04111
## [5,] -1449.16443
## [6,]   552.40239
## 
## Sigma: 15686.37
## AIC: 9278.11

plot(model_TBATS,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)

# Testing Data Evaluation
forecasting_tbats <- predict(model_TBATS, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_tbats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using TBATS Model for  ==>  Covid 19 Infection cases in USA "

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_TBATS<-paste(round(MAPE_Per_Day,3),"%")
MAPE_TBATS_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in TBATS Model for  ==>  Covid 19 Infection cases in USA "

paste(MAPE_Mean_All,"%")

## [1] "0.961 % MAPE  7 days Covid 19 Infection cases in USA  %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in TBATS Model for  ==>  Covid 19 Infection cases in USA "

data.frame(date_TBATS=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_TBATS=validation_forecast,MAPE_TBATS_Model)

##   date_TBATS validation_data_by_name actual_data forecasting_TBATS
## 1 2021-01-04                  Monday    20258725          20162694
## 2 2021-01-05                 Tuesday    20470169          20355038
## 3 2021-01-06               Wednesday    20643544          20548519
## 4 2021-01-07                Thursday    20870913          20744013
## 5 2021-01-08                  Friday    21170475          20936503
## 6 2021-01-09                Saturday    21447670          21127621
## 7 2021-01-10                  Sunday    21761186          21320609
##   MAPE_TBATS_Model
## 1          0.474 %
## 2          0.562 %
## 3           0.46 %
## 4          0.608 %
## 5          1.105 %
## 6          1.492 %
## 7          2.025 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_TBATS=tail(forecasting_tbats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_TBATS
## 1 2021-01-11          Monday             21512953
## 2 2021-01-12         Tuesday             21706433
## 3 2021-01-13       Wednesday             21901928
## 4 2021-01-14        Thursday             22094417
## 5 2021-01-15          Friday             22285536
## 6 2021-01-16        Saturday             22478523
## 7 2021-01-17          Sunday             22670867

plot(forecasting_tbats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph2<-autoplot(forecasting_tbats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph2

## Error of forecasting TBATS Model

Error_tbats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_tbats1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_tbats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_tbats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_tbats<-sqrt(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MSE_TBATS<-(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MAD_tbats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_tbats<-c(Error_tbats)
REOF_A_tbats<-c(paste(round(REOF_A_tbats1,3),"%"))
REOF_F_tbats<-c(paste(round(REOF_F_tbats,3),"%"))
data.frame(correlation_tbats,MSE_TBATS,RMSE_tbats,MAPE_Mean_All,MAD_tbats) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_tbats   MSE_TBATS RMSE_tbats
## 1         0.9947608 56984714520   238714.7
##                                           MAPE_Mean_All MAD_tbats
## 1 0.961 % MAPE  7 days Covid 19 Infection cases in USA     203955

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_tbats,REOF_A_tbats,REOF_F_tbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_tbats REOF_A_tbats REOF_F_tbats
## 1       2021-01-04              Monday   96030.90      0.474 %      0.476 %
## 2       2021-01-05             Tuesday  115130.85      0.562 %      0.566 %
## 3       2021-01-06           Wednesday   95025.14       0.46 %      0.462 %
## 4       2021-01-07            Thursday  126899.75      0.608 %      0.612 %
## 5       2021-01-08              Friday  233972.39      1.105 %      1.118 %
## 6       2021-01-09            Saturday  320048.58      1.492 %      1.515 %
## 7       2021-01-10              Sunday  440577.41      2.025 %      2.066 %

## Holt's linear trend


# Data Modeling
data_series<-ts(training_data)
model_holt<-holt(data_series,h=N_forecasting_days+validation_data_days,lambda = "auto")
accuracy(model_holt)  # accuracy on training data

##                   ME     RMSE      MAE MPE MAPE      MASE      ACF1
## Training set 466.398 16007.22 7811.952 NaN  Inf 0.1431419 0.1840721

# Print Model Parameters
summary(model_holt$model)

## Holt's method 
## 
## Call:
##  holt(y = data_series, h = N_forecasting_days + validation_data_days,  
## 
##  Call:
##      lambda = "auto") 
## 
##   Box-Cox transformation: lambda= 0.5231 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.2768 
## 
##   Initial states:
##     l = -2.3769 
##     b = -0.0404 
## 
##   sigma:  12.574
## 
##      AIC     AICc      BIC 
## 4031.462 4031.628 4050.989 
## 
## Training set error measures:
##                   ME     RMSE      MAE MPE MAPE      MASE      ACF1
## Training set 466.398 16007.22 7811.952 NaN  Inf 0.1431419 0.1840721

# Testing Data Evaluation
forecasting_holt <- predict(model_holt, h=N_forecasting_days+validation_data_days,lambda = "auto")
validation_forecast<-head(forecasting_holt$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using holt Model for  ==>  Covid 19 Infection cases in USA "

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_holt<-paste(round(MAPE_Per_Day,3),"%")
MAPE_holt_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in holt Model for  ==>  Covid 19 Infection cases in USA "

paste(MAPE_Mean_All,"%")

## [1] "0.811 % MAPE  7 days Covid 19 Infection cases in USA  %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in holt Model for  ==>  Covid 19 Infection cases in USA "

data.frame(date_holt=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_holt=validation_forecast,MAPE_holt_Model)

##    date_holt validation_data_by_name actual_data forecasting_holt
## 1 2021-01-04                  Monday    20258725         20172449
## 2 2021-01-05                 Tuesday    20470169         20371412
## 3 2021-01-06               Wednesday    20643544         20571306
## 4 2021-01-07                Thursday    20870913         20772131
## 5 2021-01-08                  Friday    21170475         20973886
## 6 2021-01-09                Saturday    21447670         21176571
## 7 2021-01-10                  Sunday    21761186         21380185
##   MAPE_holt_Model
## 1         0.426 %
## 2         0.482 %
## 3          0.35 %
## 4         0.473 %
## 5         0.929 %
## 6         1.264 %
## 7         1.751 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_holt=tail(forecasting_holt$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_holt
## 1 2021-01-11          Monday            21584728
## 2 2021-01-12         Tuesday            21790199
## 3 2021-01-13       Wednesday            21996599
## 4 2021-01-14        Thursday            22203926
## 5 2021-01-15          Friday            22412181
## 6 2021-01-16        Saturday            22621362
## 7 2021-01-17          Sunday            22831470

plot(forecasting_holt)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph3<-autoplot(forecasting_holt,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph3

## Error of forecasting by using Holt's linear model
Error_Holt<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_Holt1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_Holt<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_Holt<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_Holt<-sqrt(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
MSE_HOLT<-(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
MAD_Holt<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_Holt<-c(Error_Holt)
REOF_A_Holt<-c(paste(round(REOF_A_Holt1,3),"%"))
REOF_F_Holt<-c(paste(round(REOF_F_Holt,3),"%"))
REOF_A_Holt11<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
data.frame(correlation_Holt,MSE_HOLT,RMSE_Holt,MAPE_Mean_All,MAD_Holt) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_Holt    MSE_HOLT RMSE_Holt
## 1        0.9953134 41353763826  203356.2
##                                           MAPE_Mean_All MAD_Holt
## 1 0.811 % MAPE  7 days Covid 19 Infection cases in USA  172105.9

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_Holt,REOF_A_Holt,REOF_F_Holt)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_Holt REOF_A_Holt REOF_F_Holt
## 1       2021-01-04              Monday  86276.07     0.426 %     0.428 %
## 2       2021-01-05             Tuesday  98756.83     0.482 %     0.485 %
## 3       2021-01-06           Wednesday  72237.59      0.35 %     0.351 %
## 4       2021-01-07            Thursday  98781.79     0.473 %     0.476 %
## 5       2021-01-08              Friday 196588.83     0.929 %     0.937 %
## 6       2021-01-09            Saturday 271099.14     1.264 %      1.28 %
## 7       2021-01-10              Sunday 381001.12     1.751 %     1.782 %

#Auto arima model
##################

require(tseries) # need to install tseries tj test Stationarity in time series 
paste ("tests For Check Stationarity in series  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series  ==>  Covid 19 Infection cases in USA "

kpss.test(data_series) # applay kpss test

## Warning in kpss.test(data_series): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  data_series
## KPSS Level = 5.323, Truncation lag parameter = 5, p-value = 0.01

pp.test(data_series)   # applay pp test

## Warning in pp.test(data_series): p-value greater than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  data_series
## Dickey-Fuller Z(alpha) = 4.7393, Truncation lag parameter = 5, p-value
## = 0.99
## alternative hypothesis: stationary

adf.test(data_series)  # applay adf test

## Warning in adf.test(data_series): p-value greater than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  data_series
## Dickey-Fuller = -0.27305, Lag order = 7, p-value = 0.99
## alternative hypothesis: stationary

ndiffs(data_series)    # Doing first diffrencing on data

## [1] 2

#Taking the first difference
diff1_x1<-diff(data_series)
autoplot(diff1_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main = "1nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub, cex.main,
## cex.lab, cex.sub, font.main, font.lab

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking first differences in  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking first differences in  ==>  Covid 19 Infection cases in USA "

kpss.test(diff1_x1)   # applay kpss test after taking first differences

## Warning in kpss.test(diff1_x1): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff1_x1
## KPSS Level = 4.2066, Truncation lag parameter = 5, p-value = 0.01

pp.test(diff1_x1)     # applay pp test after taking first differences

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff1_x1
## Dickey-Fuller Z(alpha) = -25.067, Truncation lag parameter = 5, p-value
## = 0.02321
## alternative hypothesis: stationary

adf.test(diff1_x1)    # applay adf test after taking first differences

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1_x1
## Dickey-Fuller = -1.1075, Lag order = 7, p-value = 0.9205
## alternative hypothesis: stationary

#Taking the second difference
diff2_x1=diff(diff1_x1)
autoplot(diff2_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab ,main = "2nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub, cex.main,
## cex.lab, cex.sub, font.main, font.lab

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking Second differences in",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking Second differences in Covid 19 Infection cases in USA "

kpss.test(diff2_x1)   # applay kpss test after taking Second differences

## Warning in kpss.test(diff2_x1): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff2_x1
## KPSS Level = 0.080196, Truncation lag parameter = 5, p-value = 0.1

pp.test(diff2_x1)     # applay pp test after taking Second differences

## Warning in pp.test(diff2_x1): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff2_x1
## Dickey-Fuller Z(alpha) = -398.14, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary

adf.test(diff2_x1)    # applay adf test after taking Second differences

## Warning in adf.test(diff2_x1): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff2_x1
## Dickey-Fuller = -7.7882, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary

####Fitting an ARIMA Model
#1. Using auto arima function
model1 <- auto.arima(data_series,stepwise=FALSE, approximation=FALSE, trace=T, test = c("kpss", "adf", "pp"))  #applaying auto arima

## 
##  ARIMA(0,2,0)                    : 8183.762
##  ARIMA(0,2,1)                    : 8106.168
##  ARIMA(0,2,2)                    : 8100.86
##  ARIMA(0,2,3)                    : 8095.087
##  ARIMA(0,2,4)                    : 8094.533
##  ARIMA(0,2,5)                    : 8091.759
##  ARIMA(1,2,0)                    : 8133.178
##  ARIMA(1,2,1)                    : 8098.971
##  ARIMA(1,2,2)                    : 8100.889
##  ARIMA(1,2,3)                    : 8096.263
##  ARIMA(1,2,4)                    : 8087.348
##  ARIMA(2,2,0)                    : 8131.241
##  ARIMA(2,2,1)                    : 8100.641
##  ARIMA(2,2,2)                    : 8099.247
##  ARIMA(2,2,3)                    : Inf
##  ARIMA(3,2,0)                    : 8113.968
##  ARIMA(3,2,1)                    : 8088.309
##  ARIMA(3,2,2)                    : Inf
##  ARIMA(4,2,0)                    : 8096.493
##  ARIMA(4,2,1)                    : 8084.196
##  ARIMA(5,2,0)                    : 8080.2
## 
## 
## 
##  Best model: ARIMA(5,2,0)

model1 # show the result of autoarima

## Series: data_series 
## ARIMA(5,2,0) 
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5
##       -0.5276  -0.3099  -0.3863  -0.3463  -0.2273
## s.e.   0.0514   0.0560   0.0553   0.0570   0.0523
## 
## sigma^2 estimated as 235605094:  log likelihood=-4033.98
## AIC=8079.96   AICc=8080.2   BIC=8103.36

#Make changes in the source of auto arima to run the best model
arima.string <- function (object, padding = FALSE) 
{
  order <- object$arma[c(1, 6, 2, 3, 7, 4, 5)]
  m <- order[7]
  result <- paste("ARIMA(", order[1], ",", order[2], ",", 
                  order[3], ")", sep = "")
  if (m > 1 && sum(order[4:6]) > 0) {
    result <- paste(result, "(", order[4], ",", order[5], 
                    ",", order[6], ")[", m, "]", sep = "")
  }
  if (padding && m > 1 && sum(order[4:6]) == 0) {
    result <- paste(result, "         ", sep = "")
    if (m <= 9) {
      result <- paste(result, " ", sep = "")
    }
    else if (m <= 99) {
      result <- paste(result, "  ", sep = "")
    }
    else {
      result <- paste(result, "   ", sep = "")
    }
  }
  if (!is.null(object$xreg)) {
    if (NCOL(object$xreg) == 1 && is.element("drift", names(object$coef))) {
      result <- paste(result, "with drift        ")
    }
    else {
      result <- paste("Regression with", result, "errors")
    }
  }
  else {
    if (is.element("constant", names(object$coef)) || is.element("intercept", 
                                                                 names(object$coef))) {
      result <- paste(result, "with non-zero mean")
    }
    else if (order[2] == 0 && order[5] == 0) {
      result <- paste(result, "with zero mean    ")
    }
    else {
      result <- paste(result, "                  ")
    }
  }
  if (!padding) {
    result <- gsub("[ ]*$", "", result)
  }
  return(result)
}






source("stringthearima.R")  
bestmodel <- arima.string(model1, padding = TRUE)
bestmodel <- substring(bestmodel,7,11)
bestmodel <- gsub(" ", "", bestmodel)
bestmodel <- gsub(")", "", bestmodel)
bestmodel <- strsplit(bestmodel, ",")[[1]]
bestmodel <- c(strtoi(bestmodel[1]),strtoi(bestmodel[2]),strtoi(bestmodel[3]))
bestmodel

## [1] 5 2 0

strtoi(bestmodel[3])

## [1] 0

#2. Using ACF and PACF Function
#par(mfrow=c(1,2))  # Code for making two plot in one graph 
acf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab, main=paste("ACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))    # plot ACF "auto correlation function after taking second diffrences

pacf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main=paste("PACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))   # plot PACF " Partial auto correlation function after taking second diffrences

library(forecast)   # install library forecast             
x1_model1= arima(data_series, order=c(bestmodel)) # Run Best model of auto arima  for forecasting
x1_model1  # Show result of best model of auto arima

## 
## Call:
## arima(x = data_series, order = c(bestmodel))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5
##       -0.5276  -0.3099  -0.3863  -0.3463  -0.2273
## s.e.   0.0514   0.0560   0.0553   0.0570   0.0523
## 
## sigma^2 estimated as 232377627:  log likelihood = -4033.98,  aic = 8079.96

paste ("accuracy of autoarima Model For  ==> ",y_lab, sep=" ")

## [1] "accuracy of autoarima Model For  ==>  Covid 19 Infection cases in USA "

accuracy(x1_model1)  # aacuracy of best model from auto arima

##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 1445.599 15202.34 6877.758 0.9124656 3.277377 0.1260242
##                     ACF1
## Training set -0.03536632

x1_model1$x          # show result of best model from auto arima

## NULL

checkresiduals(x1_model1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)  # checkresiduals from best model from using auto arima

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(5,2,0)
## Q* = 8.5643, df = 5, p-value = 0.1278
## 
## Model df: 5.   Total lags used: 10

paste("Box-Ljung test , Ljung-Box test For Modelling for   ==> ",y_lab, sep=" ")

## [1] "Box-Ljung test , Ljung-Box test For Modelling for   ==>  Covid 19 Infection cases in USA "

Box.test(x1_model1$residuals^2, lag=20, type="Ljung-Box")   # Do test for resdulas by using Box-Ljung test , Ljung-Box test For Modelling

## 
##  Box-Ljung test
## 
## data:  x1_model1$residuals^2
## X-squared = 52.732, df = 20, p-value = 8.897e-05

library(tseries)
jarque.bera.test(x1_model1$residuals)  # Do test jarque.bera.test

## 
##  Jarque Bera Test
## 
## data:  x1_model1$residuals
## X-squared = 51183, df = 2, p-value < 2.2e-16

#Actual Vs Fitted
plot(data_series, col='red',lwd=2, main="Actual vs Fitted Plot", xlab='Time in (days)', ylab=y_lab) # plot actual and Fitted model 
lines(fitted(x1_model1), col='black')

#Test data

x1_test <- ts(testing_data, start =(rows-validation_data_days+1) ) # make testing data in time series and start from rows-6
forecasting_auto_arima <- forecast(x1_model1, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_auto_arima$mean,validation_data_days)
MAPE_Per_Day<-round(abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using bats Model for  ==>  Covid 19 Infection cases in USA "

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_auto_arima<-paste(round(MAPE_Per_Day,3),"%")
MAPE_auto.arima_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  Covid 19 Infection cases in USA "

paste(MAPE_Mean_All,"%")

## [1] "1.102 % MAPE  7 days Covid 19 Infection cases in USA  %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  Covid 19 Infection cases in USA "

data.frame(date_auto.arima=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_auto.arima=validation_forecast,MAPE_auto.arima_Model)

##   date_auto.arima validation_data_by_name actual_data forecasting_auto.arima
## 1      2021-01-04                  Monday    20258725               20147555
## 2      2021-01-05                 Tuesday    20470169               20321109
## 3      2021-01-06               Wednesday    20643544               20509920
## 4      2021-01-07                Thursday    20870913               20710167
## 5      2021-01-08                  Friday    21170475               20911420
## 6      2021-01-09                Saturday    21447670               21101338
## 7      2021-01-10                  Sunday    21761186               21287130
##   MAPE_auto.arima_Model
## 1               0.549 %
## 2               0.728 %
## 3               0.647 %
## 4                0.77 %
## 5               1.224 %
## 6               1.615 %
## 7               2.178 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_auto.arima=tail(forecasting_auto_arima$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_auto.arima
## 1 2021-01-11          Monday                  21470795
## 2 2021-01-12         Tuesday                  21658292
## 3 2021-01-13       Wednesday                  21849717
## 4 2021-01-14        Thursday                  22042709
## 5 2021-01-15          Friday                  22233851
## 6 2021-01-16        Saturday                  22423122
## 7 2021-01-17          Sunday                  22611117

plot(forecasting_auto_arima)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph4<-autoplot(forecasting_auto_arima,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph4

Error_auto.arima<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_auto.arima<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_auto.arima<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_auto.arima<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_auto.arima<-sqrt(sum((Error_auto.arima^2))/validation_data_days)   #  Root mean square forecast error
MSE_ARIMA<-(sum((Error_auto.arima^2))/validation_data_days)   #  Root mean square forecast error
MAD_auto.arima<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_auto.arima<-c(Error_auto.arima)
REOF_auto.arima1<-c(paste(round(REOF_A_auto.arima,3),"%"))
REOF_auto.arima2<-c(paste(round(REOF_F_auto.arima,3),"%"))
data.frame(correlation_auto.arima,MSE_ARIMA,RMSE_auto.arima,MAPE_Mean_All,MAD_auto.arima) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_auto.arima   MSE_ARIMA RMSE_auto.arima
## 1              0.9954913 70008098665        264590.4
##                                           MAPE_Mean_All MAD_auto.arima
## 1 1.102 % MAPE  7 days Covid 19 Infection cases in USA        233434.8

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_auto.arima,REOF_A_auto.arima=REOF_auto.arima1,REOF_F_auto.arima=REOF_auto.arima2)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_auto.arima REOF_A_auto.arima
## 1       2021-01-04              Monday        111170.2           0.549 %
## 2       2021-01-05             Tuesday        149060.5           0.728 %
## 3       2021-01-06           Wednesday        133624.0           0.647 %
## 4       2021-01-07            Thursday        160746.1            0.77 %
## 5       2021-01-08              Friday        259054.9           1.224 %
## 6       2021-01-09            Saturday        346331.5           1.615 %
## 7       2021-01-10              Sunday        474056.1           2.178 %
##   REOF_F_auto.arima
## 1           0.552 %
## 2           0.734 %
## 3           0.652 %
## 4           0.776 %
## 5           1.239 %
## 6           1.641 %
## 7           2.227 %

# SIR Model 
#install.packages("dplyr")
library(deSolve)
first<-rows-13
secondr<-rows-7
vector_SIR<-original_data[first:secondr]
Infected <- c(vector_SIR)
Day <- 1:(length(Infected))
N <- Population # population of the us
SIR <- function(time, state, parameters) {
  par <- as.list(c(state, parameters))
  with(par, {
    dS <- -beta/N * I * S
    dI <- beta/N * I * S - gamma * I
    dR <- gamma * I
    list(c(dS, dI, dR))
  })
}

init <- c(S = N-Infected[1], I = Infected[1], R = 0)
RSS <- function(parameters) {
  names(parameters) <- c("beta", "gamma")
  out <- ode(y = init, times = Day, func = SIR, parms = parameters)
  fit <- out[ , 3]
  sum((Infected - fit)^2)
}

# optimize with some sensible conditions
Opt <- optim(c(0.5, 0.5), RSS, method = "L-BFGS-B", 
             lower = c(0, 0), upper = c(10, 10))
Opt$message

## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

Opt_par <- setNames(Opt$par, c("beta", "gamma"))
Opt_par

##        beta       gamma 
## 0.017059499 0.006290767

# beta     gamma 
# 0.6512503 0.4920399 

out <- ode(y = init, times = Day, func = SIR, parms = Opt_par)

plot(out)
plot(out, obs=data.frame(time=Day, I=Infected))

result_SIR<-data.frame(out)
validation_forecast<-result_SIR$I

## Error of forecasting
Error_SIR<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_SIR<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_SIR<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_SIR<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_SIR<-sqrt(sum((Error_SIR^2))/validation_data_days)   #  Root mean square forecast error
MSE_SIR<-(sum((Error_SIR^2))/validation_data_days)   #  Root mean square forecast error
MAD_SIR<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_SIR<-c(Error_SIR)
REOF_A_SIR<-c(paste(round(REOF_A_SIR,3),"%"))
REOF_A_SIR1<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
REOF_F_SIR<-c(paste(round(REOF_F_SIR,3),"%"))
MAPE_Mean_All<-paste(round(mean(abs(((testing_data-validation_forecast)/testing_data)*100)),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
data.frame(correlation_SIR,MSE_SIR,RMSE_SIR,MAPE_Mean_All,MAD_SIR) # analysis of Error  by using SIR's linear model shows result of correlation ,MSE ,MPER

##   correlation_SIR      MSE_SIR RMSE_SIR
## 1       0.9955887 2.435812e+12  1560709
##                                           MAPE_Mean_All MAD_SIR
## 1 7.415 % MAPE  7 days Covid 19 Infection cases in USA  1555274

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_SIR,REOF_A_SIR,REOF_F_SIR,validation_forecast,testing_data)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_SIR REOF_A_SIR REOF_F_SIR
## 1       2021-01-04              Monday  1431425    7.066 %    7.603 %
## 2       2021-01-05             Tuesday  1457575     7.12 %    7.666 %
## 3       2021-01-06           Wednesday  1444134    6.996 %    7.522 %
## 4       2021-01-07            Thursday  1483159    7.106 %     7.65 %
## 5       2021-01-08              Friday  1592842    7.524 %    8.136 %
## 6       2021-01-09            Saturday  1678617    7.827 %    8.491 %
## 7       2021-01-10              Sunday  1799166    8.268 %    9.013 %
##   validation_forecast testing_data
## 1            18827300     20258725
## 2            19012594     20470169
## 3            19199410     20643544
## 4            19387754     20870913
## 5            19577633     21170475
## 6            19769053     21447670
## 7            19962020     21761186

## forecasting by SIR model

Infected <- c(tail(original_data,validation_data_days))
Day <- 1:(length(Infected))
N <- Population # population of the us

SIR <- function(time, state, parameters) {
  par <- as.list(c(state, parameters))
  with(par, {
    dS <- -beta/N * I * S
    dI <- beta/N * I * S - gamma * I
    dR <- gamma * I
    list(c(dS, dI, dR))
  })
}

init <- c(S = N-Infected[1], I = Infected[1], R = 0)
RSS <- function(parameters) {
  names(parameters) <- c("beta", "gamma")
  out <- ode(y = init, times = Day, func = SIR, parms = parameters)
  fit <- out[ , 3]
  sum((Infected - fit)^2)
}

# optimize with some sensible conditions
Opt <- optim(c(0.5, 0.5), RSS, method = "L-BFGS-B", 
             lower = c(0, 0), upper = c(10, 10))
Opt$message

## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

Opt_par <- setNames(Opt$par, c("beta", "gamma"))
Opt_par

##      beta     gamma 
## 0.0120714 0.0000000

# beta     gamma 
# 0.6512503 0.4920399 

out <- ode(y = init, times = Day, func = SIR, parms = Opt_par)

plot(out)
plot(out, obs=data.frame(time=Day, I=Infected))

result_SIR <-data.frame(out)
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_SIR=result_SIR$I)

##           FD forecating_date forecasting_by_SIR
## 1 2021-01-11          Monday           20258725
## 2 2021-01-12         Tuesday           20489578
## 3 2021-01-13       Wednesday           20722887
## 4 2021-01-14        Thursday           20958674
## 5 2021-01-15          Friday           21196958
## 6 2021-01-16        Saturday           21437766
## 7 2021-01-17          Sunday           21681118

# Choose Best model by least error

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==>  Covid 19 Infection cases in USA "

M1<-mean(REOF_A_bats)

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using TBATS  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using TBATS  Model For ==>  Covid 19 Infection cases in USA "

M2<-mean(REOF_A_tbats1)

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using Holt's Linear << Exponential Smoothing >>  For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using Holt's Linear << Exponential Smoothing >>  For ==>  Covid 19 Infection cases in USA "

M3<-REOF_A_Holt11

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using auto arima  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using auto arima  Model For ==>  Covid 19 Infection cases in USA "

M4<-mean(REOF_A_auto.arima)
paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using SIR Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using SIR Model For ==>  Covid 19 Infection cases in USA "

M5<-REOF_A_SIR1

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using autoarima  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using autoarima  Model For ==>  Covid 19 Infection cases in USA "

data.frame(validation_dates,forecating_date=forecasting_data_by_name,MAPE_bats_error=REOF_A_bats,MAPE_TBATS_error=REOF_A_tbats1,MAPE_Holt_error=REOF_A_Holt1,MAPE_autoarima_error = REOF_A_auto.arima)

##   validation_dates forecating_date MAPE_bats_error MAPE_TBATS_error
## 1       2021-01-04          Monday       0.4774678        0.4740224
## 2       2021-01-05         Tuesday       0.5647359        0.5624324
## 3       2021-01-06       Wednesday       0.4670074        0.4603141
## 4       2021-01-07        Thursday       0.6286519        0.6080221
## 5       2021-01-08          Friday       1.1251372        1.1051825
## 6       2021-01-09        Saturday       1.5051583        1.4922301
## 7       2021-01-10          Sunday       2.0392587        2.0246020
##   MAPE_Holt_error MAPE_autoarima_error
## 1       0.4258712            0.5487524
## 2       0.4824427            0.7281840
## 3       0.3499283            0.6472919
## 4       0.4732988            0.7701919
## 5       0.9285991            1.2236615
## 6       1.2640027            1.6147747
## 7       1.7508288            2.1784478

recommend_Model<-c(M1,M2,M3,M4,M5)
best_recommended_model<-min(recommend_Model)
paste ("lodaing .....   ... . .Select Minimum MAPE from Models for select best Model ==> ", y_lab , sep=" ")

## [1] "lodaing .....   ... . .Select Minimum MAPE from Models for select best Model ==>  Covid 19 Infection cases in USA "

best_recommended_model

## [1] 0.8107102

paste ("Best Model For Forecasting  ==> ",y_lab, sep=" ")

## [1] "Best Model For Forecasting  ==>  Covid 19 Infection cases in USA "

if(best_recommended_model >= M1) {paste("System Recommend Bats Model That's better  For forecasting==> ",y_lab, sep=" ")}
if(best_recommended_model >= M2) {paste("System Recommend  That's better TBATS  For forecasting ==> ",y_lab, sep=" ")}
if(best_recommended_model >= M3) {paste("System Recommend Holt's Linear Model < Exponential Smoothing Model >   That's better  For forecasting ==> ",y_lab, sep=" ")}

## [1] "System Recommend Holt's Linear Model < Exponential Smoothing Model >   That's better  For forecasting ==>  Covid 19 Infection cases in USA "

if(best_recommended_model >= M4) {paste("System Recommend auto arima Model  That's better  For forecasting ==> ",y_lab, sep=" ")}
if(best_recommended_model >= M5) {paste("System Recommend SIR Model  That's better  For forecasting ==> ",y_lab, sep=" ")}

message("System finished Forecasting  by using autoarima and Holt's ,TBATS, and SIR  Model ==>",y_lab, sep=" ")

## System finished Forecasting  by using autoarima and Holt's ,TBATS, and SIR  Model ==>Covid 19 Infection cases in USA

message(" Thank you for using our System For Modelling  ==> ",y_lab, sep=" ")

##  Thank you for using our System For Modelling  ==> Covid 19 Infection cases in USA