New techniques For Forecasting Covid 19 In top 10 countries infected (Brazil)

By

Makarovskikh Tatyana Anatolyevna “Макаровских Татьяна Анатольевна”

Abotaleb mostafa “Аботалеб Мостафа”

Department of Electrical Engineering and Computer Science

South ural state university, Chelyabinsk, Russian federation

# Imports
library(fpp2)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## -- Attaching packages ---------------------------------------------- fpp2 2.4 --

## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3

##

library(forecast)
library(ggplot2)
library("readxl")
library(moments)
library(forecast)
require(forecast)  
require(tseries)

## Loading required package: tseries

require(markovchain)

## Loading required package: markovchain

## Package:  markovchain
## Version:  0.8.5-3
## Date:     2020-12-03
## BugReport: https://github.com/spedygiorgio/markovchain/issues

require(data.table)

## Loading required package: data.table

#population in Brazil  = 332049624
#WHO COVID-19 global table data January 11th 2021 at 11.53.00 AM.csv
Full_original_data<-read.csv("F:/Phd/COVID 19 in 2021/WHO_data.csv")
View(Full_original_data)
y_lab<- "Covid 19 Infection cases in Brazil "   # input name of data
Actual_date_interval <- c("2020/01/03","2021/01/10")
Forecast_date_interval <- c("2021/01/11","2021/01/17")
validation_data_days <-7
frequency <-"days"
Population <-213376438 # population in Brazil

# Data Preparation & calculate some of statistics measures
Covid_data<-Full_original_data[Full_original_data$Country == "Brazil", ]
original_data<-Covid_data$Cumulative_cases
View(original_data)
summary(original_data)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0    9362 1613170 2549528 4992806 8013708

sd(original_data)  # calculate standard deviation

## [1] 2626118

skewness(original_data)  # calculate Cofficient of skewness

## [1] 0.5166867

kurtosis(original_data)   # calculate Cofficient of kurtosis

## [1] 1.773888

rows <- NROW(original_data)
training_data<-original_data[1:(rows-validation_data_days)]
testing_data<-original_data[(rows-validation_data_days+1):rows]
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency)  #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency)  #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD)) 
validation_dates<-tail(AD,validation_data_days)
validation_data_by_name<-weekdays(validation_dates)
forecasting_data_by_name<-weekdays(FD)

##bats model
# Data Modeling
data_series<-ts(training_data)
autoplot(data_series ,xlab=paste ("Time in  ", frequency, sep=" "), ylab = y_lab, main=paste ("Actual Data :", y_lab, sep=" "))

model_bats<-bats(data_series)
accuracy(model_bats)  # accuracy on training data

##                    ME     RMSE      MAE MPE MAPE      MASE       ACF1
## Training set 641.5894 8698.253 5471.335 NaN  Inf 0.2600465 0.01545096

# Print Model Parameters
model_bats

## BATS(1, {0,0}, 1, -)
## 
## Call: bats(y = data_series)
## 
## Parameters
##   Alpha: 1.507906
##   Beta: 0.1643973
##   Damping Parameter: 1
## 
## Seed States:
##            [,1]
## [1,] -45.610897
## [2,]   8.731625
## 
## Sigma: 8698.253
## AIC: 8833.292

plot(model_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4)

# Testing Data Evaluation
forecasting_bats <- predict(model_bats, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_bats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using bats Model for  ==>  Covid 19 Infection cases in Brazil "

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_bats<-paste(round(MAPE_Per_Day,3),"%")
MAPE_bats_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  Covid 19 Infection cases in Brazil "

paste(MAPE_Mean_All,"%")

## [1] "0.412 % MAPE  7 days Covid 19 Infection cases in Brazil  %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  Covid 19 Infection cases in Brazil "

data.frame(date_bats=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_bats=validation_forecast,MAPE_bats_Model)

##    date_bats validation_data_by_name actual_data forecasting_bats
## 1 2021-01-04                  Monday     7716405          7726003
## 2 2021-01-05                 Tuesday     7733746          7764721
## 3 2021-01-06               Wednesday     7753752          7803440
## 4 2021-01-07                Thursday     7810400          7842158
## 5 2021-01-08                  Friday     7873830          7880876
## 6 2021-01-09                Saturday     7961673          7919594
## 7 2021-01-10                  Sunday     8013708          7958313
##   MAPE_bats_Model
## 1         0.124 %
## 2         0.401 %
## 3         0.641 %
## 4         0.407 %
## 5         0.089 %
## 6         0.529 %
## 7         0.691 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_bats=tail(forecasting_bats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_bats
## 1 2021-01-11          Monday             7997031
## 2 2021-01-12         Tuesday             8035749
## 3 2021-01-13       Wednesday             8074467
## 4 2021-01-14        Thursday             8113186
## 5 2021-01-15          Friday             8151904
## 6 2021-01-16        Saturday             8190622
## 7 2021-01-17          Sunday             8229340

plot(forecasting_bats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph1<-autoplot(forecasting_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph1

## Error of forecasting
Error_bats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_bats<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_bats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_bats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_bats<-sqrt(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MSE_bats<-(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MAD_bats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_bats<-c(Error_bats)
REOF_Abats<-c(paste(round(REOF_A_bats,3),"%"))
REOF_Fbats<-c(paste(round(REOF_F_bats,3),"%"))
data.frame(correlation_bats,MSE_bats,RMSE_bats,MAPE_Mean_All,MAD_bats) # analysis of Error  by using Bats Model shows result of correlation ,MSE ,MPER

##   correlation_bats   MSE_bats RMSE_bats
## 1        0.9748807 1345417652  36679.94
##                                              MAPE_Mean_All MAD_bats
## 1 0.412 % MAPE  7 days Covid 19 Infection cases in Brazil  4513.061

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_bats,REOF_Abats,REOF_Fbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_bats REOF_Abats REOF_Fbats
## 1       2021-01-04              Monday  9598.145    0.124 %    0.124 %
## 2       2021-01-05             Tuesday 30975.403    0.401 %    0.399 %
## 3       2021-01-06           Wednesday 49687.660    0.641 %    0.637 %
## 4       2021-01-07            Thursday 31757.918    0.407 %    0.405 %
## 5       2021-01-08              Friday  7046.176    0.089 %    0.089 %
## 6       2021-01-09            Saturday 42078.567    0.529 %    0.531 %
## 7       2021-01-10              Sunday 55395.309    0.691 %    0.696 %

## TBATS Model

# Data Modeling
data_series<-ts(training_data)
model_TBATS<-forecast:::fitSpecificTBATS(data_series,use.box.cox=FALSE, use.beta=TRUE,  seasonal.periods=c(6),use.damping=FALSE,k.vector=c(2))
accuracy(model_TBATS)  # accuracy on training data

##                    ME     RMSE      MAE MPE MAPE      MASE       ACF1
## Training set 633.1078 8567.274 5667.245 NaN  Inf 0.2693579 0.01600741

# Print Model Parameters
model_TBATS

## TBATS(1, {0,0}, 1, {<6,2>})
## 
## Call: NULL
## 
## Parameters
##   Alpha: 1.514693
##   Beta: 0.1664232
##   Damping Parameter: 1
##   Gamma-1 Values: -0.002481143
##   Gamma-2 Values: 0.004364751
## 
## Seed States:
##            [,1]
## [1,]   48.43941
## [2,]  -34.39969
## [3,] -699.63011
## [4,]  454.63022
## [5,] -767.45649
## [6,]   85.79901
## 
## Sigma: 8567.274
## AIC: 8834.155

plot(model_TBATS,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)

# Testing Data Evaluation
forecasting_tbats <- predict(model_TBATS, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_tbats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using TBATS Model for  ==>  Covid 19 Infection cases in Brazil "

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_TBATS<-paste(round(MAPE_Per_Day,3),"%")
MAPE_TBATS_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in TBATS Model for  ==>  Covid 19 Infection cases in Brazil "

paste(MAPE_Mean_All,"%")

## [1] "0.407 % MAPE  7 days Covid 19 Infection cases in Brazil  %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in TBATS Model for  ==>  Covid 19 Infection cases in Brazil "

data.frame(date_TBATS=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_TBATS=validation_forecast,MAPE_TBATS_Model)

##   date_TBATS validation_data_by_name actual_data forecasting_TBATS
## 1 2021-01-04                  Monday     7716405           7723780
## 2 2021-01-05                 Tuesday     7733746           7763074
## 3 2021-01-06               Wednesday     7753752           7804200
## 4 2021-01-07                Thursday     7810400           7841214
## 5 2021-01-08                  Friday     7873830           7879536
## 6 2021-01-09                Saturday     7961673           7919130
## 7 2021-01-10                  Sunday     8013708           7955585
##   MAPE_TBATS_Model
## 1          0.096 %
## 2          0.379 %
## 3          0.651 %
## 4          0.395 %
## 5          0.072 %
## 6          0.534 %
## 7          0.725 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_TBATS=tail(forecasting_tbats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_TBATS
## 1 2021-01-11          Monday              7994879
## 2 2021-01-12         Tuesday              8036005
## 3 2021-01-13       Wednesday              8073018
## 4 2021-01-14        Thursday              8111340
## 5 2021-01-15          Friday              8150935
## 6 2021-01-16        Saturday              8187389
## 7 2021-01-17          Sunday              8226684

plot(forecasting_tbats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph2<-autoplot(forecasting_tbats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph2

## Error of forecasting TBATS Model

Error_tbats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_tbats1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_tbats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_tbats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_tbats<-sqrt(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MSE_tbats<-(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MAD_tbats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_tbats<-c(Error_tbats)
REOF_A_tbats<-c(paste(round(REOF_A_tbats1,3),"%"))
REOF_F_tbats<-c(paste(round(REOF_F_tbats,3),"%"))
data.frame(correlation_tbats,MSE_tbats,RMSE_tbats,MAPE_Mean_All,MAD_tbats) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_tbats  MSE_tbats RMSE_tbats
## 1         0.9727568 1375685097   37090.23
##                                              MAPE_Mean_All MAD_tbats
## 1 0.407 % MAPE  7 days Covid 19 Infection cases in Brazil   3286.423

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_tbats,REOF_A_tbats,REOF_F_tbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_tbats REOF_A_tbats REOF_F_tbats
## 1       2021-01-04              Monday   7375.034      0.096 %      0.095 %
## 2       2021-01-05             Tuesday  29328.331      0.379 %      0.378 %
## 3       2021-01-06           Wednesday  50448.295      0.651 %      0.646 %
## 4       2021-01-07            Thursday  30813.621      0.395 %      0.393 %
## 5       2021-01-08              Friday   5705.593      0.072 %      0.072 %
## 6       2021-01-09            Saturday  42542.668      0.534 %      0.537 %
## 7       2021-01-10              Sunday  58123.243      0.725 %      0.731 %

## Holt's linear trend


# Data Modeling
data_series<-ts(training_data)
model_holt<-holt(data_series,h=N_forecasting_days+validation_data_days,lambda = "auto")
accuracy(model_holt)  # accuracy on training data

##                     ME     RMSE      MAE MPE MAPE      MASE      ACF1
## Training set -467.1298 10003.29 6482.739 Inf  Inf 0.3081175 0.3828341

# Print Model Parameters
summary(model_holt$model)

## Holt's method 
## 
## Call:
##  holt(y = data_series, h = N_forecasting_days + validation_data_days,  
## 
##  Call:
##      lambda = "auto") 
## 
##   Box-Cox transformation: lambda= 0.4566 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.1548 
## 
##   Initial states:
##     l = -3.0736 
##     b = -1.0619 
## 
##   sigma:  3.6118
## 
##      AIC     AICc      BIC 
## 3115.860 3116.026 3135.387 
## 
## Training set error measures:
##                     ME     RMSE      MAE MPE MAPE      MASE      ACF1
## Training set -467.1298 10003.29 6482.739 Inf  Inf 0.3081175 0.3828341

# Testing Data Evaluation
forecasting_holt <- predict(model_holt, h=N_forecasting_days+validation_data_days,lambda = "auto")
validation_forecast<-head(forecasting_holt$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using holt Model for  ==>  Covid 19 Infection cases in Brazil "

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_holt<-paste(round(MAPE_Per_Day,3),"%")
MAPE_holt_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in holt Model for  ==>  Covid 19 Infection cases in Brazil "

paste(MAPE_Mean_All,"%")

## [1] "0.492 % MAPE  7 days Covid 19 Infection cases in Brazil  %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in holt Model for  ==>  Covid 19 Infection cases in Brazil "

data.frame(date_holt=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_holt=validation_forecast,MAPE_holt_Model)

##    date_holt validation_data_by_name actual_data forecasting_holt
## 1 2021-01-04                  Monday     7716405          7741394
## 2 2021-01-05                 Tuesday     7733746          7782326
## 3 2021-01-06               Wednesday     7753752          7823375
## 4 2021-01-07                Thursday     7810400          7864542
## 5 2021-01-08                  Friday     7873830          7905825
## 6 2021-01-09                Saturday     7961673          7947227
## 7 2021-01-10                  Sunday     8013708          7988746
##   MAPE_holt_Model
## 1         0.324 %
## 2         0.628 %
## 3         0.898 %
## 4         0.693 %
## 5         0.406 %
## 6         0.181 %
## 7         0.311 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_holt=tail(forecasting_holt$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_holt
## 1 2021-01-11          Monday             8030382
## 2 2021-01-12         Tuesday             8072136
## 3 2021-01-13       Wednesday             8114008
## 4 2021-01-14        Thursday             8155998
## 5 2021-01-15          Friday             8198105
## 6 2021-01-16        Saturday             8240330
## 7 2021-01-17          Sunday             8282673

plot(forecasting_holt)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph3<-autoplot(forecasting_holt,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph3

## Error of forecasting by using Holt's linear model
Error_Holt<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_Holt1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_Holt<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_Holt<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_Holt<-sqrt(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
MSE_Holt<-(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
MAD_Holt<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_Holt<-c(Error_Holt)
REOF_A_Holt<-c(paste(round(REOF_A_Holt1,3),"%"))
REOF_F_Holt<-c(paste(round(REOF_F_Holt,3),"%"))
REOF_A_Holt11<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
data.frame(correlation_Holt,MSE_Holt,RMSE_Holt,MAPE_Mean_All,MAD_Holt) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_Holt   MSE_Holt RMSE_Holt
## 1        0.9753755 1802673206   42457.9
##                                              MAPE_Mean_All MAD_Holt
## 1 0.492 % MAPE  7 days Covid 19 Infection cases in Brazil  27131.59

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_Holt,REOF_A_Holt,REOF_F_Holt)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_Holt REOF_A_Holt REOF_F_Holt
## 1       2021-01-04              Monday  24989.38     0.324 %     0.323 %
## 2       2021-01-05             Tuesday  48580.11     0.628 %     0.624 %
## 3       2021-01-06           Wednesday  69623.16     0.898 %      0.89 %
## 4       2021-01-07            Thursday  54141.59     0.693 %     0.688 %
## 5       2021-01-08              Friday  31995.45     0.406 %     0.405 %
## 6       2021-01-09            Saturday  14446.21     0.181 %     0.182 %
## 7       2021-01-10              Sunday  24962.34     0.311 %     0.312 %

#Auto arima model
##################

require(tseries) # need to install tseries tj test Stationarity in time series 
paste ("tests For Check Stationarity in series  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series  ==>  Covid 19 Infection cases in Brazil "

kpss.test(data_series) # applay kpss test

## Warning in kpss.test(data_series): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  data_series
## KPSS Level = 5.9017, Truncation lag parameter = 5, p-value = 0.01

pp.test(data_series)   # applay pp test

## 
##  Phillips-Perron Unit Root Test
## 
## data:  data_series
## Dickey-Fuller Z(alpha) = -1.7739, Truncation lag parameter = 5, p-value
## = 0.9751
## alternative hypothesis: stationary

adf.test(data_series)  # applay adf test

## 
##  Augmented Dickey-Fuller Test
## 
## data:  data_series
## Dickey-Fuller = -3.1481, Lag order = 7, p-value = 0.09694
## alternative hypothesis: stationary

ndiffs(data_series)    # Doing first diffrencing on data

## [1] 2

#Taking the first difference
diff1_x1<-diff(data_series)
autoplot(diff1_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main = "1nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub, cex.main,
## cex.lab, cex.sub, font.main, font.lab

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking first differences in  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking first differences in  ==>  Covid 19 Infection cases in Brazil "

kpss.test(diff1_x1)   # applay kpss test after taking first differences

## Warning in kpss.test(diff1_x1): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff1_x1
## KPSS Level = 4.3318, Truncation lag parameter = 5, p-value = 0.01

pp.test(diff1_x1)     # applay pp test after taking first differences

## Warning in pp.test(diff1_x1): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff1_x1
## Dickey-Fuller Z(alpha) = -83.636, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary

adf.test(diff1_x1)    # applay adf test after taking first differences

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1_x1
## Dickey-Fuller = -1.3113, Lag order = 7, p-value = 0.8676
## alternative hypothesis: stationary

#Taking the second difference
diff2_x1=diff(diff1_x1)
autoplot(diff2_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab ,main = "2nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub, cex.main,
## cex.lab, cex.sub, font.main, font.lab

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking Second differences in",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking Second differences in Covid 19 Infection cases in Brazil "

kpss.test(diff2_x1)   # applay kpss test after taking Second differences

## Warning in kpss.test(diff2_x1): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff2_x1
## KPSS Level = 0.03997, Truncation lag parameter = 5, p-value = 0.1

pp.test(diff2_x1)     # applay pp test after taking Second differences

## Warning in pp.test(diff2_x1): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff2_x1
## Dickey-Fuller Z(alpha) = -217.44, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary

adf.test(diff2_x1)    # applay adf test after taking Second differences

## Warning in adf.test(diff2_x1): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff2_x1
## Dickey-Fuller = -8.8838, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary

####Fitting an ARIMA Model
#1. Using auto arima function
model1 <- auto.arima(data_series,stepwise=FALSE, approximation=FALSE, trace=T, test = c("kpss", "adf", "pp"))  #applaying auto arima

## 
##  ARIMA(0,2,0)                    : 7781.254
##  ARIMA(0,2,1)                    : 7783.125
##  ARIMA(0,2,2)                    : 7666.805
##  ARIMA(0,2,3)                    : 7667.607
##  ARIMA(0,2,4)                    : 7647.428
##  ARIMA(0,2,5)                    : 7630.406
##  ARIMA(1,2,0)                    : 7783.215
##  ARIMA(1,2,1)                    : 7699.599
##  ARIMA(1,2,2)                    : 7668.17
##  ARIMA(1,2,3)                    : 7669.045
##  ARIMA(1,2,4)                    : 7632.891
##  ARIMA(2,2,0)                    : 7753.15
##  ARIMA(2,2,1)                    : 7645.097
##  ARIMA(2,2,2)                    : 7585.275
##  ARIMA(2,2,3)                    : Inf
##  ARIMA(3,2,0)                    : 7740.24
##  ARIMA(3,2,1)                    : 7631.318
##  ARIMA(3,2,2)                    : 7572.113
##  ARIMA(4,2,0)                    : 7682.875
##  ARIMA(4,2,1)                    : 7574.361
##  ARIMA(5,2,0)                    : 7553.166
## 
## 
## 
##  Best model: ARIMA(5,2,0)

model1 # show the result of autoarima

## Series: data_series 
## ARIMA(5,2,0) 
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5
##       -0.3625  -0.5278  -0.4630  -0.4729  -0.5639
## s.e.   0.0439   0.0409   0.0438   0.0413   0.0444
## 
## sigma^2 estimated as 55286712:  log likelihood=-3770.47
## AIC=7552.93   AICc=7553.17   BIC=7576.33

#Make changes in the source of auto arima to run the best model
arima.string <- function (object, padding = FALSE) 
{
  order <- object$arma[c(1, 6, 2, 3, 7, 4, 5)]
  m <- order[7]
  result <- paste("ARIMA(", order[1], ",", order[2], ",", 
                  order[3], ")", sep = "")
  if (m > 1 && sum(order[4:6]) > 0) {
    result <- paste(result, "(", order[4], ",", order[5], 
                    ",", order[6], ")[", m, "]", sep = "")
  }
  if (padding && m > 1 && sum(order[4:6]) == 0) {
    result <- paste(result, "         ", sep = "")
    if (m <= 9) {
      result <- paste(result, " ", sep = "")
    }
    else if (m <= 99) {
      result <- paste(result, "  ", sep = "")
    }
    else {
      result <- paste(result, "   ", sep = "")
    }
  }
  if (!is.null(object$xreg)) {
    if (NCOL(object$xreg) == 1 && is.element("drift", names(object$coef))) {
      result <- paste(result, "with drift        ")
    }
    else {
      result <- paste("Regression with", result, "errors")
    }
  }
  else {
    if (is.element("constant", names(object$coef)) || is.element("intercept", 
                                                                 names(object$coef))) {
      result <- paste(result, "with non-zero mean")
    }
    else if (order[2] == 0 && order[5] == 0) {
      result <- paste(result, "with zero mean    ")
    }
    else {
      result <- paste(result, "                  ")
    }
  }
  if (!padding) {
    result <- gsub("[ ]*$", "", result)
  }
  return(result)
}






source("stringthearima.R")  
bestmodel <- arima.string(model1, padding = TRUE)
bestmodel <- substring(bestmodel,7,11)
bestmodel <- gsub(" ", "", bestmodel)
bestmodel <- gsub(")", "", bestmodel)
bestmodel <- strsplit(bestmodel, ",")[[1]]
bestmodel <- c(strtoi(bestmodel[1]),strtoi(bestmodel[2]),strtoi(bestmodel[3]))
bestmodel

## [1] 5 2 0

strtoi(bestmodel[3])

## [1] 0

#2. Using ACF and PACF Function
#par(mfrow=c(1,2))  # Code for making two plot in one graph 
acf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab, main=paste("ACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))    # plot ACF "auto correlation function after taking second diffrences

pacf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main=paste("PACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))   # plot PACF " Partial auto correlation function after taking second diffrences

library(forecast)   # install library forecast             
x1_model1= arima(data_series, order=c(bestmodel)) # Run Best model of auto arima  for forecasting
x1_model1  # Show result of best model of auto arima

## 
## Call:
## arima(x = data_series, order = c(bestmodel))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5
##       -0.3625  -0.5278  -0.4630  -0.4729  -0.5639
## s.e.   0.0439   0.0409   0.0438   0.0413   0.0444
## 
## sigma^2 estimated as 54529360:  log likelihood = -3770.47,  aic = 7552.93

paste ("accuracy of autoarima Model For  ==> ",y_lab, sep=" ")

## [1] "accuracy of autoarima Model For  ==>  Covid 19 Infection cases in Brazil "

accuracy(x1_model1)  # aacuracy of best model from auto arima

##                    ME     RMSE      MAE      MPE     MAPE     MASE       ACF1
## Training set 332.0835 7364.251 4605.471 1.111238 3.168168 0.218893 -0.2665656

x1_model1$x          # show result of best model from auto arima

## NULL

checkresiduals(x1_model1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)  # checkresiduals from best model from using auto arima

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(5,2,0)
## Q* = 89.025, df = 5, p-value < 2.2e-16
## 
## Model df: 5.   Total lags used: 10

paste("Box-Ljung test , Ljung-Box test For Modelling for   ==> ",y_lab, sep=" ")

## [1] "Box-Ljung test , Ljung-Box test For Modelling for   ==>  Covid 19 Infection cases in Brazil "

Box.test(x1_model1$residuals^2, lag=20, type="Ljung-Box")   # Do test for resdulas by using Box-Ljung test , Ljung-Box test For Modelling

## 
##  Box-Ljung test
## 
## data:  x1_model1$residuals^2
## X-squared = 134.44, df = 20, p-value < 2.2e-16

library(tseries)
jarque.bera.test(x1_model1$residuals)  # Do test jarque.bera.test

## 
##  Jarque Bera Test
## 
## data:  x1_model1$residuals
## X-squared = 219, df = 2, p-value < 2.2e-16

#Actual Vs Fitted
plot(data_series, col='red',lwd=2, main="Actual vs Fitted Plot", xlab='Time in (days)', ylab=y_lab) # plot actual and Fitted model 
lines(fitted(x1_model1), col='black')

#Test data

x1_test <- ts(testing_data, start =(rows-validation_data_days+1) ) # make testing data in time series and start from rows-6
forecasting_auto_arima <- forecast(x1_model1, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_auto_arima$mean,validation_data_days)
MAPE_Per_Day<-round(abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using bats Model for  ==>  Covid 19 Infection cases in Brazil "

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_auto_arima<-paste(round(MAPE_Per_Day,3),"%")
MAPE_auto.arima_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  Covid 19 Infection cases in Brazil "

paste(MAPE_Mean_All,"%")

## [1] "0.262 % MAPE  7 days Covid 19 Infection cases in Brazil  %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  Covid 19 Infection cases in Brazil "

data.frame(date_auto.arima=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_auto.arima=validation_forecast,MAPE_auto.arima_Model)

##   date_auto.arima validation_data_by_name actual_data forecasting_auto.arima
## 1      2021-01-04                  Monday     7716405                7718452
## 2      2021-01-05                 Tuesday     7733746                7735149
## 3      2021-01-06               Wednesday     7753752                7771917
## 4      2021-01-07                Thursday     7810400                7819727
## 5      2021-01-08                  Friday     7873830                7874810
## 6      2021-01-09                Saturday     7961673                7916489
## 7      2021-01-10                  Sunday     8013708                7945248
##   MAPE_auto.arima_Model
## 1               0.027 %
## 2               0.018 %
## 3               0.234 %
## 4               0.119 %
## 5               0.012 %
## 6               0.568 %
## 7               0.854 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_auto.arima=tail(forecasting_auto_arima$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_auto.arima
## 1 2021-01-11          Monday                   7965856
## 2 2021-01-12         Tuesday                   7992777
## 3 2021-01-13       Wednesday                   8029930
## 4 2021-01-14        Thursday                   8077486
## 5 2021-01-15          Friday                   8124089
## 6 2021-01-16        Saturday                   8162421
## 7 2021-01-17          Sunday                   8191038

plot(forecasting_auto_arima)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph4<-autoplot(forecasting_auto_arima,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph4

## Error of forecasting
Error_auto.arima<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_auto.arima<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_auto.arima<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_auto.arima<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_auto.arima<-sqrt(sum((Error_auto.arima^2))/validation_data_days)   #  Root mean square forecast error
MSE_auto.arima<-(sum((Error_auto.arima^2))/validation_data_days)   #  Root mean square forecast error
MAD_auto.arima<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_auto.arima<-c(Error_auto.arima)
REOF_auto.arima1<-c(paste(round(REOF_A_auto.arima,3),"%"))
REOF_auto.arima2<-c(paste(round(REOF_F_auto.arima,3),"%"))
data.frame(correlation_auto.arima,MSE_auto.arima,RMSE_auto.arima,MAPE_Mean_All,MAD_auto.arima) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_auto.arima MSE_auto.arima RMSE_auto.arima
## 1              0.9856113     1021780393         31965.3
##                                              MAPE_Mean_All MAD_auto.arima
## 1 0.262 % MAPE  7 days Covid 19 Infection cases in Brazil        11674.67

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_auto.arima,REOF_A_auto.arima=REOF_auto.arima1,REOF_F_auto.arima=REOF_auto.arima2)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_auto.arima REOF_A_auto.arima
## 1       2021-01-04              Monday        2046.947           0.027 %
## 2       2021-01-05             Tuesday        1403.110           0.018 %
## 3       2021-01-06           Wednesday       18165.040           0.234 %
## 4       2021-01-07            Thursday        9326.717           0.119 %
## 5       2021-01-08              Friday         979.649           0.012 %
## 6       2021-01-09            Saturday       45183.953           0.568 %
## 7       2021-01-10              Sunday       68460.196           0.854 %
##   REOF_F_auto.arima
## 1           0.027 %
## 2           0.018 %
## 3           0.234 %
## 4           0.119 %
## 5           0.012 %
## 6           0.571 %
## 7           0.862 %

# SIR Model 
#install.packages("dplyr")
library(deSolve)
first<-rows-13
secondr<-rows-7
vector_SIR<-original_data[first:secondr]
Infected <- c(vector_SIR)
Day <- 1:(length(Infected))
N <- Population # population of the us
SIR <- function(time, state, parameters) {
  par <- as.list(c(state, parameters))
  with(par, {
    dS <- -beta/N * I * S
    dI <- beta/N * I * S - gamma * I
    dR <- gamma * I
    list(c(dS, dI, dR))
  })
}

init <- c(S = N-Infected[1], I = Infected[1], R = 0)
RSS <- function(parameters) {
  names(parameters) <- c("beta", "gamma")
  out <- ode(y = init, times = Day, func = SIR, parms = parameters)
  fit <- out[ , 3]
  sum((Infected - fit)^2)
}

# optimize with some sensible conditions
Opt <- optim(c(0.5, 0.5), RSS, method = "L-BFGS-B", 
             lower = c(0, 0), upper = c(10, 10))
Opt$message

## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

Opt_par <- setNames(Opt$par, c("beta", "gamma"))
Opt_par

##        beta       gamma 
## 0.012239536 0.006763319

# beta     gamma 
# 0.6512503 0.4920399 

out <- ode(y = init, times = Day, func = SIR, parms = Opt_par)

plot(out)
plot(out, obs=data.frame(time=Day, I=Infected))

result_SIR<-data.frame(out)
validation_forecast<-result_SIR$I

## Error of forecasting
Error_SIR<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_SIR<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_SIR<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_SIR<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_SIR<-sqrt(sum((Error_SIR^2))/validation_data_days)   #  Root mean square forecast error
MSE_SIR<-(sum((Error_SIR^2))/validation_data_days)   #  Root mean square forecast error
MAD_SIR<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_SIR<-c(Error_SIR)
REOF_A_SIR<-c(paste(round(REOF_A_SIR,3),"%"))
REOF_A_SIR1<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))

REOF_F_SIR<-c(paste(round(REOF_F_SIR,3),"%"))
MAPE_Mean_All<-paste(round(mean(abs(((testing_data-validation_forecast)/testing_data)*100)),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
data.frame(correlation_SIR,MSE_SIR,RMSE_SIR,MAPE_Mean_All,MAD_SIR) # analysis of Error  by using SIR's linear model shows result of correlation ,MSE ,MPER

##   correlation_SIR     MSE_SIR RMSE_SIR
## 1        0.975576 67830317745 260442.5
##                                              MAPE_Mean_All  MAD_SIR
## 1 3.284 % MAPE  7 days Covid 19 Infection cases in Brazil  257784.1

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_SIR,REOF_A_SIR,REOF_F_SIR,validation_forecast,testing_data)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_SIR REOF_A_SIR REOF_F_SIR
## 1       2021-01-04              Monday 250599.0    3.248 %    3.357 %
## 2       2021-01-05             Tuesday 230176.6    2.976 %    3.068 %
## 3       2021-01-06           Wednesday 212266.4    2.738 %    2.815 %
## 4       2021-01-07            Thursday 230845.4    2.956 %    3.046 %
## 5       2021-01-08              Friday 256053.3    3.252 %    3.361 %
## 6       2021-01-09            Saturday 305520.9    3.837 %    3.991 %
## 7       2021-01-10              Sunday 319026.9    3.981 %    4.146 %
##   validation_forecast testing_data
## 1             7465806      7716405
## 2             7503569      7733746
## 3             7541486      7753752
## 4             7579555      7810400
## 5             7617777      7873830
## 6             7656152      7961673
## 7             7694681      8013708

## forecasting by SIR model

Infected <- c(tail(original_data,validation_data_days))
Day <- 1:(length(Infected))
N <- Population # population of the us

SIR <- function(time, state, parameters) {
  par <- as.list(c(state, parameters))
  with(par, {
    dS <- -beta/N * I * S
    dI <- beta/N * I * S - gamma * I
    dR <- gamma * I
    list(c(dS, dI, dR))
  })
}

init <- c(S = N-Infected[1], I = Infected[1], R = 0)
RSS <- function(parameters) {
  names(parameters) <- c("beta", "gamma")
  out <- ode(y = init, times = Day, func = SIR, parms = parameters)
  fit <- out[ , 3]
  sum((Infected - fit)^2)
}

# optimize with some sensible conditions
Opt <- optim(c(0.5, 0.5), RSS, method = "L-BFGS-B", 
             lower = c(0, 0), upper = c(10, 10))
Opt$message

## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

Opt_par <- setNames(Opt$par, c("beta", "gamma"))
Opt_par

##        beta       gamma 
## 0.005859088 0.000000000

# beta     gamma 
# 0.6512503 0.4920399 

out <- ode(y = init, times = Day, func = SIR, parms = Opt_par)

plot(out)
plot(out, obs=data.frame(time=Day, I=Infected))

result_SIR <-data.frame(out)
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_SIR=result_SIR$I)

##           FD forecating_date forecasting_by_SIR
## 1 2021-01-11          Monday            7716405
## 2 2021-01-12         Tuesday            7760100
## 3 2021-01-13       Wednesday            7804033
## 4 2021-01-14        Thursday            7848205
## 5 2021-01-15          Friday            7892617
## 6 2021-01-16        Saturday            7937271
## 7 2021-01-17          Sunday            7982168

# Choose Best model by least error

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==>  Covid 19 Infection cases in Brazil "

M1<-mean(REOF_A_bats)

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using TBATS  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using TBATS  Model For ==>  Covid 19 Infection cases in Brazil "

M2<-mean(REOF_A_tbats1)

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using Holt's Linear << Exponential Smoothing >>  For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using Holt's Linear << Exponential Smoothing >>  For ==>  Covid 19 Infection cases in Brazil "

M3<-REOF_A_Holt11

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using auto arima  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using auto arima  Model For ==>  Covid 19 Infection cases in Brazil "

M4<-mean(REOF_A_auto.arima)
paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using SIR Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using SIR Model For ==>  Covid 19 Infection cases in Brazil "

M5<-REOF_A_SIR1

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using autoarima  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using autoarima  Model For ==>  Covid 19 Infection cases in Brazil "

data.frame(validation_dates,forecating_date=forecasting_data_by_name,MAPE_bats_error=REOF_A_bats,MAPE_TBATS_error=REOF_A_tbats1,MAPE_Holt_error=REOF_A_Holt1,MAPE_autoarima_error = REOF_A_auto.arima)

##   validation_dates forecating_date MAPE_bats_error MAPE_TBATS_error
## 1       2021-01-04          Monday      0.12438623       0.09557603
## 2       2021-01-05         Tuesday      0.40052263       0.37922542
## 3       2021-01-06       Wednesday      0.64082086       0.65063075
## 4       2021-01-07        Thursday      0.40661065       0.39452040
## 5       2021-01-08          Friday      0.08948854       0.07246274
## 6       2021-01-09        Saturday      0.52851413       0.53434332
## 7       2021-01-10          Sunday      0.69125690       0.72529774
##   MAPE_Holt_error MAPE_autoarima_error
## 1       0.3238474           0.02652721
## 2       0.6281575           0.01814269
## 3       0.8979287           0.23427419
## 4       0.6931987           0.11941407
## 5       0.4063518           0.01244184
## 6       0.1814470           0.56751832
## 7       0.3114955           0.85428863

recommend_Model<-c(M1,M2,M3,M4,M5)
best_recommended_model<-min(recommend_Model)
paste ("lodaing .....   ... . .Select Minimum MAPE from Models for select best Model ==> ", y_lab , sep=" ")

## [1] "lodaing .....   ... . .Select Minimum MAPE from Models for select best Model ==>  Covid 19 Infection cases in Brazil "

best_recommended_model

## [1] 0.261801

paste ("Best Model For Forecasting  ==> ",y_lab, sep=" ")

## [1] "Best Model For Forecasting  ==>  Covid 19 Infection cases in Brazil "

if(best_recommended_model >= M1) {paste("System Recommend Bats Model That's better  For forecasting==> ",y_lab, sep=" ")}
if(best_recommended_model >= M2) {paste("System Recommend  That's better TBATS  For forecasting ==> ",y_lab, sep=" ")}
if(best_recommended_model >= M3) {paste("System Recommend Holt's Linear Model < Exponential Smoothing Model >   That's better  For forecasting ==> ",y_lab, sep=" ")}
if(best_recommended_model >= M4) {paste("System Recommend auto arima Model  That's better  For forecasting ==> ",y_lab, sep=" ")}

## [1] "System Recommend auto arima Model  That's better  For forecasting ==>  Covid 19 Infection cases in Brazil "

if(best_recommended_model >= M5) {paste("System Recommend SIR Model  That's better  For forecasting ==> ",y_lab, sep=" ")}

message("System finished Forecasting  by using autoarima and Holt's ,TBATS, and SIR  Model ==>",y_lab, sep=" ")

## System finished Forecasting  by using autoarima and Holt's ,TBATS, and SIR  Model ==>Covid 19 Infection cases in Brazil

message(" Thank you for using our System For Modelling  ==> ",y_lab, sep=" ")

##  Thank you for using our System For Modelling  ==> Covid 19 Infection cases in Brazil