Deep-learning models and time series models for forecasting Covid-19 infection cases and deaths in Chelyabinsk region"

That algorithm contains the best 5 models for forecasting Covid 19 Infection cases in Chelyabinsk region

South Ural State University

Makarovskikh Tatyana Anatolyevna “Макаровских Татьяна Анатольевна”

Abotaleb mostafa “Аботалеб Мостафа”

Department of Electrical Engineering and Computer Science

South ural state university, Chelyabinsk, Russian federation

#Import
library(fpp2)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## -- Attaching packages ---------------------------------------------- fpp2 2.4 --

## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3

##

library(forecast)
library(ggplot2)
library("readxl")
library(moments)
library(forecast)
require(forecast)  
require(tseries)

## Loading required package: tseries

require(markovchain)

## Loading required package: markovchain

## Package:  markovchain
## Version:  0.8.5-3
## Date:     2020-12-03
## BugReport: https://github.com/spedygiorgio/markovchain/issues

require(data.table)

## Loading required package: data.table

library(Hmisc)

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:base':
## 
##     format.pval, units

##Global vriable##
Full_original_data <- read.csv("F:/Phd/University Conference/Chelyabinsk covid 19 data.csv") # path of your data ( time series data)
View(Full_original_data)
original_data<-Full_original_data$Infections
y_lab <- "Covid 19 infection cases in Chelyabinsk"   # input name of data
Actual_date_interval <- c("2020/03/12","2021/02/22")
Forecast_date_interval <- c("2021/02/23","2021/03/1")
validation_data_days <-7
frequency<-"day"
Population <-3466369 # population in Spain ( population size for SIR Model)
country.name <- "Chelyabinsk"
# Data Preparation & calculate some of statistics measures
View(original_data) # View data in table in R
summary(original_data) # Summary your time series

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0    3585   13790   16373   24505   49551

describe((original_data)) # describe your time series

## (original_data) 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      348        0      331        1    16373    15878     12.7     52.4 
##      .25      .50      .75      .90      .95 
##   3585.2  13789.5  24505.2  40141.6  45104.8 
## 
## lowest :     0     1     2     5     9, highest: 48620 48858 49091 49322 49551

# calculate standard deviation 
library(pastecs)

## 
## Attaching package: 'pastecs'

## The following objects are masked from 'package:data.table':
## 
##     first, last

stat.desc(original_data)

##      nbr.val     nbr.null       nbr.na          min          max        range 
## 3.480000e+02 9.000000e+00 0.000000e+00 0.000000e+00 4.955100e+04 4.955100e+04 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
## 5.697694e+06 1.378950e+04 1.637268e+04 7.653170e+02 1.505244e+03 2.038271e+08 
##      std.dev     coef.var 
## 1.427680e+04 8.719892e-01

data.frame(skewness=skewness(original_data))  # calculate Cofficient of skewness

##    skewness
## 1 0.7384615

data.frame(kurtosis=kurtosis(original_data))   # calculate Cofficient of kurtosis

##   kurtosis
## 1 2.509104

sd(original_data)

## [1] 14276.8

#processing on data (input data)
rows <- NROW(original_data) # calculate number of rows in time series (number of days)
training_data<-original_data[1:(rows-validation_data_days)] # Training data
testing_data<-original_data[(rows-validation_data_days+1):rows] #testing data
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency)  #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency)  #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD))  #calculate number of days that you want to forecasting
validation_dates<-tail(AD,validation_data_days) # select validation_dates
validation_data_by_name<-weekdays(validation_dates) # put names of validation dates
forecasting_data_by_name<-weekdays(FD)  # put names of Forecasting dates

#NNAR Model 
data_series<-ts(training_data)
model_NNAR<-nnetar(data_series, size = 5)
saveRDS(model_NNAR, file = "model_NNAR.RDS")
my_model <- readRDS("model_NNAR.RDS")
accuracy(model_NNAR)  # accuracy on training data

##                      ME     RMSE      MAE  MPE MAPE      MASE      ACF1
## Training set -0.5649494 18.85687 15.11495 -Inf  Inf 0.1073841 0.7776563

#Print Model Parameters
model_NNAR

## Series: data_series 
## Model:  NNAR(1,5) 
## Call:   nnetar(y = data_series, size = 5)
## 
## Average of 20 networks, each of which is
## a 1-5-1 network with 16 weights
## options were - linear output units 
## 
## sigma^2 estimated as 355.6

# Testing Data Evaluation
forecasting_NNAR <- predict(model_NNAR, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_NNAR$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using NNAR Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 day by using NNAR Model for  ==>  Covid 19 infection cases in Chelyabinsk"

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_Mean_All_NNAR<-round(mean(MAPE_Per_Day),3)
MAPE_NNAR<-paste(round(MAPE_Per_Day,3),"%")
MAPE_NNAR_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in NNAR Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in NNAR Model for  ==>  Covid 19 infection cases in Chelyabinsk"

paste(MAPE_Mean_All,"%")

## [1] "0.382 % MAPE  7 day Covid 19 infection cases in Chelyabinsk %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in NNAR Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in NNAR Model for  ==>  Covid 19 infection cases in Chelyabinsk"

data.frame(date_NNAR=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_NNAR=validation_forecast,MAPE_NNAR_Model)

##    date_NNAR validation_data_by_name actual_data forecasting_NNAR
## 1 2021-02-16                 Tuesday       48116         48070.48
## 2 2021-02-17               Wednesday       48371         48277.64
## 3 2021-02-18                Thursday       48620         48478.39
## 4 2021-02-19                  Friday       48858         48672.63
## 5 2021-02-20                Saturday       49091         48860.28
## 6 2021-02-21                  Sunday       49322         49041.32
## 7 2021-02-22                  Monday       49551         49215.71
##   MAPE_NNAR_Model
## 1         0.095 %
## 2         0.193 %
## 3         0.291 %
## 4         0.379 %
## 5          0.47 %
## 6         0.569 %
## 7         0.677 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_NNAR=tail(forecasting_NNAR$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_NNAR
## 1 2021-02-23         Tuesday            49383.46
## 2 2021-02-24       Wednesday            49544.59
## 3 2021-02-25        Thursday            49699.14
## 4 2021-02-26          Friday            49847.19
## 5 2021-02-27        Saturday            49988.81
## 6 2021-02-28          Sunday            50124.11
## 7 2021-03-01          Monday            50253.20

plot(forecasting_NNAR)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph1<-autoplot(forecasting_NNAR,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph1

saveRDS(model_NNAR, file = "model_NNAR.RDS")

## Error of forecasting
Error_NNAR<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_NNAR<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_NNAR<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_NNAR<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_NNAR<-sqrt(sum((Error_NNAR^2))/validation_data_days)   #  Root mean square forecast error
MSE_NNAR<-(sum((Error_NNAR^2))/validation_data_days)   #  Root mean square forecast error
MAD_NNAR<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_NNAR<-c(Error_NNAR)
REOF_ANNAR<-c(paste(round(REOF_A_NNAR,3),"%"))
REOF_FNNAR<-c(paste(round(REOF_F_NNAR,3),"%"))
data.frame(correlation_NNAR,MSE_NNAR,RMSE_NNAR,MAPE_Mean_All,MAD_NNAR) # analysis of Error  by using NNAR Model shows result of correlation ,MSE ,MPER

##   correlation_NNAR MSE_NNAR RMSE_NNAR
## 1        0.9999408 44233.85  210.3185
##                                                 MAPE_Mean_All MAD_NNAR
## 1 0.382 % MAPE  7 day Covid 19 infection cases in Chelyabinsk 187.5077

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_NNAR,REOF_ANNAR,REOF_FNNAR)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_NNAR REOF_ANNAR REOF_FNNAR
## 1       2021-02-16             Tuesday  45.52443    0.095 %    0.095 %
## 2       2021-02-17           Wednesday  93.35689    0.193 %    0.193 %
## 3       2021-02-18            Thursday 141.61001    0.291 %    0.292 %
## 4       2021-02-19              Friday 185.37405    0.379 %    0.381 %
## 5       2021-02-20            Saturday 230.71646     0.47 %    0.472 %
## 6       2021-02-21              Sunday 280.68157    0.569 %    0.572 %
## 7       2021-02-22              Monday 335.29064    0.677 %    0.681 %

##bats model
# Data Modeling
data_series<-ts(training_data) # make your data to time series
autoplot(data_series ,xlab=paste ("Time in  ", frequency, sep=" "), ylab = y_lab, main=paste ("Actual Data :", y_lab, sep=" "))

model_bats<-bats(data_series)
accuracy(model_bats)  # accuracy on training data

##                    ME     RMSE      MAE MPE MAPE       MASE         ACF1
## Training set 1.037871 12.04264 6.933465 NaN  Inf 0.04925879 -0.005528907

# Print Model Parameters
model_bats

## BATS(1, {0,0}, 1, -)
## 
## Call: bats(y = data_series)
## 
## Parameters
##   Alpha: 1.013369
##   Beta: 0.7537345
##   Damping Parameter: 1
## 
## Seed States:
##           [,1]
## [1,] -6.177342
## [2,] -5.825198
## 
## Sigma: 12.04264
## AIC: 3693.797

#ploting BATS Model
plot(model_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4)

# Testing Data Evaluation
forecasting_bats <- predict(model_bats, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_bats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 day by using bats Model for  ==>  Covid 19 infection cases in Chelyabinsk"

MAPE_Mean_All.bats_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.bats<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_bats<-paste(round(MAPE_Per_Day,3),"%")
MAPE_bats_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  Covid 19 infection cases in Chelyabinsk"

paste(MAPE_Mean_All.bats,"%")

## [1] "0.109 % MAPE  7 day Covid 19 infection cases in Chelyabinsk %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  Covid 19 infection cases in Chelyabinsk"

data.frame(date_bats=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_bats=validation_forecast,MAPE_bats_Model)

##    date_bats validation_data_by_name actual_data forecasting_bats
## 1 2021-02-16                 Tuesday       48116         48117.94
## 2 2021-02-17               Wednesday       48371         48378.87
## 3 2021-02-18                Thursday       48620         48639.80
## 4 2021-02-19                  Friday       48858         48900.73
## 5 2021-02-20                Saturday       49091         49161.67
## 6 2021-02-21                  Sunday       49322         49422.60
## 7 2021-02-22                  Monday       49551         49683.53
##   MAPE_bats_Model
## 1         0.004 %
## 2         0.016 %
## 3         0.041 %
## 4         0.087 %
## 5         0.144 %
## 6         0.204 %
## 7         0.267 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_bats=tail(forecasting_bats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_bats
## 1 2021-02-23         Tuesday            49944.46
## 2 2021-02-24       Wednesday            50205.39
## 3 2021-02-25        Thursday            50466.32
## 4 2021-02-26          Friday            50727.26
## 5 2021-02-27        Saturday            50988.19
## 6 2021-02-28          Sunday            51249.12
## 7 2021-03-01          Monday            51510.05

plot(forecasting_bats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph1<-autoplot(forecasting_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph1

## Error of forecasting
Error_bats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_bats<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_bats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_bats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_bats<-sqrt(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MSE_bats<-(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MAD_bats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_bats<-c(Error_bats)
REOF_Abats<-c(paste(round(REOF_A_bats,3),"%"))
REOF_Fbats<-c(paste(round(REOF_F_bats,3),"%"))
data.frame(correlation_bats,MSE_bats,RMSE_bats,MAPE_Mean_All.bats_Model,MAD_bats) # analysis of Error  by using Bats Model shows result of correlation ,MSE ,MPER

##   correlation_bats MSE_bats RMSE_bats MAPE_Mean_All.bats_Model MAD_bats
## 1        0.9997926 4994.453  70.67144                    0.109 53.73321

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_bats,REOF_Abats,REOF_Fbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name  AEOF_bats REOF_Abats REOF_Fbats
## 1       2021-02-16             Tuesday   1.937401    0.004 %    0.004 %
## 2       2021-02-17           Wednesday   7.869337    0.016 %    0.016 %
## 3       2021-02-18            Thursday  19.801272    0.041 %    0.041 %
## 4       2021-02-19              Friday  42.733207    0.087 %    0.087 %
## 5       2021-02-20            Saturday  70.665142    0.144 %    0.144 %
## 6       2021-02-21              Sunday 100.597077    0.204 %    0.204 %
## 7       2021-02-22              Monday 132.529013    0.267 %    0.267 %

## TBATS Model
# Data Modeling
data_series<-ts(training_data)
model_TBATS<-forecast:::fitSpecificTBATS(data_series,use.box.cox=FALSE, use.beta=TRUE,  seasonal.periods=c(6),use.damping=FALSE,k.vector=c(2))
accuracy(model_TBATS)  # accuracy on training data

##                    ME     RMSE      MAE MPE MAPE      MASE         ACF1
## Training set 1.033371 11.89959 7.029362 NaN  Inf 0.0499401 -0.006947839

# Print Model Parameters
model_TBATS

## TBATS(1, {0,0}, 1, {<6,2>})
## 
## Call: NULL
## 
## Parameters
##   Alpha: 1.017233
##   Beta: 0.7577115
##   Damping Parameter: 1
##   Gamma-1 Values: -0.00192764
##   Gamma-2 Values: 0.001607352
## 
## Seed States:
##            [,1]
## [1,] -6.3556673
## [2,] -5.7532877
## [3,]  0.7054920
## [4,] -0.5224022
## [5,]  1.3257028
## [6,]  0.2717464
## 
## Sigma: 11.89959
## AIC: 3697.647

plot(model_TBATS,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)

# Testing Data Evaluation
forecasting_tbats <- predict(model_TBATS, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_tbats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 day by using TBATS Model for  ==>  Covid 19 infection cases in Chelyabinsk"

MAPE_Mean_All.TBATS_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.TBATS<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_TBATS<-paste(round(MAPE_Per_Day,3),"%")
MAPE_TBATS_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in TBATS Model for  ==>  Covid 19 infection cases in Chelyabinsk"

paste(MAPE_Mean_All.TBATS,"%")

## [1] "0.113 % MAPE  7 day Covid 19 infection cases in Chelyabinsk %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in TBATS Model for  ==>  Covid 19 infection cases in Chelyabinsk"

data.frame(date_TBATS=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_TBATS=validation_forecast,MAPE_TBATS_Model)

##   date_TBATS validation_data_by_name actual_data forecasting_TBATS
## 1 2021-02-16                 Tuesday       48116          48118.05
## 2 2021-02-17               Wednesday       48371          48380.98
## 3 2021-02-18                Thursday       48620          48643.88
## 4 2021-02-19                  Friday       48858          48903.45
## 5 2021-02-20                Saturday       49091          49163.23
## 6 2021-02-21                  Sunday       49322          49424.48
## 7 2021-02-22                  Monday       49551          49685.55
##   MAPE_TBATS_Model
## 1          0.004 %
## 2          0.021 %
## 3          0.049 %
## 4          0.093 %
## 5          0.147 %
## 6          0.208 %
## 7          0.272 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_TBATS=tail(forecasting_tbats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_TBATS
## 1 2021-02-23         Tuesday             49948.47
## 2 2021-02-24       Wednesday             50211.37
## 3 2021-02-25        Thursday             50470.94
## 4 2021-02-26          Friday             50730.73
## 5 2021-02-27        Saturday             50991.97
## 6 2021-02-28          Sunday             51253.04
## 7 2021-03-01          Monday             51515.96

plot(forecasting_tbats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph2<-autoplot(forecasting_tbats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph2

## Error of forecasting TBATS Model
Error_tbats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_tbats1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_tbats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_tbats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_tbats<-sqrt(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MSE_tbats<-(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MAD_tbats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_tbats<-c(Error_tbats)
REOF_A_tbats<-c(paste(round(REOF_A_tbats1,3),"%"))
REOF_F_tbats<-c(paste(round(REOF_F_tbats,3),"%"))
data.frame(correlation_tbats,MSE_tbats,RMSE_tbats,MAPE_Mean_All.TBATS_Model,MAD_tbats) # analysis of Error  by using TBATS model shows result of correlation ,MSE ,MPER

##   correlation_tbats MSE_tbats RMSE_tbats MAPE_Mean_All.TBATS_Model MAD_tbats
## 1         0.9998241  5223.166   72.27148                     0.113  55.80314

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_tbats,REOF_A_tbats,REOF_F_tbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_tbats REOF_A_tbats REOF_F_tbats
## 1       2021-02-16             Tuesday   2.053372      0.004 %      0.004 %
## 2       2021-02-17           Wednesday   9.979516      0.021 %      0.021 %
## 3       2021-02-18            Thursday  23.877050      0.049 %      0.049 %
## 4       2021-02-19              Friday  45.452617      0.093 %      0.093 %
## 5       2021-02-20            Saturday  72.234973      0.147 %      0.147 %
## 6       2021-02-21              Sunday 102.479200      0.208 %      0.207 %
## 7       2021-02-22              Monday 134.545250      0.272 %      0.271 %

## Holt's linear trend
# Data Modeling
data_series<-ts(training_data)
model_holt<-holt(data_series,h=N_forecasting_days+validation_data_days,lambda = "auto")
accuracy(model_holt)  # accuracy on training data

##                       ME    RMSE      MAE MPE MAPE       MASE     ACF1
## Training set -0.06367956 12.3186 7.391879 Inf  Inf 0.05251559 0.162771

# Print Model Parameters
summary(model_holt$model)

## Holt's method 
## 
## Call:
##  holt(y = data_series, h = N_forecasting_days + validation_data_days,  
## 
##  Call:
##      lambda = "auto") 
## 
##   Box-Cox transformation: lambda= 0.612 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.5964 
## 
##   Initial states:
##     l = -1.6433 
##     b = -0.4258 
## 
##   sigma:  0.7914
## 
##      AIC     AICc      BIC 
## 1835.101 1835.280 1854.260 
## 
## Training set error measures:
##                       ME    RMSE      MAE MPE MAPE       MASE     ACF1
## Training set -0.06367956 12.3186 7.391879 Inf  Inf 0.05251559 0.162771

# Testing Data Evaluation
forecasting_holt <- predict(model_holt, h=N_forecasting_days+validation_data_days,lambda = "auto")
validation_forecast<-head(forecasting_holt$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 day by using holt Model for  ==>  Covid 19 infection cases in Chelyabinsk"

MAPE_Mean_All.Holt_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.Holt<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_holt<-paste(round(MAPE_Per_Day,3),"%")
MAPE_holt_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in holt Model for  ==>  Covid 19 infection cases in Chelyabinsk"

paste(MAPE_Mean_All.Holt,"%")

## [1] "0.127 % MAPE  7 day Covid 19 infection cases in Chelyabinsk %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in holt Model for  ==>  Covid 19 infection cases in Chelyabinsk"

data.frame(date_holt=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_holt=validation_forecast,MAPE_holt_Model)

##    date_holt validation_data_by_name actual_data forecasting_holt
## 1 2021-02-16                 Tuesday       48116         48119.01
## 2 2021-02-17               Wednesday       48371         48381.58
## 3 2021-02-18                Thursday       48620         48644.70
## 4 2021-02-19                  Friday       48858         48908.37
## 5 2021-02-20                Saturday       49091         49172.60
## 6 2021-02-21                  Sunday       49322         49437.37
## 7 2021-02-22                  Monday       49551         49702.70
##   MAPE_holt_Model
## 1         0.006 %
## 2         0.022 %
## 3         0.051 %
## 4         0.103 %
## 5         0.166 %
## 6         0.234 %
## 7         0.306 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_holt=tail(forecasting_holt$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_holt
## 1 2021-02-23         Tuesday            49968.58
## 2 2021-02-24       Wednesday            50235.01
## 3 2021-02-25        Thursday            50501.99
## 4 2021-02-26          Friday            50769.52
## 5 2021-02-27        Saturday            51037.60
## 6 2021-02-28          Sunday            51306.22
## 7 2021-03-01          Monday            51575.39

plot(forecasting_holt)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph3<-autoplot(forecasting_holt,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph3

## Error of forecasting by using Holt's linear model
Error_Holt<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_Holt1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_Holt<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_Holt<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_Holt<-sqrt(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
MSE_Holt<-(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
MAD_Holt<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_Holt<-c(Error_Holt)
REOF_A_Holt<-c(paste(round(REOF_A_Holt1,3),"%"))
REOF_F_Holt<-c(paste(round(REOF_F_Holt,3),"%"))
REOF_A_Holt11<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
data.frame(correlation_Holt,MSE_Holt,RMSE_Holt,MAPE_Mean_All.Holt_Model,MAD_Holt) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_Holt MSE_Holt RMSE_Holt MAPE_Mean_All.Holt_Model MAD_Holt
## 1        0.9997549 6607.374  81.28575                    0.127 62.47614

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_Holt,REOF_A_Holt,REOF_F_Holt)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_Holt REOF_A_Holt REOF_F_Holt
## 1       2021-02-16             Tuesday   3.01150     0.006 %     0.006 %
## 2       2021-02-17           Wednesday  10.57754     0.022 %     0.022 %
## 3       2021-02-18            Thursday  24.69755     0.051 %     0.051 %
## 4       2021-02-19              Friday  50.37087     0.103 %     0.103 %
## 5       2021-02-20            Saturday  81.59682     0.166 %     0.166 %
## 6       2021-02-21              Sunday 115.37475     0.234 %     0.233 %
## 7       2021-02-22              Monday 151.70397     0.306 %     0.305 %

#Auto arima model
##################
require(tseries) # need to install tseries tj test Stationarity in time series 
paste ("tests For Check Stationarity in series  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series  ==>  Covid 19 infection cases in Chelyabinsk"

kpss.test(data_series) # applay kpss test

## Warning in kpss.test(data_series): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  data_series
## KPSS Level = 5.3479, Truncation lag parameter = 5, p-value = 0.01

pp.test(data_series)   # applay pp test

## Warning in pp.test(data_series): p-value greater than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  data_series
## Dickey-Fuller Z(alpha) = 1.4225, Truncation lag parameter = 5, p-value
## = 0.99
## alternative hypothesis: stationary

adf.test(data_series)  # applay adf test

## 
##  Augmented Dickey-Fuller Test
## 
## data:  data_series
## Dickey-Fuller = -1.9863, Lag order = 6, p-value = 0.5825
## alternative hypothesis: stationary

ndiffs(data_series)    # Doing first diffrencing on data

## [1] 2

#Taking the first difference
diff1_x1<-diff(data_series)
autoplot(diff1_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black",  ylab=y_lab,main = "1nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking first differences in  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking first differences in  ==>  Covid 19 infection cases in Chelyabinsk"

kpss.test(diff1_x1)   # applay kpss test after taking first differences

## Warning in kpss.test(diff1_x1): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff1_x1
## KPSS Level = 4.1385, Truncation lag parameter = 5, p-value = 0.01

pp.test(diff1_x1)     # applay pp test after taking first differences

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff1_x1
## Dickey-Fuller Z(alpha) = -5.4344, Truncation lag parameter = 5, p-value
## = 0.8058
## alternative hypothesis: stationary

adf.test(diff1_x1)    # applay adf test after taking first differences

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1_x1
## Dickey-Fuller = -1.2398, Lag order = 6, p-value = 0.8974
## alternative hypothesis: stationary

#Taking the second difference
diff2_x1=diff(diff1_x1)
autoplot(diff2_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", ylab=y_lab ,main = "2nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking Second differences in",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking Second differences in Covid 19 infection cases in Chelyabinsk"

kpss.test(diff2_x1)   # applay kpss test after taking Second differences

## Warning in kpss.test(diff2_x1): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff2_x1
## KPSS Level = 0.10905, Truncation lag parameter = 5, p-value = 0.1

pp.test(diff2_x1)     # applay pp test after taking Second differences

## Warning in pp.test(diff2_x1): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff2_x1
## Dickey-Fuller Z(alpha) = -377.65, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary

adf.test(diff2_x1)    # applay adf test after taking Second differences

## Warning in adf.test(diff2_x1): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff2_x1
## Dickey-Fuller = -7.5188, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary

####Fitting an ARIMA Model
#1. Using auto arima function
model1 <- auto.arima(data_series,stepwise=FALSE, approximation=FALSE, trace=T, test = c("kpss", "adf", "pp"))  #applaying auto arima

## 
##  ARIMA(0,2,0)                    : 2669.501
##  ARIMA(0,2,1)                    : 2654.278
##  ARIMA(0,2,2)                    : 2656.264
##  ARIMA(0,2,3)                    : 2653.029
##  ARIMA(0,2,4)                    : 2653.228
##  ARIMA(0,2,5)                    : 2648.254
##  ARIMA(1,2,0)                    : 2654.612
##  ARIMA(1,2,1)                    : 2655.823
##  ARIMA(1,2,2)                    : Inf
##  ARIMA(1,2,3)                    : 2654.571
##  ARIMA(1,2,4)                    : 2650.166
##  ARIMA(2,2,0)                    : 2656.581
##  ARIMA(2,2,1)                    : 2656.578
##  ARIMA(2,2,2)                    : 2658.076
##  ARIMA(2,2,3)                    : 2649.378
##  ARIMA(3,2,0)                    : 2657.563
##  ARIMA(3,2,1)                    : 2656.149
##  ARIMA(3,2,2)                    : 2651.42
##  ARIMA(4,2,0)                    : 2646.378
##  ARIMA(4,2,1)                    : 2645.2
##  ARIMA(5,2,0)                    : 2646.253
## 
## 
## 
##  Best model: ARIMA(4,2,1)

model1 # show the result of autoarima

## Series: data_series 
## ARIMA(4,2,1) 
## 
## Coefficients:
##           ar1      ar2      ar3      ar4     ma1
##       -0.6629  -0.1277  -0.1110  -0.2185  0.4492
## s.e.   0.1682   0.0729   0.0635   0.0533  0.1686
## 
## sigma^2 estimated as 140.2:  log likelihood=-1316.47
## AIC=2644.95   AICc=2645.2   BIC=2667.9

#Make changes in the source of auto arima to run the best model
arima.string <- function (object, padding = FALSE) 
{
  order <- object$arma[c(1, 6, 2, 3, 7, 4, 5)]
  m <- order[7]
  result <- paste("ARIMA(", order[1], ",", order[2], ",", 
                  order[3], ")", sep = "")
  if (m > 1 && sum(order[4:6]) > 0) {
    result <- paste(result, "(", order[4], ",", order[5], 
                    ",", order[6], ")[", m, "]", sep = "")
  }
  if (padding && m > 1 && sum(order[4:6]) == 0) {
    result <- paste(result, "         ", sep = "")
    if (m <= 9) {
      result <- paste(result, " ", sep = "")
    }
    else if (m <= 99) {
      result <- paste(result, "  ", sep = "")
    }
    else {
      result <- paste(result, "   ", sep = "")
    }
  }
  if (!is.null(object$xreg)) {
    if (NCOL(object$xreg) == 1 && is.element("drift", names(object$coef))) {
      result <- paste(result, "with drift        ")
    }
    else {
      result <- paste("Regression with", result, "errors")
    }
  }
  else {
    if (is.element("constant", names(object$coef)) || is.element("intercept", 
                                                                 names(object$coef))) {
      result <- paste(result, "with non-zero mean")
    }
    else if (order[2] == 0 && order[5] == 0) {
      result <- paste(result, "with zero mean    ")
    }
    else {
      result <- paste(result, "                  ")
    }
  }
  if (!padding) {
    result <- gsub("[ ]*$", "", result)
  }
  return(result)
}


bestmodel <- arima.string(model1, padding = TRUE)
bestmodel <- substring(bestmodel,7,11)
bestmodel <- gsub(" ", "", bestmodel)
bestmodel <- gsub(")", "", bestmodel)
bestmodel <- strsplit(bestmodel, ",")[[1]]
bestmodel <- c(strtoi(bestmodel[1]),strtoi(bestmodel[2]),strtoi(bestmodel[3]))
bestmodel

## [1] 4 2 1

strtoi(bestmodel[3])

## [1] 1

#2. Using ACF and PACF Function
#par(mfrow=c(1,2))  # Code for making two plot in one graph 
acf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab, main=paste("ACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))    # plot ACF "auto correlation function after taking second diffrences

pacf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main=paste("PACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))   # plot PACF " Partial auto correlation function after taking second diffrences

library(forecast)   # install library forecast             
x1_model1= arima(data_series, order=c(bestmodel)) # Run Best model of auto arima  for forecasting
x1_model1  # Show result of best model of auto arima

## 
## Call:
## arima(x = data_series, order = c(bestmodel))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4     ma1
##       -0.6629  -0.1277  -0.1110  -0.2185  0.4492
## s.e.   0.1682   0.0729   0.0635   0.0533  0.1686
## 
## sigma^2 estimated as 138.1:  log likelihood = -1316.47,  aic = 2644.95

paste ("accuracy of autoarima Model For  ==> ",y_lab, sep=" ")

## [1] "accuracy of autoarima Model For  ==>  Covid 19 infection cases in Chelyabinsk"

accuracy(x1_model1)  # aacuracy of best model from auto arima

##                    ME    RMSE      MAE       MPE    MAPE       MASE
## Training set 1.122016 11.7181 6.823088 0.4087639 2.10094 0.04847462
##                      ACF1
## Training set -0.009778106

x1_model1$x          # show result of best model from auto arima

## NULL

checkresiduals(x1_model1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)  # checkresiduals from best model from using auto arima

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,2,1)
## Q* = 3.8452, df = 5, p-value = 0.5719
## 
## Model df: 5.   Total lags used: 10

paste("Box-Ljung test , Ljung-Box test For Modelling for   ==> ",y_lab, sep=" ")

## [1] "Box-Ljung test , Ljung-Box test For Modelling for   ==>  Covid 19 infection cases in Chelyabinsk"

Box.test(x1_model1$residuals^2, lag=20, type="Ljung-Box")   # Do test for resdulas by using Box-Ljung test , Ljung-Box test For Modelling

## 
##  Box-Ljung test
## 
## data:  x1_model1$residuals^2
## X-squared = 135.86, df = 20, p-value < 2.2e-16

library(tseries)
jarque.bera.test(x1_model1$residuals)  # Do test jarque.bera.test

## 
##  Jarque Bera Test
## 
## data:  x1_model1$residuals
## X-squared = 1609.8, df = 2, p-value < 2.2e-16

#Actual Vs Fitted
plot(data_series, col='red',lwd=2, main="Actual vs Fitted Plot", xlab='Time in (days)', ylab=y_lab) # plot actual and Fitted model 
lines(fitted(x1_model1), col='black')

#Test data
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) ) # make testing data in time series and start from rows-6
forecasting_auto_arima <- forecast(x1_model1, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_auto_arima$mean,validation_data_days)
MAPE_Per_Day<-round(abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 day by using bats Model for  ==>  Covid 19 infection cases in Chelyabinsk"

MAPE_Mean_All.ARIMA_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.ARIMA<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_auto_arima<-paste(round(MAPE_Per_Day,3),"%")
MAPE_auto.arima_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  Covid 19 infection cases in Chelyabinsk"

paste(MAPE_Mean_All.ARIMA,"%")

## [1] "0.115 % MAPE  7 day Covid 19 infection cases in Chelyabinsk %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  Covid 19 infection cases in Chelyabinsk"

data.frame(date_auto.arima=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_auto.arima=validation_forecast,MAPE_auto.arima_Model)

##   date_auto.arima validation_data_by_name actual_data forecasting_auto.arima
## 1      2021-02-16                 Tuesday       48116               48118.69
## 2      2021-02-17               Wednesday       48371               48380.23
## 3      2021-02-18                Thursday       48620               48642.11
## 4      2021-02-19                  Friday       48858               48903.49
## 5      2021-02-20                Saturday       49091               49165.03
## 6      2021-02-21                  Sunday       49322               49426.52
## 7      2021-02-22                  Monday       49551               49688.00
##   MAPE_auto.arima_Model
## 1               0.006 %
## 2               0.019 %
## 3               0.045 %
## 4               0.093 %
## 5               0.151 %
## 6               0.212 %
## 7               0.276 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_auto.arima=tail(forecasting_auto_arima$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_auto.arima
## 1 2021-02-23         Tuesday                  49949.58
## 2 2021-02-24       Wednesday                  50211.07
## 3 2021-02-25        Thursday                  50472.62
## 4 2021-02-26          Friday                  50734.13
## 5 2021-02-27        Saturday                  50995.65
## 6 2021-02-28          Sunday                  51257.18
## 7 2021-03-01          Monday                  51518.70

plot(forecasting_auto_arima)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph4<-autoplot(forecasting_auto_arima,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph4

MAPE_Mean_All.ARIMA

## [1] "0.115 % MAPE  7 day Covid 19 infection cases in Chelyabinsk"

## Error of forecasting
Error_auto.arima<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_auto.arima<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_auto.arima<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_auto.arima<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_auto.arima<-sqrt(sum((Error_auto.arima^2))/validation_data_days)   #  Root mean square forecast error
MSE_auto.arima<-(sum((Error_auto.arima^2))/validation_data_days)   #  Root mean square forecast error
MAD_auto.arima<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_auto.arima<-c(Error_auto.arima)
REOF_auto.arima1<-c(paste(round(REOF_A_auto.arima,3),"%"))
REOF_auto.arima2<-c(paste(round(REOF_F_auto.arima,3),"%"))
data.frame(correlation_auto.arima,MSE_auto.arima,RMSE_auto.arima,MAPE_Mean_All.ARIMA_Model,MAD_auto.arima) # analysis of Error  by using Auto ARIMAA model shows result of correlation ,MSE ,MPER

##   correlation_auto.arima MSE_auto.arima RMSE_auto.arima
## 1              0.9997959       5403.305        73.50718
##   MAPE_Mean_All.ARIMA_Model MAD_auto.arima
## 1                     0.115       56.43831

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_auto.arima,REOF_A_auto.arima=REOF_auto.arima1,REOF_F_auto.arima=REOF_auto.arima2)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_auto.arima REOF_A_auto.arima
## 1       2021-02-16             Tuesday         2.68858           0.006 %
## 2       2021-02-17           Wednesday         9.23370           0.019 %
## 3       2021-02-18            Thursday        22.11205           0.045 %
## 4       2021-02-19              Friday        45.49282           0.093 %
## 5       2021-02-20            Saturday        74.02632           0.151 %
## 6       2021-02-21              Sunday       104.51645           0.212 %
## 7       2021-02-22              Monday       136.99826           0.276 %
##   REOF_F_auto.arima
## 1           0.006 %
## 2           0.019 %
## 3           0.045 %
## 4           0.093 %
## 5           0.151 %
## 6           0.211 %
## 7           0.276 %

# SIR Model 
#install.packages("dplyr")
library(deSolve)
first<-rows-(validation_data_days+6)
secondr<-rows-7
vector_SIR<-original_data[first:secondr]
Infected <- c(vector_SIR)
Day <- 1:(length(Infected))
N <- Population # population of the us
SIR <- function(time, state, parameters) {
  par <- as.list(c(state, parameters))
  with(par, {
    dS <- -beta/N * I * S
    dI <- beta/N * I * S - gamma * I
    dR <- gamma * I
    list(c(dS, dI, dR))
  })
}

init <- c(S = N-Infected[1], I = Infected[1], R = 0)
RSS <- function(parameters) {
  names(parameters) <- c("beta", "gamma")
  out <- ode(y = init, times = Day, func = SIR, parms = parameters)
  fit <- out[ , 3]
  sum((Infected - fit)^2)
}

# optimize with some sensible conditions
Opt <- optim(c(0.5, 0.5), RSS, method = "L-BFGS-B", 
             lower = c(0, 0), upper = c(10, 10))
Opt$message

## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

Opt_par <- setNames(Opt$par, c("beta", "gamma"))
Opt_par

##      beta     gamma 
## 0.1168318 0.1092187

# beta     gamma 
# 0.6512503 0.4920399 
out <- ode(y = init, times = Day, func = SIR, parms = Opt_par)
plot(out)
plot(out, obs=data.frame(time=Day, I=Infected))

result_SIR<-data.frame(out)
validation_forecast<-result_SIR$I
## Error of forecasting
Error_SIR<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_SIR<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_SIR<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_SIR<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_SIR<-sqrt(sum((Error_SIR^2))/validation_data_days)   #  Root mean square forecast error
MSE_SIR<-(sum((Error_SIR^2))/validation_data_days)   #  Root mean square forecast error
MAD_SIR<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_SIR<-c(Error_SIR)
REOF_A_SIR<-c(paste(round(REOF_A_SIR,3),"%"))
REOF_A_SIR1<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
REOF_F_SIR<-c(paste(round(REOF_F_SIR,3),"%"))
MAPE_Mean_All.SIR<-round(mean(abs(((testing_data-validation_forecast)/testing_data)*100)),3)
MAPE_Mean_All<-paste(round(mean(abs(((testing_data-validation_forecast)/testing_data)*100)),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
data.frame(correlation_SIR,MSE_SIR,RMSE_SIR,MAPE_Mean_All.SIR,MAD_SIR) # analysis of Error  by using SIR model shows result of correlation ,MSE ,MPER

##   correlation_SIR MSE_SIR RMSE_SIR MAPE_Mean_All.SIR  MAD_SIR
## 1        0.999981 3139786 1771.944             3.628 1771.453

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_SIR,REOF_A_SIR,REOF_F_SIR,validation_forecast,testing_data)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_SIR REOF_A_SIR REOF_F_SIR
## 1       2021-02-16             Tuesday 1836.000    3.816 %    3.967 %
## 2       2021-02-17           Wednesday 1814.196    3.751 %    3.897 %
## 3       2021-02-18            Thursday 1793.194    3.688 %    3.829 %
## 4       2021-02-19              Friday 1768.168    3.619 %    3.755 %
## 5       2021-02-20            Saturday 1745.285    3.555 %    3.686 %
## 6       2021-02-21              Sunday 1727.713    3.503 %     3.63 %
## 7       2021-02-22              Monday 1715.612    3.462 %    3.586 %
##   validation_forecast testing_data
## 1            46280.00        48116
## 2            46556.80        48371
## 3            46826.81        48620
## 4            47089.83        48858
## 5            47345.71        49091
## 6            47594.29        49322
## 7            47835.39        49551

## forecasting by SIR model
Infected <- c(tail(original_data,N_forecasting_days))
Day <- 1:(length(Infected))
N <- Population # population of the us

SIR <- function(time, state, parameters) {
  par <- as.list(c(state, parameters))
  with(par, {
    dS <- -beta/N * I * S
    dI <- beta/N * I * S - gamma * I
    dR <- gamma * I
    list(c(dS, dI, dR))
  })
}

init <- c(S = N-Infected[1], I = Infected[1], R = 0)
RSS <- function(parameters) {
  names(parameters) <- c("beta", "gamma")
  out <- ode(y = init, times = Day, func = SIR, parms = parameters)
  fit <- out[ , 3]
  sum((Infected - fit)^2)
}

# optimize with some sensible conditions
Opt <- optim(c(0.5, 0.5), RSS, method = "L-BFGS-B", 
             lower = c(0, 0), upper = c(10, 10))
Opt$message

## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

Opt_par <- setNames(Opt$par, c("beta", "gamma"))

# beta     gamma 
# 0.6512503 0.4920399 

out <- ode(y = init, times = Day, func = SIR, parms = Opt_par)

result_SIR <-data.frame(out)
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_SIR=result_SIR$I)

##           FD forecating_date forecasting_by_SIR
## 1 2021-02-23         Tuesday           48116.00
## 2 2021-02-24       Wednesday           48379.37
## 3 2021-02-25        Thursday           48631.85
## 4 2021-02-26          Friday           48873.21
## 5 2021-02-27        Saturday           49103.25
## 6 2021-02-28          Sunday           49321.76
## 7 2021-03-01          Monday           49528.53

# Table for MAPE For counry
best_recommended_model <- min(MAPE_Mean_All_NNAR,MAPE_Mean_All.bats_Model,MAPE_Mean_All.TBATS_Model,MAPE_Mean_All.Holt_Model,MAPE_Mean_All.ARIMA_Model,MAPE_Mean_All.SIR)
paste("System Choose Least Error ==> ( MAPE %) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==> ", y_lab , sep=" ")

## [1] "System Choose Least Error ==> ( MAPE %) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==>  Covid 19 infection cases in Chelyabinsk"

best_recommended_model

## [1] 0.109

x1<-if(best_recommended_model >= MAPE_Mean_All.bats_Model) {paste("BATS Model")}
x2<-if(best_recommended_model >= MAPE_Mean_All.TBATS_Model) {paste("TBATS Model")}
x3<-if(best_recommended_model >= MAPE_Mean_All.Holt_Model) {paste("Holt Model")}
x4<-if(best_recommended_model >= MAPE_Mean_All.ARIMA_Model) {paste("ARIMA Model")}
x5<-if(best_recommended_model >= MAPE_Mean_All.SIR) {paste("SIR Model")}
x6<-if(best_recommended_model >= MAPE_Mean_All_NNAR) {paste("NNAR Model")}
result<-c(x1,x2,x3,x4,x5,x6)

table.error<-data.frame(country.name,NNAR.model=MAPE_Mean_All_NNAR, BATS.Model=MAPE_Mean_All.bats_Model,TBATS.Model=MAPE_Mean_All.TBATS_Model,Holt.Model=MAPE_Mean_All.Holt_Model,ARIMA.Model=MAPE_Mean_All.ARIMA_Model,SIR.Model=MAPE_Mean_All.SIR,Best.Model=result)

library(ascii)
print(ascii(table(table.error)), type = "rest")

## 
## +---+--------------+------------+------------+-------------+------------+-------------+-----------+------------+------+
## |   | country.name | NNAR.model | BATS.Model | TBATS.Model | Holt.Model | ARIMA.Model | SIR.Model | Best.Model | Freq |
## +===+==============+============+============+=============+============+=============+===========+============+======+
## | 1 | Chelyabinsk  | 0.382      | 0.109      | 0.113       | 0.127      | 0.115       | 3.628     | BATS Model | 1.00 |
## +---+--------------+------------+------------+-------------+------------+-------------+-----------+------------+------+

message("System finished Forecasting  by using autoarima and Holt's ,TBATS, and SIR  Model ==>",y_lab, sep=" ")

## System finished Forecasting  by using autoarima and Holt's ,TBATS, and SIR  Model ==>Covid 19 infection cases in Chelyabinsk

message(" Thank you for using our System For Modelling  ==> ",y_lab, sep=" ")

##  Thank you for using our System For Modelling  ==> Covid 19 infection cases in Chelyabinsk