New package “Epidemic. ta” for forecasting Covid-19 infection cases apply Example For forecasting infection cases in the Chelyabinsk region

That’s Algorithm Developed By

Makarovskikh Tatyana Anatolyevna “Макаровских Татьяна Анатольевна”

Abotaleb mostafa “Аботалеб Мостафа”

Department of Electrical Engineering and Computer Science

South ural state university, Chelyabinsk, Russian federation

# Imports
library(fpp2)

## Warning: package 'fpp2' was built under R version 4.0.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## -- Attaching packages --------------------------------------------------------------------------- fpp2 2.4 --

## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3

## Warning: package 'ggplot2' was built under R version 4.0.3

## Warning: package 'forecast' was built under R version 4.0.3

##

library(forecast)
library(ggplot2)
library("readxl")

## Warning: package 'readxl' was built under R version 4.0.3

library(moments)

## Warning: package 'moments' was built under R version 4.0.3

library(forecast)
require(forecast)  
require(tseries)

## Loading required package: tseries

## Warning: package 'tseries' was built under R version 4.0.3

require(markovchain)

## Loading required package: markovchain

## Warning: package 'markovchain' was built under R version 4.0.3

## Package:  markovchain
## Version:  0.8.5-3
## Date:     2020-12-03
## BugReport: https://github.com/spedygiorgio/markovchain/issues

require(data.table)

## Loading required package: data.table

#usa population =332002416
# Russia population =145966453
#population in japan = 126279505
#population in china =1442182072
#population in cheleabinsk =1130319
#population in moscow =12537954
#population in  MO =7690863
#Новгородская обл =1250615
# when you use data from russian websites use ==> unlist for define time series

Full_original_data<-read_excel("F:/Phd/ALL Russia Analysis/Regin In Russia 29_11_2020.xlsx",sheet = "Chelyabinsk region")
y_lab<- "COVID 19 Infection cases in Chelyabinsk region"   # input name of data
Actual_date_interval <- c("2020/01/22","2020/11/28")
Forecast_date_interval <- c("2020/11/29","2020/12/5")
validation_data_days <-7
frequency<-"days"
Population <-1130319   # Population size in City for applaying SIR model

# Data Preparation & calculate some of statistics measures
original_data<-Full_original_data$infection

summary(original_data)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0    1525    9946    9449   15331   24973

sd(original_data)  # calculate standard deviation

## [1] 7486.128

skewness(original_data)  # calculate Cofficient of skewness

## [1] 0.1732697

kurtosis(original_data)   # calculate Cofficient of kurtosis

## [1] 1.77532

rows <- NROW(original_data)
training_data<-original_data[1:(rows-validation_data_days)]
testing_data<-original_data[(rows-validation_data_days+1):rows]
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency)  #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency)  #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD)) 
validation_dates<-tail(AD,validation_data_days)
validation_data_by_name<-weekdays(validation_dates)
forecasting_data_by_name<-weekdays(FD)

##bats model
# Data Modeling
data_series<-ts(training_data)
autoplot(data_series ,xlab=paste ("Time in  ", frequency, sep=" "), ylab = y_lab, main=paste ("Actual Data :", y_lab, sep=" "))

model_bats<-bats(data_series)
accuracy(model_bats)  # accuracy on training data

##                    ME     RMSE      MAE MPE MAPE       MASE         ACF1
## Training set 1.303325 13.62489 8.140653 NaN  Inf 0.08918101 -0.001222465

# Print Model Parameters
model_bats

## BATS(1, {0,0}, 1, -)
## 
## Call: bats(y = data_series)
## 
## Parameters
##   Alpha: 1.055851
##   Beta: 0.6679532
##   Damping Parameter: 1
## 
## Seed States:
##           [,1]
## [1,] -6.176125
## [2,] -5.825195
## 
## Sigma: 13.62489
## AIC: 2764.857

plot(model_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4)

# Testing Data Evaluation
forecasting_bats <- predict(model_bats, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_bats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using bats Model for  ==>  COVID 19 Infection cases in Chelyabinsk region"

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_bats<-paste(round(MAPE_Per_Day,3),"%")
MAPE_bats_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  COVID 19 Infection cases in Chelyabinsk region"

paste(MAPE_Mean_All,"%")

## [1] "0.259 % MAPE  7 days COVID 19 Infection cases in Chelyabinsk region %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  COVID 19 Infection cases in Chelyabinsk region"

data.frame(date_bats=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_bats=validation_forecast,MAPE_bats_Model)

##    date_bats validation_data_by_name actual_data forecasting_bats
## 1 2020-11-22             воскресенье       23496         23494.25
## 2 2020-11-23             понедельник       23719         23711.29
## 3 2020-11-24                 вторник       23947         23928.32
## 4 2020-11-25                   среда       24188         24145.36
## 5 2020-11-26                 четверг       24440         24362.40
## 6 2020-11-27                 пятница       24701         24579.44
## 7 2020-11-28                 суббота       24973         24796.48
##   MAPE_bats_Model
## 1         0.007 %
## 2         0.033 %
## 3         0.078 %
## 4         0.176 %
## 5         0.318 %
## 6         0.492 %
## 7         0.707 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_bats=tail(forecasting_bats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_bats
## 1 2020-11-29     воскресенье            25013.52
## 2 2020-11-30     понедельник            25230.55
## 3 2020-12-01         вторник            25447.59
## 4 2020-12-02           среда            25664.63
## 5 2020-12-03         четверг            25881.67
## 6 2020-12-04         пятница            26098.71
## 7 2020-12-05         суббота            26315.74

plot(forecasting_bats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph1<-autoplot(forecasting_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph1

## Error of forecasting
Error_bats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_bats<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_bats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_bats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_bats<-sqrt(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MAD_bats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_bats<-c(Error_bats)
REOF_Abats<-c(paste(round(REOF_A_bats,3),"%"))
REOF_Fbats<-c(paste(round(REOF_F_bats,3),"%"))
data.frame(correlation_bats,RMSE_bats,MAPE_Mean_All,MAD_bats) # analysis of Error  by using Bats Model shows result of correlation ,MSE ,MPER

##   correlation_bats RMSE_bats
## 1        0.9993435  87.98434
##                                                         MAPE_Mean_All MAD_bats
## 1 0.259 % MAPE  7 days COVID 19 Infection cases in Chelyabinsk region 63.78074

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_bats,REOF_Abats,REOF_Fbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name  AEOF_bats REOF_Abats REOF_Fbats
## 1       2020-11-22         воскресенье   1.752550    0.007 %    0.007 %
## 2       2020-11-23         понедельник   7.714328    0.033 %    0.033 %
## 3       2020-11-24             вторник  18.676106    0.078 %    0.078 %
## 4       2020-11-25               среда  42.637884    0.176 %    0.177 %
## 5       2020-11-26             четверг  77.599662    0.318 %    0.319 %
## 6       2020-11-27             пятница 121.561440    0.492 %    0.495 %
## 7       2020-11-28             суббота 176.523218    0.707 %    0.712 %

## TBATS Model

# Data Modeling
data_series<-ts(training_data)
model_TBATS<-forecast:::fitSpecificTBATS(data_series,use.box.cox=FALSE, use.beta=TRUE,  seasonal.periods=c(6),use.damping=FALSE,k.vector=c(2))
accuracy(model_TBATS)  # accuracy on training data

##                    ME     RMSE      MAE MPE MAPE       MASE         ACF1
## Training set 1.288604 13.40525 8.294981 NaN  Inf 0.09087168 -0.002631398

# Print Model Parameters
model_TBATS

## TBATS(1, {0,0}, 1, {<6,2>})
## 
## Call: NULL
## 
## Parameters
##   Alpha: 1.0549
##   Beta: 0.679838
##   Damping Parameter: 1
##   Gamma-1 Values: -0.002854131
##   Gamma-2 Values: 0.002116084
## 
## Seed States:
##            [,1]
## [1,] -6.4357280
## [2,] -5.7209404
## [3,]  0.9525842
## [4,] -0.6092764
## [5,]  1.8923816
## [6,]  0.3628063
## 
## Sigma: 13.40525
## AIC: 2768.536

plot(model_TBATS,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)

# Testing Data Evaluation
forecasting_tbats <- predict(model_TBATS, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_tbats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using TBATS Model for  ==>  COVID 19 Infection cases in Chelyabinsk region"

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_TBATS<-paste(round(MAPE_Per_Day,3),"%")
MAPE_TBATS_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in TBATS Model for  ==>  COVID 19 Infection cases in Chelyabinsk region"

paste(MAPE_Mean_All,"%")

## [1] "0.23 % MAPE  7 days COVID 19 Infection cases in Chelyabinsk region %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in TBATS Model for  ==>  COVID 19 Infection cases in Chelyabinsk region"

data.frame(date_TBATS=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_TBATS=validation_forecast,MAPE_TBATS_Model)

##   date_TBATS validation_data_by_name actual_data forecasting_TBATS
## 1 2020-11-22             воскресенье       23496          23495.53
## 2 2020-11-23             понедельник       23719          23713.80
## 3 2020-11-24                 вторник       23947          23934.98
## 4 2020-11-25                   среда       24188          24155.75
## 5 2020-11-26                 четверг       24440          24371.91
## 6 2020-11-27                 пятница       24701          24588.50
## 7 2020-11-28                 суббота       24973          24806.80
##   MAPE_TBATS_Model
## 1          0.002 %
## 2          0.022 %
## 3           0.05 %
## 4          0.133 %
## 5          0.279 %
## 6          0.455 %
## 7          0.666 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_TBATS=tail(forecasting_tbats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_TBATS
## 1 2020-11-29     воскресенье             25025.07
## 2 2020-11-30     понедельник             25246.25
## 3 2020-12-01         вторник             25467.02
## 4 2020-12-02           среда             25683.19
## 5 2020-12-03         четверг             25899.78
## 6 2020-12-04         пятница             26118.08
## 7 2020-12-05         суббота             26336.35

plot(forecasting_tbats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph2<-autoplot(forecasting_tbats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph2

## Error of forecasting TBATS Model

Error_tbats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_tbats1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_tbats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_tbats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_tbats<-sqrt(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MAD_tbats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_tbats<-c(Error_tbats)
REOF_A_tbats<-c(paste(round(REOF_A_tbats1,3),"%"))
REOF_F_tbats<-c(paste(round(REOF_F_tbats,3),"%"))
data.frame(correlation_tbats,RMSE_tbats,MAPE_Mean_All,MAD_tbats) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_tbats RMSE_tbats
## 1         0.9992241   81.17533
##                                                        MAPE_Mean_All MAD_tbats
## 1 0.23 % MAPE  7 days COVID 19 Infection cases in Chelyabinsk region  56.67648

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_tbats,REOF_A_tbats,REOF_F_tbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name  AEOF_tbats REOF_A_tbats REOF_F_tbats
## 1       2020-11-22         воскресенье   0.4730111      0.002 %      0.002 %
## 2       2020-11-23         понедельник   5.2005034      0.022 %      0.022 %
## 3       2020-11-24             вторник  12.0247469       0.05 %       0.05 %
## 4       2020-11-25               среда  32.2506072      0.133 %      0.134 %
## 5       2020-11-26             четверг  68.0892084      0.279 %      0.279 %
## 6       2020-11-27             пятница 112.4986602      0.455 %      0.458 %
## 7       2020-11-28             суббота 166.1986201      0.666 %       0.67 %

## Holt's linear trend


# Data Modeling
data_series<-ts(training_data)
model_holt<-holt(data_series,h=N_forecasting_days+validation_data_days,lambda = "auto")
accuracy(model_holt)  # accuracy on training data

##                     ME     RMSE      MAE MPE MAPE       MASE       ACF1
## Training set 0.5645154 13.72531 8.252742 NaN  Inf 0.09040895 0.07040979

# Print Model Parameters
summary(model_holt$model)

## Holt's method 
## 
## Call:
##  holt(y = data_series, h = N_forecasting_days + validation_data_days,  
## 
##  Call:
##      lambda = "auto") 
## 
##   Box-Cox transformation: lambda= 0.7518 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.6551 
## 
##   Initial states:
##     l = -2.0004 
##     b = -0.1621 
## 
##   sigma:  2.2406
## 
##      AIC     AICc      BIC 
## 1838.581 1838.821 1856.307 
## 
## Training set error measures:
##                     ME     RMSE      MAE MPE MAPE       MASE       ACF1
## Training set 0.5645154 13.72531 8.252742 NaN  Inf 0.09040895 0.07040979

# Testing Data Evaluation
forecasting_holt <- predict(model_holt, h=N_forecasting_days+validation_data_days,lambda = "auto")
validation_forecast<-head(forecasting_holt$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using holt Model for  ==>  COVID 19 Infection cases in Chelyabinsk region"

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_holt<-paste(round(MAPE_Per_Day,3),"%")
MAPE_holt_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in holt Model for  ==>  COVID 19 Infection cases in Chelyabinsk region"

paste(MAPE_Mean_All,"%")

## [1] "0.233 % MAPE  7 days COVID 19 Infection cases in Chelyabinsk region %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in holt Model for  ==>  COVID 19 Infection cases in Chelyabinsk region"

data.frame(date_holt=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_holt=validation_forecast,MAPE_holt_Model)

##    date_holt validation_data_by_name actual_data forecasting_holt
## 1 2020-11-22             воскресенье       23496         23494.65
## 2 2020-11-23             понедельник       23719         23712.80
## 3 2020-11-24                 вторник       23947         23931.46
## 4 2020-11-25                   среда       24188         24150.60
## 5 2020-11-26                 четверг       24440         24370.25
## 6 2020-11-27                 пятница       24701         24590.38
## 7 2020-11-28                 суббота       24973         24811.01
##   MAPE_holt_Model
## 1         0.006 %
## 2         0.026 %
## 3         0.065 %
## 4         0.155 %
## 5         0.285 %
## 6         0.448 %
## 7         0.649 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_holt=tail(forecasting_holt$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_holt
## 1 2020-11-29     воскресенье            25032.13
## 2 2020-11-30     понедельник            25253.73
## 3 2020-12-01         вторник            25475.81
## 4 2020-12-02           среда            25698.38
## 5 2020-12-03         четверг            25921.42
## 6 2020-12-04         пятница            26144.94
## 7 2020-12-05         суббота            26368.94

plot(forecasting_holt)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph3<-autoplot(forecasting_holt,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph3

## Error of forecasting by using Holt's linear model
Error_Holt<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_Holt1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_Holt<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_Holt<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_Holt<-sqrt(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
MAD_Holt<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_Holt<-c(Error_Holt)
REOF_A_Holt<-c(paste(round(REOF_A_Holt1,3),"%"))
REOF_F_Holt<-c(paste(round(REOF_F_Holt,3),"%"))
REOF_A_Holt11<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
data.frame(correlation_Holt,RMSE_Holt,MAPE_Mean_All,MAD_Holt) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_Holt RMSE_Holt
## 1        0.9994123  80.19753
##                                                         MAPE_Mean_All MAD_Holt
## 1 0.233 % MAPE  7 days COVID 19 Infection cases in Chelyabinsk region 57.54857

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_Holt,REOF_A_Holt,REOF_F_Holt)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name  AEOF_Holt REOF_A_Holt REOF_F_Holt
## 1       2020-11-22         воскресенье   1.349110     0.006 %     0.006 %
## 2       2020-11-23         понедельник   6.196339     0.026 %     0.026 %
## 3       2020-11-24             вторник  15.544264     0.065 %     0.065 %
## 4       2020-11-25               среда  37.395189     0.155 %     0.155 %
## 5       2020-11-26             четверг  69.751389     0.285 %     0.286 %
## 6       2020-11-27             пятница 110.615114     0.448 %      0.45 %
## 7       2020-11-28             суббота 161.988591     0.649 %     0.653 %

#Auto arima model
##################

require(tseries) # need to install tseries tj test Stationarity in time series 
paste ("tests For Check Stationarity in series  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series  ==>  COVID 19 Infection cases in Chelyabinsk region"

kpss.test(data_series) # applay kpss test

## Warning in kpss.test(data_series): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  data_series
## KPSS Level = 4.3421, Truncation lag parameter = 5, p-value = 0.01

pp.test(data_series)   # applay pp test

## 
##  Phillips-Perron Unit Root Test
## 
## data:  data_series
## Dickey-Fuller Z(alpha) = -3.7306, Truncation lag parameter = 5, p-value
## = 0.9009
## alternative hypothesis: stationary

adf.test(data_series)  # applay adf test

## 
##  Augmented Dickey-Fuller Test
## 
## data:  data_series
## Dickey-Fuller = -2.4425, Lag order = 6, p-value = 0.3895
## alternative hypothesis: stationary

ndiffs(data_series)    # Doing first diffrencing on data

## [1] 2

#Taking the first difference
diff1_x1<-diff(data_series)
autoplot(diff1_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main = "1nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub, cex.main,
## cex.lab, cex.sub, font.main, font.lab

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking first differences in  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking first differences in  ==>  COVID 19 Infection cases in Chelyabinsk region"

kpss.test(diff1_x1)   # applay kpss test after taking first differences

## Warning in kpss.test(diff1_x1): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff1_x1
## KPSS Level = 1.7061, Truncation lag parameter = 5, p-value = 0.01

pp.test(diff1_x1)     # applay pp test after taking first differences

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff1_x1
## Dickey-Fuller Z(alpha) = -6.6049, Truncation lag parameter = 5, p-value
## = 0.7397
## alternative hypothesis: stationary

adf.test(diff1_x1)    # applay adf test after taking first differences

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1_x1
## Dickey-Fuller = -0.98881, Lag order = 6, p-value = 0.9389
## alternative hypothesis: stationary

#Taking the second difference
diff2_x1=diff(diff1_x1)
autoplot(diff2_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab ,main = "2nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub, cex.main,
## cex.lab, cex.sub, font.main, font.lab

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking Second differences in",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking Second differences in COVID 19 Infection cases in Chelyabinsk region"

kpss.test(diff2_x1)   # applay kpss test after taking Second differences

## Warning in kpss.test(diff2_x1): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff2_x1
## KPSS Level = 0.13267, Truncation lag parameter = 5, p-value = 0.1

pp.test(diff2_x1)     # applay pp test after taking Second differences

## Warning in pp.test(diff2_x1): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff2_x1
## Dickey-Fuller Z(alpha) = -277.38, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary

adf.test(diff2_x1)    # applay adf test after taking Second differences

## Warning in adf.test(diff2_x1): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff2_x1
## Dickey-Fuller = -7.0415, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary

####Fitting an ARIMA Model
#1. Using auto arima function
model1 <- auto.arima(data_series,stepwise=FALSE, approximation=FALSE, trace=T, test = c("kpss", "adf", "pp"))  #applaying auto arima

## 
##  ARIMA(0,2,0)                    : 2068.079
##  ARIMA(0,2,1)                    : 2053.64
##  ARIMA(0,2,2)                    : 2055.032
##  ARIMA(0,2,3)                    : 2051.817
##  ARIMA(0,2,4)                    : 2052.36
##  ARIMA(0,2,5)                    : 2049.213
##  ARIMA(1,2,0)                    : 2054.906
##  ARIMA(1,2,1)                    : 2053.539
##  ARIMA(1,2,2)                    : 2055.122
##  ARIMA(1,2,3)                    : 2053.426
##  ARIMA(1,2,4)                    : 2050.519
##  ARIMA(2,2,0)                    : 2056.588
##  ARIMA(2,2,1)                    : 2054.786
##  ARIMA(2,2,2)                    : 2056.403
##  ARIMA(2,2,3)                    : 2053.244
##  ARIMA(3,2,0)                    : 2057.104
##  ARIMA(3,2,1)                    : 2054.427
##  ARIMA(3,2,2)                    : 2052.079
##  ARIMA(4,2,0)                    : 2046.353
##  ARIMA(4,2,1)                    : 2046.943
##  ARIMA(5,2,0)                    : 2047.731
## 
## 
## 
##  Best model: ARIMA(4,2,0)

model1 # show the result of autoarima

## Series: data_series 
## ARIMA(4,2,0) 
## 
## Coefficients:
##           ar1      ar2      ar3      ar4
##       -0.2703  -0.0700  -0.1335  -0.2203
## s.e.   0.0610   0.0628   0.0625   0.0607
## 
## sigma^2 estimated as 180:  log likelihood=-1018.06
## AIC=2046.11   AICc=2046.35   BIC=2063.8

#Make changes in the source of auto arima to run the best model
arima.string <- function (object, padding = FALSE) 
{
  order <- object$arma[c(1, 6, 2, 3, 7, 4, 5)]
  m <- order[7]
  result <- paste("ARIMA(", order[1], ",", order[2], ",", 
                  order[3], ")", sep = "")
  if (m > 1 && sum(order[4:6]) > 0) {
    result <- paste(result, "(", order[4], ",", order[5], 
                    ",", order[6], ")[", m, "]", sep = "")
  }
  if (padding && m > 1 && sum(order[4:6]) == 0) {
    result <- paste(result, "         ", sep = "")
    if (m <= 9) {
      result <- paste(result, " ", sep = "")
    }
    else if (m <= 99) {
      result <- paste(result, "  ", sep = "")
    }
    else {
      result <- paste(result, "   ", sep = "")
    }
  }
  if (!is.null(object$xreg)) {
    if (NCOL(object$xreg) == 1 && is.element("drift", names(object$coef))) {
      result <- paste(result, "with drift        ")
    }
    else {
      result <- paste("Regression with", result, "errors")
    }
  }
  else {
    if (is.element("constant", names(object$coef)) || is.element("intercept", 
                                                                 names(object$coef))) {
      result <- paste(result, "with non-zero mean")
    }
    else if (order[2] == 0 && order[5] == 0) {
      result <- paste(result, "with zero mean    ")
    }
    else {
      result <- paste(result, "                  ")
    }
  }
  if (!padding) {
    result <- gsub("[ ]*$", "", result)
  }
  return(result)
}






source("stringthearima.R")  
bestmodel <- arima.string(model1, padding = TRUE)
bestmodel <- substring(bestmodel,7,11)
bestmodel <- gsub(" ", "", bestmodel)
bestmodel <- gsub(")", "", bestmodel)
bestmodel <- strsplit(bestmodel, ",")[[1]]
bestmodel <- c(strtoi(bestmodel[1]),strtoi(bestmodel[2]),strtoi(bestmodel[3]))
bestmodel

## [1] 4 2 0

strtoi(bestmodel[3])

## [1] 0

#2. Using ACF and PACF Function
#par(mfrow=c(1,2))  # Code for making two plot in one graph 
acf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab, main=paste("ACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))    # plot ACF "auto correlation function after taking second diffrences

pacf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main=paste("PACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))   # plot PACF " Partial auto correlation function after taking second diffrences

library(forecast)   # install library forecast             
x1_model1= arima(data_series, order=c(bestmodel)) # Run Best model of auto arima  for forecasting
x1_model1  # Show result of best model of auto arima

## 
## Call:
## arima(x = data_series, order = c(bestmodel))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4
##       -0.2703  -0.0700  -0.1335  -0.2203
## s.e.   0.0610   0.0628   0.0625   0.0607
## 
## sigma^2 estimated as 177.2:  log likelihood = -1018.06,  aic = 2046.11

paste ("accuracy of autoarima Model For  ==> ",y_lab, sep=" ")

## [1] "accuracy of autoarima Model For  ==>  COVID 19 Infection cases in Chelyabinsk region"

accuracy(x1_model1)  # aacuracy of best model from auto arima

##                    ME     RMSE      MAE       MPE     MAPE       MASE
## Training set 1.435101 13.25908 8.083879 0.6111157 2.781743 0.08855906
##                       ACF1
## Training set -0.0008462041

x1_model1$x          # show result of best model from auto arima

## NULL

checkresiduals(x1_model1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)  # checkresiduals from best model from using auto arima

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,2,0)
## Q* = 3.5355, df = 6, p-value = 0.7392
## 
## Model df: 4.   Total lags used: 10

paste("Box-Ljung test , Ljung-Box test For Modelling for   ==> ",y_lab, sep=" ")

## [1] "Box-Ljung test , Ljung-Box test For Modelling for   ==>  COVID 19 Infection cases in Chelyabinsk region"

Box.test(x1_model1$residuals^2, lag=20, type="Ljung-Box")   # Do test for resdulas by using Box-Ljung test , Ljung-Box test For Modelling

## 
##  Box-Ljung test
## 
## data:  x1_model1$residuals^2
## X-squared = 72, df = 20, p-value = 8.558e-08

library(tseries)
jarque.bera.test(x1_model1$residuals)  # Do test jarque.bera.test

## 
##  Jarque Bera Test
## 
## data:  x1_model1$residuals
## X-squared = 773.82, df = 2, p-value < 2.2e-16

#Actual Vs Fitted
plot(data_series, col='red',lwd=2, main="Actual vs Fitted Plot", xlab='Time in (days)', ylab=y_lab) # plot actual and Fitted model 
lines(fitted(x1_model1), col='blue')

#Test data

x1_test <- ts(testing_data, start =(rows-validation_data_days+1) ) # make testing data in time series and start from rows-6
forecasting_auto_arima <- forecast(x1_model1, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_auto_arima$mean,validation_data_days)
MAPE_Per_Day<-round(abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 days by using bats Model for  ==>  COVID 19 Infection cases in Chelyabinsk region"

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_auto_arima<-paste(round(MAPE_Per_Day,3),"%")
MAPE_auto.arima_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  COVID 19 Infection cases in Chelyabinsk region"

paste(MAPE_Mean_All,"%")

## [1] "0.263 % MAPE  7 days COVID 19 Infection cases in Chelyabinsk region %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  COVID 19 Infection cases in Chelyabinsk region"

data.frame(date_auto.arima=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_auto.arima=validation_forecast,MAPE_auto.arima_Model)

##   date_auto.arima validation_data_by_name actual_data forecasting_auto.arima
## 1      2020-11-22             воскресенье       23496               23493.27
## 2      2020-11-23             понедельник       23719               23710.25
## 3      2020-11-24                 вторник       23947               23927.80
## 4      2020-11-25                   среда       24188               24144.28
## 5      2020-11-26                 четверг       24440               24361.30
## 6      2020-11-27                 пятница       24701               24578.02
## 7      2020-11-28                 суббота       24973               24794.79
##   MAPE_auto.arima_Model
## 1               0.012 %
## 2               0.037 %
## 3                0.08 %
## 4               0.181 %
## 5               0.322 %
## 6               0.498 %
## 7               0.714 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_auto.arima=tail(forecasting_auto_arima$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_auto.arima
## 1 2020-11-29     воскресенье                  25011.74
## 2 2020-11-30     понедельник                  25228.55
## 3 2020-12-01         вторник                  25445.45
## 4 2020-12-02           среда                  25662.30
## 5 2020-12-03         четверг                  25879.14
## 6 2020-12-04         пятница                  26096.00
## 7 2020-12-05         суббота                  26312.84

plot(forecasting_auto_arima)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph4<-autoplot(forecasting_auto_arima,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph4

## Error of forecasting
Error_auto.arima<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_auto.arima<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_auto.arima<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_auto.arima<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_auto.arima<-sqrt(sum((Error_auto.arima^2))/validation_data_days)   #  Root mean square forecast error
MAD_auto.arima<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_auto.arima<-c(Error_auto.arima)
REOF_auto.arima1<-c(paste(round(REOF_A_auto.arima,3),"%"))
REOF_auto.arima2<-c(paste(round(REOF_F_auto.arima,3),"%"))
data.frame(correlation_auto.arima,RMSE_auto.arima,MAPE_Mean_All,MAD_auto.arima) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_auto.arima RMSE_auto.arima
## 1              0.9993289        88.99512
##                                                         MAPE_Mean_All
## 1 0.263 % MAPE  7 days COVID 19 Infection cases in Chelyabinsk region
##   MAD_auto.arima
## 1       64.89781

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_auto.arima,REOF_A_auto.arima=REOF_auto.arima1,REOF_F_auto.arima=REOF_auto.arima2)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_auto.arima REOF_A_auto.arima
## 1       2020-11-22         воскресенье        2.725618           0.012 %
## 2       2020-11-23         понедельник        8.754141           0.037 %
## 3       2020-11-24             вторник       19.195955            0.08 %
## 4       2020-11-25               среда       43.716343           0.181 %
## 5       2020-11-26             четверг       78.699214           0.322 %
## 6       2020-11-27             пятница      122.983753           0.498 %
## 7       2020-11-28             суббота      178.209642           0.714 %
##   REOF_F_auto.arima
## 1           0.012 %
## 2           0.037 %
## 3            0.08 %
## 4           0.181 %
## 5           0.323 %
## 6             0.5 %
## 7           0.719 %

# SIR Model 
#install.packages("dplyr")
library(deSolve)
first<-rows-13
secondr<-rows-7
vector_SIR<-original_data[first:secondr]
Infected <- c(vector_SIR)
Day <- 1:(length(Infected))
N <- Population # population of the us
SIR <- function(time, state, parameters) {
  par <- as.list(c(state, parameters))
  with(par, {
    dS <- -beta/N * I * S
    dI <- beta/N * I * S - gamma * I
    dR <- gamma * I
    list(c(dS, dI, dR))
  })
}

init <- c(S = N-Infected[1], I = Infected[1], R = 0)
RSS <- function(parameters) {
  names(parameters) <- c("beta", "gamma")
  out <- ode(y = init, times = Day, func = SIR, parms = parameters)
  fit <- out[ , 3]
  sum((Infected - fit)^2)
}

# optimize with some sensible conditions
Opt <- optim(c(0.5, 0.5), RSS, method = "L-BFGS-B", 
             lower = c(0, 0), upper = c(10, 10))
Opt$message

## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

Opt_par <- setNames(Opt$par, c("beta", "gamma"))
Opt_par

##       beta      gamma 
## 0.09747856 0.08558046

# beta     gamma 
# 0.6512503 0.4920399 

out <- ode(y = init, times = Day, func = SIR, parms = Opt_par)

plot(out)
plot(out, obs=data.frame(time=Day, I=Infected))

result_SIR<-data.frame(out)
validation_forecast<-result_SIR$I

## Error of forecasting
Error_SIR<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_SIR<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_SIR<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_SIR<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_SIR<-sqrt(sum((Error_SIR^2))/validation_data_days)   #  Root mean square forecast error
MAD_SIR<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_SIR<-c(Error_SIR)
REOF_A_SIR<-c(paste(round(REOF_A_SIR,3),"%"))
REOF_A_SIR1<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))

REOF_F_SIR<-c(paste(round(REOF_F_SIR,3),"%"))
MAPE_Mean_All<-paste(round(mean(abs(((testing_data-validation_forecast)/testing_data)*100)),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
data.frame(correlation_SIR,RMSE_SIR,MAPE_Mean_All,MAD_SIR) # analysis of Error  by using SIR's linear model shows result of correlation ,MSE ,MPER

##   correlation_SIR RMSE_SIR
## 1       0.9989854 1579.909
##                                                         MAPE_Mean_All  MAD_SIR
## 1 6.517 % MAPE  7 days COVID 19 Infection cases in Chelyabinsk region 1578.448

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_SIR,REOF_A_SIR,REOF_F_SIR,validation_forecast,testing_data)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_SIR REOF_A_SIR REOF_F_SIR
## 1       2020-11-22         воскресенье 1512.000    6.435 %    6.878 %
## 2       2020-11-23         понедельник 1516.025    6.392 %    6.828 %
## 3       2020-11-24             вторник 1526.966    6.376 %    6.811 %
## 4       2020-11-25               среда 1552.951     6.42 %    6.861 %
## 5       2020-11-26             четверг 1592.113    6.514 %    6.968 %
## 6       2020-11-27             пятница 1642.584     6.65 %    7.124 %
## 7       2020-11-28             суббота 1706.495    6.833 %    7.335 %
##   validation_forecast testing_data
## 1            21984.00        23496
## 2            22202.97        23719
## 3            22420.03        23947
## 4            22635.05        24188
## 5            22847.89        24440
## 6            23058.42        24701
## 7            23266.51        24973

## forecasting by SIR model

Infected <- c(tail(original_data,validation_data_days))
Day <- 1:(length(Infected))
N <- Population # population of the us

SIR <- function(time, state, parameters) {
  par <- as.list(c(state, parameters))
  with(par, {
    dS <- -beta/N * I * S
    dI <- beta/N * I * S - gamma * I
    dR <- gamma * I
    list(c(dS, dI, dR))
  })
}

init <- c(S = N-Infected[1], I = Infected[1], R = 0)
RSS <- function(parameters) {
  names(parameters) <- c("beta", "gamma")
  out <- ode(y = init, times = Day, func = SIR, parms = parameters)
  fit <- out[ , 3]
  sum((Infected - fit)^2)
}

# optimize with some sensible conditions
Opt <- optim(c(0.5, 0.5), RSS, method = "L-BFGS-B", 
             lower = c(0, 0), upper = c(10, 10))
Opt$message

## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

Opt_par <- setNames(Opt$par, c("beta", "gamma"))
Opt_par

##       beta      gamma 
## 0.03218839 0.02148695

# beta     gamma 
# 0.6512503 0.4920399 

out <- ode(y = init, times = Day, func = SIR, parms = Opt_par)

plot(out)
plot(out, obs=data.frame(time=Day, I=Infected))

result_SIR <-data.frame(out)
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_SIR=result_SIR$I)

##           FD forecating_date forecasting_by_SIR
## 1 2020-11-29     воскресенье           23496.00
## 2 2020-11-30     понедельник           23732.66
## 3 2020-12-01         вторник           23971.18
## 4 2020-12-02           среда           24211.59
## 5 2020-12-03         четверг           24453.88
## 6 2020-12-04         пятница           24698.05
## 7 2020-12-05         суббота           24944.10

# Choose Best model by least error

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==>  COVID 19 Infection cases in Chelyabinsk region"

M1<-mean(REOF_A_bats)

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using TBATS  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using TBATS  Model For ==>  COVID 19 Infection cases in Chelyabinsk region"

M2<-mean(REOF_A_tbats1)

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using Holt's Linear << Exponential Smoothing >>  For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using Holt's Linear << Exponential Smoothing >>  For ==>  COVID 19 Infection cases in Chelyabinsk region"

M3<-REOF_A_Holt11

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using auto arima  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using auto arima  Model For ==>  COVID 19 Infection cases in Chelyabinsk region"

M4<-mean(REOF_A_auto.arima)
paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using SIR Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using SIR Model For ==>  COVID 19 Infection cases in Chelyabinsk region"

M5<-REOF_A_SIR1

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using autoarima  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using autoarima  Model For ==>  COVID 19 Infection cases in Chelyabinsk region"

data.frame(validation_dates,forecating_date=forecasting_data_by_name,MAPE_bats_error=REOF_A_bats,MAPE_TBATS_error=REOF_A_tbats1,MAPE_Holt_error=REOF_A_Holt1,MAPE_autoarima_error = REOF_A_auto.arima)

##   validation_dates forecating_date MAPE_bats_error MAPE_TBATS_error
## 1       2020-11-22     воскресенье     0.007458928      0.002013156
## 2       2020-11-23     понедельник     0.032523832      0.021925475
## 3       2020-11-24         вторник     0.077989334      0.050214001
## 4       2020-11-25           среда     0.176277013      0.133333088
## 5       2020-11-26         четверг     0.317510892      0.278597416
## 6       2020-11-27         пятница     0.492131654      0.455441724
## 7       2020-11-28         суббота     0.706856277      0.665513235
##   MAPE_Holt_error MAPE_autoarima_error
## 1      0.00574187           0.01160035
## 2      0.02612395           0.03690772
## 3      0.06491111           0.08016017
## 4      0.15460224           0.18073567
## 5      0.28539848           0.32200988
## 6      0.44781634           0.49788977
## 7      0.64865491           0.71360927

recommend_Model<-c(M1,M2,M3,M4,M5)
best_recommended_model<-min(recommend_Model)
paste ("lodaing .....   ... . .Select Minimum MAPE from Models for select best Model ==> ", y_lab , sep=" ")

## [1] "lodaing .....   ... . .Select Minimum MAPE from Models for select best Model ==>  COVID 19 Infection cases in Chelyabinsk region"

best_recommended_model

## [1] 0.2295769

paste ("Best Model For Forecasting  ==> ",y_lab, sep=" ")

## [1] "Best Model For Forecasting  ==>  COVID 19 Infection cases in Chelyabinsk region"

if(best_recommended_model >= M1) {paste("System Recommend Bats Model That's better  For forecasting==> ",y_lab, sep=" ")}
if(best_recommended_model >= M2) {paste("System Recommend  That's better TBATS  For forecasting ==> ",y_lab, sep=" ")}

## [1] "System Recommend  That's better TBATS  For forecasting ==>  COVID 19 Infection cases in Chelyabinsk region"

if(best_recommended_model >= M3) {paste("System Recommend Holt's Linear Model < Exponential Smoothing Model >   That's better  For forecasting ==> ",y_lab, sep=" ")}
if(best_recommended_model >= M4) {paste("System Recommend auto arima Model  That's better  For forecasting ==> ",y_lab, sep=" ")}
if(best_recommended_model >= M5) {paste("System Recommend SIR Model  That's better  For forecasting ==> ",y_lab, sep=" ")}

message("System finished Forecasting  by using autoarima and Holt's ,TBATS, and SIR  Model ==>",y_lab, sep=" ")

## System finished Forecasting  by using autoarima and Holt's ,TBATS, and SIR  Model ==>COVID 19 Infection cases in Chelyabinsk region

message(" Thank you for using our System For Modelling  ==> ",y_lab, sep=" ")

##  Thank you for using our System For Modelling  ==> COVID 19 Infection cases in Chelyabinsk region

## Markov Chain For COVID 19 infection cases

require(markovchain)
require(data.table)
xx9<-original_data[rows]
xx8<-original_data[rows-1]
xx7<-original_data[rows-2]
xx6<-original_data[rows-3]
xx5<-original_data[rows-4]
xx4<-original_data[rows-5]
xx3<-original_data[rows-6]
xx2<-original_data[rows-7]
xx1<-original_data[rows-8]
infection_vector1<-c(xx1,xx2,xx3)
infection_vector2<-c(xx4,xx5,xx6)
infection_vector3<-c(xx7,xx8,xx9)
sum_vector1<-sum(infection_vector1)
sum_vector2<-sum(infection_vector2)
sum_vector3<-sum(infection_vector3)
proba_vector1<-c(infection_vector1/sum_vector1)
proba_vector2<-c(infection_vector2/sum_vector2)
proba_vector3<-c(infection_vector3/sum_vector3)
CovidStates = c("Low Infections", "Mid Infections", "Hight Infections")
byRow = TRUE


CovidMatrix = matrix(data = c(proba_vector1,
                              proba_vector2,
                              proba_vector3), byrow = byRow, nrow = 3,
                     
                     dimnames = list(CovidStates, CovidStates))


mcCovid = new("markovchain", states = CovidStates, byrow = byRow,
              transitionMatrix = CovidMatrix, name = "Cvid 19")

mcCovid = new("markovchain", states = c("Low Infections", "Mid Infections", "Hight Infections"),
              transitionMatrix = matrix(data = c(proba_vector1,
                                                 proba_vector2,
                                                 proba_vector3), byrow = byRow, nrow = 3),
              name = "Cvid 19")

name = ("Cvid 19")
initialState = c(0,1,0)
after2Days = initialState * (mcCovid * mcCovid)
after7Days = initialState * (mcCovid^7)
after30days =initialState * (mcCovid^30)
after7Days

##      Low Infections Mid Infections Hight Infections
## [1,]      0.3300215       0.333295        0.3366835

plot(mcCovid,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main = "Markov Chain")