South ural state university, Chelyabinsk, Russian federation
#Import
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v ggplot2 3.3.2 v fma 2.4
## v forecast 8.13 v expsmooth 2.3
##
library(forecast)
library(ggplot2)
library("readxl")
library(moments)
library(forecast)
require(forecast)
require(tseries)
## Loading required package: tseries
require(markovchain)
## Loading required package: markovchain
## Package: markovchain
## Version: 0.8.5-3
## Date: 2020-12-03
## BugReport: https://github.com/spedygiorgio/markovchain/issues
require(data.table)
## Loading required package: data.table
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
##Global vriable##
Full_original_data <- read.csv("F:/Phd/University Conference/Chelyabinsk covid 19 data.csv") # path of your data ( time series data)
View(Full_original_data)
original_data<-Full_original_data$Infections
y_lab <- "Covid 19 infection cases in Chelyabinsk" # input name of data
Actual_date_interval <- c("2020/03/12","2021/02/22")
Forecast_date_interval <- c("2021/02/23","2021/03/1")
validation_data_days <-7
frequency<-"day"
Population <-3466369 # population in Spain ( population size for SIR Model)
country.name <- "Chelyabinsk"
# Data Preparation & calculate some of statistics measures
View(original_data) # View data in table in R
summary(original_data) # Summary your time series
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 3585 13790 16373 24505 49551
describe((original_data)) # describe your time series
## (original_data)
## n missing distinct Info Mean Gmd .05 .10
## 348 0 331 1 16373 15878 12.7 52.4
## .25 .50 .75 .90 .95
## 3585.2 13789.5 24505.2 40141.6 45104.8
##
## lowest : 0 1 2 5 9, highest: 48620 48858 49091 49322 49551
# calculate standard deviation
library(pastecs)
##
## Attaching package: 'pastecs'
## The following objects are masked from 'package:data.table':
##
## first, last
stat.desc(original_data)
## nbr.val nbr.null nbr.na min max range
## 3.480000e+02 9.000000e+00 0.000000e+00 0.000000e+00 4.955100e+04 4.955100e+04
## sum median mean SE.mean CI.mean.0.95 var
## 5.697694e+06 1.378950e+04 1.637268e+04 7.653170e+02 1.505244e+03 2.038271e+08
## std.dev coef.var
## 1.427680e+04 8.719892e-01
data.frame(skewness=skewness(original_data)) # calculate Cofficient of skewness
## skewness
## 1 0.7384615
data.frame(kurtosis=kurtosis(original_data)) # calculate Cofficient of kurtosis
## kurtosis
## 1 2.509104
sd(original_data)
## [1] 14276.8
#processing on data (input data)
rows <- NROW(original_data) # calculate number of rows in time series (number of days)
training_data<-original_data[1:(rows-validation_data_days)] # Training data
testing_data<-original_data[(rows-validation_data_days+1):rows] #testing data
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency) #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency) #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD)) #calculate number of days that you want to forecasting
validation_dates<-tail(AD,validation_data_days) # select validation_dates
validation_data_by_name<-weekdays(validation_dates) # put names of validation dates
forecasting_data_by_name<-weekdays(FD) # put names of Forecasting dates
#NNAR Model
data_series<-ts(training_data)
model_NNAR<-nnetar(data_series, size = 5)
saveRDS(model_NNAR, file = "model_NNAR.RDS")
my_model <- readRDS("model_NNAR.RDS")
accuracy(model_NNAR) # accuracy on training data
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.5649494 18.85687 15.11495 -Inf Inf 0.1073841 0.7776563
#Print Model Parameters
model_NNAR
## Series: data_series
## Model: NNAR(1,5)
## Call: nnetar(y = data_series, size = 5)
##
## Average of 20 networks, each of which is
## a 1-5-1 network with 16 weights
## options were - linear output units
##
## sigma^2 estimated as 355.6
# Testing Data Evaluation
forecasting_NNAR <- predict(model_NNAR, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_NNAR$mean,validation_data_days)
MAPE_Per_Day<-round( abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using NNAR Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 7 day by using NNAR Model for ==> Covid 19 infection cases in Chelyabinsk"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_Mean_All_NNAR<-round(mean(MAPE_Per_Day),3)
MAPE_NNAR<-paste(round(MAPE_Per_Day,3),"%")
MAPE_NNAR_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in NNAR Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 7 days in NNAR Model for ==> Covid 19 infection cases in Chelyabinsk"
paste(MAPE_Mean_All,"%")
## [1] "0.382 % MAPE 7 day Covid 19 infection cases in Chelyabinsk %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in NNAR Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 7 days in NNAR Model for ==> Covid 19 infection cases in Chelyabinsk"
data.frame(date_NNAR=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_NNAR=validation_forecast,MAPE_NNAR_Model)
## date_NNAR validation_data_by_name actual_data forecasting_NNAR
## 1 2021-02-16 Tuesday 48116 48070.48
## 2 2021-02-17 Wednesday 48371 48277.64
## 3 2021-02-18 Thursday 48620 48478.39
## 4 2021-02-19 Friday 48858 48672.63
## 5 2021-02-20 Saturday 49091 48860.28
## 6 2021-02-21 Sunday 49322 49041.32
## 7 2021-02-22 Monday 49551 49215.71
## MAPE_NNAR_Model
## 1 0.095 %
## 2 0.193 %
## 3 0.291 %
## 4 0.379 %
## 5 0.47 %
## 6 0.569 %
## 7 0.677 %
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_NNAR=tail(forecasting_NNAR$mean,N_forecasting_days))
## FD forecating_date forecasting_by_NNAR
## 1 2021-02-23 Tuesday 49383.46
## 2 2021-02-24 Wednesday 49544.59
## 3 2021-02-25 Thursday 49699.14
## 4 2021-02-26 Friday 49847.19
## 5 2021-02-27 Saturday 49988.81
## 6 2021-02-28 Sunday 50124.11
## 7 2021-03-01 Monday 50253.20
plot(forecasting_NNAR)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph1<-autoplot(forecasting_NNAR,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph1

saveRDS(model_NNAR, file = "model_NNAR.RDS")
## Error of forecasting
Error_NNAR<-abs(testing_data-validation_forecast) # Absolute error of forecast (AEOF)
REOF_A_NNAR<-abs(((testing_data-validation_forecast)/testing_data)*100) #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_NNAR<-abs(((testing_data-validation_forecast)/validation_forecast)*100) #Relative error of forecast (divided by forecast)(REOF_F)
correlation_NNAR<-cor(testing_data,validation_forecast, method = c("pearson")) # correlation coefficient between predicted and actual values
RMSE_NNAR<-sqrt(sum((Error_NNAR^2))/validation_data_days) # Root mean square forecast error
MSE_NNAR<-(sum((Error_NNAR^2))/validation_data_days) # Root mean square forecast error
MAD_NNAR<-abs((sum(testing_data-validation_forecast))/validation_data_days) # average forecast accuracy
AEOF_NNAR<-c(Error_NNAR)
REOF_ANNAR<-c(paste(round(REOF_A_NNAR,3),"%"))
REOF_FNNAR<-c(paste(round(REOF_F_NNAR,3),"%"))
data.frame(correlation_NNAR,MSE_NNAR,RMSE_NNAR,MAPE_Mean_All,MAD_NNAR) # analysis of Error by using NNAR Model shows result of correlation ,MSE ,MPER
## correlation_NNAR MSE_NNAR RMSE_NNAR
## 1 0.9999408 44233.85 210.3185
## MAPE_Mean_All MAD_NNAR
## 1 0.382 % MAPE 7 day Covid 19 infection cases in Chelyabinsk 187.5077
data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_NNAR,REOF_ANNAR,REOF_FNNAR) # Analysis of error shows result AEOF,REOF_A,REOF_F
## validation_dates Validation_day_name AEOF_NNAR REOF_ANNAR REOF_FNNAR
## 1 2021-02-16 Tuesday 45.52443 0.095 % 0.095 %
## 2 2021-02-17 Wednesday 93.35689 0.193 % 0.193 %
## 3 2021-02-18 Thursday 141.61001 0.291 % 0.292 %
## 4 2021-02-19 Friday 185.37405 0.379 % 0.381 %
## 5 2021-02-20 Saturday 230.71646 0.47 % 0.472 %
## 6 2021-02-21 Sunday 280.68157 0.569 % 0.572 %
## 7 2021-02-22 Monday 335.29064 0.677 % 0.681 %
##bats model
# Data Modeling
data_series<-ts(training_data) # make your data to time series
autoplot(data_series ,xlab=paste ("Time in ", frequency, sep=" "), ylab = y_lab, main=paste ("Actual Data :", y_lab, sep=" "))

model_bats<-bats(data_series)
accuracy(model_bats) # accuracy on training data
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.037871 12.04264 6.933465 NaN Inf 0.04925879 -0.005528907
# Print Model Parameters
model_bats
## BATS(1, {0,0}, 1, -)
##
## Call: bats(y = data_series)
##
## Parameters
## Alpha: 1.013369
## Beta: 0.7537345
## Damping Parameter: 1
##
## Seed States:
## [,1]
## [1,] -6.177342
## [2,] -5.825198
##
## Sigma: 12.04264
## AIC: 3693.797
#ploting BATS Model
plot(model_bats,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4)

# Testing Data Evaluation
forecasting_bats <- predict(model_bats, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_bats$mean,validation_data_days)
MAPE_Per_Day<-round( abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 7 day by using bats Model for ==> Covid 19 infection cases in Chelyabinsk"
MAPE_Mean_All.bats_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.bats<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_bats<-paste(round(MAPE_Per_Day,3),"%")
MAPE_bats_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 7 days in bats Model for ==> Covid 19 infection cases in Chelyabinsk"
paste(MAPE_Mean_All.bats,"%")
## [1] "0.109 % MAPE 7 day Covid 19 infection cases in Chelyabinsk %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 7 days in bats Model for ==> Covid 19 infection cases in Chelyabinsk"
data.frame(date_bats=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_bats=validation_forecast,MAPE_bats_Model)
## date_bats validation_data_by_name actual_data forecasting_bats
## 1 2021-02-16 Tuesday 48116 48117.94
## 2 2021-02-17 Wednesday 48371 48378.87
## 3 2021-02-18 Thursday 48620 48639.80
## 4 2021-02-19 Friday 48858 48900.73
## 5 2021-02-20 Saturday 49091 49161.67
## 6 2021-02-21 Sunday 49322 49422.60
## 7 2021-02-22 Monday 49551 49683.53
## MAPE_bats_Model
## 1 0.004 %
## 2 0.016 %
## 3 0.041 %
## 4 0.087 %
## 5 0.144 %
## 6 0.204 %
## 7 0.267 %
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_bats=tail(forecasting_bats$mean,N_forecasting_days))
## FD forecating_date forecasting_by_bats
## 1 2021-02-23 Tuesday 49944.46
## 2 2021-02-24 Wednesday 50205.39
## 3 2021-02-25 Thursday 50466.32
## 4 2021-02-26 Friday 50727.26
## 5 2021-02-27 Saturday 50988.19
## 6 2021-02-28 Sunday 51249.12
## 7 2021-03-01 Monday 51510.05
plot(forecasting_bats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph1<-autoplot(forecasting_bats,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph1

## Error of forecasting
Error_bats<-abs(testing_data-validation_forecast) # Absolute error of forecast (AEOF)
REOF_A_bats<-abs(((testing_data-validation_forecast)/testing_data)*100) #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_bats<-abs(((testing_data-validation_forecast)/validation_forecast)*100) #Relative error of forecast (divided by forecast)(REOF_F)
correlation_bats<-cor(testing_data,validation_forecast, method = c("pearson")) # correlation coefficient between predicted and actual values
RMSE_bats<-sqrt(sum((Error_bats^2))/validation_data_days) # Root mean square forecast error
MSE_bats<-(sum((Error_bats^2))/validation_data_days) # Root mean square forecast error
MAD_bats<-abs((sum(testing_data-validation_forecast))/validation_data_days) # average forecast accuracy
AEOF_bats<-c(Error_bats)
REOF_Abats<-c(paste(round(REOF_A_bats,3),"%"))
REOF_Fbats<-c(paste(round(REOF_F_bats,3),"%"))
data.frame(correlation_bats,MSE_bats,RMSE_bats,MAPE_Mean_All.bats_Model,MAD_bats) # analysis of Error by using Bats Model shows result of correlation ,MSE ,MPER
## correlation_bats MSE_bats RMSE_bats MAPE_Mean_All.bats_Model MAD_bats
## 1 0.9997926 4994.453 70.67144 0.109 53.73321
data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_bats,REOF_Abats,REOF_Fbats) # Analysis of error shows result AEOF,REOF_A,REOF_F
## validation_dates Validation_day_name AEOF_bats REOF_Abats REOF_Fbats
## 1 2021-02-16 Tuesday 1.937401 0.004 % 0.004 %
## 2 2021-02-17 Wednesday 7.869337 0.016 % 0.016 %
## 3 2021-02-18 Thursday 19.801272 0.041 % 0.041 %
## 4 2021-02-19 Friday 42.733207 0.087 % 0.087 %
## 5 2021-02-20 Saturday 70.665142 0.144 % 0.144 %
## 6 2021-02-21 Sunday 100.597077 0.204 % 0.204 %
## 7 2021-02-22 Monday 132.529013 0.267 % 0.267 %
## TBATS Model
# Data Modeling
data_series<-ts(training_data)
model_TBATS<-forecast:::fitSpecificTBATS(data_series,use.box.cox=FALSE, use.beta=TRUE, seasonal.periods=c(6),use.damping=FALSE,k.vector=c(2))
accuracy(model_TBATS) # accuracy on training data
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.033371 11.89959 7.029362 NaN Inf 0.0499401 -0.006947839
# Print Model Parameters
model_TBATS
## TBATS(1, {0,0}, 1, {<6,2>})
##
## Call: NULL
##
## Parameters
## Alpha: 1.017233
## Beta: 0.7577115
## Damping Parameter: 1
## Gamma-1 Values: -0.00192764
## Gamma-2 Values: 0.001607352
##
## Seed States:
## [,1]
## [1,] -6.3556673
## [2,] -5.7532877
## [3,] 0.7054920
## [4,] -0.5224022
## [5,] 1.3257028
## [6,] 0.2717464
##
## Sigma: 11.89959
## AIC: 3697.647
plot(model_TBATS,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)

# Testing Data Evaluation
forecasting_tbats <- predict(model_TBATS, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_tbats$mean,validation_data_days)
MAPE_Per_Day<-round( abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using TBATS Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 7 day by using TBATS Model for ==> Covid 19 infection cases in Chelyabinsk"
MAPE_Mean_All.TBATS_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.TBATS<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_TBATS<-paste(round(MAPE_Per_Day,3),"%")
MAPE_TBATS_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in TBATS Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 7 days in TBATS Model for ==> Covid 19 infection cases in Chelyabinsk"
paste(MAPE_Mean_All.TBATS,"%")
## [1] "0.113 % MAPE 7 day Covid 19 infection cases in Chelyabinsk %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in TBATS Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 7 days in TBATS Model for ==> Covid 19 infection cases in Chelyabinsk"
data.frame(date_TBATS=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_TBATS=validation_forecast,MAPE_TBATS_Model)
## date_TBATS validation_data_by_name actual_data forecasting_TBATS
## 1 2021-02-16 Tuesday 48116 48118.05
## 2 2021-02-17 Wednesday 48371 48380.98
## 3 2021-02-18 Thursday 48620 48643.88
## 4 2021-02-19 Friday 48858 48903.45
## 5 2021-02-20 Saturday 49091 49163.23
## 6 2021-02-21 Sunday 49322 49424.48
## 7 2021-02-22 Monday 49551 49685.55
## MAPE_TBATS_Model
## 1 0.004 %
## 2 0.021 %
## 3 0.049 %
## 4 0.093 %
## 5 0.147 %
## 6 0.208 %
## 7 0.272 %
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_TBATS=tail(forecasting_tbats$mean,N_forecasting_days))
## FD forecating_date forecasting_by_TBATS
## 1 2021-02-23 Tuesday 49948.47
## 2 2021-02-24 Wednesday 50211.37
## 3 2021-02-25 Thursday 50470.94
## 4 2021-02-26 Friday 50730.73
## 5 2021-02-27 Saturday 50991.97
## 6 2021-02-28 Sunday 51253.04
## 7 2021-03-01 Monday 51515.96
plot(forecasting_tbats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph2<-autoplot(forecasting_tbats,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph2

## Error of forecasting TBATS Model
Error_tbats<-abs(testing_data-validation_forecast) # Absolute error of forecast (AEOF)
REOF_A_tbats1<-abs(((testing_data-validation_forecast)/testing_data)*100) #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_tbats<-abs(((testing_data-validation_forecast)/validation_forecast)*100) #Relative error of forecast (divided by forecast)(REOF_F)
correlation_tbats<-cor(testing_data,validation_forecast, method = c("pearson")) # correlation coefficient between predicted and actual values
RMSE_tbats<-sqrt(sum((Error_tbats^2))/validation_data_days) # Root mean square forecast error
MSE_tbats<-(sum((Error_tbats^2))/validation_data_days) # Root mean square forecast error
MAD_tbats<-abs((sum(testing_data-validation_forecast))/validation_data_days) # average forecast accuracy
AEOF_tbats<-c(Error_tbats)
REOF_A_tbats<-c(paste(round(REOF_A_tbats1,3),"%"))
REOF_F_tbats<-c(paste(round(REOF_F_tbats,3),"%"))
data.frame(correlation_tbats,MSE_tbats,RMSE_tbats,MAPE_Mean_All.TBATS_Model,MAD_tbats) # analysis of Error by using TBATS model shows result of correlation ,MSE ,MPER
## correlation_tbats MSE_tbats RMSE_tbats MAPE_Mean_All.TBATS_Model MAD_tbats
## 1 0.9998241 5223.166 72.27148 0.113 55.80314
data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_tbats,REOF_A_tbats,REOF_F_tbats) # Analysis of error shows result AEOF,REOF_A,REOF_F
## validation_dates Validation_day_name AEOF_tbats REOF_A_tbats REOF_F_tbats
## 1 2021-02-16 Tuesday 2.053372 0.004 % 0.004 %
## 2 2021-02-17 Wednesday 9.979516 0.021 % 0.021 %
## 3 2021-02-18 Thursday 23.877050 0.049 % 0.049 %
## 4 2021-02-19 Friday 45.452617 0.093 % 0.093 %
## 5 2021-02-20 Saturday 72.234973 0.147 % 0.147 %
## 6 2021-02-21 Sunday 102.479200 0.208 % 0.207 %
## 7 2021-02-22 Monday 134.545250 0.272 % 0.271 %
## Holt's linear trend
# Data Modeling
data_series<-ts(training_data)
model_holt<-holt(data_series,h=N_forecasting_days+validation_data_days,lambda = "auto")
accuracy(model_holt) # accuracy on training data
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.06367956 12.3186 7.391879 Inf Inf 0.05251559 0.162771
# Print Model Parameters
summary(model_holt$model)
## Holt's method
##
## Call:
## holt(y = data_series, h = N_forecasting_days + validation_data_days,
##
## Call:
## lambda = "auto")
##
## Box-Cox transformation: lambda= 0.612
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.5964
##
## Initial states:
## l = -1.6433
## b = -0.4258
##
## sigma: 0.7914
##
## AIC AICc BIC
## 1835.101 1835.280 1854.260
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.06367956 12.3186 7.391879 Inf Inf 0.05251559 0.162771
# Testing Data Evaluation
forecasting_holt <- predict(model_holt, h=N_forecasting_days+validation_data_days,lambda = "auto")
validation_forecast<-head(forecasting_holt$mean,validation_data_days)
MAPE_Per_Day<-round( abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using holt Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 7 day by using holt Model for ==> Covid 19 infection cases in Chelyabinsk"
MAPE_Mean_All.Holt_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.Holt<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_holt<-paste(round(MAPE_Per_Day,3),"%")
MAPE_holt_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in holt Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 7 days in holt Model for ==> Covid 19 infection cases in Chelyabinsk"
paste(MAPE_Mean_All.Holt,"%")
## [1] "0.127 % MAPE 7 day Covid 19 infection cases in Chelyabinsk %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in holt Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 7 days in holt Model for ==> Covid 19 infection cases in Chelyabinsk"
data.frame(date_holt=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_holt=validation_forecast,MAPE_holt_Model)
## date_holt validation_data_by_name actual_data forecasting_holt
## 1 2021-02-16 Tuesday 48116 48119.01
## 2 2021-02-17 Wednesday 48371 48381.58
## 3 2021-02-18 Thursday 48620 48644.70
## 4 2021-02-19 Friday 48858 48908.37
## 5 2021-02-20 Saturday 49091 49172.60
## 6 2021-02-21 Sunday 49322 49437.37
## 7 2021-02-22 Monday 49551 49702.70
## MAPE_holt_Model
## 1 0.006 %
## 2 0.022 %
## 3 0.051 %
## 4 0.103 %
## 5 0.166 %
## 6 0.234 %
## 7 0.306 %
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_holt=tail(forecasting_holt$mean,N_forecasting_days))
## FD forecating_date forecasting_by_holt
## 1 2021-02-23 Tuesday 49968.58
## 2 2021-02-24 Wednesday 50235.01
## 3 2021-02-25 Thursday 50501.99
## 4 2021-02-26 Friday 50769.52
## 5 2021-02-27 Saturday 51037.60
## 6 2021-02-28 Sunday 51306.22
## 7 2021-03-01 Monday 51575.39
plot(forecasting_holt)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph3<-autoplot(forecasting_holt,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph3

## Error of forecasting by using Holt's linear model
Error_Holt<-abs(testing_data-validation_forecast) # Absolute error of forecast (AEOF)
REOF_A_Holt1<-abs(((testing_data-validation_forecast)/testing_data)*100) #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_Holt<-abs(((testing_data-validation_forecast)/validation_forecast)*100) #Relative error of forecast (divided by forecast)(REOF_F)
correlation_Holt<-cor(testing_data,validation_forecast, method = c("pearson")) # correlation coefficient between predicted and actual values
RMSE_Holt<-sqrt(sum((Error_Holt^2))/validation_data_days) # Root mean square forecast error
MSE_Holt<-(sum((Error_Holt^2))/validation_data_days) # Root mean square forecast error
MAD_Holt<-abs((sum(testing_data-validation_forecast))/validation_data_days) # average forecast accuracy
AEOF_Holt<-c(Error_Holt)
REOF_A_Holt<-c(paste(round(REOF_A_Holt1,3),"%"))
REOF_F_Holt<-c(paste(round(REOF_F_Holt,3),"%"))
REOF_A_Holt11<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
data.frame(correlation_Holt,MSE_Holt,RMSE_Holt,MAPE_Mean_All.Holt_Model,MAD_Holt) # analysis of Error by using Holt's linear model shows result of correlation ,MSE ,MPER
## correlation_Holt MSE_Holt RMSE_Holt MAPE_Mean_All.Holt_Model MAD_Holt
## 1 0.9997549 6607.374 81.28575 0.127 62.47614
data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_Holt,REOF_A_Holt,REOF_F_Holt) # Analysis of error shows result AEOF,REOF_A,REOF_F
## validation_dates Validation_day_name AEOF_Holt REOF_A_Holt REOF_F_Holt
## 1 2021-02-16 Tuesday 3.01150 0.006 % 0.006 %
## 2 2021-02-17 Wednesday 10.57754 0.022 % 0.022 %
## 3 2021-02-18 Thursday 24.69755 0.051 % 0.051 %
## 4 2021-02-19 Friday 50.37087 0.103 % 0.103 %
## 5 2021-02-20 Saturday 81.59682 0.166 % 0.166 %
## 6 2021-02-21 Sunday 115.37475 0.234 % 0.233 %
## 7 2021-02-22 Monday 151.70397 0.306 % 0.305 %
#Auto arima model
##################
require(tseries) # need to install tseries tj test Stationarity in time series
paste ("tests For Check Stationarity in series ==> ",y_lab, sep=" ")
## [1] "tests For Check Stationarity in series ==> Covid 19 infection cases in Chelyabinsk"
kpss.test(data_series) # applay kpss test
## Warning in kpss.test(data_series): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: data_series
## KPSS Level = 5.3479, Truncation lag parameter = 5, p-value = 0.01
pp.test(data_series) # applay pp test
## Warning in pp.test(data_series): p-value greater than printed p-value
##
## Phillips-Perron Unit Root Test
##
## data: data_series
## Dickey-Fuller Z(alpha) = 1.4225, Truncation lag parameter = 5, p-value
## = 0.99
## alternative hypothesis: stationary
adf.test(data_series) # applay adf test
##
## Augmented Dickey-Fuller Test
##
## data: data_series
## Dickey-Fuller = -1.9863, Lag order = 6, p-value = 0.5825
## alternative hypothesis: stationary
ndiffs(data_series) # Doing first diffrencing on data
## [1] 2
#Taking the first difference
diff1_x1<-diff(data_series)
autoplot(diff1_x1, xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", ylab=y_lab,main = "1nd differenced series")
## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking first differences in ==> ",y_lab, sep=" ")
## [1] "tests For Check Stationarity in series after taking first differences in ==> Covid 19 infection cases in Chelyabinsk"
kpss.test(diff1_x1) # applay kpss test after taking first differences
## Warning in kpss.test(diff1_x1): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff1_x1
## KPSS Level = 4.1385, Truncation lag parameter = 5, p-value = 0.01
pp.test(diff1_x1) # applay pp test after taking first differences
##
## Phillips-Perron Unit Root Test
##
## data: diff1_x1
## Dickey-Fuller Z(alpha) = -5.4344, Truncation lag parameter = 5, p-value
## = 0.8058
## alternative hypothesis: stationary
adf.test(diff1_x1) # applay adf test after taking first differences
##
## Augmented Dickey-Fuller Test
##
## data: diff1_x1
## Dickey-Fuller = -1.2398, Lag order = 6, p-value = 0.8974
## alternative hypothesis: stationary
#Taking the second difference
diff2_x1=diff(diff1_x1)
autoplot(diff2_x1, xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", ylab=y_lab ,main = "2nd differenced series")
## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking Second differences in",y_lab, sep=" ")
## [1] "tests For Check Stationarity in series after taking Second differences in Covid 19 infection cases in Chelyabinsk"
kpss.test(diff2_x1) # applay kpss test after taking Second differences
## Warning in kpss.test(diff2_x1): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff2_x1
## KPSS Level = 0.10905, Truncation lag parameter = 5, p-value = 0.1
pp.test(diff2_x1) # applay pp test after taking Second differences
## Warning in pp.test(diff2_x1): p-value smaller than printed p-value
##
## Phillips-Perron Unit Root Test
##
## data: diff2_x1
## Dickey-Fuller Z(alpha) = -377.65, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary
adf.test(diff2_x1) # applay adf test after taking Second differences
## Warning in adf.test(diff2_x1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff2_x1
## Dickey-Fuller = -7.5188, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
####Fitting an ARIMA Model
#1. Using auto arima function
model1 <- auto.arima(data_series,stepwise=FALSE, approximation=FALSE, trace=T, test = c("kpss", "adf", "pp")) #applaying auto arima
##
## ARIMA(0,2,0) : 2669.501
## ARIMA(0,2,1) : 2654.278
## ARIMA(0,2,2) : 2656.264
## ARIMA(0,2,3) : 2653.029
## ARIMA(0,2,4) : 2653.228
## ARIMA(0,2,5) : 2648.254
## ARIMA(1,2,0) : 2654.612
## ARIMA(1,2,1) : 2655.823
## ARIMA(1,2,2) : Inf
## ARIMA(1,2,3) : 2654.571
## ARIMA(1,2,4) : 2650.166
## ARIMA(2,2,0) : 2656.581
## ARIMA(2,2,1) : 2656.578
## ARIMA(2,2,2) : 2658.076
## ARIMA(2,2,3) : 2649.378
## ARIMA(3,2,0) : 2657.563
## ARIMA(3,2,1) : 2656.149
## ARIMA(3,2,2) : 2651.42
## ARIMA(4,2,0) : 2646.378
## ARIMA(4,2,1) : 2645.2
## ARIMA(5,2,0) : 2646.253
##
##
##
## Best model: ARIMA(4,2,1)
model1 # show the result of autoarima
## Series: data_series
## ARIMA(4,2,1)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1
## -0.6629 -0.1277 -0.1110 -0.2185 0.4492
## s.e. 0.1682 0.0729 0.0635 0.0533 0.1686
##
## sigma^2 estimated as 140.2: log likelihood=-1316.47
## AIC=2644.95 AICc=2645.2 BIC=2667.9
#Make changes in the source of auto arima to run the best model
arima.string <- function (object, padding = FALSE)
{
order <- object$arma[c(1, 6, 2, 3, 7, 4, 5)]
m <- order[7]
result <- paste("ARIMA(", order[1], ",", order[2], ",",
order[3], ")", sep = "")
if (m > 1 && sum(order[4:6]) > 0) {
result <- paste(result, "(", order[4], ",", order[5],
",", order[6], ")[", m, "]", sep = "")
}
if (padding && m > 1 && sum(order[4:6]) == 0) {
result <- paste(result, " ", sep = "")
if (m <= 9) {
result <- paste(result, " ", sep = "")
}
else if (m <= 99) {
result <- paste(result, " ", sep = "")
}
else {
result <- paste(result, " ", sep = "")
}
}
if (!is.null(object$xreg)) {
if (NCOL(object$xreg) == 1 && is.element("drift", names(object$coef))) {
result <- paste(result, "with drift ")
}
else {
result <- paste("Regression with", result, "errors")
}
}
else {
if (is.element("constant", names(object$coef)) || is.element("intercept",
names(object$coef))) {
result <- paste(result, "with non-zero mean")
}
else if (order[2] == 0 && order[5] == 0) {
result <- paste(result, "with zero mean ")
}
else {
result <- paste(result, " ")
}
}
if (!padding) {
result <- gsub("[ ]*$", "", result)
}
return(result)
}
bestmodel <- arima.string(model1, padding = TRUE)
bestmodel <- substring(bestmodel,7,11)
bestmodel <- gsub(" ", "", bestmodel)
bestmodel <- gsub(")", "", bestmodel)
bestmodel <- strsplit(bestmodel, ",")[[1]]
bestmodel <- c(strtoi(bestmodel[1]),strtoi(bestmodel[2]),strtoi(bestmodel[3]))
bestmodel
## [1] 4 2 1
strtoi(bestmodel[3])
## [1] 1
#2. Using ACF and PACF Function
#par(mfrow=c(1,2)) # Code for making two plot in one graph
acf(diff2_x1,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab, main=paste("ACF-2nd differenced series ",y_lab, sep=" ",lag.max=20)) # plot ACF "auto correlation function after taking second diffrences

pacf(diff2_x1,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main=paste("PACF-2nd differenced series ",y_lab, sep=" ",lag.max=20)) # plot PACF " Partial auto correlation function after taking second diffrences

library(forecast) # install library forecast
x1_model1= arima(data_series, order=c(bestmodel)) # Run Best model of auto arima for forecasting
x1_model1 # Show result of best model of auto arima
##
## Call:
## arima(x = data_series, order = c(bestmodel))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1
## -0.6629 -0.1277 -0.1110 -0.2185 0.4492
## s.e. 0.1682 0.0729 0.0635 0.0533 0.1686
##
## sigma^2 estimated as 138.1: log likelihood = -1316.47, aic = 2644.95
paste ("accuracy of autoarima Model For ==> ",y_lab, sep=" ")
## [1] "accuracy of autoarima Model For ==> Covid 19 infection cases in Chelyabinsk"
accuracy(x1_model1) # aacuracy of best model from auto arima
## ME RMSE MAE MPE MAPE MASE
## Training set 1.122016 11.7181 6.823088 0.4087639 2.10094 0.04847462
## ACF1
## Training set -0.009778106
x1_model1$x # show result of best model from auto arima
## NULL
checkresiduals(x1_model1,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab) # checkresiduals from best model from using auto arima

##
## Ljung-Box test
##
## data: Residuals from ARIMA(4,2,1)
## Q* = 3.8452, df = 5, p-value = 0.5719
##
## Model df: 5. Total lags used: 10
paste("Box-Ljung test , Ljung-Box test For Modelling for ==> ",y_lab, sep=" ")
## [1] "Box-Ljung test , Ljung-Box test For Modelling for ==> Covid 19 infection cases in Chelyabinsk"
Box.test(x1_model1$residuals^2, lag=20, type="Ljung-Box") # Do test for resdulas by using Box-Ljung test , Ljung-Box test For Modelling
##
## Box-Ljung test
##
## data: x1_model1$residuals^2
## X-squared = 135.86, df = 20, p-value < 2.2e-16
library(tseries)
jarque.bera.test(x1_model1$residuals) # Do test jarque.bera.test
##
## Jarque Bera Test
##
## data: x1_model1$residuals
## X-squared = 1609.8, df = 2, p-value < 2.2e-16
#Actual Vs Fitted
plot(data_series, col='red',lwd=2, main="Actual vs Fitted Plot", xlab='Time in (days)', ylab=y_lab) # plot actual and Fitted model
lines(fitted(x1_model1), col='black')

#Test data
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) ) # make testing data in time series and start from rows-6
forecasting_auto_arima <- forecast(x1_model1, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_auto_arima$mean,validation_data_days)
MAPE_Per_Day<-round(abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 7 day by using bats Model for ==> Covid 19 infection cases in Chelyabinsk"
MAPE_Mean_All.ARIMA_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.ARIMA<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_auto_arima<-paste(round(MAPE_Per_Day,3),"%")
MAPE_auto.arima_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 7 days in bats Model for ==> Covid 19 infection cases in Chelyabinsk"
paste(MAPE_Mean_All.ARIMA,"%")
## [1] "0.115 % MAPE 7 day Covid 19 infection cases in Chelyabinsk %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 7 days in bats Model for ==> Covid 19 infection cases in Chelyabinsk"
data.frame(date_auto.arima=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_auto.arima=validation_forecast,MAPE_auto.arima_Model)
## date_auto.arima validation_data_by_name actual_data forecasting_auto.arima
## 1 2021-02-16 Tuesday 48116 48118.69
## 2 2021-02-17 Wednesday 48371 48380.23
## 3 2021-02-18 Thursday 48620 48642.11
## 4 2021-02-19 Friday 48858 48903.49
## 5 2021-02-20 Saturday 49091 49165.03
## 6 2021-02-21 Sunday 49322 49426.52
## 7 2021-02-22 Monday 49551 49688.00
## MAPE_auto.arima_Model
## 1 0.006 %
## 2 0.019 %
## 3 0.045 %
## 4 0.093 %
## 5 0.151 %
## 6 0.212 %
## 7 0.276 %
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_auto.arima=tail(forecasting_auto_arima$mean,N_forecasting_days))
## FD forecating_date forecasting_by_auto.arima
## 1 2021-02-23 Tuesday 49949.58
## 2 2021-02-24 Wednesday 50211.07
## 3 2021-02-25 Thursday 50472.62
## 4 2021-02-26 Friday 50734.13
## 5 2021-02-27 Saturday 50995.65
## 6 2021-02-28 Sunday 51257.18
## 7 2021-03-01 Monday 51518.70
plot(forecasting_auto_arima)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph4<-autoplot(forecasting_auto_arima,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph4

MAPE_Mean_All.ARIMA
## [1] "0.115 % MAPE 7 day Covid 19 infection cases in Chelyabinsk"
## Error of forecasting
Error_auto.arima<-abs(testing_data-validation_forecast) # Absolute error of forecast (AEOF)
REOF_A_auto.arima<-abs(((testing_data-validation_forecast)/testing_data)*100) #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_auto.arima<-abs(((testing_data-validation_forecast)/validation_forecast)*100) #Relative error of forecast (divided by forecast)(REOF_F)
correlation_auto.arima<-cor(testing_data,validation_forecast, method = c("pearson")) # correlation coefficient between predicted and actual values
RMSE_auto.arima<-sqrt(sum((Error_auto.arima^2))/validation_data_days) # Root mean square forecast error
MSE_auto.arima<-(sum((Error_auto.arima^2))/validation_data_days) # Root mean square forecast error
MAD_auto.arima<-abs((sum(testing_data-validation_forecast))/validation_data_days) # average forecast accuracy
AEOF_auto.arima<-c(Error_auto.arima)
REOF_auto.arima1<-c(paste(round(REOF_A_auto.arima,3),"%"))
REOF_auto.arima2<-c(paste(round(REOF_F_auto.arima,3),"%"))
data.frame(correlation_auto.arima,MSE_auto.arima,RMSE_auto.arima,MAPE_Mean_All.ARIMA_Model,MAD_auto.arima) # analysis of Error by using Auto ARIMAA model shows result of correlation ,MSE ,MPER
## correlation_auto.arima MSE_auto.arima RMSE_auto.arima
## 1 0.9997959 5403.305 73.50718
## MAPE_Mean_All.ARIMA_Model MAD_auto.arima
## 1 0.115 56.43831
data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_auto.arima,REOF_A_auto.arima=REOF_auto.arima1,REOF_F_auto.arima=REOF_auto.arima2) # Analysis of error shows result AEOF,REOF_A,REOF_F
## validation_dates Validation_day_name AEOF_auto.arima REOF_A_auto.arima
## 1 2021-02-16 Tuesday 2.68858 0.006 %
## 2 2021-02-17 Wednesday 9.23370 0.019 %
## 3 2021-02-18 Thursday 22.11205 0.045 %
## 4 2021-02-19 Friday 45.49282 0.093 %
## 5 2021-02-20 Saturday 74.02632 0.151 %
## 6 2021-02-21 Sunday 104.51645 0.212 %
## 7 2021-02-22 Monday 136.99826 0.276 %
## REOF_F_auto.arima
## 1 0.006 %
## 2 0.019 %
## 3 0.045 %
## 4 0.093 %
## 5 0.151 %
## 6 0.211 %
## 7 0.276 %
# SIR Model
#install.packages("dplyr")
library(deSolve)
first<-rows-(validation_data_days+6)
secondr<-rows-7
vector_SIR<-original_data[first:secondr]
Infected <- c(vector_SIR)
Day <- 1:(length(Infected))
N <- Population # population of the us
SIR <- function(time, state, parameters) {
par <- as.list(c(state, parameters))
with(par, {
dS <- -beta/N * I * S
dI <- beta/N * I * S - gamma * I
dR <- gamma * I
list(c(dS, dI, dR))
})
}
init <- c(S = N-Infected[1], I = Infected[1], R = 0)
RSS <- function(parameters) {
names(parameters) <- c("beta", "gamma")
out <- ode(y = init, times = Day, func = SIR, parms = parameters)
fit <- out[ , 3]
sum((Infected - fit)^2)
}
# optimize with some sensible conditions
Opt <- optim(c(0.5, 0.5), RSS, method = "L-BFGS-B",
lower = c(0, 0), upper = c(10, 10))
Opt$message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
Opt_par <- setNames(Opt$par, c("beta", "gamma"))
Opt_par
## beta gamma
## 0.1168318 0.1092187
# beta gamma
# 0.6512503 0.4920399
out <- ode(y = init, times = Day, func = SIR, parms = Opt_par)
plot(out)
plot(out, obs=data.frame(time=Day, I=Infected))


result_SIR<-data.frame(out)
validation_forecast<-result_SIR$I
## Error of forecasting
Error_SIR<-abs(testing_data-validation_forecast) # Absolute error of forecast (AEOF)
REOF_A_SIR<-abs(((testing_data-validation_forecast)/testing_data)*100) #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_SIR<-abs(((testing_data-validation_forecast)/validation_forecast)*100) #Relative error of forecast (divided by forecast)(REOF_F)
correlation_SIR<-cor(testing_data,validation_forecast, method = c("pearson")) # correlation coefficient between predicted and actual values
RMSE_SIR<-sqrt(sum((Error_SIR^2))/validation_data_days) # Root mean square forecast error
MSE_SIR<-(sum((Error_SIR^2))/validation_data_days) # Root mean square forecast error
MAD_SIR<-abs((sum(testing_data-validation_forecast))/validation_data_days) # average forecast accuracy
AEOF_SIR<-c(Error_SIR)
REOF_A_SIR<-c(paste(round(REOF_A_SIR,3),"%"))
REOF_A_SIR1<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
REOF_F_SIR<-c(paste(round(REOF_F_SIR,3),"%"))
MAPE_Mean_All.SIR<-round(mean(abs(((testing_data-validation_forecast)/testing_data)*100)),3)
MAPE_Mean_All<-paste(round(mean(abs(((testing_data-validation_forecast)/testing_data)*100)),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
data.frame(correlation_SIR,MSE_SIR,RMSE_SIR,MAPE_Mean_All.SIR,MAD_SIR) # analysis of Error by using SIR model shows result of correlation ,MSE ,MPER
## correlation_SIR MSE_SIR RMSE_SIR MAPE_Mean_All.SIR MAD_SIR
## 1 0.999981 3139786 1771.944 3.628 1771.453
data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_SIR,REOF_A_SIR,REOF_F_SIR,validation_forecast,testing_data) # Analysis of error shows result AEOF,REOF_A,REOF_F
## validation_dates Validation_day_name AEOF_SIR REOF_A_SIR REOF_F_SIR
## 1 2021-02-16 Tuesday 1836.000 3.816 % 3.967 %
## 2 2021-02-17 Wednesday 1814.196 3.751 % 3.897 %
## 3 2021-02-18 Thursday 1793.194 3.688 % 3.829 %
## 4 2021-02-19 Friday 1768.168 3.619 % 3.755 %
## 5 2021-02-20 Saturday 1745.285 3.555 % 3.686 %
## 6 2021-02-21 Sunday 1727.713 3.503 % 3.63 %
## 7 2021-02-22 Monday 1715.612 3.462 % 3.586 %
## validation_forecast testing_data
## 1 46280.00 48116
## 2 46556.80 48371
## 3 46826.81 48620
## 4 47089.83 48858
## 5 47345.71 49091
## 6 47594.29 49322
## 7 47835.39 49551
## forecasting by SIR model
Infected <- c(tail(original_data,N_forecasting_days))
Day <- 1:(length(Infected))
N <- Population # population of the us
SIR <- function(time, state, parameters) {
par <- as.list(c(state, parameters))
with(par, {
dS <- -beta/N * I * S
dI <- beta/N * I * S - gamma * I
dR <- gamma * I
list(c(dS, dI, dR))
})
}
init <- c(S = N-Infected[1], I = Infected[1], R = 0)
RSS <- function(parameters) {
names(parameters) <- c("beta", "gamma")
out <- ode(y = init, times = Day, func = SIR, parms = parameters)
fit <- out[ , 3]
sum((Infected - fit)^2)
}
# optimize with some sensible conditions
Opt <- optim(c(0.5, 0.5), RSS, method = "L-BFGS-B",
lower = c(0, 0), upper = c(10, 10))
Opt$message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
Opt_par <- setNames(Opt$par, c("beta", "gamma"))
# beta gamma
# 0.6512503 0.4920399
out <- ode(y = init, times = Day, func = SIR, parms = Opt_par)
result_SIR <-data.frame(out)
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_SIR=result_SIR$I)
## FD forecating_date forecasting_by_SIR
## 1 2021-02-23 Tuesday 48116.00
## 2 2021-02-24 Wednesday 48379.37
## 3 2021-02-25 Thursday 48631.85
## 4 2021-02-26 Friday 48873.21
## 5 2021-02-27 Saturday 49103.25
## 6 2021-02-28 Sunday 49321.76
## 7 2021-03-01 Monday 49528.53
# Table for MAPE For counry
best_recommended_model <- min(MAPE_Mean_All_NNAR,MAPE_Mean_All.bats_Model,MAPE_Mean_All.TBATS_Model,MAPE_Mean_All.Holt_Model,MAPE_Mean_All.ARIMA_Model,MAPE_Mean_All.SIR)
paste("System Choose Least Error ==> ( MAPE %) of Forecasting by using bats model and BATS Model, Holt's Linear Models , and autoarima for ==> ", y_lab , sep=" ")
## [1] "System Choose Least Error ==> ( MAPE %) of Forecasting by using bats model and BATS Model, Holt's Linear Models , and autoarima for ==> Covid 19 infection cases in Chelyabinsk"
best_recommended_model
## [1] 0.109
x1<-if(best_recommended_model >= MAPE_Mean_All.bats_Model) {paste("BATS Model")}
x2<-if(best_recommended_model >= MAPE_Mean_All.TBATS_Model) {paste("TBATS Model")}
x3<-if(best_recommended_model >= MAPE_Mean_All.Holt_Model) {paste("Holt Model")}
x4<-if(best_recommended_model >= MAPE_Mean_All.ARIMA_Model) {paste("ARIMA Model")}
x5<-if(best_recommended_model >= MAPE_Mean_All.SIR) {paste("SIR Model")}
x6<-if(best_recommended_model >= MAPE_Mean_All_NNAR) {paste("NNAR Model")}
result<-c(x1,x2,x3,x4,x5,x6)
table.error<-data.frame(country.name,NNAR.model=MAPE_Mean_All_NNAR, BATS.Model=MAPE_Mean_All.bats_Model,TBATS.Model=MAPE_Mean_All.TBATS_Model,Holt.Model=MAPE_Mean_All.Holt_Model,ARIMA.Model=MAPE_Mean_All.ARIMA_Model,SIR.Model=MAPE_Mean_All.SIR,Best.Model=result)
library(ascii)
print(ascii(table(table.error)), type = "rest")
##
## +---+--------------+------------+------------+-------------+------------+-------------+-----------+------------+------+
## | | country.name | NNAR.model | BATS.Model | TBATS.Model | Holt.Model | ARIMA.Model | SIR.Model | Best.Model | Freq |
## +===+==============+============+============+=============+============+=============+===========+============+======+
## | 1 | Chelyabinsk | 0.382 | 0.109 | 0.113 | 0.127 | 0.115 | 3.628 | BATS Model | 1.00 |
## +---+--------------+------------+------------+-------------+------------+-------------+-----------+------------+------+
message("System finished Forecasting by using autoarima and Holt's ,TBATS, and SIR Model ==>",y_lab, sep=" ")
## System finished Forecasting by using autoarima and Holt's ,TBATS, and SIR Model ==>Covid 19 infection cases in Chelyabinsk
message(" Thank you for using our System For Modelling ==> ",y_lab, sep=" ")
## Thank you for using our System For Modelling ==> Covid 19 infection cases in Chelyabinsk