New package “FORECAST.TA” for forecasting apply Example For forecasting SAARC milk production in Pakistan

That’s Algorithm Developed By

Makarovskikh Tatyana Anatolyevna “Макаровских Татьяна Анатольевна”

Abotaleb mostafa “Аботалеб Мостафа”

Department of Electrical Engineering and Computer Science

South ural state university, Chelyabinsk, Russian federation

# Imports
library(fpp2)

## Warning: package 'fpp2' was built under R version 4.0.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## -- Attaching packages --------------------------------------------------------------------------- fpp2 2.4 --

## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3

## Warning: package 'ggplot2' was built under R version 4.0.3

## Warning: package 'forecast' was built under R version 4.0.3

##

library(forecast)
library(ggplot2)
library("readxl")

## Warning: package 'readxl' was built under R version 4.0.3

library(moments)

## Warning: package 'moments' was built under R version 4.0.3

library(forecast)
require(forecast)  
require(tseries)

## Loading required package: tseries

## Warning: package 'tseries' was built under R version 4.0.3

require(markovchain)

## Loading required package: markovchain

## Warning: package 'markovchain' was built under R version 4.0.3

## Package:  markovchain
## Version:  0.8.5-3
## Date:     2020-12-03
## BugReport: https://github.com/spedygiorgio/markovchain/issues

require(data.table)

## Loading required package: data.table

Full_original_data<-read_excel("F:/Phd/ALL Russia Analysis/SAARC milk production final.xlsx")
y_lab<- "SAARC milk production in Pakistan"   # input name of data
Actual_date_interval <- c("1961/12/31","2018/12/31")
Forecast_date_interval <- c("2019/12/31","2025/12/31")
validation_data_days <-7
frequency<-"years"

# Data Preparation & calculate some of statistics measures
original_data<-Full_original_data$Pakistan

summary(original_data)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  4209000  5773750 10380500 12627086 19084250 28109000

sd(original_data)  # calculate standard deviation

## [1] 7540331

skewness(original_data)  # calculate Cofficient of skewness

## [1] 0.5334464

kurtosis(original_data)   # calculate Cofficient of kurtosis

## [1] 1.880945

rows <- NROW(original_data)
training_data<-original_data[1:(rows-validation_data_days)]
testing_data<-original_data[(rows-validation_data_days+1):rows]
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency)  #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency)  #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD)) 
validation_dates<-tail(AD,validation_data_days)
validation_data_by_name<-weekdays(validation_dates)
forecasting_data_by_name<-weekdays(FD)

##bats model
# Data Modeling
data_series<-ts(training_data)
autoplot(data_series ,xlab=paste ("Time in  ", frequency, sep=" "), ylab = y_lab, main=paste ("Actual Data :", y_lab, sep=" "))

model_bats<-bats(data_series)
accuracy(model_bats)  # accuracy on training data

##                    ME   RMSE      MAE       MPE     MAPE      MASE        ACF1
## Training set 16437.32 166425 99196.88 0.2896213 1.073816 0.2616504 -0.03808877

# Print Model Parameters
model_bats

## BATS(0.544, {0,0}, 0.998, -)
## 
## Call: bats(y = data_series)
## 
## Parameters
##   Lambda: 0.544361
##   Alpha: 0.9867143
##   Beta: 0.4119543
##   Damping Parameter: 0.998458
## 
## Seed States:
##              [,1]
## [1,] 7707.8541597
## [2,]    0.0208483
## attr(,"lambda")
## [1] 0.5443614
## 
## Sigma: 102.5477
## AIC: 1430.339

plot(model_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4)

# Testing Data Evaluation
forecasting_bats <- predict(model_bats, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_bats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 years by using bats Model for  ==>  SAARC milk production in Pakistan"

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_bats<-paste(round(MAPE_Per_Day,3),"%")
MAPE_bats_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  SAARC milk production in Pakistan"

paste(MAPE_Mean_All,"%")

## [1] "0.751 % MAPE  7 years SAARC milk production in Pakistan %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  SAARC milk production in Pakistan"

data.frame(date_bats=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_bats=validation_forecast,MAPE_bats_Model)

##    date_bats validation_data_by_name actual_data forecasting_bats
## 1 2012-12-31             понедельник    23652000         23604177
## 2 2013-12-31                 вторник    24370000         24261379
## 3 2014-12-31                   среда    25001000         24925762
## 4 2015-12-31                 четверг    25744000         25597270
## 5 2016-12-31                 суббота    26510000         26275848
## 6 2017-12-31             воскресенье    27298000         26961440
## 7 2018-12-31             понедельник    28109000         27653994
##   MAPE_bats_Model
## 1         0.202 %
## 2         0.446 %
## 3         0.301 %
## 4          0.57 %
## 5         0.883 %
## 6         1.233 %
## 7         1.619 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_bats=tail(forecasting_bats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_bats
## 1 2019-12-31         вторник            28353455
## 2 2020-12-31         четверг            29059771
## 3 2021-12-31         пятница            29772889
## 4 2022-12-31         суббота            30492759
## 5 2023-12-31     воскресенье            31219327
## 6 2024-12-31         вторник            31952545
## 7 2025-12-31           среда            32692361

plot(forecasting_bats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph1<-autoplot(forecasting_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph1

## Error of forecasting
Error_bats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_bats<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_bats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_bats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_bats<-sqrt(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MAD_bats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_bats<-c(Error_bats)
REOF_Abats<-c(paste(round(REOF_A_bats,3),"%"))
REOF_Fbats<-c(paste(round(REOF_F_bats,3),"%"))
data.frame(correlation_bats,RMSE_bats,MAPE_Mean_All,MAD_bats) # analysis of Error  by using Bats Model shows result of correlation ,MSE ,MPER

##   correlation_bats RMSE_bats
## 1        0.9995564  243898.6
##                                             MAPE_Mean_All MAD_bats
## 1 0.751 % MAPE  7 years SAARC milk production in Pakistan 200589.8

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_bats,REOF_Abats,REOF_Fbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_bats REOF_Abats REOF_Fbats
## 1       2012-12-31         понедельник  47822.94    0.202 %    0.203 %
## 2       2013-12-31             вторник 108620.65    0.446 %    0.448 %
## 3       2014-12-31               среда  75237.59    0.301 %    0.302 %
## 4       2015-12-31             четверг 146729.65     0.57 %    0.573 %
## 5       2016-12-31             суббота 234152.08    0.883 %    0.891 %
## 6       2017-12-31         воскресенье 336559.51    1.233 %    1.248 %
## 7       2018-12-31         понедельник 455005.94    1.619 %    1.645 %

## TBATS Model

# Data Modeling
data_series<-ts(training_data)
model_TBATS<-forecast:::fitSpecificTBATS(data_series,use.box.cox=FALSE, use.beta=TRUE,  seasonal.periods=c(6),use.damping=FALSE,k.vector=c(2))
accuracy(model_TBATS)  # accuracy on training data

##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 25338.39 179751.4 120493.9 0.2595034 1.455225 0.3178252
##                     ACF1
## Training set -0.01911074

# Print Model Parameters
model_TBATS

## TBATS(1, {0,0}, 1, {<6,2>})
## 
## Call: NULL
## 
## Parameters
##   Alpha: 0.9868946
##   Beta: 0.3584389
##   Damping Parameter: 1
##   Gamma-1 Values: -0.005289493
##   Gamma-2 Values: 0.009289828
## 
## Seed States:
##              [,1]
## [1,] 4730320.9197
## [2,]  144576.0120
## [3,]    1161.5291
## [4,]  -23156.1313
## [5,]     725.8252
## [6,]  -25038.9612
## 
## Sigma: 179751.4
## AIC: 1454.655

plot(model_TBATS,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)

# Testing Data Evaluation
forecasting_tbats <- predict(model_TBATS, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_tbats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 years by using TBATS Model for  ==>  SAARC milk production in Pakistan"

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_TBATS<-paste(round(MAPE_Per_Day,3),"%")
MAPE_TBATS_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in TBATS Model for  ==>  SAARC milk production in Pakistan"

paste(MAPE_Mean_All,"%")

## [1] "1.746 % MAPE  7 years SAARC milk production in Pakistan %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in TBATS Model for  ==>  SAARC milk production in Pakistan"

data.frame(date_TBATS=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_TBATS=validation_forecast,MAPE_TBATS_Model)

##   date_TBATS validation_data_by_name actual_data forecasting_TBATS
## 1 2012-12-31             понедельник    23652000          23502822
## 2 2013-12-31                 вторник    24370000          24121649
## 3 2014-12-31                   среда    25001000          24779868
## 4 2015-12-31                 четверг    25744000          25331920
## 5 2016-12-31                 суббота    26510000          25948036
## 6 2017-12-31             воскресенье    27298000          26600471
## 7 2018-12-31             понедельник    28109000          27149451
##   MAPE_TBATS_Model
## 1          0.631 %
## 2          1.019 %
## 3          0.884 %
## 4          1.601 %
## 5           2.12 %
## 6          2.555 %
## 7          3.414 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_TBATS=tail(forecasting_tbats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_TBATS
## 1 2019-12-31         вторник             27768278
## 2 2020-12-31         четверг             28426497
## 3 2021-12-31         пятница             28978550
## 4 2022-12-31         суббота             29594665
## 5 2023-12-31     воскресенье             30247100
## 6 2024-12-31         вторник             30796080
## 7 2025-12-31           среда             31414907

plot(forecasting_tbats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph2<-autoplot(forecasting_tbats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph2

## Error of forecasting TBATS Model

Error_tbats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_tbats1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_tbats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_tbats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_tbats<-sqrt(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MAD_tbats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_tbats<-c(Error_tbats)
REOF_A_tbats<-c(paste(round(REOF_A_tbats1,3),"%"))
REOF_F_tbats<-c(paste(round(REOF_F_tbats,3),"%"))
data.frame(correlation_tbats,RMSE_tbats,MAPE_Mean_All,MAD_tbats) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_tbats RMSE_tbats
## 1         0.9987025   537949.2
##                                             MAPE_Mean_All MAD_tbats
## 1 1.746 % MAPE  7 years SAARC milk production in Pakistan  464254.6

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_tbats,REOF_A_tbats,REOF_F_tbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_tbats REOF_A_tbats REOF_F_tbats
## 1       2012-12-31         понедельник   149178.0      0.631 %      0.635 %
## 2       2013-12-31             вторник   248351.2      1.019 %       1.03 %
## 3       2014-12-31               среда   221131.7      0.884 %      0.892 %
## 4       2015-12-31             четверг   412079.5      1.601 %      1.627 %
## 5       2016-12-31             суббота   561964.3       2.12 %      2.166 %
## 6       2017-12-31         воскресенье   697528.8      2.555 %      2.622 %
## 7       2018-12-31         понедельник   959548.8      3.414 %      3.534 %

## Holt's linear trend


# Data Modeling
data_series<-ts(training_data)
model_holt<-holt(data_series,h=N_forecasting_days+validation_data_days,lambda = "auto")
accuracy(model_holt)  # accuracy on training data

##                     ME     RMSE      MAE         MPE      MAPE      MASE
## Training set -15056.54 164178.6 79405.29 -0.06626589 0.6611748 0.2094463
##                    ACF1
## Training set -0.1201909

# Print Model Parameters
summary(model_holt$model)

## Holt's method 
## 
## Call:
##  holt(y = data_series, h = N_forecasting_days + validation_data_days,  
## 
##  Call:
##      lambda = "auto") 
## 
##   Box-Cox transformation: lambda= -0.249 
## 
##   Smoothing parameters:
##     alpha = 0.9986 
##     beta  = 0.5667 
## 
##   Initial states:
##     l = 3.926 
##     b = 7e-04 
## 
##   sigma:  2e-04
## 
##       AIC      AICc       BIC 
## -651.6374 -650.3041 -641.9783 
## 
## Training set error measures:
##                     ME     RMSE      MAE         MPE      MAPE      MASE
## Training set -15056.54 164178.6 79405.29 -0.06626589 0.6611748 0.2094463
##                    ACF1
## Training set -0.1201909

# Testing Data Evaluation
forecasting_holt <- predict(model_holt, h=N_forecasting_days+validation_data_days,lambda = "auto")
validation_forecast<-head(forecasting_holt$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 years by using holt Model for  ==>  SAARC milk production in Pakistan"

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_holt<-paste(round(MAPE_Per_Day,3),"%")
MAPE_holt_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in holt Model for  ==>  SAARC milk production in Pakistan"

paste(MAPE_Mean_All,"%")

## [1] "0.632 % MAPE  7 years SAARC milk production in Pakistan %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in holt Model for  ==>  SAARC milk production in Pakistan"

data.frame(date_holt=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_holt=validation_forecast,MAPE_holt_Model)

##    date_holt validation_data_by_name actual_data forecasting_holt
## 1 2012-12-31             понедельник    23652000         23654698
## 2 2013-12-31                 вторник    24370000         24381244
## 3 2014-12-31                   среда    25001000         25135875
## 4 2015-12-31                 четверг    25744000         25919904
## 5 2016-12-31                 суббота    26510000         26734715
## 6 2017-12-31             воскресенье    27298000         27581768
## 7 2018-12-31             понедельник    28109000         28462607
##   MAPE_holt_Model
## 1         0.011 %
## 2         0.046 %
## 3         0.539 %
## 4         0.683 %
## 5         0.848 %
## 6          1.04 %
## 7         1.258 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_holt=tail(forecasting_holt$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_holt
## 1 2019-12-31         вторник            29378860
## 2 2020-12-31         четверг            30332248
## 3 2021-12-31         пятница            31324592
## 4 2022-12-31         суббота            32357815
## 5 2023-12-31     воскресенье            33433955
## 6 2024-12-31         вторник            34555167
## 7 2025-12-31           среда            35723735

plot(forecasting_holt)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph3<-autoplot(forecasting_holt,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph3

## Error of forecasting by using Holt's linear model
Error_Holt<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_Holt1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_Holt<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_Holt<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_Holt<-sqrt(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
MAD_Holt<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_Holt<-c(Error_Holt)
REOF_A_Holt<-c(paste(round(REOF_A_Holt1,3),"%"))
REOF_F_Holt<-c(paste(round(REOF_F_Holt,3),"%"))
REOF_A_Holt11<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
data.frame(correlation_Holt,RMSE_Holt,MAPE_Mean_All,MAD_Holt) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_Holt RMSE_Holt
## 1        0.9999184  208849.4
##                                             MAPE_Mean_All MAD_Holt
## 1 0.632 % MAPE  7 years SAARC milk production in Pakistan 169544.4

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_Holt,REOF_A_Holt,REOF_F_Holt)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name  AEOF_Holt REOF_A_Holt REOF_F_Holt
## 1       2012-12-31         понедельник   2697.936     0.011 %     0.011 %
## 2       2013-12-31             вторник  11243.738     0.046 %     0.046 %
## 3       2014-12-31               среда 134875.030     0.539 %     0.537 %
## 4       2015-12-31             четверг 175903.989     0.683 %     0.679 %
## 5       2016-12-31             суббота 224714.844     0.848 %     0.841 %
## 6       2017-12-31         воскресенье 283768.436      1.04 %     1.029 %
## 7       2018-12-31         понедельник 353607.097     1.258 %     1.242 %

#Auto arima model
##################

require(tseries) # need to install tseries tj test Stationarity in time series 
paste ("tests For Check Stationarity in series  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series  ==>  SAARC milk production in Pakistan"

kpss.test(data_series) # applay kpss test

## Warning in kpss.test(data_series): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  data_series
## KPSS Level = 1.3026, Truncation lag parameter = 3, p-value = 0.01

pp.test(data_series)   # applay pp test

## 
##  Phillips-Perron Unit Root Test
## 
## data:  data_series
## Dickey-Fuller Z(alpha) = -1.4196, Truncation lag parameter = 3, p-value
## = 0.9786
## alternative hypothesis: stationary

adf.test(data_series)  # applay adf test

## 
##  Augmented Dickey-Fuller Test
## 
## data:  data_series
## Dickey-Fuller = -1.9457, Lag order = 3, p-value = 0.5962
## alternative hypothesis: stationary

ndiffs(data_series)    # Doing first diffrencing on data

## [1] 2

#Taking the first difference
diff1_x1<-diff(data_series)
autoplot(diff1_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main = "1nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub, cex.main,
## cex.lab, cex.sub, font.main, font.lab

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking first differences in  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking first differences in  ==>  SAARC milk production in Pakistan"

kpss.test(diff1_x1)   # applay kpss test after taking first differences

## Warning in kpss.test(diff1_x1): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff1_x1
## KPSS Level = 1.0309, Truncation lag parameter = 3, p-value = 0.01

pp.test(diff1_x1)     # applay pp test after taking first differences

## Warning in pp.test(diff1_x1): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff1_x1
## Dickey-Fuller Z(alpha) = -28.598, Truncation lag parameter = 3, p-value
## = 0.01
## alternative hypothesis: stationary

adf.test(diff1_x1)    # applay adf test after taking first differences

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1_x1
## Dickey-Fuller = -1.9938, Lag order = 3, p-value = 0.5768
## alternative hypothesis: stationary

#Taking the second difference
diff2_x1=diff(diff1_x1)
autoplot(diff2_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab ,main = "2nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub, cex.main,
## cex.lab, cex.sub, font.main, font.lab

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking Second differences in",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking Second differences in SAARC milk production in Pakistan"

kpss.test(diff2_x1)   # applay kpss test after taking Second differences

## Warning in kpss.test(diff2_x1): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff2_x1
## KPSS Level = 0.059702, Truncation lag parameter = 3, p-value = 0.1

pp.test(diff2_x1)     # applay pp test after taking Second differences

## Warning in pp.test(diff2_x1): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff2_x1
## Dickey-Fuller Z(alpha) = -62.285, Truncation lag parameter = 3, p-value
## = 0.01
## alternative hypothesis: stationary

adf.test(diff2_x1)    # applay adf test after taking Second differences

## Warning in adf.test(diff2_x1): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff2_x1
## Dickey-Fuller = -4.3805, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary

####Fitting an ARIMA Model
#1. Using auto arima function
model1 <- auto.arima(data_series,stepwise=FALSE, approximation=FALSE, trace=T, test = c("kpss", "adf", "pp"))  #applaying auto arima

## 
##  ARIMA(0,2,0)                    : 1328.649
##  ARIMA(0,2,1)                    : 1316.333
##  ARIMA(0,2,2)                    : 1318.577
##  ARIMA(0,2,3)                    : 1320.952
##  ARIMA(0,2,4)                    : 1323.391
##  ARIMA(0,2,5)                    : 1325.159
##  ARIMA(1,2,0)                    : 1319.856
##  ARIMA(1,2,1)                    : 1318.576
##  ARIMA(1,2,2)                    : Inf
##  ARIMA(1,2,3)                    : Inf
##  ARIMA(1,2,4)                    : Inf
##  ARIMA(2,2,0)                    : 1319.76
##  ARIMA(2,2,1)                    : 1320.951
##  ARIMA(2,2,2)                    : Inf
##  ARIMA(2,2,3)                    : Inf
##  ARIMA(3,2,0)                    : 1321.209
##  ARIMA(3,2,1)                    : 1323.417
##  ARIMA(3,2,2)                    : Inf
##  ARIMA(4,2,0)                    : 1323.637
##  ARIMA(4,2,1)                    : 1326.021
##  ARIMA(5,2,0)                    : 1325.644
## 
## 
## 
##  Best model: ARIMA(0,2,1)

model1 # show the result of autoarima

## Series: data_series 
## ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.5712
## s.e.   0.1130
## 
## sigma^2 estimated as 2.524e+10:  log likelihood=-656.04
## AIC=1316.07   AICc=1316.33   BIC=1319.86

#Make changes in the source of auto arima to run the best model
arima.string <- function (object, padding = FALSE) 
{
  order <- object$arma[c(1, 6, 2, 3, 7, 4, 5)]
  m <- order[7]
  result <- paste("ARIMA(", order[1], ",", order[2], ",", 
                  order[3], ")", sep = "")
  if (m > 1 && sum(order[4:6]) > 0) {
    result <- paste(result, "(", order[4], ",", order[5], 
                    ",", order[6], ")[", m, "]", sep = "")
  }
  if (padding && m > 1 && sum(order[4:6]) == 0) {
    result <- paste(result, "         ", sep = "")
    if (m <= 9) {
      result <- paste(result, " ", sep = "")
    }
    else if (m <= 99) {
      result <- paste(result, "  ", sep = "")
    }
    else {
      result <- paste(result, "   ", sep = "")
    }
  }
  if (!is.null(object$xreg)) {
    if (NCOL(object$xreg) == 1 && is.element("drift", names(object$coef))) {
      result <- paste(result, "with drift        ")
    }
    else {
      result <- paste("Regression with", result, "errors")
    }
  }
  else {
    if (is.element("constant", names(object$coef)) || is.element("intercept", 
                                                                 names(object$coef))) {
      result <- paste(result, "with non-zero mean")
    }
    else if (order[2] == 0 && order[5] == 0) {
      result <- paste(result, "with zero mean    ")
    }
    else {
      result <- paste(result, "                  ")
    }
  }
  if (!padding) {
    result <- gsub("[ ]*$", "", result)
  }
  return(result)
}






source("stringthearima.R")  
bestmodel <- arima.string(model1, padding = TRUE)
bestmodel <- substring(bestmodel,7,11)
bestmodel <- gsub(" ", "", bestmodel)
bestmodel <- gsub(")", "", bestmodel)
bestmodel <- strsplit(bestmodel, ",")[[1]]
bestmodel <- c(strtoi(bestmodel[1]),strtoi(bestmodel[2]),strtoi(bestmodel[3]))
bestmodel

## [1] 0 2 1

strtoi(bestmodel[3])

## [1] 1

#2. Using ACF and PACF Function
#par(mfrow=c(1,2))  # Code for making two plot in one graph 
acf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab, main=paste("ACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))    # plot ACF "auto correlation function after taking second diffrences

pacf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main=paste("PACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))   # plot PACF " Partial auto correlation function after taking second diffrences

library(forecast)   # install library forecast             
x1_model1= arima(data_series, order=c(bestmodel)) # Run Best model of auto arima  for forecasting
x1_model1  # Show result of best model of auto arima

## 
## Call:
## arima(x = data_series, order = c(bestmodel))
## 
## Coefficients:
##           ma1
##       -0.5712
## s.e.   0.1130
## 
## sigma^2 estimated as 2.472e+10:  log likelihood = -656.04,  aic = 1316.07

paste ("accuracy of autoarima Model For  ==> ",y_lab, sep=" ")

## [1] "accuracy of autoarima Model For  ==>  SAARC milk production in Pakistan"

accuracy(x1_model1)  # aacuracy of best model from auto arima

##                    ME   RMSE      MAE       MPE    MAPE      MASE       ACF1
## Training set 22995.25 154127 85842.84 0.2592826 0.72093 0.2264266 -0.0486627

x1_model1$x          # show result of best model from auto arima

## NULL

checkresiduals(x1_model1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)  # checkresiduals from best model from using auto arima

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,2,1)
## Q* = 11.374, df = 9, p-value = 0.251
## 
## Model df: 1.   Total lags used: 10

paste("Box-Ljung test , Ljung-Box test For Modelling for   ==> ",y_lab, sep=" ")

## [1] "Box-Ljung test , Ljung-Box test For Modelling for   ==>  SAARC milk production in Pakistan"

Box.test(x1_model1$residuals^2, lag=20, type="Ljung-Box")   # Do test for resdulas by using Box-Ljung test , Ljung-Box test For Modelling

## 
##  Box-Ljung test
## 
## data:  x1_model1$residuals^2
## X-squared = 9.0818, df = 20, p-value = 0.9819

library(tseries)
jarque.bera.test(x1_model1$residuals)  # Do test jarque.bera.test

## 
##  Jarque Bera Test
## 
## data:  x1_model1$residuals
## X-squared = 242.57, df = 2, p-value < 2.2e-16

#Actual Vs Fitted
plot(data_series, col='red',lwd=2, main="Actual vs Fitted Plot", xlab='Time in (days)', ylab=y_lab) # plot actual and Fitted model 
lines(fitted(x1_model1), col='blue')

#Test data

x1_test <- ts(testing_data, start =(rows-validation_data_days+1) ) # make testing data in time series and start from rows-6
forecasting_auto_arima <- forecast(x1_model1, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_auto_arima$mean,validation_data_days)
MAPE_Per_Day<-round(abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 years by using bats Model for  ==>  SAARC milk production in Pakistan"

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_auto_arima<-paste(round(MAPE_Per_Day,3),"%")
MAPE_auto.arima_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  SAARC milk production in Pakistan"

paste(MAPE_Mean_All,"%")

## [1] "1.179 % MAPE  7 years SAARC milk production in Pakistan %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  SAARC milk production in Pakistan"

data.frame(date_auto.arima=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_auto.arima=validation_forecast,MAPE_auto.arima_Model)

##   date_auto.arima validation_data_by_name actual_data forecasting_auto.arima
## 1      2012-12-31             понедельник    23652000               23590412
## 2      2013-12-31                 вторник    24370000               24225825
## 3      2014-12-31                   среда    25001000               24861237
## 4      2015-12-31                 четверг    25744000               25496650
## 5      2016-12-31                 суббота    26510000               26132062
## 6      2017-12-31             воскресенье    27298000               26767474
## 7      2018-12-31             понедельник    28109000               27402887
##   MAPE_auto.arima_Model
## 1                0.26 %
## 2               0.592 %
## 3               0.559 %
## 4               0.961 %
## 5               1.426 %
## 6               1.943 %
## 7               2.512 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_auto.arima=tail(forecasting_auto_arima$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_auto.arima
## 1 2019-12-31         вторник                  28038299
## 2 2020-12-31         четверг                  28673711
## 3 2021-12-31         пятница                  29309124
## 4 2022-12-31         суббота                  29944536
## 5 2023-12-31     воскресенье                  30579949
## 6 2024-12-31         вторник                  31215361
## 7 2025-12-31           среда                  31850773

plot(forecasting_auto_arima)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph4<-autoplot(forecasting_auto_arima,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph4

## Error of forecasting
Error_auto.arima<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_auto.arima<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_auto.arima<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_auto.arima<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_auto.arima<-sqrt(sum((Error_auto.arima^2))/validation_data_days)   #  Root mean square forecast error
MAD_auto.arima<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_auto.arima<-c(Error_auto.arima)
REOF_auto.arima1<-c(paste(round(REOF_A_auto.arima,3),"%"))
REOF_auto.arima2<-c(paste(round(REOF_F_auto.arima,3),"%"))
data.frame(correlation_auto.arima,RMSE_auto.arima,MAPE_Mean_All,MAD_auto.arima) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_auto.arima RMSE_auto.arima
## 1              0.9992705        383254.1
##                                             MAPE_Mean_All MAD_auto.arima
## 1 1.179 % MAPE  7 years SAARC milk production in Pakistan       315350.5

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_auto.arima,REOF_A_auto.arima=REOF_auto.arima1,REOF_F_auto.arima=REOF_auto.arima2)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_auto.arima REOF_A_auto.arima
## 1       2012-12-31         понедельник        61587.61            0.26 %
## 2       2013-12-31             вторник       144175.23           0.592 %
## 3       2014-12-31               среда       139762.84           0.559 %
## 4       2015-12-31             четверг       247350.45           0.961 %
## 5       2016-12-31             суббота       377938.06           1.426 %
## 6       2017-12-31         воскресенье       530525.68           1.943 %
## 7       2018-12-31         понедельник       706113.29           2.512 %
##   REOF_F_auto.arima
## 1           0.261 %
## 2           0.595 %
## 3           0.562 %
## 4            0.97 %
## 5           1.446 %
## 6           1.982 %
## 7           2.577 %

# Choose Best model by least error

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==>  SAARC milk production in Pakistan"

M1<-mean(REOF_A_bats)

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using TBATS  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using TBATS  Model For ==>  SAARC milk production in Pakistan"

M2<-mean(REOF_A_tbats1)

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using Holt's Linear << Exponential Smoothing >>  For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using Holt's Linear << Exponential Smoothing >>  For ==>  SAARC milk production in Pakistan"

M3<-REOF_A_Holt11

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using auto arima  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using auto arima  Model For ==>  SAARC milk production in Pakistan"

M4<-mean(REOF_A_auto.arima)

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using autoarima  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using autoarima  Model For ==>  SAARC milk production in Pakistan"

data.frame(validation_dates,forecating_date=forecasting_data_by_name,MAPE_bats_error=REOF_A_bats,MAPE_TBATS_error=REOF_A_tbats1,MAPE_Holt_error=REOF_A_Holt1,MAPE_autoarima_error = REOF_A_auto.arima)

##   validation_dates forecating_date MAPE_bats_error MAPE_TBATS_error
## 1       2012-12-31         вторник       0.2021941        0.6307203
## 2       2013-12-31         четверг       0.4457146        1.0190857
## 3       2014-12-31         пятница       0.3009383        0.8844916
## 4       2015-12-31         суббота       0.5699567        1.6006818
## 5       2016-12-31     воскресенье       0.8832595        2.1198201
## 6       2017-12-31         вторник       1.2329090        2.5552376
## 7       2018-12-31           среда       1.6187198        3.4136712
##   MAPE_Holt_error MAPE_autoarima_error
## 1      0.01140680            0.2603907
## 2      0.04613762            0.5916095
## 3      0.53947854            0.5590290
## 4      0.68328150            0.9608082
## 5      0.84766067            1.4256434
## 6      1.03952098            1.9434599
## 7      1.25798533            2.5120541

recommend_Model<-c(M1,M2,M3,M4)
best_recommended_model<-min(recommend_Model)
paste ("lodaing .....   ... . .Select Minimum MAPE from Models for select best Model ==> ", y_lab , sep=" ")

## [1] "lodaing .....   ... . .Select Minimum MAPE from Models for select best Model ==>  SAARC milk production in Pakistan"

best_recommended_model

## [1] 0.6322102

paste ("Best Model For Forecasting  ==> ",y_lab, sep=" ")

## [1] "Best Model For Forecasting  ==>  SAARC milk production in Pakistan"

if(best_recommended_model >= M1) {paste("System Recommend Bats Model That's better  For forecasting==> ",y_lab, sep=" ")}
if(best_recommended_model >= M2) {paste("System Recommend  That's better TBATS  For forecasting ==> ",y_lab, sep=" ")}
if(best_recommended_model >= M3) {paste("System Recommend Holt's Linear Model < Exponential Smoothing Model >   That's better  For forecasting ==> ",y_lab, sep=" ")}

## [1] "System Recommend Holt's Linear Model < Exponential Smoothing Model >   That's better  For forecasting ==>  SAARC milk production in Pakistan"

if(best_recommended_model >= M4) {paste("System Recommend auto arima Model  That's better  For forecasting ==> ",y_lab, sep=" ")}

message("System finished Forecasting  by using autoarima and Holt's ,and TBATS Model ==>",y_lab, sep=" ")

## System finished Forecasting  by using autoarima and Holt's ,and TBATS Model ==>SAARC milk production in Pakistan

message(" Thank you for using our System For Modelling  ==> ",y_lab, sep=" ")

##  Thank you for using our System For Modelling  ==> SAARC milk production in Pakistan