New package “FORECAST.TA” for forecasting apply Example For forecasting Rent price per m2 in Chelyabinsk

That’s Algorithm Developed By

Makarovskikh Tatyana Anatolyevna “Макаровских Татьяна Анатольевна”

Abotaleb mostafa “Аботалеб Мостафа”

Department of Electrical Engineering and Computer Science

South ural state university, Chelyabinsk, Russian federation

# Imports
library(fpp2)

## Warning: package 'fpp2' was built under R version 4.0.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## -- Attaching packages --------------------------------------------------------------------------- fpp2 2.4 --

## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3

## Warning: package 'ggplot2' was built under R version 4.0.3

## Warning: package 'forecast' was built under R version 4.0.3

##

library(forecast)
library(ggplot2)
library("readxl")

## Warning: package 'readxl' was built under R version 4.0.3

library(moments)

## Warning: package 'moments' was built under R version 4.0.3

library(forecast)
require(forecast)  
require(tseries)

## Loading required package: tseries

## Warning: package 'tseries' was built under R version 4.0.3

require(markovchain)

## Loading required package: markovchain

## Warning: package 'markovchain' was built under R version 4.0.3

## Package:  markovchain
## Version:  0.8.5-3
## Date:     2020-12-03
## BugReport: https://github.com/spedygiorgio/markovchain/issues

require(data.table)

## Loading required package: data.table

Full_original_data<-read_excel("F:/Phd/ALL Russia Analysis/Prices rent and sales.xlsx",sheet = "Rent Price")

## New names:
## * Октябрьский -> Октябрьский...21
## * Железногорск -> Железногорск...113
## * Железногорск -> Железногорск...129
## * Октябрьский -> Октябрьский...183

y_lab<- "Rent price per m2 in  Chelyabinsk"   # input name of data
Actual_date_interval <- c("2018/11/01","2020/10/01")
Forecast_date_interval <- c("2020/11/01","2021/05/01")
validation_data_days <-7
frequency<-"month"

# Data Preparation & calculate some of statistics measures
original_data<-Full_original_data$chelyabinsk

summary(original_data)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   278.0   281.0   283.0   283.5   286.0   292.0

sd(original_data)  # calculate standard deviation

## [1] 3.562842

skewness(original_data)  # calculate Cofficient of skewness

## [1] 0.3621266

kurtosis(original_data)   # calculate Cofficient of kurtosis

## [1] 2.400009

rows <- NROW(original_data)
training_data<-original_data[1:(rows-validation_data_days)]
testing_data<-original_data[(rows-validation_data_days+1):rows]
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency)  #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency)  #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD)) 
validation_dates<-tail(AD,validation_data_days)
validation_data_by_name<-weekdays(validation_dates)
forecasting_data_by_name<-weekdays(FD)

##bats model
# Data Modeling
data_series<-ts(training_data)
autoplot(data_series ,xlab=paste ("Time in  ", frequency, sep=" "), ylab = y_lab, main=paste ("Actual Data :", y_lab, sep=" "))

model_bats<-bats(data_series)
accuracy(model_bats)  # accuracy on training data

##                      ME     RMSE      MAE        MPE      MAPE      MASE
## Training set -0.3743609 1.356817 1.005067 -0.1342393 0.3553133 0.7309576
##                    ACF1
## Training set 0.02210875

# Print Model Parameters
model_bats

## BATS(1, {3,0}, 1, -)
## 
## Call: bats(y = data_series)
## 
## Parameters
##   Alpha: -0.02009994
##   Beta: 0.03990649
##   Damping Parameter: 1
##   AR coefficients: 0.569574 -0.064634 -0.547707
## 
## Seed States:
##             [,1]
## [1,] 278.4725499
## [2,]   0.5792238
## [3,]   0.0000000
## [4,]   0.0000000
## [5,]   0.0000000
## 
## Sigma: 1.356817
## AIC: 78.53944

plot(model_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4)

# Testing Data Evaluation
forecasting_bats <- predict(model_bats, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_bats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 month by using bats Model for  ==>  Rent price per m2 in  Chelyabinsk"

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_bats<-paste(round(MAPE_Per_Day,3),"%")
MAPE_bats_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  Rent price per m2 in  Chelyabinsk"

paste(MAPE_Mean_All,"%")

## [1] "1.178 % MAPE  7 month Rent price per m2 in  Chelyabinsk %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  Rent price per m2 in  Chelyabinsk"

data.frame(date_bats=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_bats=validation_forecast,MAPE_bats_Model)

##    date_bats validation_data_by_name actual_data forecasting_bats
## 1 2020-04-01                   среда         292         287.5392
## 2 2020-05-01                 пятница         286         286.5313
## 3 2020-06-01             понедельник         286         286.3371
## 4 2020-07-01                   среда         286         286.8940
## 5 2020-08-01                 суббота         286         288.1258
## 6 2020-09-01                 вторник         281         289.2478
## 7 2020-10-01                 четверг         283         289.8523
##   MAPE_bats_Model
## 1         1.528 %
## 2         0.186 %
## 3         0.118 %
## 4         0.313 %
## 5         0.743 %
## 6         2.935 %
## 7         2.421 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_bats=tail(forecasting_bats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_bats
## 1 2020-11-01     воскресенье            289.7994
## 2 2020-12-01         вторник            289.4657
## 3 2021-01-01         пятница            289.2980
## 4 2021-02-01     понедельник            289.6031
## 5 2021-03-01     понедельник            290.3205
## 6 2021-04-01         четверг            291.1512
## 7 2021-05-01         суббота            291.7610

plot(forecasting_bats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph1<-autoplot(forecasting_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph1

## Error of forecasting
Error_bats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_bats<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_bats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_bats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_bats<-sqrt(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MAD_bats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_bats<-c(Error_bats)
REOF_Abats<-c(paste(round(REOF_A_bats,3),"%"))
REOF_Fbats<-c(paste(round(REOF_F_bats,3),"%"))
data.frame(correlation_bats,RMSE_bats,MAPE_Mean_All,MAD_bats) # analysis of Error  by using Bats Model shows result of correlation ,MSE ,MPER

##   correlation_bats RMSE_bats
## 1       -0.5408407  4.481608
##                                             MAPE_Mean_All MAD_bats
## 1 1.178 % MAPE  7 month Rent price per m2 in  Chelyabinsk 2.075372

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_bats,REOF_Abats,REOF_Fbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_bats REOF_Abats REOF_Fbats
## 1       2020-04-01               среда 4.4608046    1.528 %    1.551 %
## 2       2020-05-01             пятница 0.5313435    0.186 %    0.185 %
## 3       2020-06-01         понедельник 0.3371058    0.118 %    0.118 %
## 4       2020-07-01               среда 0.8940256    0.313 %    0.312 %
## 5       2020-08-01             суббота 2.1258201    0.743 %    0.738 %
## 6       2020-09-01             вторник 8.2478333    2.935 %    2.851 %
## 7       2020-10-01             четверг 6.8522834    2.421 %    2.364 %

## TBATS Model

# Data Modeling
data_series<-ts(training_data)
model_TBATS<-forecast:::fitSpecificTBATS(data_series,use.box.cox=FALSE, use.beta=TRUE,  seasonal.periods=c(6),use.damping=FALSE,k.vector=c(2))
accuracy(model_TBATS)  # accuracy on training data

##                     ME     RMSE      MAE        MPE   MAPE      MASE      ACF1
## Training set -0.462536 1.661304 1.355726 -0.1665239 0.4813 0.9859826 0.5140716

# Print Model Parameters
model_TBATS

## TBATS(1, {0,0}, 1, {<6,2>})
## 
## Call: NULL
## 
## Parameters
##   Alpha: -0.02010764
##   Beta: 0.03450868
##   Damping Parameter: 1
##   Gamma-1 Values: -7.435622e-05
##   Gamma-2 Values: 0.0001299956
## 
## Seed States:
##              [,1]
## [1,] 278.68036382
## [2,]   0.55929243
## [3,]  -2.02088793
## [4,]  -0.01307079
## [5,]   0.15385144
## [6,]   0.45133680
## 
## Sigma: 1.661304
## AIC: 85.42312

plot(model_TBATS,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)

# Testing Data Evaluation
forecasting_tbats <- predict(model_TBATS, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_tbats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 month by using TBATS Model for  ==>  Rent price per m2 in  Chelyabinsk"

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_TBATS<-paste(round(MAPE_Per_Day,3),"%")
MAPE_TBATS_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in TBATS Model for  ==>  Rent price per m2 in  Chelyabinsk"

paste(MAPE_Mean_All,"%")

## [1] "1.344 % MAPE  7 month Rent price per m2 in  Chelyabinsk %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in TBATS Model for  ==>  Rent price per m2 in  Chelyabinsk"

data.frame(date_TBATS=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_TBATS=validation_forecast,MAPE_TBATS_Model)

##   date_TBATS validation_data_by_name actual_data forecasting_TBATS
## 1 2020-04-01                   среда         292          285.0716
## 2 2020-05-01                 пятница         286          284.8535
## 3 2020-06-01             понедельник         286          286.6957
## 4 2020-07-01                   среда         286          288.2228
## 5 2020-08-01                 суббота         286          289.7592
## 6 2020-09-01                 вторник         281          289.3140
## 7 2020-10-01                 четверг         283          286.7993
##   MAPE_TBATS_Model
## 1          2.373 %
## 2          0.401 %
## 3          0.243 %
## 4          0.777 %
## 5          1.314 %
## 6          2.959 %
## 7          1.342 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_TBATS=tail(forecasting_tbats$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_TBATS
## 1 2020-11-01     воскресенье             286.5812
## 2 2020-12-01         вторник             288.4234
## 3 2021-01-01         пятница             289.9505
## 4 2021-02-01     понедельник             291.4869
## 5 2021-03-01     понедельник             291.0417
## 6 2021-04-01         четверг             288.5269
## 7 2021-05-01         суббота             288.3089

plot(forecasting_tbats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph2<-autoplot(forecasting_tbats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph2

## Error of forecasting TBATS Model

Error_tbats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_tbats1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_tbats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_tbats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_tbats<-sqrt(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MAD_tbats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_tbats<-c(Error_tbats)
REOF_A_tbats<-c(paste(round(REOF_A_tbats1,3),"%"))
REOF_F_tbats<-c(paste(round(REOF_F_tbats,3),"%"))
data.frame(correlation_tbats,RMSE_tbats,MAPE_Mean_All,MAD_tbats) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_tbats RMSE_tbats
## 1        -0.5578186   4.666468
##                                             MAPE_Mean_All MAD_tbats
## 1 1.344 % MAPE  7 month Rent price per m2 in  Chelyabinsk   1.53088

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_tbats,REOF_A_tbats,REOF_F_tbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_tbats REOF_A_tbats REOF_F_tbats
## 1       2020-04-01               среда  6.9284268      2.373 %       2.43 %
## 2       2020-05-01             пятница  1.1464678      0.401 %      0.402 %
## 3       2020-06-01         понедельник  0.6957123      0.243 %      0.243 %
## 4       2020-07-01               среда  2.2228337      0.777 %      0.771 %
## 5       2020-08-01             суббота  3.7592479      1.314 %      1.297 %
## 6       2020-09-01             вторник  8.3140079      2.959 %      2.874 %
## 7       2020-10-01             четверг  3.7992542      1.342 %      1.325 %

## Holt's linear trend


# Data Modeling
data_series<-ts(training_data)
model_holt<-holt(data_series,h=N_forecasting_days+validation_data_days,lambda = "auto")
accuracy(model_holt)  # accuracy on training data

##                      ME     RMSE      MAE         MPE      MAPE     MASE
## Training set 0.02237606 2.246147 1.594429 0.002624528 0.5629071 1.159585
##                  ACF1
## Training set 0.484728

# Print Model Parameters
summary(model_holt$model)

## Holt's method 
## 
## Call:
##  holt(y = data_series, h = N_forecasting_days + validation_data_days,  
## 
##  Call:
##      lambda = "auto") 
## 
##   Box-Cox transformation: lambda= -0.9999 
## 
##   Smoothing parameters:
##     alpha = 0.3166 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 0.9965 
##     b = 0 
## 
##   sigma:  0
## 
##       AIC      AICc       BIC 
## -298.3716 -292.9171 -294.2055 
## 
## Training set error measures:
##                      ME     RMSE      MAE         MPE      MAPE     MASE
## Training set 0.02237606 2.246147 1.594429 0.002624528 0.5629071 1.159585
##                  ACF1
## Training set 0.484728

# Testing Data Evaluation
forecasting_holt <- predict(model_holt, h=N_forecasting_days+validation_data_days,lambda = "auto")
validation_forecast<-head(forecasting_holt$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 month by using holt Model for  ==>  Rent price per m2 in  Chelyabinsk"

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_holt<-paste(round(MAPE_Per_Day,3),"%")
MAPE_holt_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in holt Model for  ==>  Rent price per m2 in  Chelyabinsk"

paste(MAPE_Mean_All,"%")

## [1] "1.711 % MAPE  7 month Rent price per m2 in  Chelyabinsk %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in holt Model for  ==>  Rent price per m2 in  Chelyabinsk"

data.frame(date_holt=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_holt=validation_forecast,MAPE_holt_Model)

##    date_holt validation_data_by_name actual_data forecasting_holt
## 1 2020-04-01                   среда         292         287.6209
## 2 2020-05-01                 пятница         286         288.1875
## 3 2020-06-01             понедельник         286         288.7562
## 4 2020-07-01                   среда         286         289.3272
## 5 2020-08-01                 суббота         286         289.9005
## 6 2020-09-01                 вторник         281         290.4761
## 7 2020-10-01                 четверг         283         291.0539
##   MAPE_holt_Model
## 1           1.5 %
## 2         0.765 %
## 3         0.964 %
## 4         1.163 %
## 5         1.364 %
## 6         3.372 %
## 7         2.846 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_holt=tail(forecasting_holt$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_holt
## 1 2020-11-01     воскресенье            291.6341
## 2 2020-12-01         вторник            292.2165
## 3 2021-01-01         пятница            292.8013
## 4 2021-02-01     понедельник            293.3885
## 5 2021-03-01     понедельник            293.9780
## 6 2021-04-01         четверг            294.5699
## 7 2021-05-01         суббота            295.1641

plot(forecasting_holt)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph3<-autoplot(forecasting_holt,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph3

## Error of forecasting by using Holt's linear model
Error_Holt<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_Holt1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_Holt<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_Holt<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_Holt<-sqrt(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
MAD_Holt<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_Holt<-c(Error_Holt)
REOF_A_Holt<-c(paste(round(REOF_A_Holt1,3),"%"))
REOF_F_Holt<-c(paste(round(REOF_F_Holt,3),"%"))
REOF_A_Holt11<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
data.frame(correlation_Holt,RMSE_Holt,MAPE_Mean_All,MAD_Holt) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_Holt RMSE_Holt
## 1       -0.8385015  5.509793
##                                             MAPE_Mean_All MAD_Holt
## 1 1.711 % MAPE  7 month Rent price per m2 in  Chelyabinsk 3.617488

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_Holt,REOF_A_Holt,REOF_F_Holt)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_Holt REOF_A_Holt REOF_F_Holt
## 1       2020-04-01               среда  4.379069       1.5 %     1.523 %
## 2       2020-05-01             пятница  2.187463     0.765 %     0.759 %
## 3       2020-06-01         понедельник  2.756231     0.964 %     0.955 %
## 4       2020-07-01               среда  3.327249     1.163 %      1.15 %
## 5       2020-08-01             суббота  3.900529     1.364 %     1.345 %
## 6       2020-09-01             вторник  9.476085     3.372 %     3.262 %
## 7       2020-10-01             четверг  8.053932     2.846 %     2.767 %

#Auto arima model
##################

require(tseries) # need to install tseries tj test Stationarity in time series 
paste ("tests For Check Stationarity in series  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series  ==>  Rent price per m2 in  Chelyabinsk"

kpss.test(data_series) # applay kpss test

## 
##  KPSS Test for Level Stationarity
## 
## data:  data_series
## KPSS Level = 0.48391, Truncation lag parameter = 2, p-value = 0.04529

pp.test(data_series)   # applay pp test

## 
##  Phillips-Perron Unit Root Test
## 
## data:  data_series
## Dickey-Fuller Z(alpha) = -9.1929, Truncation lag parameter = 2, p-value
## = 0.5293
## alternative hypothesis: stationary

adf.test(data_series)  # applay adf test

## 
##  Augmented Dickey-Fuller Test
## 
## data:  data_series
## Dickey-Fuller = -3.8485, Lag order = 2, p-value = 0.03225
## alternative hypothesis: stationary

ndiffs(data_series)    # Doing first diffrencing on data

## [1] 1

#Taking the first difference
diff1_x1<-diff(data_series)
autoplot(diff1_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main = "1nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub, cex.main,
## cex.lab, cex.sub, font.main, font.lab

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking first differences in  ==> ",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking first differences in  ==>  Rent price per m2 in  Chelyabinsk"

kpss.test(diff1_x1)   # applay kpss test after taking first differences

## Warning in kpss.test(diff1_x1): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff1_x1
## KPSS Level = 0.067045, Truncation lag parameter = 2, p-value = 0.1

pp.test(diff1_x1)     # applay pp test after taking first differences

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff1_x1
## Dickey-Fuller Z(alpha) = -13.336, Truncation lag parameter = 2, p-value
## = 0.2517
## alternative hypothesis: stationary

adf.test(diff1_x1)    # applay adf test after taking first differences

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1_x1
## Dickey-Fuller = -3.7408, Lag order = 2, p-value = 0.03994
## alternative hypothesis: stationary

#Taking the second difference
diff2_x1=diff(diff1_x1)
autoplot(diff2_x1, xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab ,main = "2nd differenced series")

## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub, cex.main,
## cex.lab, cex.sub, font.main, font.lab

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking Second differences in",y_lab, sep=" ")

## [1] "tests For Check Stationarity in series after taking Second differences in Rent price per m2 in  Chelyabinsk"

kpss.test(diff2_x1)   # applay kpss test after taking Second differences

## Warning in kpss.test(diff2_x1): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff2_x1
## KPSS Level = 0.06343, Truncation lag parameter = 2, p-value = 0.1

pp.test(diff2_x1)     # applay pp test after taking Second differences

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff2_x1
## Dickey-Fuller Z(alpha) = -19.202, Truncation lag parameter = 2, p-value
## = 0.03373
## alternative hypothesis: stationary

adf.test(diff2_x1)    # applay adf test after taking Second differences

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff2_x1
## Dickey-Fuller = -2.7029, Lag order = 2, p-value = 0.3046
## alternative hypothesis: stationary

####Fitting an ARIMA Model
#1. Using auto arima function
model1 <- auto.arima(data_series,stepwise=FALSE, approximation=FALSE, trace=T, test = c("kpss", "adf", "pp"))  #applaying auto arima

## 
##  ARIMA(0,1,0)                    : 72.19537
##  ARIMA(0,1,0) with drift         : 73.42088
##  ARIMA(0,1,1)                    : 74.34256
##  ARIMA(0,1,1) with drift         : 76.28196
##  ARIMA(0,1,2)                    : Inf
##  ARIMA(0,1,2) with drift         : Inf
##  ARIMA(0,1,3)                    : Inf
##  ARIMA(0,1,3) with drift         : Inf
##  ARIMA(0,1,4)                    : Inf
##  ARIMA(0,1,4) with drift         : Inf
##  ARIMA(0,1,5)                    : Inf
##  ARIMA(0,1,5) with drift         : Inf
##  ARIMA(1,1,0)                    : 74.2766
##  ARIMA(1,1,0) with drift         : 76.28559
##  ARIMA(1,1,1)                    : 77.35221
##  ARIMA(1,1,1) with drift         : 79.91318
##  ARIMA(1,1,2)                    : Inf
##  ARIMA(1,1,2) with drift         : Inf
##  ARIMA(1,1,3)                    : Inf
##  ARIMA(1,1,3) with drift         : Inf
##  ARIMA(1,1,4)                    : Inf
##  ARIMA(1,1,4) with drift         : Inf
##  ARIMA(2,1,0)                    : 77.34514
##  ARIMA(2,1,0) with drift         : 79.83089
##  ARIMA(2,1,1)                    : 80.50989
##  ARIMA(2,1,1) with drift         : Inf
##  ARIMA(2,1,2)                    : Inf
##  ARIMA(2,1,2) with drift         : Inf
##  ARIMA(2,1,3)                    : Inf
##  ARIMA(2,1,3) with drift         : Inf
##  ARIMA(3,1,0)                    : 75.17712
##  ARIMA(3,1,0) with drift         : 76.8228
##  ARIMA(3,1,1)                    : 79.31198
##  ARIMA(3,1,1) with drift         : Inf
##  ARIMA(3,1,2)                    : Inf
##  ARIMA(3,1,2) with drift         : Inf
##  ARIMA(4,1,0)                    : 79.12483
##  ARIMA(4,1,0) with drift         : 80.43408
##  ARIMA(4,1,1)                    : Inf
##  ARIMA(4,1,1) with drift         : 87.05119
##  ARIMA(5,1,0)                    : 83.74669
##  ARIMA(5,1,0) with drift         : 87.07906
## 
## 
## 
##  Best model: ARIMA(0,1,0)

model1 # show the result of autoarima

## Series: data_series 
## ARIMA(0,1,0) 
## 
## sigma^2 estimated as 4.63:  log likelihood=-34.95
## AIC=71.91   AICc=72.2   BIC=72.68

#Make changes in the source of auto arima to run the best model
arima.string <- function (object, padding = FALSE) 
{
  order <- object$arma[c(1, 6, 2, 3, 7, 4, 5)]
  m <- order[7]
  result <- paste("ARIMA(", order[1], ",", order[2], ",", 
                  order[3], ")", sep = "")
  if (m > 1 && sum(order[4:6]) > 0) {
    result <- paste(result, "(", order[4], ",", order[5], 
                    ",", order[6], ")[", m, "]", sep = "")
  }
  if (padding && m > 1 && sum(order[4:6]) == 0) {
    result <- paste(result, "         ", sep = "")
    if (m <= 9) {
      result <- paste(result, " ", sep = "")
    }
    else if (m <= 99) {
      result <- paste(result, "  ", sep = "")
    }
    else {
      result <- paste(result, "   ", sep = "")
    }
  }
  if (!is.null(object$xreg)) {
    if (NCOL(object$xreg) == 1 && is.element("drift", names(object$coef))) {
      result <- paste(result, "with drift        ")
    }
    else {
      result <- paste("Regression with", result, "errors")
    }
  }
  else {
    if (is.element("constant", names(object$coef)) || is.element("intercept", 
                                                                 names(object$coef))) {
      result <- paste(result, "with non-zero mean")
    }
    else if (order[2] == 0 && order[5] == 0) {
      result <- paste(result, "with zero mean    ")
    }
    else {
      result <- paste(result, "                  ")
    }
  }
  if (!padding) {
    result <- gsub("[ ]*$", "", result)
  }
  return(result)
}






source("stringthearima.R")  
bestmodel <- arima.string(model1, padding = TRUE)
bestmodel <- substring(bestmodel,7,11)
bestmodel <- gsub(" ", "", bestmodel)
bestmodel <- gsub(")", "", bestmodel)
bestmodel <- strsplit(bestmodel, ",")[[1]]
bestmodel <- c(strtoi(bestmodel[1]),strtoi(bestmodel[2]),strtoi(bestmodel[3]))
bestmodel

## [1] 0 1 0

strtoi(bestmodel[3])

## [1] 0

#2. Using ACF and PACF Function
#par(mfrow=c(1,2))  # Code for making two plot in one graph 
acf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab, main=paste("ACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))    # plot ACF "auto correlation function after taking second diffrences

pacf(diff2_x1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main=paste("PACF-2nd differenced series ",y_lab, sep=" ",lag.max=20))   # plot PACF " Partial auto correlation function after taking second diffrences

library(forecast)   # install library forecast             
x1_model1= arima(data_series, order=c(bestmodel)) # Run Best model of auto arima  for forecasting
x1_model1  # Show result of best model of auto arima

## 
## Call:
## arima(x = data_series, order = c(bestmodel))
## 
## 
## sigma^2 estimated as 4.625:  log likelihood = -34.95,  aic = 71.91

paste ("accuracy of autoarima Model For  ==> ",y_lab, sep=" ")

## [1] "accuracy of autoarima Model For  ==>  Rent price per m2 in  Chelyabinsk"

accuracy(x1_model1)  # aacuracy of best model from auto arima

##                     ME    RMSE      MAE       MPE      MAPE      MASE      ACF1
## Training set 0.6045882 2.08746 1.310471 0.2110455 0.4631463 0.9530695 0.1159181

x1_model1$x          # show result of best model from auto arima

## NULL

checkresiduals(x1_model1,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)  # checkresiduals from best model from using auto arima

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0)
## Q* = 8.2134, df = 3, p-value = 0.0418
## 
## Model df: 0.   Total lags used: 3

paste("Box-Ljung test , Ljung-Box test For Modelling for   ==> ",y_lab, sep=" ")

## [1] "Box-Ljung test , Ljung-Box test For Modelling for   ==>  Rent price per m2 in  Chelyabinsk"

Box.test(x1_model1$residuals^2, lag=20, type="Ljung-Box")   # Do test for resdulas by using Box-Ljung test , Ljung-Box test For Modelling

## 
##  Box-Ljung test
## 
## data:  x1_model1$residuals^2
## X-squared = NA, df = 20, p-value = NA

library(tseries)
jarque.bera.test(x1_model1$residuals)  # Do test jarque.bera.test

## 
##  Jarque Bera Test
## 
## data:  x1_model1$residuals
## X-squared = 23.641, df = 2, p-value = 7.353e-06

#Actual Vs Fitted
plot(data_series, col='red',lwd=2, main="Actual vs Fitted Plot", xlab='Time in (days)', ylab=y_lab) # plot actual and Fitted model 
lines(fitted(x1_model1), col='blue')

#Test data

x1_test <- ts(testing_data, start =(rows-validation_data_days+1) ) # make testing data in time series and start from rows-6
forecasting_auto_arima <- forecast(x1_model1, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_auto_arima$mean,validation_data_days)
MAPE_Per_Day<-round(abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE % For  7 month by using bats Model for  ==>  Rent price per m2 in  Chelyabinsk"

MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_auto_arima<-paste(round(MAPE_Per_Day,3),"%")
MAPE_auto.arima_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] " MAPE that's Error of Forecasting for  7  days in bats Model for  ==>  Rent price per m2 in  Chelyabinsk"

paste(MAPE_Mean_All,"%")

## [1] "1.203 % MAPE  7 month Rent price per m2 in  Chelyabinsk %"

paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")

## [1] "MAPE that's Error of Forecasting day by day for  7  days in bats Model for  ==>  Rent price per m2 in  Chelyabinsk"

data.frame(date_auto.arima=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_auto.arima=validation_forecast,MAPE_auto.arima_Model)

##   date_auto.arima validation_data_by_name actual_data forecasting_auto.arima
## 1      2020-04-01                   среда         292                    288
## 2      2020-05-01                 пятница         286                    288
## 3      2020-06-01             понедельник         286                    288
## 4      2020-07-01                   среда         286                    288
## 5      2020-08-01                 суббота         286                    288
## 6      2020-09-01                 вторник         281                    288
## 7      2020-10-01                 четверг         283                    288
##   MAPE_auto.arima_Model
## 1                1.37 %
## 2               0.699 %
## 3               0.699 %
## 4               0.699 %
## 5               0.699 %
## 6               2.491 %
## 7               1.767 %

data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_auto.arima=tail(forecasting_auto_arima$mean,N_forecasting_days))

##           FD forecating_date forecasting_by_auto.arima
## 1 2020-11-01     воскресенье                       288
## 2 2020-12-01         вторник                       288
## 3 2021-01-01         пятница                       288
## 4 2021-02-01     понедельник                       288
## 5 2021-03-01     понедельник                       288
## 6 2021-04-01         четверг                       288
## 7 2021-05-01         суббота                       288

plot(forecasting_auto_arima)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph4<-autoplot(forecasting_auto_arima,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph4

## Error of forecasting
Error_auto.arima<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
REOF_A_auto.arima<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_auto.arima<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_auto.arima<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values

## Warning in cor(testing_data, validation_forecast, method = c("pearson")):
## ñòàíäàðòíîå îòêëîíåíèå íóëåâîå

RMSE_auto.arima<-sqrt(sum((Error_auto.arima^2))/validation_data_days)   #  Root mean square forecast error
MAD_auto.arima<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_auto.arima<-c(Error_auto.arima)
REOF_auto.arima1<-c(paste(round(REOF_A_auto.arima,3),"%"))
REOF_auto.arima2<-c(paste(round(REOF_F_auto.arima,3),"%"))
data.frame(correlation_auto.arima,RMSE_auto.arima,MAPE_Mean_All,MAD_auto.arima) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER

##   correlation_auto.arima RMSE_auto.arima
## 1                     NA        3.891382
##                                             MAPE_Mean_All MAD_auto.arima
## 1 1.203 % MAPE  7 month Rent price per m2 in  Chelyabinsk       2.285714

data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_auto.arima,REOF_A_auto.arima=REOF_auto.arima1,REOF_F_auto.arima=REOF_auto.arima2)   # Analysis of error shows result AEOF,REOF_A,REOF_F

##   validation_dates Validation_day_name AEOF_auto.arima REOF_A_auto.arima
## 1       2020-04-01               среда               4            1.37 %
## 2       2020-05-01             пятница               2           0.699 %
## 3       2020-06-01         понедельник               2           0.699 %
## 4       2020-07-01               среда               2           0.699 %
## 5       2020-08-01             суббота               2           0.699 %
## 6       2020-09-01             вторник               7           2.491 %
## 7       2020-10-01             четверг               5           1.767 %
##   REOF_F_auto.arima
## 1           1.389 %
## 2           0.694 %
## 3           0.694 %
## 4           0.694 %
## 5           0.694 %
## 6           2.431 %
## 7           1.736 %

# Choose Best model by least error

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using bats model and BATS Model, Holt's Linear Models , and autoarima for  ==>  Rent price per m2 in  Chelyabinsk"

M1<-mean(REOF_A_bats)

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using TBATS  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using TBATS  Model For ==>  Rent price per m2 in  Chelyabinsk"

M2<-mean(REOF_A_tbats1)

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using Holt's Linear << Exponential Smoothing >>  For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using Holt's Linear << Exponential Smoothing >>  For ==>  Rent price per m2 in  Chelyabinsk"

M3<-REOF_A_Holt11

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using auto arima  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using auto arima  Model For ==>  Rent price per m2 in  Chelyabinsk"

M4<-mean(REOF_A_auto.arima)

paste("System Summarizes  Error ==> ( MAPE ) of Forecasting  by using autoarima  Model For ==> ", y_lab , sep=" ")

## [1] "System Summarizes  Error ==> ( MAPE ) of Forecasting  by using autoarima  Model For ==>  Rent price per m2 in  Chelyabinsk"

data.frame(validation_dates,forecating_date=forecasting_data_by_name,MAPE_bats_error=REOF_A_bats,MAPE_TBATS_error=REOF_A_tbats1,MAPE_Holt_error=REOF_A_Holt1,MAPE_autoarima_error = REOF_A_auto.arima)

##   validation_dates forecating_date MAPE_bats_error MAPE_TBATS_error
## 1       2020-04-01     воскресенье       1.5276728        2.3727489
## 2       2020-05-01         вторник       0.1857845        0.4008629
## 3       2020-06-01         пятница       0.1178692        0.2432560
## 4       2020-07-01     понедельник       0.3125964        0.7772146
## 5       2020-08-01     понедельник       0.7432938        1.3144223
## 6       2020-09-01         четверг       2.9351720        2.9587217
## 7       2020-10-01         суббота       2.4213016        1.3424926
##   MAPE_Holt_error MAPE_autoarima_error
## 1       1.4996812            1.3698630
## 2       0.7648471            0.6993007
## 3       0.9637171            0.6993007
## 4       1.1633736            0.6993007
## 5       1.3638213            0.6993007
## 6       3.3722724            2.4911032
## 7       2.8459123            1.7667845

recommend_Model<-c(M1,M2,M3,M4)
best_recommended_model<-min(recommend_Model)
paste ("lodaing .....   ... . .Select Minimum MAPE from Models for select best Model ==> ", y_lab , sep=" ")

## [1] "lodaing .....   ... . .Select Minimum MAPE from Models for select best Model ==>  Rent price per m2 in  Chelyabinsk"

best_recommended_model

## [1] 1.17767

paste ("Best Model For Forecasting  ==> ",y_lab, sep=" ")

## [1] "Best Model For Forecasting  ==>  Rent price per m2 in  Chelyabinsk"

if(best_recommended_model >= M1) {paste("System Recommend Bats Model That's better  For forecasting==> ",y_lab, sep=" ")}

## [1] "System Recommend Bats Model That's better  For forecasting==>  Rent price per m2 in  Chelyabinsk"

if(best_recommended_model >= M2) {paste("System Recommend  That's better TBATS  For forecasting ==> ",y_lab, sep=" ")}
if(best_recommended_model >= M3) {paste("System Recommend Holt's Linear Model < Exponential Smoothing Model >   That's better  For forecasting ==> ",y_lab, sep=" ")}
if(best_recommended_model >= M4) {paste("System Recommend auto arima Model  That's better  For forecasting ==> ",y_lab, sep=" ")}

message("System finished Forecasting  by using autoarima and Holt's ,and TBATS Model ==>",y_lab, sep=" ")

## System finished Forecasting  by using autoarima and Holt's ,and TBATS Model ==>Rent price per m2 in  Chelyabinsk

message(" Thank you for using our System For Modelling  ==> ",y_lab, sep=" ")

##  Thank you for using our System For Modelling  ==> Rent price per m2 in  Chelyabinsk