South ural state university, Chelyabinsk, Russian federation
# Imports
library(fpp2)
## Warning: package 'fpp2' was built under R version 4.0.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages --------------------------------------------------------------------------- fpp2 2.4 --
## v ggplot2 3.3.2 v fma 2.4
## v forecast 8.13 v expsmooth 2.3
## Warning: package 'ggplot2' was built under R version 4.0.3
## Warning: package 'forecast' was built under R version 4.0.3
##
library(forecast)
library(ggplot2)
library("readxl")
## Warning: package 'readxl' was built under R version 4.0.3
library(moments)
## Warning: package 'moments' was built under R version 4.0.3
library(forecast)
require(forecast)
require(tseries)
## Loading required package: tseries
## Warning: package 'tseries' was built under R version 4.0.3
require(markovchain)
## Loading required package: markovchain
## Warning: package 'markovchain' was built under R version 4.0.3
## Package: markovchain
## Version: 0.8.5-3
## Date: 2020-12-03
## BugReport: https://github.com/spedygiorgio/markovchain/issues
require(data.table)
## Loading required package: data.table
Full_original_data<-read_excel("F:/Phd/ALL Russia Analysis/Prices rent and sales.xlsx",sheet = "Rent Price")
## New names:
## * Октябрьский -> Октябрьский...21
## * Железногорск -> Железногорск...113
## * Железногорск -> Железногорск...129
## * Октябрьский -> Октябрьский...183
y_lab<- "Rent price per m2 in Chelyabinsk" # input name of data
Actual_date_interval <- c("2018/11/01","2020/10/01")
Forecast_date_interval <- c("2020/11/01","2021/05/01")
validation_data_days <-7
frequency<-"month"
# Data Preparation & calculate some of statistics measures
original_data<-Full_original_data$chelyabinsk
summary(original_data)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 278.0 281.0 283.0 283.5 286.0 292.0
sd(original_data) # calculate standard deviation
## [1] 3.562842
skewness(original_data) # calculate Cofficient of skewness
## [1] 0.3621266
kurtosis(original_data) # calculate Cofficient of kurtosis
## [1] 2.400009
rows <- NROW(original_data)
training_data<-original_data[1:(rows-validation_data_days)]
testing_data<-original_data[(rows-validation_data_days+1):rows]
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency) #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency) #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD))
validation_dates<-tail(AD,validation_data_days)
validation_data_by_name<-weekdays(validation_dates)
forecasting_data_by_name<-weekdays(FD)
##bats model
# Data Modeling
data_series<-ts(training_data)
autoplot(data_series ,xlab=paste ("Time in ", frequency, sep=" "), ylab = y_lab, main=paste ("Actual Data :", y_lab, sep=" "))

model_bats<-bats(data_series)
accuracy(model_bats) # accuracy on training data
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3743609 1.356817 1.005067 -0.1342393 0.3553133 0.7309576
## ACF1
## Training set 0.02210875
# Print Model Parameters
model_bats
## BATS(1, {3,0}, 1, -)
##
## Call: bats(y = data_series)
##
## Parameters
## Alpha: -0.02009994
## Beta: 0.03990649
## Damping Parameter: 1
## AR coefficients: 0.569574 -0.064634 -0.547707
##
## Seed States:
## [,1]
## [1,] 278.4725499
## [2,] 0.5792238
## [3,] 0.0000000
## [4,] 0.0000000
## [5,] 0.0000000
##
## Sigma: 1.356817
## AIC: 78.53944
plot(model_bats,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4)

# Testing Data Evaluation
forecasting_bats <- predict(model_bats, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_bats$mean,validation_data_days)
MAPE_Per_Day<-round( abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 7 month by using bats Model for ==> Rent price per m2 in Chelyabinsk"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_bats<-paste(round(MAPE_Per_Day,3),"%")
MAPE_bats_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 7 days in bats Model for ==> Rent price per m2 in Chelyabinsk"
paste(MAPE_Mean_All,"%")
## [1] "1.178 % MAPE 7 month Rent price per m2 in Chelyabinsk %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 7 days in bats Model for ==> Rent price per m2 in Chelyabinsk"
data.frame(date_bats=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_bats=validation_forecast,MAPE_bats_Model)
## date_bats validation_data_by_name actual_data forecasting_bats
## 1 2020-04-01 среда 292 287.5392
## 2 2020-05-01 пятница 286 286.5313
## 3 2020-06-01 понедельник 286 286.3371
## 4 2020-07-01 среда 286 286.8940
## 5 2020-08-01 суббота 286 288.1258
## 6 2020-09-01 вторник 281 289.2478
## 7 2020-10-01 четверг 283 289.8523
## MAPE_bats_Model
## 1 1.528 %
## 2 0.186 %
## 3 0.118 %
## 4 0.313 %
## 5 0.743 %
## 6 2.935 %
## 7 2.421 %
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_bats=tail(forecasting_bats$mean,N_forecasting_days))
## FD forecating_date forecasting_by_bats
## 1 2020-11-01 воскресенье 289.7994
## 2 2020-12-01 вторник 289.4657
## 3 2021-01-01 пятница 289.2980
## 4 2021-02-01 понедельник 289.6031
## 5 2021-03-01 понедельник 290.3205
## 6 2021-04-01 четверг 291.1512
## 7 2021-05-01 суббота 291.7610
plot(forecasting_bats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph1<-autoplot(forecasting_bats,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph1

## Error of forecasting
Error_bats<-abs(testing_data-validation_forecast) # Absolute error of forecast (AEOF)
REOF_A_bats<-abs(((testing_data-validation_forecast)/testing_data)*100) #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_bats<-abs(((testing_data-validation_forecast)/validation_forecast)*100) #Relative error of forecast (divided by forecast)(REOF_F)
correlation_bats<-cor(testing_data,validation_forecast, method = c("pearson")) # correlation coefficient between predicted and actual values
RMSE_bats<-sqrt(sum((Error_bats^2))/validation_data_days) # Root mean square forecast error
MAD_bats<-abs((sum(testing_data-validation_forecast))/validation_data_days) # average forecast accuracy
AEOF_bats<-c(Error_bats)
REOF_Abats<-c(paste(round(REOF_A_bats,3),"%"))
REOF_Fbats<-c(paste(round(REOF_F_bats,3),"%"))
data.frame(correlation_bats,RMSE_bats,MAPE_Mean_All,MAD_bats) # analysis of Error by using Bats Model shows result of correlation ,MSE ,MPER
## correlation_bats RMSE_bats
## 1 -0.5408407 4.481608
## MAPE_Mean_All MAD_bats
## 1 1.178 % MAPE 7 month Rent price per m2 in Chelyabinsk 2.075372
data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_bats,REOF_Abats,REOF_Fbats) # Analysis of error shows result AEOF,REOF_A,REOF_F
## validation_dates Validation_day_name AEOF_bats REOF_Abats REOF_Fbats
## 1 2020-04-01 среда 4.4608046 1.528 % 1.551 %
## 2 2020-05-01 пятница 0.5313435 0.186 % 0.185 %
## 3 2020-06-01 понедельник 0.3371058 0.118 % 0.118 %
## 4 2020-07-01 среда 0.8940256 0.313 % 0.312 %
## 5 2020-08-01 суббота 2.1258201 0.743 % 0.738 %
## 6 2020-09-01 вторник 8.2478333 2.935 % 2.851 %
## 7 2020-10-01 четверг 6.8522834 2.421 % 2.364 %
## TBATS Model
# Data Modeling
data_series<-ts(training_data)
model_TBATS<-forecast:::fitSpecificTBATS(data_series,use.box.cox=FALSE, use.beta=TRUE, seasonal.periods=c(6),use.damping=FALSE,k.vector=c(2))
accuracy(model_TBATS) # accuracy on training data
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.462536 1.661304 1.355726 -0.1665239 0.4813 0.9859826 0.5140716
# Print Model Parameters
model_TBATS
## TBATS(1, {0,0}, 1, {<6,2>})
##
## Call: NULL
##
## Parameters
## Alpha: -0.02010764
## Beta: 0.03450868
## Damping Parameter: 1
## Gamma-1 Values: -7.435622e-05
## Gamma-2 Values: 0.0001299956
##
## Seed States:
## [,1]
## [1,] 278.68036382
## [2,] 0.55929243
## [3,] -2.02088793
## [4,] -0.01307079
## [5,] 0.15385144
## [6,] 0.45133680
##
## Sigma: 1.661304
## AIC: 85.42312
plot(model_TBATS,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)

# Testing Data Evaluation
forecasting_tbats <- predict(model_TBATS, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_tbats$mean,validation_data_days)
MAPE_Per_Day<-round( abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using TBATS Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 7 month by using TBATS Model for ==> Rent price per m2 in Chelyabinsk"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_TBATS<-paste(round(MAPE_Per_Day,3),"%")
MAPE_TBATS_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in TBATS Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 7 days in TBATS Model for ==> Rent price per m2 in Chelyabinsk"
paste(MAPE_Mean_All,"%")
## [1] "1.344 % MAPE 7 month Rent price per m2 in Chelyabinsk %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in TBATS Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 7 days in TBATS Model for ==> Rent price per m2 in Chelyabinsk"
data.frame(date_TBATS=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_TBATS=validation_forecast,MAPE_TBATS_Model)
## date_TBATS validation_data_by_name actual_data forecasting_TBATS
## 1 2020-04-01 среда 292 285.0716
## 2 2020-05-01 пятница 286 284.8535
## 3 2020-06-01 понедельник 286 286.6957
## 4 2020-07-01 среда 286 288.2228
## 5 2020-08-01 суббота 286 289.7592
## 6 2020-09-01 вторник 281 289.3140
## 7 2020-10-01 четверг 283 286.7993
## MAPE_TBATS_Model
## 1 2.373 %
## 2 0.401 %
## 3 0.243 %
## 4 0.777 %
## 5 1.314 %
## 6 2.959 %
## 7 1.342 %
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_TBATS=tail(forecasting_tbats$mean,N_forecasting_days))
## FD forecating_date forecasting_by_TBATS
## 1 2020-11-01 воскресенье 286.5812
## 2 2020-12-01 вторник 288.4234
## 3 2021-01-01 пятница 289.9505
## 4 2021-02-01 понедельник 291.4869
## 5 2021-03-01 понедельник 291.0417
## 6 2021-04-01 четверг 288.5269
## 7 2021-05-01 суббота 288.3089
plot(forecasting_tbats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph2<-autoplot(forecasting_tbats,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph2

## Error of forecasting TBATS Model
Error_tbats<-abs(testing_data-validation_forecast) # Absolute error of forecast (AEOF)
REOF_A_tbats1<-abs(((testing_data-validation_forecast)/testing_data)*100) #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_tbats<-abs(((testing_data-validation_forecast)/validation_forecast)*100) #Relative error of forecast (divided by forecast)(REOF_F)
correlation_tbats<-cor(testing_data,validation_forecast, method = c("pearson")) # correlation coefficient between predicted and actual values
RMSE_tbats<-sqrt(sum((Error_tbats^2))/validation_data_days) # Root mean square forecast error
MAD_tbats<-abs((sum(testing_data-validation_forecast))/validation_data_days) # average forecast accuracy
AEOF_tbats<-c(Error_tbats)
REOF_A_tbats<-c(paste(round(REOF_A_tbats1,3),"%"))
REOF_F_tbats<-c(paste(round(REOF_F_tbats,3),"%"))
data.frame(correlation_tbats,RMSE_tbats,MAPE_Mean_All,MAD_tbats) # analysis of Error by using Holt's linear model shows result of correlation ,MSE ,MPER
## correlation_tbats RMSE_tbats
## 1 -0.5578186 4.666468
## MAPE_Mean_All MAD_tbats
## 1 1.344 % MAPE 7 month Rent price per m2 in Chelyabinsk 1.53088
data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_tbats,REOF_A_tbats,REOF_F_tbats) # Analysis of error shows result AEOF,REOF_A,REOF_F
## validation_dates Validation_day_name AEOF_tbats REOF_A_tbats REOF_F_tbats
## 1 2020-04-01 среда 6.9284268 2.373 % 2.43 %
## 2 2020-05-01 пятница 1.1464678 0.401 % 0.402 %
## 3 2020-06-01 понедельник 0.6957123 0.243 % 0.243 %
## 4 2020-07-01 среда 2.2228337 0.777 % 0.771 %
## 5 2020-08-01 суббота 3.7592479 1.314 % 1.297 %
## 6 2020-09-01 вторник 8.3140079 2.959 % 2.874 %
## 7 2020-10-01 четверг 3.7992542 1.342 % 1.325 %
## Holt's linear trend
# Data Modeling
data_series<-ts(training_data)
model_holt<-holt(data_series,h=N_forecasting_days+validation_data_days,lambda = "auto")
accuracy(model_holt) # accuracy on training data
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02237606 2.246147 1.594429 0.002624528 0.5629071 1.159585
## ACF1
## Training set 0.484728
# Print Model Parameters
summary(model_holt$model)
## Holt's method
##
## Call:
## holt(y = data_series, h = N_forecasting_days + validation_data_days,
##
## Call:
## lambda = "auto")
##
## Box-Cox transformation: lambda= -0.9999
##
## Smoothing parameters:
## alpha = 0.3166
## beta = 1e-04
##
## Initial states:
## l = 0.9965
## b = 0
##
## sigma: 0
##
## AIC AICc BIC
## -298.3716 -292.9171 -294.2055
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02237606 2.246147 1.594429 0.002624528 0.5629071 1.159585
## ACF1
## Training set 0.484728
# Testing Data Evaluation
forecasting_holt <- predict(model_holt, h=N_forecasting_days+validation_data_days,lambda = "auto")
validation_forecast<-head(forecasting_holt$mean,validation_data_days)
MAPE_Per_Day<-round( abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using holt Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 7 month by using holt Model for ==> Rent price per m2 in Chelyabinsk"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_holt<-paste(round(MAPE_Per_Day,3),"%")
MAPE_holt_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in holt Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 7 days in holt Model for ==> Rent price per m2 in Chelyabinsk"
paste(MAPE_Mean_All,"%")
## [1] "1.711 % MAPE 7 month Rent price per m2 in Chelyabinsk %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in holt Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 7 days in holt Model for ==> Rent price per m2 in Chelyabinsk"
data.frame(date_holt=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_holt=validation_forecast,MAPE_holt_Model)
## date_holt validation_data_by_name actual_data forecasting_holt
## 1 2020-04-01 среда 292 287.6209
## 2 2020-05-01 пятница 286 288.1875
## 3 2020-06-01 понедельник 286 288.7562
## 4 2020-07-01 среда 286 289.3272
## 5 2020-08-01 суббота 286 289.9005
## 6 2020-09-01 вторник 281 290.4761
## 7 2020-10-01 четверг 283 291.0539
## MAPE_holt_Model
## 1 1.5 %
## 2 0.765 %
## 3 0.964 %
## 4 1.163 %
## 5 1.364 %
## 6 3.372 %
## 7 2.846 %
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_holt=tail(forecasting_holt$mean,N_forecasting_days))
## FD forecating_date forecasting_by_holt
## 1 2020-11-01 воскресенье 291.6341
## 2 2020-12-01 вторник 292.2165
## 3 2021-01-01 пятница 292.8013
## 4 2021-02-01 понедельник 293.3885
## 5 2021-03-01 понедельник 293.9780
## 6 2021-04-01 четверг 294.5699
## 7 2021-05-01 суббота 295.1641
plot(forecasting_holt)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph3<-autoplot(forecasting_holt,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph3

## Error of forecasting by using Holt's linear model
Error_Holt<-abs(testing_data-validation_forecast) # Absolute error of forecast (AEOF)
REOF_A_Holt1<-abs(((testing_data-validation_forecast)/testing_data)*100) #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_Holt<-abs(((testing_data-validation_forecast)/validation_forecast)*100) #Relative error of forecast (divided by forecast)(REOF_F)
correlation_Holt<-cor(testing_data,validation_forecast, method = c("pearson")) # correlation coefficient between predicted and actual values
RMSE_Holt<-sqrt(sum((Error_Holt^2))/validation_data_days) # Root mean square forecast error
MAD_Holt<-abs((sum(testing_data-validation_forecast))/validation_data_days) # average forecast accuracy
AEOF_Holt<-c(Error_Holt)
REOF_A_Holt<-c(paste(round(REOF_A_Holt1,3),"%"))
REOF_F_Holt<-c(paste(round(REOF_F_Holt,3),"%"))
REOF_A_Holt11<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
data.frame(correlation_Holt,RMSE_Holt,MAPE_Mean_All,MAD_Holt) # analysis of Error by using Holt's linear model shows result of correlation ,MSE ,MPER
## correlation_Holt RMSE_Holt
## 1 -0.8385015 5.509793
## MAPE_Mean_All MAD_Holt
## 1 1.711 % MAPE 7 month Rent price per m2 in Chelyabinsk 3.617488
data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_Holt,REOF_A_Holt,REOF_F_Holt) # Analysis of error shows result AEOF,REOF_A,REOF_F
## validation_dates Validation_day_name AEOF_Holt REOF_A_Holt REOF_F_Holt
## 1 2020-04-01 среда 4.379069 1.5 % 1.523 %
## 2 2020-05-01 пятница 2.187463 0.765 % 0.759 %
## 3 2020-06-01 понедельник 2.756231 0.964 % 0.955 %
## 4 2020-07-01 среда 3.327249 1.163 % 1.15 %
## 5 2020-08-01 суббота 3.900529 1.364 % 1.345 %
## 6 2020-09-01 вторник 9.476085 3.372 % 3.262 %
## 7 2020-10-01 четверг 8.053932 2.846 % 2.767 %
#Auto arima model
##################
require(tseries) # need to install tseries tj test Stationarity in time series
paste ("tests For Check Stationarity in series ==> ",y_lab, sep=" ")
## [1] "tests For Check Stationarity in series ==> Rent price per m2 in Chelyabinsk"
kpss.test(data_series) # applay kpss test
##
## KPSS Test for Level Stationarity
##
## data: data_series
## KPSS Level = 0.48391, Truncation lag parameter = 2, p-value = 0.04529
pp.test(data_series) # applay pp test
##
## Phillips-Perron Unit Root Test
##
## data: data_series
## Dickey-Fuller Z(alpha) = -9.1929, Truncation lag parameter = 2, p-value
## = 0.5293
## alternative hypothesis: stationary
adf.test(data_series) # applay adf test
##
## Augmented Dickey-Fuller Test
##
## data: data_series
## Dickey-Fuller = -3.8485, Lag order = 2, p-value = 0.03225
## alternative hypothesis: stationary
ndiffs(data_series) # Doing first diffrencing on data
## [1] 1
#Taking the first difference
diff1_x1<-diff(data_series)
autoplot(diff1_x1, xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main = "1nd differenced series")
## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub, cex.main,
## cex.lab, cex.sub, font.main, font.lab

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking first differences in ==> ",y_lab, sep=" ")
## [1] "tests For Check Stationarity in series after taking first differences in ==> Rent price per m2 in Chelyabinsk"
kpss.test(diff1_x1) # applay kpss test after taking first differences
## Warning in kpss.test(diff1_x1): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff1_x1
## KPSS Level = 0.067045, Truncation lag parameter = 2, p-value = 0.1
pp.test(diff1_x1) # applay pp test after taking first differences
##
## Phillips-Perron Unit Root Test
##
## data: diff1_x1
## Dickey-Fuller Z(alpha) = -13.336, Truncation lag parameter = 2, p-value
## = 0.2517
## alternative hypothesis: stationary
adf.test(diff1_x1) # applay adf test after taking first differences
##
## Augmented Dickey-Fuller Test
##
## data: diff1_x1
## Dickey-Fuller = -3.7408, Lag order = 2, p-value = 0.03994
## alternative hypothesis: stationary
#Taking the second difference
diff2_x1=diff(diff1_x1)
autoplot(diff2_x1, xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab ,main = "2nd differenced series")
## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub, cex.main,
## cex.lab, cex.sub, font.main, font.lab

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking Second differences in",y_lab, sep=" ")
## [1] "tests For Check Stationarity in series after taking Second differences in Rent price per m2 in Chelyabinsk"
kpss.test(diff2_x1) # applay kpss test after taking Second differences
## Warning in kpss.test(diff2_x1): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff2_x1
## KPSS Level = 0.06343, Truncation lag parameter = 2, p-value = 0.1
pp.test(diff2_x1) # applay pp test after taking Second differences
##
## Phillips-Perron Unit Root Test
##
## data: diff2_x1
## Dickey-Fuller Z(alpha) = -19.202, Truncation lag parameter = 2, p-value
## = 0.03373
## alternative hypothesis: stationary
adf.test(diff2_x1) # applay adf test after taking Second differences
##
## Augmented Dickey-Fuller Test
##
## data: diff2_x1
## Dickey-Fuller = -2.7029, Lag order = 2, p-value = 0.3046
## alternative hypothesis: stationary
####Fitting an ARIMA Model
#1. Using auto arima function
model1 <- auto.arima(data_series,stepwise=FALSE, approximation=FALSE, trace=T, test = c("kpss", "adf", "pp")) #applaying auto arima
##
## ARIMA(0,1,0) : 72.19537
## ARIMA(0,1,0) with drift : 73.42088
## ARIMA(0,1,1) : 74.34256
## ARIMA(0,1,1) with drift : 76.28196
## ARIMA(0,1,2) : Inf
## ARIMA(0,1,2) with drift : Inf
## ARIMA(0,1,3) : Inf
## ARIMA(0,1,3) with drift : Inf
## ARIMA(0,1,4) : Inf
## ARIMA(0,1,4) with drift : Inf
## ARIMA(0,1,5) : Inf
## ARIMA(0,1,5) with drift : Inf
## ARIMA(1,1,0) : 74.2766
## ARIMA(1,1,0) with drift : 76.28559
## ARIMA(1,1,1) : 77.35221
## ARIMA(1,1,1) with drift : 79.91318
## ARIMA(1,1,2) : Inf
## ARIMA(1,1,2) with drift : Inf
## ARIMA(1,1,3) : Inf
## ARIMA(1,1,3) with drift : Inf
## ARIMA(1,1,4) : Inf
## ARIMA(1,1,4) with drift : Inf
## ARIMA(2,1,0) : 77.34514
## ARIMA(2,1,0) with drift : 79.83089
## ARIMA(2,1,1) : 80.50989
## ARIMA(2,1,1) with drift : Inf
## ARIMA(2,1,2) : Inf
## ARIMA(2,1,2) with drift : Inf
## ARIMA(2,1,3) : Inf
## ARIMA(2,1,3) with drift : Inf
## ARIMA(3,1,0) : 75.17712
## ARIMA(3,1,0) with drift : 76.8228
## ARIMA(3,1,1) : 79.31198
## ARIMA(3,1,1) with drift : Inf
## ARIMA(3,1,2) : Inf
## ARIMA(3,1,2) with drift : Inf
## ARIMA(4,1,0) : 79.12483
## ARIMA(4,1,0) with drift : 80.43408
## ARIMA(4,1,1) : Inf
## ARIMA(4,1,1) with drift : 87.05119
## ARIMA(5,1,0) : 83.74669
## ARIMA(5,1,0) with drift : 87.07906
##
##
##
## Best model: ARIMA(0,1,0)
model1 # show the result of autoarima
## Series: data_series
## ARIMA(0,1,0)
##
## sigma^2 estimated as 4.63: log likelihood=-34.95
## AIC=71.91 AICc=72.2 BIC=72.68
#Make changes in the source of auto arima to run the best model
arima.string <- function (object, padding = FALSE)
{
order <- object$arma[c(1, 6, 2, 3, 7, 4, 5)]
m <- order[7]
result <- paste("ARIMA(", order[1], ",", order[2], ",",
order[3], ")", sep = "")
if (m > 1 && sum(order[4:6]) > 0) {
result <- paste(result, "(", order[4], ",", order[5],
",", order[6], ")[", m, "]", sep = "")
}
if (padding && m > 1 && sum(order[4:6]) == 0) {
result <- paste(result, " ", sep = "")
if (m <= 9) {
result <- paste(result, " ", sep = "")
}
else if (m <= 99) {
result <- paste(result, " ", sep = "")
}
else {
result <- paste(result, " ", sep = "")
}
}
if (!is.null(object$xreg)) {
if (NCOL(object$xreg) == 1 && is.element("drift", names(object$coef))) {
result <- paste(result, "with drift ")
}
else {
result <- paste("Regression with", result, "errors")
}
}
else {
if (is.element("constant", names(object$coef)) || is.element("intercept",
names(object$coef))) {
result <- paste(result, "with non-zero mean")
}
else if (order[2] == 0 && order[5] == 0) {
result <- paste(result, "with zero mean ")
}
else {
result <- paste(result, " ")
}
}
if (!padding) {
result <- gsub("[ ]*$", "", result)
}
return(result)
}
source("stringthearima.R")
bestmodel <- arima.string(model1, padding = TRUE)
bestmodel <- substring(bestmodel,7,11)
bestmodel <- gsub(" ", "", bestmodel)
bestmodel <- gsub(")", "", bestmodel)
bestmodel <- strsplit(bestmodel, ",")[[1]]
bestmodel <- c(strtoi(bestmodel[1]),strtoi(bestmodel[2]),strtoi(bestmodel[3]))
bestmodel
## [1] 0 1 0
strtoi(bestmodel[3])
## [1] 0
#2. Using ACF and PACF Function
#par(mfrow=c(1,2)) # Code for making two plot in one graph
acf(diff2_x1,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab, main=paste("ACF-2nd differenced series ",y_lab, sep=" ",lag.max=20)) # plot ACF "auto correlation function after taking second diffrences

pacf(diff2_x1,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main=paste("PACF-2nd differenced series ",y_lab, sep=" ",lag.max=20)) # plot PACF " Partial auto correlation function after taking second diffrences

library(forecast) # install library forecast
x1_model1= arima(data_series, order=c(bestmodel)) # Run Best model of auto arima for forecasting
x1_model1 # Show result of best model of auto arima
##
## Call:
## arima(x = data_series, order = c(bestmodel))
##
##
## sigma^2 estimated as 4.625: log likelihood = -34.95, aic = 71.91
paste ("accuracy of autoarima Model For ==> ",y_lab, sep=" ")
## [1] "accuracy of autoarima Model For ==> Rent price per m2 in Chelyabinsk"
accuracy(x1_model1) # aacuracy of best model from auto arima
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.6045882 2.08746 1.310471 0.2110455 0.4631463 0.9530695 0.1159181
x1_model1$x # show result of best model from auto arima
## NULL
checkresiduals(x1_model1,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab) # checkresiduals from best model from using auto arima

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 8.2134, df = 3, p-value = 0.0418
##
## Model df: 0. Total lags used: 3
paste("Box-Ljung test , Ljung-Box test For Modelling for ==> ",y_lab, sep=" ")
## [1] "Box-Ljung test , Ljung-Box test For Modelling for ==> Rent price per m2 in Chelyabinsk"
Box.test(x1_model1$residuals^2, lag=20, type="Ljung-Box") # Do test for resdulas by using Box-Ljung test , Ljung-Box test For Modelling
##
## Box-Ljung test
##
## data: x1_model1$residuals^2
## X-squared = NA, df = 20, p-value = NA
library(tseries)
jarque.bera.test(x1_model1$residuals) # Do test jarque.bera.test
##
## Jarque Bera Test
##
## data: x1_model1$residuals
## X-squared = 23.641, df = 2, p-value = 7.353e-06
#Actual Vs Fitted
plot(data_series, col='red',lwd=2, main="Actual vs Fitted Plot", xlab='Time in (days)', ylab=y_lab) # plot actual and Fitted model
lines(fitted(x1_model1), col='blue')

#Test data
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) ) # make testing data in time series and start from rows-6
forecasting_auto_arima <- forecast(x1_model1, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_auto_arima$mean,validation_data_days)
MAPE_Per_Day<-round(abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 7 month by using bats Model for ==> Rent price per m2 in Chelyabinsk"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_auto_arima<-paste(round(MAPE_Per_Day,3),"%")
MAPE_auto.arima_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 7 days in bats Model for ==> Rent price per m2 in Chelyabinsk"
paste(MAPE_Mean_All,"%")
## [1] "1.203 % MAPE 7 month Rent price per m2 in Chelyabinsk %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 7 days in bats Model for ==> Rent price per m2 in Chelyabinsk"
data.frame(date_auto.arima=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_auto.arima=validation_forecast,MAPE_auto.arima_Model)
## date_auto.arima validation_data_by_name actual_data forecasting_auto.arima
## 1 2020-04-01 среда 292 288
## 2 2020-05-01 пятница 286 288
## 3 2020-06-01 понедельник 286 288
## 4 2020-07-01 среда 286 288
## 5 2020-08-01 суббота 286 288
## 6 2020-09-01 вторник 281 288
## 7 2020-10-01 четверг 283 288
## MAPE_auto.arima_Model
## 1 1.37 %
## 2 0.699 %
## 3 0.699 %
## 4 0.699 %
## 5 0.699 %
## 6 2.491 %
## 7 1.767 %
data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_auto.arima=tail(forecasting_auto_arima$mean,N_forecasting_days))
## FD forecating_date forecasting_by_auto.arima
## 1 2020-11-01 воскресенье 288
## 2 2020-12-01 вторник 288
## 3 2021-01-01 пятница 288
## 4 2021-02-01 понедельник 288
## 5 2021-03-01 понедельник 288
## 6 2021-04-01 четверг 288
## 7 2021-05-01 суббота 288
plot(forecasting_auto_arima)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph4<-autoplot(forecasting_auto_arima,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph4

## Error of forecasting
Error_auto.arima<-abs(testing_data-validation_forecast) # Absolute error of forecast (AEOF)
REOF_A_auto.arima<-abs(((testing_data-validation_forecast)/testing_data)*100) #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_auto.arima<-abs(((testing_data-validation_forecast)/validation_forecast)*100) #Relative error of forecast (divided by forecast)(REOF_F)
correlation_auto.arima<-cor(testing_data,validation_forecast, method = c("pearson")) # correlation coefficient between predicted and actual values
## Warning in cor(testing_data, validation_forecast, method = c("pearson")):
## ñòàíäàðòíîå îòêëîíåíèå íóëåâîå
RMSE_auto.arima<-sqrt(sum((Error_auto.arima^2))/validation_data_days) # Root mean square forecast error
MAD_auto.arima<-abs((sum(testing_data-validation_forecast))/validation_data_days) # average forecast accuracy
AEOF_auto.arima<-c(Error_auto.arima)
REOF_auto.arima1<-c(paste(round(REOF_A_auto.arima,3),"%"))
REOF_auto.arima2<-c(paste(round(REOF_F_auto.arima,3),"%"))
data.frame(correlation_auto.arima,RMSE_auto.arima,MAPE_Mean_All,MAD_auto.arima) # analysis of Error by using Holt's linear model shows result of correlation ,MSE ,MPER
## correlation_auto.arima RMSE_auto.arima
## 1 NA 3.891382
## MAPE_Mean_All MAD_auto.arima
## 1 1.203 % MAPE 7 month Rent price per m2 in Chelyabinsk 2.285714
data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_auto.arima,REOF_A_auto.arima=REOF_auto.arima1,REOF_F_auto.arima=REOF_auto.arima2) # Analysis of error shows result AEOF,REOF_A,REOF_F
## validation_dates Validation_day_name AEOF_auto.arima REOF_A_auto.arima
## 1 2020-04-01 среда 4 1.37 %
## 2 2020-05-01 пятница 2 0.699 %
## 3 2020-06-01 понедельник 2 0.699 %
## 4 2020-07-01 среда 2 0.699 %
## 5 2020-08-01 суббота 2 0.699 %
## 6 2020-09-01 вторник 7 2.491 %
## 7 2020-10-01 четверг 5 1.767 %
## REOF_F_auto.arima
## 1 1.389 %
## 2 0.694 %
## 3 0.694 %
## 4 0.694 %
## 5 0.694 %
## 6 2.431 %
## 7 1.736 %
# Choose Best model by least error
paste("System Summarizes Error ==> ( MAPE ) of Forecasting by using bats model and BATS Model, Holt's Linear Models , and autoarima for ==> ", y_lab , sep=" ")
## [1] "System Summarizes Error ==> ( MAPE ) of Forecasting by using bats model and BATS Model, Holt's Linear Models , and autoarima for ==> Rent price per m2 in Chelyabinsk"
M1<-mean(REOF_A_bats)
paste("System Summarizes Error ==> ( MAPE ) of Forecasting by using TBATS Model For ==> ", y_lab , sep=" ")
## [1] "System Summarizes Error ==> ( MAPE ) of Forecasting by using TBATS Model For ==> Rent price per m2 in Chelyabinsk"
M2<-mean(REOF_A_tbats1)
paste("System Summarizes Error ==> ( MAPE ) of Forecasting by using Holt's Linear << Exponential Smoothing >> For ==> ", y_lab , sep=" ")
## [1] "System Summarizes Error ==> ( MAPE ) of Forecasting by using Holt's Linear << Exponential Smoothing >> For ==> Rent price per m2 in Chelyabinsk"
M3<-REOF_A_Holt11
paste("System Summarizes Error ==> ( MAPE ) of Forecasting by using auto arima Model For ==> ", y_lab , sep=" ")
## [1] "System Summarizes Error ==> ( MAPE ) of Forecasting by using auto arima Model For ==> Rent price per m2 in Chelyabinsk"
M4<-mean(REOF_A_auto.arima)
paste("System Summarizes Error ==> ( MAPE ) of Forecasting by using autoarima Model For ==> ", y_lab , sep=" ")
## [1] "System Summarizes Error ==> ( MAPE ) of Forecasting by using autoarima Model For ==> Rent price per m2 in Chelyabinsk"
data.frame(validation_dates,forecating_date=forecasting_data_by_name,MAPE_bats_error=REOF_A_bats,MAPE_TBATS_error=REOF_A_tbats1,MAPE_Holt_error=REOF_A_Holt1,MAPE_autoarima_error = REOF_A_auto.arima)
## validation_dates forecating_date MAPE_bats_error MAPE_TBATS_error
## 1 2020-04-01 воскресенье 1.5276728 2.3727489
## 2 2020-05-01 вторник 0.1857845 0.4008629
## 3 2020-06-01 пятница 0.1178692 0.2432560
## 4 2020-07-01 понедельник 0.3125964 0.7772146
## 5 2020-08-01 понедельник 0.7432938 1.3144223
## 6 2020-09-01 четверг 2.9351720 2.9587217
## 7 2020-10-01 суббота 2.4213016 1.3424926
## MAPE_Holt_error MAPE_autoarima_error
## 1 1.4996812 1.3698630
## 2 0.7648471 0.6993007
## 3 0.9637171 0.6993007
## 4 1.1633736 0.6993007
## 5 1.3638213 0.6993007
## 6 3.3722724 2.4911032
## 7 2.8459123 1.7667845
recommend_Model<-c(M1,M2,M3,M4)
best_recommended_model<-min(recommend_Model)
paste ("lodaing ..... ... . .Select Minimum MAPE from Models for select best Model ==> ", y_lab , sep=" ")
## [1] "lodaing ..... ... . .Select Minimum MAPE from Models for select best Model ==> Rent price per m2 in Chelyabinsk"
best_recommended_model
## [1] 1.17767
paste ("Best Model For Forecasting ==> ",y_lab, sep=" ")
## [1] "Best Model For Forecasting ==> Rent price per m2 in Chelyabinsk"
if(best_recommended_model >= M1) {paste("System Recommend Bats Model That's better For forecasting==> ",y_lab, sep=" ")}
## [1] "System Recommend Bats Model That's better For forecasting==> Rent price per m2 in Chelyabinsk"
if(best_recommended_model >= M2) {paste("System Recommend That's better TBATS For forecasting ==> ",y_lab, sep=" ")}
if(best_recommended_model >= M3) {paste("System Recommend Holt's Linear Model < Exponential Smoothing Model > That's better For forecasting ==> ",y_lab, sep=" ")}
if(best_recommended_model >= M4) {paste("System Recommend auto arima Model That's better For forecasting ==> ",y_lab, sep=" ")}
message("System finished Forecasting by using autoarima and Holt's ,and TBATS Model ==>",y_lab, sep=" ")
## System finished Forecasting by using autoarima and Holt's ,and TBATS Model ==>Rent price per m2 in Chelyabinsk
message(" Thank you for using our System For Modelling ==> ",y_lab, sep=" ")
## Thank you for using our System For Modelling ==> Rent price per m2 in Chelyabinsk