Forecasting wind speed in England by using BATS, TBATS, and Holt’s Linear trend model

That algorithm contains the best 3 models for forecasting wind Speed in England

Makarovskikh Tatyana Anatolyevna Макаровских Татьяна Анатольевна"

Abotaleb mostafa “Аботалеб Мостафа”

Department of Electrical Engineering and Computer Science

South ural state university, Chelyabinsk, Russian federation

Dr Pradeep Mishra

Department of Mathematics & Statistics

Jawaharlal Nehru Krishi Vishwavidyalaya, India

# Imports
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3
## 
library(forecast)
library(ggplot2)
library("readxl")
library(moments)
library(forecast)
library(here)
## here() starts at F:/Phd/Wind Speed
require(forecast)  
require(tseries)
## Loading required package: tseries
require(markovchain)
## Loading required package: markovchain
## Package:  markovchain
## Version:  0.8.5-3
## Date:     2020-12-03
## BugReport: https://github.com/spedygiorgio/markovchain/issues
require(data.table)
## Loading required package: data.table
#Global variable
Full_original_data <- read.csv("F:/Phd/Wind Speed/data/WindSpeed.csv")
y_lab<- "Wind Speed in England"   # input name of data
Actual_date_interval <- c("1994/07/07","2015/12/31")
Forecast_date_interval <- c("2016/01/01","2016/01/31")
validation_data_days <-1551 #(20% of dataset for testing )
frequency<-"day"
# Data Preparation & calculate some of statistics measures
original_data<-as.numeric(Full_original_data$Wind_speed)

summary(original_data)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.479   2.575   3.101   4.179  16.421
sd(original_data)  # calculate standard deviation
## [1] 2.167933
skewness(original_data)  # calculate Cofficient of skewness
## [1] 1.30852
kurtosis(original_data)   # calculate Cofficient of kurtosis
## [1] 5.058105
rows <- NROW(original_data)
training_data<-original_data[1:(rows-validation_data_days)]
testing_data<-original_data[(rows-validation_data_days+1):rows]
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency)  #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency)  #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD)) 
validation_dates<-tail(AD,validation_data_days)
validation_data_by_name<-weekdays(validation_dates)
forecasting_data_by_name<-weekdays(FD)
data_series<-ts(training_data)
##bats model
# Data Modeling
data_series<-ts(training_data)
autoplot(data_series ,xlab=paste ("Time in  ", frequency, sep=" "), ylab = y_lab, main=paste ("Actual training data :", y_lab, sep=" "))

model_bats<-bats(data_series)
accuracy(model_bats)  # accuracy on training data
##                      ME     RMSE      MAE  MPE MAPE      MASE         ACF1
## Training set 0.01076643 1.462041 1.070622 -Inf  Inf 0.8947959 3.177563e-05
# Print Model Parameters
model_bats
## BATS(1, {3,2}, 0.958, -)
## 
## Call: bats(y = data_series)
## 
## Parameters
##   Alpha: 0.04377292
##   Beta: -0.001422928
##   Damping Parameter: 0.957934
##   AR coefficients: -0.181957 0.138062 0.019664
##   MA coefficients: 0.6357 0.085392
## 
## Seed States:
##            [,1]
## [1,] 1.07610507
## [2,] 0.08115185
## [3,] 0.00000000
## [4,] 0.00000000
## [5,] 0.00000000
## [6,] 0.00000000
## [7,] 0.00000000
## 
## Sigma: 1.462041
## AIC: 58932.69
plot(model_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "))

checkresiduals(model_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)  # checkresiduals from best model from using BATS model

## 
##  Ljung-Box test
## 
## data:  Residuals from BATS
## Q* = 43.824, df = 3, p-value = 1.645e-09
## 
## Model df: 15.   Total lags used: 18
# Testing Data Evaluation
forecasting_bats <- predict(model_bats, h=N_forecasting_days+validation_data_days)
#forecasting_bats
validation_forecast<-head(forecasting_bats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for  ==> ",y_lab, sep=" ")
## [1] "MAPE % For  1551 day by using bats Model for  ==>  Wind Speed in England"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_bats<-paste(round(MAPE_Per_Day,3),"%")
MAPE_bats_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for  1551  days in bats Model for  ==>  Wind Speed in England"
paste(MAPE_Mean_All,"%")
## [1] "Inf % MAPE  1551 day Wind Speed in England %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for  ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for  1551  days in bats Model for  ==>  Wind Speed in England"
#data.frame(date_bats=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_bats=validation_forecast,MAPE_bats_Model)
#data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_bats=tail(forecasting_bats$mean,N_forecasting_days))
par(bg = "#F5F5F5")
plot(forecasting_bats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph1<-autoplot(forecasting_bats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "), ylab=y_lab)
graph1

## Error of forecasting
Error_bats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
MSE_Bats<-((sum(Error_bats^2)/validation_data_days))
MSE_Bats
## [1] 5.200144
REOF_A_bats<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_bats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_bats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_bats<-sqrt(sum((Error_bats^2))/validation_data_days)   #  Root mean square forecast error
MAD_bats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_bats<-c(Error_bats)
REOF_Abats<-c(paste(round(REOF_A_bats,3),"%"))
REOF_Fbats<-c(paste(round(REOF_F_bats,3),"%"))
data.frame(correlation_bats,RMSE_bats,MAPE_Mean_All,MAD_bats) # analysis of Error  by using Bats Model shows result of correlation ,MSE ,MPER
##   correlation_bats RMSE_bats                              MAPE_Mean_All
## 1      -0.01763502  2.280382 Inf % MAPE  1551 day Wind Speed in England
##    MAD_bats
## 1 0.2439334
#(validation_dates,Validation_day_name=validation_data_by_name,AEOF_bats,REOF_Abats,REOF_Fbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F
## TBATS Model

# Data Modeling
data_series<-ts(training_data)
model_TBATS<-forecast:::fitSpecificTBATS(data_series,use.box.cox=FALSE, use.beta=TRUE,  seasonal.periods=c(6),use.damping=FALSE,k.vector=c(2))
accuracy(model_TBATS)  # accuracy on training data
##                       ME     RMSE      MAE MPE MAPE      MASE      ACF1
## Training set -0.02863277 1.578318 1.143113 NaN  Inf 0.9553818 0.1879452
# Print Model Parameters
model_TBATS
## TBATS(1, {0,0}, 1, {<6,2>})
## 
## Call: NULL
## 
## Parameters
##   Alpha: 0.3745714
##   Beta: 0.0004507664
##   Damping Parameter: 1
##   Gamma-1 Values: -0.0001219981
##   Gamma-2 Values: 0.0001986376
## 
## Seed States:
##              [,1]
## [1,]  1.084195150
## [2,]  0.078593289
## [3,] -0.006296349
## [4,] -0.014116594
## [5,] -0.073376805
## [6,]  0.008367949
## 
## Sigma: 1.578318
## AIC: 59872.38
checkresiduals(model_TBATS,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "))  # checkresiduals from best model from TBATS model

## 
##  Ljung-Box test
## 
## data:  Residuals from TBATS(1, {0,0}, 1, {<6,2>})
## Q* = 645.76, df = 3, p-value < 2.2e-16
## 
## Model df: 10.   Total lags used: 13
plot(model_TBATS,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "), ylab=y_lab)

# Testing Data Evaluation
forecasting_tbats <- predict(model_TBATS, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_tbats$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using TBATS Model for  ==> ",y_lab, sep=" ")
## [1] "MAPE % For  1551 day by using TBATS Model for  ==>  Wind Speed in England"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_TBATS<-paste(round(MAPE_Per_Day,3),"%")
MAPE_TBATS_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for  1551  days in TBATS Model for  ==>  Wind Speed in England"
paste(MAPE_Mean_All,"%")
## [1] "Inf % MAPE  1551 day Wind Speed in England %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in TBATS Model for  ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for  1551  days in TBATS Model for  ==>  Wind Speed in England"
#data.frame(date_TBATS=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_TBATS=validation_forecast,MAPE_TBATS_Model)
#data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_TBATS=tail(forecasting_tbats$mean,N_forecasting_days))
plot(forecasting_tbats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph2<-autoplot(forecasting_tbats,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "), ylab=y_lab)
graph2

## Error of forecasting TBATS Model
Error_tbats<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
sqrt(sum(Error_tbats^2)/validation_data_days)
## [1] 3.591204
REOF_A_tbats1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_tbats<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_tbats<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_tbats<-sqrt(sum((Error_tbats^2))/validation_data_days)   #  Root mean square forecast error
MAD_tbats<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_tbats<-c(Error_tbats)
REOF_A_tbats<-c(paste(round(REOF_A_tbats1,3),"%"))
REOF_F_tbats<-c(paste(round(REOF_F_tbats,3),"%"))
data.frame(correlation_tbats,RMSE_tbats,MAPE_Mean_All,MAD_tbats) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER
##   correlation_tbats RMSE_tbats                              MAPE_Mean_All
## 1         0.0203281   3.591204 Inf % MAPE  1551 day Wind Speed in England
##   MAD_tbats
## 1  2.714993
#data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_tbats,REOF_A_tbats,REOF_F_tbats)   # Analysis of error shows result AEOF,REOF_A,REOF_F
## Holt's linear trend
# Data Modeling
data_series<-ts(training_data)
model_holt<-holt(data_series,h=N_forecasting_days+validation_data_days,lambda = "auto")
checkresiduals(model_holt,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)  # checkresiduals from best model from using auto Holts linear trend model 

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt's method
## Q* = 249.55, df = 6, p-value < 2.2e-16
## 
## Model df: 4.   Total lags used: 10
accuracy(model_holt)  # accuracy on training data
##                     ME    RMSE      MAE MPE MAPE      MASE         ACF1
## Training set 0.1254765 1.59638 1.125681 NaN  Inf 0.9408125 -0.005215349
# Print Model Parameters
summary(model_holt$model)
## Holt's method 
## 
## Call:
##  holt(y = data_series, h = N_forecasting_days + validation_data_days,  
## 
##  Call:
##      lambda = "auto") 
## 
##   Box-Cox transformation: lambda= 0.0635 
## 
##   Smoothing parameters:
##     alpha = 0.6556 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 0.1011 
##     b = 1e-04 
## 
##   sigma:  0.6748
## 
##      AIC     AICc      BIC 
## 49313.13 49313.14 49346.80 
## 
## Training set error measures:
##                     ME    RMSE      MAE MPE MAPE      MASE         ACF1
## Training set 0.1254765 1.59638 1.125681 NaN  Inf 0.9408125 -0.005215349
# Testing Data Evaluation
forecasting_holt <- predict(model_holt, h=N_forecasting_days+validation_data_days,lambda = "auto")
validation_forecast<-head(forecasting_holt$mean,validation_data_days)
MAPE_Per_Day<-round(  abs(((testing_data-validation_forecast)/testing_data)*100)  ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using holt Model for  ==> ",y_lab, sep=" ")
## [1] "MAPE % For  1551 day by using holt Model for  ==>  Wind Speed in England"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_holt<-paste(round(MAPE_Per_Day,3),"%")
MAPE_holt_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for  1551  days in holt Model for  ==>  Wind Speed in England"
paste(MAPE_Mean_All,"%")
## [1] "Inf % MAPE  1551 day Wind Speed in England %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in holt Model for  ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for  1551  days in holt Model for  ==>  Wind Speed in England"
#data.frame(date_holt=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_holt=validation_forecast,MAPE_holt_Model)
#data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_holt=tail(forecasting_holt$mean,N_forecasting_days))
plot(forecasting_holt,ylim=c(0,20))
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph3<-autoplot(forecasting_holt,xlab = paste ("Time in  ", frequency ,y_lab , sep=" "),  ylab=y_lab, ylim=c(0,20))
graph3

## Error of forecasting by using Holt's linear model
Error_Holt<-abs(testing_data-validation_forecast)  # Absolute error of forecast (AEOF)
sqrt(sum(Error_Holt^2)/validation_data_days)
## [1] 2.808644
REOF_A_Holt1<-abs(((testing_data-validation_forecast)/testing_data)*100)  #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_Holt<-abs(((testing_data-validation_forecast)/validation_forecast)*100)  #Relative error of forecast (divided by forecast)(REOF_F)
correlation_Holt<-cor(testing_data,validation_forecast, method = c("pearson"))     # correlation coefficient between predicted and actual values 
RMSE_Holt<-sqrt(sum((Error_Holt^2))/validation_data_days)   #  Root mean square forecast error
MAD_Holt<-abs((sum(testing_data-validation_forecast))/validation_data_days)   # average forecast accuracy
AEOF_Holt<-c(Error_Holt)
REOF_A_Holt<-c(paste(round(REOF_A_Holt1,3),"%"))
REOF_F_Holt<-c(paste(round(REOF_F_Holt,3),"%"))
REOF_A_Holt11<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
data.frame(correlation_Holt,RMSE_Holt,MAPE_Mean_All,MAD_Holt) # analysis of Error  by using Holt's linear model shows result of correlation ,MSE ,MPER
##   correlation_Holt RMSE_Holt                              MAPE_Mean_All
## 1      -0.01699457  2.808644 Inf % MAPE  1551 day Wind Speed in England
##   MAD_Holt
## 1 1.649879
#data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_Holt,REOF_A_Holt,REOF_F_Holt)   # Analysis of error shows result AEOF,REOF_A,REOF_F