Jawaharlal Nehru Krishi Vishwavidyalaya, India
# Imports
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v ggplot2 3.3.2 v fma 2.4
## v forecast 8.13 v expsmooth 2.3
##
library(forecast)
library(ggplot2)
library("readxl")
library(moments)
library(forecast)
library(here)
## here() starts at F:/Phd/Wind Speed
require(forecast)
require(tseries)
## Loading required package: tseries
require(markovchain)
## Loading required package: markovchain
## Package: markovchain
## Version: 0.8.5-3
## Date: 2020-12-03
## BugReport: https://github.com/spedygiorgio/markovchain/issues
require(data.table)
## Loading required package: data.table
#Global variable
Full_original_data <- read.csv("F:/Phd/Wind Speed/data/WindSpeed.csv")
y_lab<- "Wind Speed in England" # input name of data
Actual_date_interval <- c("1994/07/07","2015/12/31")
Forecast_date_interval <- c("2016/01/01","2016/01/31")
validation_data_days <-1551 #(20% of dataset for testing )
frequency<-"day"
# Data Preparation & calculate some of statistics measures
original_data<-as.numeric(Full_original_data$Wind_speed)
summary(original_data)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.479 2.575 3.101 4.179 16.421
sd(original_data) # calculate standard deviation
## [1] 2.167933
skewness(original_data) # calculate Cofficient of skewness
## [1] 1.30852
kurtosis(original_data) # calculate Cofficient of kurtosis
## [1] 5.058105
rows <- NROW(original_data)
training_data<-original_data[1:(rows-validation_data_days)]
testing_data<-original_data[(rows-validation_data_days+1):rows]
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency) #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency) #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD))
validation_dates<-tail(AD,validation_data_days)
validation_data_by_name<-weekdays(validation_dates)
forecasting_data_by_name<-weekdays(FD)
data_series<-ts(training_data)
##bats model
# Data Modeling
data_series<-ts(training_data)
autoplot(data_series ,xlab=paste ("Time in ", frequency, sep=" "), ylab = y_lab, main=paste ("Actual training data :", y_lab, sep=" "))

model_bats<-bats(data_series)
accuracy(model_bats) # accuracy on training data
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.01076643 1.462041 1.070622 -Inf Inf 0.8947959 3.177563e-05
# Print Model Parameters
model_bats
## BATS(1, {3,2}, 0.958, -)
##
## Call: bats(y = data_series)
##
## Parameters
## Alpha: 0.04377292
## Beta: -0.001422928
## Damping Parameter: 0.957934
## AR coefficients: -0.181957 0.138062 0.019664
## MA coefficients: 0.6357 0.085392
##
## Seed States:
## [,1]
## [1,] 1.07610507
## [2,] 0.08115185
## [3,] 0.00000000
## [4,] 0.00000000
## [5,] 0.00000000
## [6,] 0.00000000
## [7,] 0.00000000
##
## Sigma: 1.462041
## AIC: 58932.69
plot(model_bats,xlab = paste ("Time in ", frequency ,y_lab , sep=" "))

checkresiduals(model_bats,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab) # checkresiduals from best model from using BATS model

##
## Ljung-Box test
##
## data: Residuals from BATS
## Q* = 43.824, df = 3, p-value = 1.645e-09
##
## Model df: 15. Total lags used: 18
# Testing Data Evaluation
forecasting_bats <- predict(model_bats, h=N_forecasting_days+validation_data_days)
#forecasting_bats
validation_forecast<-head(forecasting_bats$mean,validation_data_days)
MAPE_Per_Day<-round( abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 1551 day by using bats Model for ==> Wind Speed in England"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_bats<-paste(round(MAPE_Per_Day,3),"%")
MAPE_bats_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 1551 days in bats Model for ==> Wind Speed in England"
paste(MAPE_Mean_All,"%")
## [1] "Inf % MAPE 1551 day Wind Speed in England %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 1551 days in bats Model for ==> Wind Speed in England"
#data.frame(date_bats=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_bats=validation_forecast,MAPE_bats_Model)
#data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_bats=tail(forecasting_bats$mean,N_forecasting_days))
par(bg = "#F5F5F5")
plot(forecasting_bats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph1<-autoplot(forecasting_bats,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), ylab=y_lab)
graph1

## Error of forecasting
Error_bats<-abs(testing_data-validation_forecast) # Absolute error of forecast (AEOF)
MSE_Bats<-((sum(Error_bats^2)/validation_data_days))
MSE_Bats
## [1] 5.200144
REOF_A_bats<-abs(((testing_data-validation_forecast)/testing_data)*100) #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_bats<-abs(((testing_data-validation_forecast)/validation_forecast)*100) #Relative error of forecast (divided by forecast)(REOF_F)
correlation_bats<-cor(testing_data,validation_forecast, method = c("pearson")) # correlation coefficient between predicted and actual values
RMSE_bats<-sqrt(sum((Error_bats^2))/validation_data_days) # Root mean square forecast error
MAD_bats<-abs((sum(testing_data-validation_forecast))/validation_data_days) # average forecast accuracy
AEOF_bats<-c(Error_bats)
REOF_Abats<-c(paste(round(REOF_A_bats,3),"%"))
REOF_Fbats<-c(paste(round(REOF_F_bats,3),"%"))
data.frame(correlation_bats,RMSE_bats,MAPE_Mean_All,MAD_bats) # analysis of Error by using Bats Model shows result of correlation ,MSE ,MPER
## correlation_bats RMSE_bats MAPE_Mean_All
## 1 -0.01763502 2.280382 Inf % MAPE 1551 day Wind Speed in England
## MAD_bats
## 1 0.2439334
#(validation_dates,Validation_day_name=validation_data_by_name,AEOF_bats,REOF_Abats,REOF_Fbats) # Analysis of error shows result AEOF,REOF_A,REOF_F
## TBATS Model
# Data Modeling
data_series<-ts(training_data)
model_TBATS<-forecast:::fitSpecificTBATS(data_series,use.box.cox=FALSE, use.beta=TRUE, seasonal.periods=c(6),use.damping=FALSE,k.vector=c(2))
accuracy(model_TBATS) # accuracy on training data
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.02863277 1.578318 1.143113 NaN Inf 0.9553818 0.1879452
# Print Model Parameters
model_TBATS
## TBATS(1, {0,0}, 1, {<6,2>})
##
## Call: NULL
##
## Parameters
## Alpha: 0.3745714
## Beta: 0.0004507664
## Damping Parameter: 1
## Gamma-1 Values: -0.0001219981
## Gamma-2 Values: 0.0001986376
##
## Seed States:
## [,1]
## [1,] 1.084195150
## [2,] 0.078593289
## [3,] -0.006296349
## [4,] -0.014116594
## [5,] -0.073376805
## [6,] 0.008367949
##
## Sigma: 1.578318
## AIC: 59872.38
checkresiduals(model_TBATS,xlab = paste ("Time in ", frequency ,y_lab , sep=" ")) # checkresiduals from best model from TBATS model

##
## Ljung-Box test
##
## data: Residuals from TBATS(1, {0,0}, 1, {<6,2>})
## Q* = 645.76, df = 3, p-value < 2.2e-16
##
## Model df: 10. Total lags used: 13
plot(model_TBATS,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), ylab=y_lab)

# Testing Data Evaluation
forecasting_tbats <- predict(model_TBATS, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_tbats$mean,validation_data_days)
MAPE_Per_Day<-round( abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using TBATS Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 1551 day by using TBATS Model for ==> Wind Speed in England"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_TBATS<-paste(round(MAPE_Per_Day,3),"%")
MAPE_TBATS_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in TBATS Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 1551 days in TBATS Model for ==> Wind Speed in England"
paste(MAPE_Mean_All,"%")
## [1] "Inf % MAPE 1551 day Wind Speed in England %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in TBATS Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 1551 days in TBATS Model for ==> Wind Speed in England"
#data.frame(date_TBATS=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_TBATS=validation_forecast,MAPE_TBATS_Model)
#data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_TBATS=tail(forecasting_tbats$mean,N_forecasting_days))
plot(forecasting_tbats)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph2<-autoplot(forecasting_tbats,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), ylab=y_lab)
graph2

## Error of forecasting TBATS Model
Error_tbats<-abs(testing_data-validation_forecast) # Absolute error of forecast (AEOF)
sqrt(sum(Error_tbats^2)/validation_data_days)
## [1] 3.591204
REOF_A_tbats1<-abs(((testing_data-validation_forecast)/testing_data)*100) #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_tbats<-abs(((testing_data-validation_forecast)/validation_forecast)*100) #Relative error of forecast (divided by forecast)(REOF_F)
correlation_tbats<-cor(testing_data,validation_forecast, method = c("pearson")) # correlation coefficient between predicted and actual values
RMSE_tbats<-sqrt(sum((Error_tbats^2))/validation_data_days) # Root mean square forecast error
MAD_tbats<-abs((sum(testing_data-validation_forecast))/validation_data_days) # average forecast accuracy
AEOF_tbats<-c(Error_tbats)
REOF_A_tbats<-c(paste(round(REOF_A_tbats1,3),"%"))
REOF_F_tbats<-c(paste(round(REOF_F_tbats,3),"%"))
data.frame(correlation_tbats,RMSE_tbats,MAPE_Mean_All,MAD_tbats) # analysis of Error by using Holt's linear model shows result of correlation ,MSE ,MPER
## correlation_tbats RMSE_tbats MAPE_Mean_All
## 1 0.0203281 3.591204 Inf % MAPE 1551 day Wind Speed in England
## MAD_tbats
## 1 2.714993
#data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_tbats,REOF_A_tbats,REOF_F_tbats) # Analysis of error shows result AEOF,REOF_A,REOF_F
## Holt's linear trend
# Data Modeling
data_series<-ts(training_data)
model_holt<-holt(data_series,h=N_forecasting_days+validation_data_days,lambda = "auto")
checkresiduals(model_holt,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="blue", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab) # checkresiduals from best model from using auto Holts linear trend model

##
## Ljung-Box test
##
## data: Residuals from Holt's method
## Q* = 249.55, df = 6, p-value < 2.2e-16
##
## Model df: 4. Total lags used: 10
accuracy(model_holt) # accuracy on training data
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.1254765 1.59638 1.125681 NaN Inf 0.9408125 -0.005215349
# Print Model Parameters
summary(model_holt$model)
## Holt's method
##
## Call:
## holt(y = data_series, h = N_forecasting_days + validation_data_days,
##
## Call:
## lambda = "auto")
##
## Box-Cox transformation: lambda= 0.0635
##
## Smoothing parameters:
## alpha = 0.6556
## beta = 1e-04
##
## Initial states:
## l = 0.1011
## b = 1e-04
##
## sigma: 0.6748
##
## AIC AICc BIC
## 49313.13 49313.14 49346.80
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.1254765 1.59638 1.125681 NaN Inf 0.9408125 -0.005215349
# Testing Data Evaluation
forecasting_holt <- predict(model_holt, h=N_forecasting_days+validation_data_days,lambda = "auto")
validation_forecast<-head(forecasting_holt$mean,validation_data_days)
MAPE_Per_Day<-round( abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using holt Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 1551 day by using holt Model for ==> Wind Speed in England"
MAPE_Mean_All<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_holt<-paste(round(MAPE_Per_Day,3),"%")
MAPE_holt_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in holt Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 1551 days in holt Model for ==> Wind Speed in England"
paste(MAPE_Mean_All,"%")
## [1] "Inf % MAPE 1551 day Wind Speed in England %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in holt Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 1551 days in holt Model for ==> Wind Speed in England"
#data.frame(date_holt=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_holt=validation_forecast,MAPE_holt_Model)
#data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_holt=tail(forecasting_holt$mean,N_forecasting_days))
plot(forecasting_holt,ylim=c(0,20))
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph3<-autoplot(forecasting_holt,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), ylab=y_lab, ylim=c(0,20))
graph3

## Error of forecasting by using Holt's linear model
Error_Holt<-abs(testing_data-validation_forecast) # Absolute error of forecast (AEOF)
sqrt(sum(Error_Holt^2)/validation_data_days)
## [1] 2.808644
REOF_A_Holt1<-abs(((testing_data-validation_forecast)/testing_data)*100) #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_Holt<-abs(((testing_data-validation_forecast)/validation_forecast)*100) #Relative error of forecast (divided by forecast)(REOF_F)
correlation_Holt<-cor(testing_data,validation_forecast, method = c("pearson")) # correlation coefficient between predicted and actual values
RMSE_Holt<-sqrt(sum((Error_Holt^2))/validation_data_days) # Root mean square forecast error
MAD_Holt<-abs((sum(testing_data-validation_forecast))/validation_data_days) # average forecast accuracy
AEOF_Holt<-c(Error_Holt)
REOF_A_Holt<-c(paste(round(REOF_A_Holt1,3),"%"))
REOF_F_Holt<-c(paste(round(REOF_F_Holt,3),"%"))
REOF_A_Holt11<-mean(abs(((testing_data-validation_forecast)/testing_data)*100))
data.frame(correlation_Holt,RMSE_Holt,MAPE_Mean_All,MAD_Holt) # analysis of Error by using Holt's linear model shows result of correlation ,MSE ,MPER
## correlation_Holt RMSE_Holt MAPE_Mean_All
## 1 -0.01699457 2.808644 Inf % MAPE 1551 day Wind Speed in England
## MAD_Holt
## 1 1.649879
#data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_Holt,REOF_A_Holt,REOF_F_Holt) # Analysis of error shows result AEOF,REOF_A,REOF_F