Jawaharlal Nehru Krishi Vishwavidyalaya, India
# Imports
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v ggplot2 3.3.2 v fma 2.4
## v forecast 8.13 v expsmooth 2.3
##
library(forecast)
library(ggplot2)
library("readxl")
library(moments)
library(forecast)
library(here)
## here() starts at F:/Phd/Wind Speed
require(forecast)
require(tseries)
## Loading required package: tseries
require(markovchain)
## Loading required package: markovchain
## Package: markovchain
## Version: 0.8.5-3
## Date: 2020-12-03
## BugReport: https://github.com/spedygiorgio/markovchain/issues
require(data.table)
## Loading required package: data.table
#Global variable
Full_original_data <- read.csv("F:/Phd/Wind Speed/data/WindSpeed.csv")
y_lab<- "Wind Speed in England" # input name of data
Actual_date_interval <- c("1994/07/07","2015/12/31")
Forecast_date_interval <- c("2016/01/01","2016/01/31")
validation_data_days <-1551 #(20% of dataset for testing )
frequency<-"day"
# Data Preparation & calculate some of statistics measures
original_data<-as.numeric(Full_original_data$Wind_speed)
summary(original_data)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.479 2.575 3.101 4.179 16.421
sd(original_data) # calculate standard deviation
## [1] 2.167933
skewness(original_data) # calculate Cofficient of skewness
## [1] 1.30852
kurtosis(original_data) # calculate Cofficient of kurtosis
## [1] 5.058105
rows <- NROW(original_data)
training_data<-original_data[1:(rows-validation_data_days)]
testing_data<-original_data[(rows-validation_data_days+1):rows]
AD<-fulldate<-seq(as.Date(Actual_date_interval[1]),as.Date(Actual_date_interval[2]), frequency) #input range for actual date
FD<-seq(as.Date(Forecast_date_interval[1]),as.Date(Forecast_date_interval[2]), frequency) #input range forecasting date
N_forecasting_days<-nrow(data.frame(FD))
validation_dates<-tail(AD,validation_data_days)
validation_data_by_name<-weekdays(validation_dates)
forecasting_data_by_name<-weekdays(FD)
data_series<-ts(training_data)
#Auto arima model
##################
require(tseries) # need to install tseries tj test Stationarity in time series
paste ("tests For Check Stationarity in series ==> ",y_lab, sep=" ")
## [1] "tests For Check Stationarity in series ==> Wind Speed in England"
kpss.test(data_series) # applay kpss test
## Warning in kpss.test(data_series): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: data_series
## KPSS Level = 22.084, Truncation lag parameter = 11, p-value = 0.01
pp.test(data_series) # applay pp test
## Warning in pp.test(data_series): p-value smaller than printed p-value
##
## Phillips-Perron Unit Root Test
##
## data: data_series
## Dickey-Fuller Z(alpha) = -3370.5, Truncation lag parameter = 11,
## p-value = 0.01
## alternative hypothesis: stationary
adf.test(data_series) # applay adf test
## Warning in adf.test(data_series): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: data_series
## Dickey-Fuller = -9.8023, Lag order = 18, p-value = 0.01
## alternative hypothesis: stationary
ndiffs(data_series) # Doing first diffrencing on data
## [1] 1
#Taking the first difference
diff1_x1<-diff(data_series)
autoplot(diff1_x1, xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", ylab=y_lab,main = "1nd differenced series")
## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking first differences in ==> ",y_lab, sep=" ")
## [1] "tests For Check Stationarity in series after taking first differences in ==> Wind Speed in England"
kpss.test(diff1_x1) # applay kpss test after taking first differences
## Warning in kpss.test(diff1_x1): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff1_x1
## KPSS Level = 0.0016108, Truncation lag parameter = 11, p-value = 0.1
pp.test(diff1_x1) # applay pp test after taking first differences
## Warning in pp.test(diff1_x1): p-value smaller than printed p-value
##
## Phillips-Perron Unit Root Test
##
## data: diff1_x1
## Dickey-Fuller Z(alpha) = -5409, Truncation lag parameter = 11, p-value
## = 0.01
## alternative hypothesis: stationary
adf.test(diff1_x1) # applay adf test after taking first differences
## Warning in adf.test(diff1_x1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff1_x1
## Dickey-Fuller = -29.839, Lag order = 18, p-value = 0.01
## alternative hypothesis: stationary
#Taking the second difference
diff2_x1=diff(diff1_x1)
autoplot(diff2_x1, xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", ylab=y_lab ,main = "2nd differenced series")
## Warning: Ignoring unknown parameters: col.main, col.lab, col.sub

##Testing the stationary of the first differenced series
paste ("tests For Check Stationarity in series after taking Second differences in",y_lab, sep=" ")
## [1] "tests For Check Stationarity in series after taking Second differences in Wind Speed in England"
kpss.test(diff2_x1) # applay kpss test after taking Second differences
## Warning in kpss.test(diff2_x1): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff2_x1
## KPSS Level = 0.00094204, Truncation lag parameter = 11, p-value = 0.1
pp.test(diff2_x1) # applay pp test after taking Second differences
## Warning in pp.test(diff2_x1): p-value smaller than printed p-value
##
## Phillips-Perron Unit Root Test
##
## data: diff2_x1
## Dickey-Fuller Z(alpha) = -7424.2, Truncation lag parameter = 11,
## p-value = 0.01
## alternative hypothesis: stationary
adf.test(diff2_x1) # applay adf test after taking Second differences
## Warning in adf.test(diff2_x1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff2_x1
## Dickey-Fuller = -39.362, Lag order = 18, p-value = 0.01
## alternative hypothesis: stationary
####Fitting an ARIMA Model
#1. Using auto arima function
model1 <- auto.arima(data_series,stepwise=FALSE, approximation=FALSE, trace=T, test = c("kpss", "adf", "pp")) #applaying auto arima
##
## ARIMA(0,1,0) : 24176.48
## ARIMA(0,1,0) with drift : 24178.49
## ARIMA(0,1,1) : 23271.97
## ARIMA(0,1,1) with drift : 23273.97
## ARIMA(0,1,2) : 22466.57
## ARIMA(0,1,2) with drift : 22468.51
## ARIMA(0,1,3) : 22372.72
## ARIMA(0,1,3) with drift : 22374.6
## ARIMA(0,1,4) : 22356.46
## ARIMA(0,1,4) with drift : 22358.31
## ARIMA(0,1,5) : 22352.47
## ARIMA(0,1,5) with drift : 22354.31
## ARIMA(1,1,0) : 23798.76
## ARIMA(1,1,0) with drift : 23800.76
## ARIMA(1,1,1) : 22357.8
## ARIMA(1,1,1) with drift : 22359.61
## ARIMA(1,1,2) : 22347.88
## ARIMA(1,1,2) with drift : 22349.71
## ARIMA(1,1,3) : 22342.96
## ARIMA(1,1,3) with drift : 22344.77
## ARIMA(1,1,4) : 22343.14
## ARIMA(1,1,4) with drift : 22344.92
## ARIMA(2,1,0) : 23377.9
## ARIMA(2,1,0) with drift : 23379.9
## ARIMA(2,1,1) : 22349.66
## ARIMA(2,1,1) with drift : 22351.49
## ARIMA(2,1,2) : 22337.75
## ARIMA(2,1,2) with drift : 22348.56
## ARIMA(2,1,3) : Inf
## ARIMA(2,1,3) with drift : 22339.1
## ARIMA(3,1,0) : 23158.48
## ARIMA(3,1,0) with drift : 23160.48
## ARIMA(3,1,1) : 22343.21
## ARIMA(3,1,1) with drift : 22345.02
## ARIMA(3,1,2) : Inf
## ARIMA(3,1,2) with drift : Inf
## ARIMA(4,1,0) : 23020.23
## ARIMA(4,1,0) with drift : 23022.23
## ARIMA(4,1,1) : 22343.34
## ARIMA(4,1,1) with drift : 22345.14
## ARIMA(5,1,0) : 22949.39
## ARIMA(5,1,0) with drift : 22951.4
##
##
##
## Best model: ARIMA(2,1,2)
model1 # show the result of autoarima
## Series: data_series
## ARIMA(2,1,2)
##
## Coefficients:
## ar1 ar2 ma1 ma2
## -0.4591 0.3958 -0.0475 -0.9042
## s.e. 0.0281 0.0193 0.0238 0.0235
##
## sigma^2 estimated as 2.141: log likelihood=-11163.87
## AIC=22337.74 AICc=22337.75 BIC=22371.4
#Make changes in the source of auto arima to run the best model
arima.string <- function (object, padding = FALSE)
{
order <- object$arma[c(1, 6, 2, 3, 7, 4, 5)]
m <- order[7]
result <- paste("ARIMA(", order[1], ",", order[2], ",",
order[3], ")", sep = "")
if (m > 1 && sum(order[4:6]) > 0) {
result <- paste(result, "(", order[4], ",", order[5],
",", order[6], ")[", m, "]", sep = "")
}
if (padding && m > 1 && sum(order[4:6]) == 0) {
result <- paste(result, " ", sep = "")
if (m <= 9) {
result <- paste(result, " ", sep = "")
}
else if (m <= 99) {
result <- paste(result, " ", sep = "")
}
else {
result <- paste(result, " ", sep = "")
}
}
if (!is.null(object$xreg)) {
if (NCOL(object$xreg) == 1 && is.element("drift", names(object$coef))) {
result <- paste(result, "with drift ")
}
else {
result <- paste("Regression with", result, "errors")
}
}
else {
if (is.element("constant", names(object$coef)) || is.element("intercept",
names(object$coef))) {
result <- paste(result, "with non-zero mean")
}
else if (order[2] == 0 && order[5] == 0) {
result <- paste(result, "with zero mean ")
}
else {
result <- paste(result, " ")
}
}
if (!padding) {
result <- gsub("[ ]*$", "", result)
}
return(result)
}
bestmodel <- arima.string(model1, padding = TRUE)
bestmodel <- substring(bestmodel,7,11)
bestmodel <- gsub(" ", "", bestmodel)
bestmodel <- gsub(")", "", bestmodel)
bestmodel <- strsplit(bestmodel, ",")[[1]]
bestmodel <- c(strtoi(bestmodel[1]),strtoi(bestmodel[2]),strtoi(bestmodel[3]))
bestmodel
## [1] 2 1 2
strtoi(bestmodel[3])
## [1] 2
#2. Using ACF and PACF Function
#par(mfrow=c(1,2)) # Code for making two plot in one graph
acf(diff2_x1,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab, main=paste("ACF-2nd differenced series ",y_lab, sep=" ",lag.max=20)) # plot ACF "auto correlation function after taking second diffrences

pacf(diff2_x1,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab,main=paste("PACF-2nd differenced series ",y_lab, sep=" ",lag.max=20)) # plot PACF " Partial auto correlation function after taking second diffrences

library(forecast) # install library forecast
x1_model1= arima(data_series, order=c(bestmodel)) # Run Best model of auto arima for forecasting
x1_model1 # Show result of best model of auto arima
##
## Call:
## arima(x = data_series, order = c(bestmodel))
##
## Coefficients:
## ar1 ar2 ma1 ma2
## -0.4591 0.3958 -0.0475 -0.9042
## s.e. 0.0281 0.0193 0.0238 0.0235
##
## sigma^2 estimated as 2.14: log likelihood = -11163.87, aic = 22337.74
paste ("accuracy of autoarima Model For ==> ",y_lab, sep=" ")
## [1] "accuracy of autoarima Model For ==> Wind Speed in England"
accuracy(x1_model1) # aacuracy of best model from auto arima
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.00872683 1.462684 1.071619 -Inf Inf 0.8956295 0.00681155
x1_model1$x # show result of best model from auto arima
## NULL
checkresiduals(x1_model1,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab) # checkresiduals from best model from using auto arima

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,2)
## Q* = 28.495, df = 6, p-value = 7.578e-05
##
## Model df: 4. Total lags used: 10
paste("Box-Ljung test , Ljung-Box test For Modelling for ==> ",y_lab, sep=" ")
## [1] "Box-Ljung test , Ljung-Box test For Modelling for ==> Wind Speed in England"
Box.test(x1_model1$residuals^2, lag=20, type="Ljung-Box") # Do test for resdulas by using Box-Ljung test , Ljung-Box test For Modelling
##
## Box-Ljung test
##
## data: x1_model1$residuals^2
## X-squared = 1219.2, df = 20, p-value < 2.2e-16
library(tseries)
jarque.bera.test(x1_model1$residuals) # Do test jarque.bera.test
##
## Jarque Bera Test
##
## data: x1_model1$residuals
## X-squared = 3147.8, df = 2, p-value < 2.2e-16
#Actual Vs Fitted
plot(data_series, col='red',lwd=2, main="Actual vs Fitted Plot", xlab='Time in (days)', ylab=y_lab) # plot actual and Fitted model
lines(fitted(x1_model1), col='black')

#Test data
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) ) # make testing data in time series and start from rows-6
forecasting_auto_arima <- forecast(x1_model1, h=N_forecasting_days+validation_data_days)
validation_forecast<-head(forecasting_auto_arima$mean,validation_data_days)
MAPE_Per_Day<-round(abs(((testing_data-validation_forecast)/testing_data)*100) ,3)
paste ("MAPE % For ",validation_data_days,frequency,"by using bats Model for ==> ",y_lab, sep=" ")
## [1] "MAPE % For 1551 day by using bats Model for ==> Wind Speed in England"
MAPE_Mean_All.ARIMA_Model<-round(mean(MAPE_Per_Day),3)
MAPE_Mean_All.ARIMA<-paste(round(mean(MAPE_Per_Day),3),"% MAPE ",validation_data_days,frequency,y_lab,sep=" ")
MAPE_auto_arima<-paste(round(MAPE_Per_Day,3),"%")
MAPE_auto.arima_Model<-paste(MAPE_Per_Day ,"%")
paste (" MAPE that's Error of Forecasting for ",validation_data_days," days in bats Model for ==> ",y_lab, sep=" ")
## [1] " MAPE that's Error of Forecasting for 1551 days in bats Model for ==> Wind Speed in England"
paste(MAPE_Mean_All.ARIMA,"%")
## [1] "Inf % MAPE 1551 day Wind Speed in England %"
paste ("MAPE that's Error of Forecasting day by day for ",validation_data_days," days in bats Model for ==> ",y_lab, sep=" ")
## [1] "MAPE that's Error of Forecasting day by day for 1551 days in bats Model for ==> Wind Speed in England"
#data.frame(date_auto.arima=validation_dates,validation_data_by_name,actual_data=testing_data,forecasting_auto.arima=validation_forecast,MAPE_auto.arima_Model)
#data.frame(FD,forecating_date=forecasting_data_by_name,forecasting_by_auto.arima=tail(forecasting_auto_arima$mean,N_forecasting_days))
plot(forecasting_auto_arima)
x1_test <- ts(testing_data, start =(rows-validation_data_days+1) )
lines(x1_test, col='red',lwd=2)

graph4<-autoplot(forecasting_auto_arima,xlab = paste ("Time in ", frequency ,y_lab , sep=" "), col.main="black", col.lab="black", col.sub="black", cex.main=1, cex.lab=1, cex.sub=1,font.main=4, font.lab=4, ylab=y_lab)
graph4

MAPE_Mean_All.ARIMA
## [1] "Inf % MAPE 1551 day Wind Speed in England"
## Error of forecasting
Error_auto.arima<-abs(testing_data-validation_forecast) # Absolute error of forecast (AEOF)
REOF_A_auto.arima<-abs(((testing_data-validation_forecast)/testing_data)*100) #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_auto.arima<-abs(((testing_data-validation_forecast)/validation_forecast)*100) #Relative error of forecast (divided by forecast)(REOF_F)
correlation_auto.arima<-cor(testing_data,validation_forecast, method = c("pearson")) # correlation coefficient between predicted and actual values
RMSE_auto.arima<-sqrt(sum((Error_auto.arima^2))/validation_data_days) # Root mean square forecast error
MSE_auto.arima<-(sum((Error_auto.arima^2))/validation_data_days) # Root mean square forecast error
MAD_auto.arima<-abs((sum(testing_data-validation_forecast))/validation_data_days) # average forecast accuracy
AEOF_auto.arima<-c(Error_auto.arima)
REOF_auto.arima1<-c(paste(round(REOF_A_auto.arima,3),"%"))
REOF_auto.arima2<-c(paste(round(REOF_F_auto.arima,3),"%"))
#data.frame(correlation_auto.arima,MSE_auto.arima,RMSE_auto.arima,MAPE_Mean_All.ARIMA_Model,MAD_auto.arima) # analysis of Error by using Auto ARIMAA model shows result of correlation ,MSE ,MPER
#data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_auto.arima,REOF_A_auto.arima=REOF_auto.arima1,REOF_F_auto.arima=REOF_auto.arima2) # Analysis of error shows result AEOF,REOF_A,REOF_F
## Error of forecasting
Error_auto.arima<-abs(testing_data-validation_forecast) # Absolute error of forecast (AEOF)
sqrt(sum(Error_auto.arima^2/validation_data_days))
## [1] 2.302737
REOF_A_auto.arima<-abs(((testing_data-validation_forecast)/testing_data)*100) #Relative error of forecast (divided by actual)(REOF_A)
REOF_F_auto.arima<-abs(((testing_data-validation_forecast)/validation_forecast)*100) #Relative error of forecast (divided by forecast)(REOF_F)
correlation_auto.arima<-cor(testing_data,validation_forecast, method = c("pearson")) # correlation coefficient between predicted and actual values
RMSE_auto.arima<-sqrt(sum((Error_auto.arima^2))/validation_data_days) # Root mean square forecast error
MAD_auto.arima<-abs((sum(testing_data-validation_forecast))/validation_data_days) # average forecast accuracy
AEOF_auto.arima<-c(Error_auto.arima)
REOF_auto.arima1<-c(paste(round(REOF_A_auto.arima,3),"%"))
REOF_auto.arima2<-c(paste(round(REOF_F_auto.arima,3),"%"))
#data.frame(correlation_auto.arima,RMSE_auto.arima,MAPE_Mean_All,MAD_auto.arima) # analysis of Error by using Holt's linear model shows result of correlation ,MSE ,MPER
#data.frame(validation_dates,Validation_day_name=validation_data_by_name,AEOF_auto.arima,REOF_A_auto.arima=REOF_auto.arima1,REOF_F_auto.arima=REOF_auto.arima2) # Analysis of error shows result AEOF,REOF_A,REOF_F