library(xts)
library(dplyr)
library(zoo)
library(tseries)
library(stats)
library(forecast)
library(astsa)
library(corrplot)
library(AER)
library(vars)
library(dynlm)
library(TSstudio)
library(tidyverse)
library(sarima)
library(dygraphs)
# setting time series format
NEE$Date<-as.Date(NEE$Date,"%d/%m/%Y")
# Descriptive stadistics of the dependet variable
summary(NEE$Adj.Close)
## Length Class Mode
## 0 NULL NULL
# visualizing time series data
# time series plot
NEExts<-xts(NEE$NEE_Adj_Close,order.by=NEE$Date)
dygraph(NEExts, main = "NextEra Energy Stock Price") %>%
dyOptions(colors = RColorBrewer::brewer.pal(4, "Dark2")) %>%
dyShading(from = "2019/01/01",
to = "2022/01/12",
color = "#FFE6E6")
# Decompose a time series
# 1) observed: data observations
# 2) trend: increasing / decreasing value of data observations
# 3) seasonality: repeating short-term cycle in time series
# 4) noise: random variation in time series
NEEts<-ts(NEE$NEE_Adj_Close,frequency=12,start=c(2015,1))
NEE_ts_decompose<-decompose(NEEts)
plot(NEE_ts_decompose)
Based on the graph, it is possible to see a positive trend according to the periods from 2015 to mid-2022, this tells us that the stock had a good performance over time since, unlike each year, its increase was maintained, for it shows us a positive trend behavior.
At first glance it is possible to identify the seasonality, it is possible to see that the action follows a constant pattern over time, at the beginning of 2015 it is observed that the action decreases drastically in the middle of the year, but ends with a high peak. With the information provided, we can infer that this pattern will happen again since it tends to start at the top and in the middle of the year it has a drastic decrease and by the end of the period it increases drastically.
# Stationary Test
# H0: Non-stationary and HA: Stationary. p-values < 0.05 reject the H0.
adf.test(NEE$NEE_Adj_Close)
##
## Augmented Dickey-Fuller Test
##
## data: NEE$NEE_Adj_Close
## Dickey-Fuller = -2.4972, Lag order = 4, p-value = 0.371
## alternative hypothesis: stationary
# P-Value > 0.05. Fail to reject the H0. The time series data is non-stationary.
# Serial Autocorrelation
acf(NEE$NEE_Adj_Close,main="Significant Autocorrelations")
# There is high serial autocorrelation despite the number of lags of the variable
# converting to time series format
NSRts<-ts(NEE$NonStore_Retailing,frequency=12,start=c(2015,1))
UNEMts<-ts(NEE$US_Unemployment,frequency=12,start=c(2015,1))
CONSts<-ts(NEE$US_Consumer_Confidence,frequency=12,start=c(2015,1))
MHWts<-ts(NEE$US_Min_Hour_Wage,frequency=12,start=c(2015,1))
par(mfrow=c(2,3))
plot(NEE$NEE_Adj_Close,type="l",col="blue",lwd=2,xlab="Date",ylab="NEE´s Stock Price",main="NEE´s Stock Price")
plot(NEE$NonStore_Retailing,type="l",col="blue",lwd=2,xlab="Date",ylab="Non Store Retailing",main="Non Store Retailing")
plot(NEE$US_Unemployment,type="l",col="blue",lwd=2,xlab="Date",ylab="Unemployment",main="unemployment")
plot(NEE$US_Consumer_Confidence,type="l",col="blue",lwd=2,xlab="Date",ylab="Consumer Confidence",main="Consumer Confidence")
plot(NEE$US_Min_Hour_Wage,type="l",col="blue",lwd=2,xlab="Date",ylab="Min Hour Wage",main="Min Hour Wage")
The explanatory variables that might explain NEE stock price are Non Store Retailing and Consumer confidence.
Non store Retail: This variable has a positive impact on the stock prove, we can see its direction going upwards as same as the stocks price.
Consumer Confidence: it does have an impact on the stock price, it shows a downside trend on the graph which also applies to the graph in the stock price, we can say that the consumers confidence has decreased and it resulted in a negative impact in which it did not recover from that downfall.
summary(NEE_ARMA<-arma(log(NEE$NEE_Adj_Close),order=c(1,1))) # Transforming into log to having a stationary time series
##
## Call:
## arma(x = log(NEE$NEE_Adj_Close), order = c(1, 1))
##
## Model:
## ARMA(1,1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.171119 -0.026521 0.004783 0.034091 0.127085
##
## Coefficient(s):
## Estimate Std. Error t value Pr(>|t|)
## ar1 0.996556 0.006827 145.974 < 2e-16 ***
## ma1 -0.393909 0.094287 -4.178 2.94e-05 ***
## intercept 0.027168 0.025718 1.056 0.291
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Fit:
## sigma^2 estimated as 0.002657, Conditional Sum-of-Squares = 0.25, AIC = -290.91
NEE_estimated_stock_price<-exp(NEE_ARMA$fitted.values) # Applying exp to make interpretations on real numbers
plot(NEE_estimated_stock_price)
### Residuals are stationary ?
NEE_ARMA_residuals<-NEE_ARMA$residuals
adf.test(na.omit(NEE_estimated_stock_price))
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(NEE_estimated_stock_price)
## Dickey-Fuller = -2.4654, Lag order = 4, p-value = 0.3842
## alternative hypothesis: stationary
# ADF test suggest that ARMA residuals are non stationary since p-value is > 0.05.
### Residuals show serial autocorrelation ?
Box.test(NEE_ARMA_residuals,lag=5,type="Ljung-Box")
##
## Box-Ljung test
##
## data: NEE_ARMA_residuals
## X-squared = 2.559, df = 5, p-value = 0.7676
# Do not reject the Ho. P-value is > 0.05 indicating that ARMA Model does not show residual serial autocorrelation.
tsmodel = Arima(log(NEE$NEE_Adj_Close),order=c(1,1,1))# Transforming into log to having a stationary time series
print(tsmodel)
## Series: log(NEE$NEE_Adj_Close)
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## -0.0997 -0.1420
## s.e. 0.2617 0.2475
##
## sigma^2 = 0.003111: log likelihood = 140.39
## AIC=-274.79 AICc=-274.53 BIC=-267.13
plot(exp(tsmodel$fitted))# Applying exp to make interpretations on real numbers
### Residuals are stationary ?
adf.test(tsmodel$residuals)
## Warning in adf.test(tsmodel$residuals): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: tsmodel$residuals
## Dickey-Fuller = -4.2776, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# ADF test suggest that ARIMA residuals are stationary since p-value is < 0.05.
### Residuals show serial autocorrelation ?
acf(tsmodel$residuals,main="ACF - ARIMA (1,1,1)")
# ACF plot displays weak or no autocorrelation.
Box.test(tsmodel$residuals,lag=3,type="Ljung-Box")
##
## Box-Ljung test
##
## data: tsmodel$residuals
## X-squared = 4.4708, df = 3, p-value = 0.2149
# P-value is > 0.05 indicating that ARIMA model does not show residual serial autocorrelation.
# Converting to a stationary time series
NEEtsE = diff(NEEts)
CONStsE = diff(CONSts)
NSRtsE = diff(NSRts)
adf.test(NEEtsE) # Check the stationarity of the series
##
## Augmented Dickey-Fuller Test
##
## data: NEEtsE
## Dickey-Fuller = -4.4819, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
VAR_ts<-cbind(NEEtsE,CONStsE,NSRtsE)
colnames(VAR_ts)<-cbind("NextEra_Energy","consumer_conf","NonStore_retailing")
# Generating the preferred lag order based on multiple iterations of the AIC.
lag_selection<-VARselect(na.omit(VAR_ts,lag.max=5,type="const", season=12))
lag_selection$selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 10 2 1 2
lag_selection$criteria
## 1 2 3 4 5
## AIC(n) 2.062979e+01 2.047521e+01 2.056263e+01 2.066464e+01 2.066491e+01
## HQ(n) 2.076850e+01 2.071795e+01 2.090939e+01 2.111544e+01 2.121973e+01
## SC(n) 2.097464e+01 2.107869e+01 2.142474e+01 2.178539e+01 2.204429e+01
## FPE(n) 9.109542e+08 7.811917e+08 8.544002e+08 9.499320e+08 9.562561e+08
## 6 7 8 9 10
## AIC(n) 2.071554e+01 2.076409e+01 2.068223e+01 2.064348e+01 2.037666e+01
## HQ(n) 2.137440e+01 2.152697e+01 2.154914e+01 2.161442e+01 2.145164e+01
## SC(n) 2.235356e+01 2.266073e+01 2.283751e+01 2.305739e+01 2.304921e+01
## FPE(n) 1.015427e+09 1.080001e+09 1.012745e+09 9.967554e+08 7.858273e+08
# We estimate the VAR model. The p option refers to the number of lags used.
VAR_ts[is.na(VAR_ts)] <- mean(VAR_ts, na.rm = TRUE)
VAR_model1<-VAR(VAR_ts,p=2,type="const",season=NULL,exog=NULL)
AIC(VAR_model1) # AIC of the model
## [1] 2681.611
summary(VAR_model1)
##
## VAR Estimation Results:
## =========================
## Endogenous variables: NextEra_Energy, consumer_conf, NonStore_retailing
## Deterministic variables: const
## Sample size: 93
## Log Likelihood: -1319.806
## Roots of the characteristic polynomial:
## 0.5827 0.5827 0.4904 0.4904 0.4689 0.2149
## Call:
## VAR(y = VAR_ts, p = 2, type = "const", exogen = NULL)
##
##
## Estimation results for equation NextEra_Energy:
## ===============================================
## NextEra_Energy = NextEra_Energy.l1 + consumer_conf.l1 + NonStore_retailing.l1 + NextEra_Energy.l2 + consumer_conf.l2 + NonStore_retailing.l2 + const
##
## Estimate Std. Error t value Pr(>|t|)
## NextEra_Energy.l1 -0.4064112 0.1017927 -3.993 0.000137 ***
## consumer_conf.l1 -0.0242895 0.0819127 -0.297 0.767541
## NonStore_retailing.l1 -0.0001757 0.0001894 -0.927 0.356282
## NextEra_Energy.l2 -0.2862548 0.1046040 -2.737 0.007542 **
## consumer_conf.l2 -0.0740753 0.0858527 -0.863 0.390637
## NonStore_retailing.l2 0.0002700 0.0001862 1.450 0.150689
## const 0.9753473 0.4138836 2.357 0.020717 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 3.218 on 86 degrees of freedom
## Multiple R-Squared: 0.2163, Adjusted R-squared: 0.1616
## F-statistic: 3.956 on 6 and 86 DF, p-value: 0.001533
##
##
## Estimation results for equation consumer_conf:
## ==============================================
## consumer_conf = NextEra_Energy.l1 + consumer_conf.l1 + NonStore_retailing.l1 + NextEra_Energy.l2 + consumer_conf.l2 + NonStore_retailing.l2 + const
##
## Estimate Std. Error t value Pr(>|t|)
## NextEra_Energy.l1 2.563e-01 1.286e-01 1.994 0.0494 *
## consumer_conf.l1 -3.506e-02 1.035e-01 -0.339 0.7355
## NonStore_retailing.l1 -3.139e-05 2.392e-04 -0.131 0.8959
## NextEra_Energy.l2 2.414e-01 1.321e-01 1.827 0.0712 .
## consumer_conf.l2 -2.704e-01 1.084e-01 -2.494 0.0146 *
## NonStore_retailing.l2 -4.295e-04 2.352e-04 -1.826 0.0713 .
## const -4.498e-01 5.227e-01 -0.860 0.3919
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 4.065 on 86 degrees of freedom
## Multiple R-Squared: 0.1548, Adjusted R-squared: 0.09582
## F-statistic: 2.625 on 6 and 86 DF, p-value: 0.02194
##
##
## Estimation results for equation NonStore_retailing:
## ===================================================
## NonStore_retailing = NextEra_Energy.l1 + consumer_conf.l1 + NonStore_retailing.l1 + NextEra_Energy.l2 + consumer_conf.l2 + NonStore_retailing.l2 + const
##
## Estimate Std. Error t value Pr(>|t|)
## NextEra_Energy.l1 -55.55196 58.46406 -0.950 0.344679
## consumer_conf.l1 -153.19385 47.04607 -3.256 0.001616 **
## NonStore_retailing.l1 -0.31178 0.10880 -2.866 0.005229 **
## NextEra_Energy.l2 -9.60313 60.07872 -0.160 0.873380
## consumer_conf.l2 -68.55794 49.30902 -1.390 0.168003
## NonStore_retailing.l2 0.05533 0.10695 0.517 0.606276
## const 880.35391 237.71171 3.703 0.000375 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 1848 on 86 degrees of freedom
## Multiple R-Squared: 0.172, Adjusted R-squared: 0.1142
## F-statistic: 2.977 on 6 and 86 DF, p-value: 0.01088
##
##
##
## Covariance matrix of residuals:
## NextEra_Energy consumer_conf NonStore_retailing
## NextEra_Energy 10.3580 0.2728 -739.5
## consumer_conf 0.2728 16.5230 -1912.6
## NonStore_retailing -739.4902 -1912.5575 3416795.4
##
## Correlation matrix of residuals:
## NextEra_Energy consumer_conf NonStore_retailing
## NextEra_Energy 1.00000 0.02085 -0.1243
## consumer_conf 0.02085 1.00000 -0.2545
## NonStore_retailing -0.12430 -0.25454 1.0000
### Residuals are stationary ?
VAR_model1_residuals<-data.frame(residuals(VAR_model1))
adf.test(VAR_model1_residuals$NextEra_Energy)
##
## Augmented Dickey-Fuller Test
##
## data: VAR_model1_residuals$NextEra_Energy
## Dickey-Fuller = -4.7507, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# ADF test suggest that VAR residuals are stationary since p-value is < 0.05.
### Residuals show serial autocorrelation ?
Box.test(VAR_model1_residuals$NextEra_Energy,lag=1,type="Ljung-Box")
##
## Box-Ljung test
##
## data: VAR_model1_residuals$NextEra_Energy
## X-squared = 0.010197, df = 1, p-value = 0.9196
Box.test(tsmodel$residuals,lag=2,type="Ljung-Box")
##
## Box-Ljung test
##
## data: tsmodel$residuals
## X-squared = 3.2905, df = 2, p-value = 0.193
# P-value is > 0.05 indicating that ARIMA model does not show residual serial autocorrelation.
To decide our winning model we compared AIC from the ARMA, ARIMA and VAR models. In our model you can see a minimal difference where the ARIMA is better, has the lower AIC.
It should be mentioned that our time series is NOT stationary, however we need our time series to be stationary in order to get the best model. When our series is not stationary as was the case, the ARIMA model should be used.
Using the diagnostic tests of both models you can see if there is stationery in the residues of ARIMA and VAR models. In the residuals we can see that there is no serial autocorrelation in all of the models (so the difference transformations were applied) and the ARMA model if there.
Under the above context our winning model is ARIMA.
# Granger causality testing each variable against all the others.
# There could be a unidirectional, bidirectional, or no causality relationships between variables.
granger_diff<-causality(VAR_model1,cause="NextEra_Energy")
granger_diff
## $Granger
##
## Granger causality H0: NextEra_Energy do not Granger-cause consumer_conf
## NonStore_retailing
##
## data: VAR object VAR_model1
## F-Test = 1.4979, df1 = 4, df2 = 258, p-value = 0.2032
##
##
## $Instant
##
## H0: No instantaneous causality between: NextEra_Energy and
## consumer_conf NonStore_retailing
##
## data: VAR object VAR_model1
## Chi-squared = 1.4263, df = 2, p-value = 0.4901
# Due to a high p-value, H0 is rejected in both tests, concluding that there is no Granger or instantaneous causality with the variables of the VAR model
temp_forecast<-autoplot(forecast(exp(tsmodel$fitted)))
plot(temp_forecast)
forecast(exp(tsmodel$fitted),h=5)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 97 81.10732 76.94714 85.26751 74.74487 87.46978
## 98 81.75311 76.08905 87.41717 73.09068 90.41554
## 99 82.39890 75.46614 89.33165 71.79616 93.00163
## 100 83.04468 74.96371 91.12566 70.68590 95.40346
## 101 83.69047 74.53352 92.84742 69.68612 97.69481
# It is forecast that the share price will rise during the next 5 months, the estimate indicates that its value will be 83.69
# The estimate is not the only thing to take into account, but the possible values around it, there is some volatility in the forecast. The following month has an expected value of 81.10 with a minimum value of 74.75 and a maximum of 87.47 in a 95% confidence interval.