library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Loading required package: TTR
I- Pair Values
mySymbols <- c('LCID', 'TSLA', 'SEV', 'NIO', 'MBGYY', 'POAHY', 'RIVN'
)
myStocks <-lapply(mySymbols, function(x) {getSymbols(x,
from = "2020-01-01",
to = "2023-02-10",
periodicity = "daily",
auto.assign=FALSE)} )
names(myStocks) <- mySymbols
closePrices <- lapply(myStocks, Cl)
closePrices <- do.call(merge, closePrices)
names(closePrices)<-sub("\\.Close", "", names(closePrices))
head(closePrices) # class: 'xts'
## LCID TSLA SEV NIO MBGYY POAHY RIVN
## 2020-01-02 NA 28.68400 NA 3.72 14.07 7.54 NA
## 2020-01-03 NA 29.53400 NA 3.83 13.56 7.42 NA
## 2020-01-06 NA 30.10267 NA 3.68 13.66 7.46 NA
## 2020-01-07 NA 31.27067 NA 3.24 13.65 7.46 NA
## 2020-01-08 NA 32.80933 NA 3.39 13.74 7.52 NA
## 2020-01-09 NA 32.08933 NA 3.46 13.79 7.61 NA
library(TSstudio)
ts_info(closePrices)
## The closePrices series is a xts object with 7 variables and 783 observations
## Frequency: daily
## Start time: 2020-01-02
## End time: 2023-02-09
ts_plot(closePrices)
ts_plot(closePrices,
title = "Daily Close Prices of 7 EV stocks during 01/2020 - 02/2023 ",
Ytitle = "Close Price ($/share)")
library(tidyverse)
library(forecast)
library(tseries)
#Test if the time-series is stationary using an ADF (Augmented Dickey-Fuller Test).
##convert to vector of stocks
closePrices_vector <- as.numeric(closePrices)
##convert to vector of LUCID stock
closePrices_vector_lcid <- as.numeric(closePrices[,1])
##convert to vector of TESLA stock
closePrices_vector_tsla <- as.numeric(closePrices[,2])
##convert to vector of SONO MOTORS stock
closePrices_vector_sev <- as.numeric(closePrices[,3])
##convert to vector of NIO stock
closePrices_vector_nio <- as.numeric(closePrices[,4])
##convert to vector of Mercedes stock
closePrices_vector_mbgyy <- as.numeric(closePrices[,5])
##convert to vector of PORSCHE stock
closePrices_vector_poahy <- as.numeric(closePrices[,6])
##convert to vector of RIVIAN stock
closePrices_vector_rivn <- as.numeric(closePrices[,7])
closePrices_vector <- na.omit(closePrices_vector)
adf.test(diff(log(closePrices_vector)), alternative = "stationary", k=0)
## Warning in adf.test(diff(log(closePrices_vector)), alternative = "stationary", :
## p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(log(closePrices_vector))
## Dickey-Fuller = -62.656, Lag order = 0, p-value = 0.01
## alternative hypothesis: stationary
Using hypothesis testing, we can see that the ADF gives a p-value lesser than a significance level of 5 per cent. Thus, we can reject the null hypothesis and accept the alternate hypothesis that our time-series is stationary.
# Proceed with ARIMA Forecasting
## To determine the parameters of our ARIMA model, we can take a look at the different ACF and PACF time series plots.
closePrices_vector_lcid <- na.omit(closePrices_vector_lcid)
acf(diff(log(closePrices_vector_lcid)))
pacf(diff(log(closePrices_vector_lcid)))
closePrices_vector_tsla <- na.omit(closePrices_vector_tsla)
acf(diff(log(closePrices_vector_tsla)))
pacf(diff(log(closePrices_vector_tsla)))
It seems that the lag for the ACF time series plot cuts off after lag 0 which most likely represents an MA(0) model. As such, we can build our ARIMA(0,1,1) model and also include a seasonal component.
#A period of 5 is used as the data is sampled daily as opposed to 365 as R does not support lags greater than 350 periods.
fit_lcid <- arima(log(closePrices_vector_lcid), c(0,1,1), seasonal = list(order = c(0,1,1), period = 5))
fit_tsla <- arima(log(closePrices_vector_tsla), c(0,1,1), seasonal = list(order = c(0,1,1), period = 5))
fit_sev <- arima(log(closePrices_vector_sev), c(0,1,1), seasonal = list(order = c(0,1,1), period = 5))
fit_nio <- arima(log(closePrices_vector_nio), c(0,1,1), seasonal = list(order = c(0,1,1), period = 5))
fit_mbgyy <- arima(log(closePrices_vector_mbgyy), c(0,1,1), seasonal = list(order = c(0,1,1), period = 5))
fit_poahy <- arima(log(closePrices_vector_poahy), c(0,1,1), seasonal = list(order = c(0,1,1), period = 5))
fit_rivn <- arima(log(closePrices_vector_rivn), c(0,1,1), seasonal = list(order = c(0,1,1), period = 5))
fit_lcid
##
## Call:
## arima(x = log(closePrices_vector_lcid), order = c(0, 1, 1), seasonal = list(order = c(0,
## 1, 1), period = 5))
##
## Coefficients:
## ma1 sma1
## 0.0662 -1.0000
## s.e. 0.0409 0.0152
##
## sigma^2 estimated as 0.003829: log likelihood = 802.09, aic = -1598.18
fit_tsla
##
## Call:
## arima(x = log(closePrices_vector_tsla), order = c(0, 1, 1), seasonal = list(order = c(0,
## 1, 1), period = 5))
##
## Coefficients:
## ma1 sma1
## -0.0115 -0.9815
## s.e. 0.0343 0.0098
##
## sigma^2 estimated as 0.002107: log likelihood = 1283.38, aic = -2560.76
fit_sev
##
## Call:
## arima(x = log(closePrices_vector_sev), order = c(0, 1, 1), seasonal = list(order = c(0,
## 1, 1), period = 5))
##
## Coefficients:
## ma1 sma1
## 0.0931 -0.9589
## s.e. 0.0574 0.0278
##
## sigma^2 estimated as 0.004706: log likelihood = 374.72, aic = -743.43
fit_nio
##
## Call:
## arima(x = log(closePrices_vector_nio), order = c(0, 1, 1), seasonal = list(order = c(0,
## 1, 1), period = 5))
##
## Coefficients:
## ma1 sma1
## 0.0365 -0.9741
## s.e. 0.0353 0.0128
##
## sigma^2 estimated as 0.003051: log likelihood = 1140.33, aic = -2274.65
fit_mbgyy
##
## Call:
## arima(x = log(closePrices_vector_mbgyy), order = c(0, 1, 1), seasonal = list(order = c(0,
## 1, 1), period = 5))
##
## Coefficients:
## ma1 sma1
## -0.0795 -1.0000
## s.e. 0.0323 0.0094
##
## sigma^2 estimated as 0.0009302: log likelihood = 1596.61, aic = -3187.21
fit_poahy
##
## Call:
## arima(x = log(closePrices_vector_poahy), order = c(0, 1, 1), seasonal = list(order = c(0,
## 1, 1), period = 5))
##
## Coefficients:
## ma1 sma1
## 0.0571 -1.0000
## s.e. 0.0337 0.0112
##
## sigma^2 estimated as 0.0008371: log likelihood = 1637.6, aic = -3269.2
fit_rivn
##
## Call:
## arima(x = log(closePrices_vector_rivn), order = c(0, 1, 1), seasonal = list(order = c(0,
## 1, 1), period = 5))
##
## Coefficients:
## ma1 sma1
## 0.0373 -0.9993
## s.e. 0.0528 0.0369
##
## sigma^2 estimated as 0.003405: log likelihood = 426.93, aic = -847.87
#apply forecast for the next year or approximately 232 weeks ahead (2023 - 2027) assuming there are 5 trading days per week.
#apply exp() to remove the log from the values. (Log inverse)
pred_prices_lcid <- predict(fit_lcid, n.ahead= 232*5)
pred_prices_tsla <- predict(fit_tsla, n.ahead= 232*5)
pred_prices_sev <- predict(fit_sev, n.ahead= 232*5)
pred_prices_nio <- predict(fit_nio, n.ahead= 232*5)
pred_prices_mbgyy <- predict(fit_mbgyy, n.ahead= 232*5)
pred_prices_poahy <- predict(fit_poahy, n.ahead= 232*5)
pred_prices_rivn <- predict(fit_rivn, n.ahead= 232*5)
dates <-as.POSIXct(closePrices[,0])
#lucid
ts.plot(as.ts(closePrices_vector_lcid),exp(pred_prices_lcid$pred), log = "y", col=c(2,4), lty=c(1,5), xlab = "Time Period", ylab = "Share Price($)", main = "Predicted Share Prices of LUCID stock in 2023-2027")
#change x axis to datetimes
axis(1, at=seq(1, length(dates)), labels = format(dates, "%Y-%m"), cex.axis = 0.5, col.axis="red")
abline(h=21.06, col = "lightgrey")
text(900, 25, labels = "Mean = 21.06", col = "red", cex=.7)
text(850, c(7.5,6), labels = c("Predicted Mean = 13.98 (Min: $11.67 - Max: $16.60)","Model Errors (RMSE) = 6.6%"), col = "blue", cex=.7)
#tesla
ts.plot(as.ts(closePrices_vector_tsla),exp(pred_prices_tsla$pred), log = "y", col=c(2,4), lty=c(1,5), xlab = "Time Period", ylab = "Share Price($)", main = "Predicted Share Prices of TESLA stock in 2023-2027")
#change x axis to datetimes
axis(1, at=seq(1, length(dates)), labels = format(dates, "%Y-%m"), cex.axis = 0.5, col.axis="red")
abline(h=204.41, col = "lightgrey")
text(900, 250, labels = "Mean = 204.41", col = "red", cex=.7)
text(850, c(80,30), labels = c("Predicted Mean = 113.04 (Min: $72.17 - Max: $167.07)","Model Errors (RMSE) = 4%"), col = "blue", cex=.7)
#Sono Motors
ts.plot(as.ts(closePrices_vector_sev),exp(pred_prices_sev$pred), log = "y", col=c(2,4), lty=c(1,5), xlab = "Time Period", ylab = "Share Price($)", main = "Predicted Share Prices of Sono Motors stock in 2023-2027")
#change x axis to datetimes
axis(1, at=seq(1, length(dates)), labels = format(dates, "%Y-%m"), cex.axis = 0.5, col.axis="red")
abline(h=4.46, col = "lightgrey")
text(900, 6, labels = "Mean = 4.46", col = "red", cex=.7)
text(850, c(0.5,0.1), labels = c("Predicted Mean = 0.08","Model Errors (RMSE) = 6.5%"), col = "blue", cex=.7)
#NIO
ts.plot(as.ts(closePrices_vector_nio),exp(pred_prices_nio$pred), log = "y", col=c(2,4), lty=c(1,5), xlab = "Time Period", ylab = "Share Price($)", main = "Predicted Share Prices of NIO stock in 2023-2027")
#change x axis to datetimes
axis(1, at=seq(1, length(dates)), labels = format(dates, "%Y-%m"), cex.axis = 0.5, col.axis="red")
abline(h=24.88, col = "lightgrey")
text(900, 32, labels = "Mean = 24.88", col = "red", cex=.7)
text(850, c(1,0.5), labels = c("Predicted Mean = 3.48","Model Errors (RMSE) = 5%"), col = "blue", cex=.7)
#Mercedes
ts.plot(as.ts(closePrices_vector_mbgyy),exp(pred_prices_mbgyy$pred), log = "y", col=c(2,4), lty=c(1,5), xlab = "Time Period", ylab = "Share Price($)", main = "Predicted Share Prices of MERCEDES stock in 2023-2027")
#change x axis to datetimes
axis(1, at=seq(1, length(dates)), labels = format(dates, "%Y-%m"), cex.axis = 0.5, col.axis="red")
abline(h=16.81, col = "lightgrey")
text(900, 15, labels = "Mean = 16.81", col = "red", cex=.7)
text(850, c(20,25), labels = c("Predicted Mean = 19.36","Model Errors (RMSE) = 2.4%"), col = "blue", cex=.7)
#Porsche
ts.plot(as.ts(closePrices_vector_poahy),exp(pred_prices_poahy$pred), log = "y", col=c(2,4), lty=c(1,5), xlab = "Time Period", ylab = "Share Price($)", main = "Predicted Share Prices of PORSCHE stock in 2023-2027")
#change x axis to datetimes
axis(1, at=seq(1, length(dates)), labels = format(dates, "%Y-%m"), cex.axis = 0.5, col.axis="red")
abline(h= 7.7, col = "lightgrey")
text(900, 8, labels = "Mean = 7.7", col = "red", cex=.7)
text(850, c(4.5,4), labels = c("Predicted Mean = 4.9","Model Errors (RMSE) = 3%"), col = "blue", cex=.7)
#RIVIAN
ts.plot(as.ts(closePrices_vector_rivn),exp(pred_prices_rivn$pred), log = "y", col=c(2,4), lty=c(1,5), xlab = "Time Period", ylab = "Share Price($)", main = "Predicted Share Prices of RIVIAN stock in 2023-2027")
#change x axis to datetimes
axis(1, at=seq(1, length(dates)), labels = format(dates, "%Y-%m"), cex.axis = 0.5, col.axis="red")
abline(h=45.8, col = "lightgrey")
text(900, 50, labels = "Mean = 45.8", col = "red", cex=.7)
text(850, c(0.8,0.4), labels = c("Predicted Mean = 2.8","Model Errors (RMSE) = 5%"), col = "blue", cex=.7)
pre_lucid <- forecast(fit_lcid, n.ahead= 232*5)
## Warning in forecast.Arima(fit_lcid, n.ahead = 232 * 5): The non-existent n.ahead
## arguments will be ignored.
plot(pre_lucid)