Crude Oil Price Modelling using Time Series Analysis

# link of the data
# https://finance.yahoo.com/quote/CL%3DF/history?period1=1577836800&period2=1638316800&interval=1mo&filter=history&frequency=1mo&includeAdjustedClose=true

library(readxl)
df = read_excel('dataset/Crude Oil.xlsx')
data = df$Close
data = ts(data, start = c(2018,1), frequency = 12)
plot.ts(data, main='Crude Oil Price from 2018 to 2021', col='blue', lwd=3)

acf(as.numeric(data), main='ACF of Crude Oil Price', col='blue', lwd=6)

pacf(as.numeric(data), main='PACF of Crude Oil Price', col='blue', lwd=6)
summary(data)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.84   50.33   58.87   57.13   66.50   83.57

# autocorrelation test
Box.test(data,lag = 12, type = "Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  data
## X-squared = 111.45, df = 12, p-value < 2.2e-16

library(tseries)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

kpss.test(data, null="Trend")

## 
##  KPSS Test for Trend Stationarity
## 
## data:  data
## KPSS Trend = 0.24002, Truncation lag parameter = 3, p-value = 0.01

adf.test(data)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  data
## Dickey-Fuller = -2.0416, Lag order = 3, p-value = 0.5575
## alternative hypothesis: stationary

# best arima model
f1 = arima(data,order = c(1,0,0))
f2 = arima(data,order = c(1,0,1))
f3 = arima(data,order = c(2,0,1))
f4 = arima(data,order = c(0,0,1))
f5 = arima(data,order = c(0,0,2))
f6 = arima(data,order = c(1,0,0), seasonal = c(0,0,1))
AIC(f1,f2,f3,f4,f5,f6)

##    df      AIC
## f1  3 332.1922
## f2  4 333.7803
## f3  5 335.1876
## f4  3 356.7054
## f5  4 348.2848
## f6  4 331.2786

library(forecast)
fit = auto.arima(data)
fit

## Series: data 
## ARIMA(1,0,0)(0,0,1)[12] with non-zero mean 
## 
## Coefficients:
##          ar1     sma1     mean
##       0.8429  -0.3008  57.9231
## s.e.  0.0781   0.1744   4.5427
## 
## sigma^2 estimated as 50.09:  log likelihood=-161.64
## AIC=331.28   AICc=332.21   BIC=338.76

# residual of model
Box.test(fit$residuals, type = "Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  fit$residuals
## X-squared = 0.019229, df = 1, p-value = 0.8897

qqnorm(fit$residuals); qqline(fit$residuals)

shapiro.test(fit$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  fit$residuals
## W = 0.93578, p-value = 0.0112

# forecast
training = df$Close[1:36]
testing  = df$Close[37:48]
plot.ts(as.numeric(data), main='Crude Oil Price from 2018 to 2021', col='blue', lwd=3)
abline(v=36, col='green', lwd=2, lty=2)
lines(36:48,fit$fitted[36:48], lwd=3, col='red')
legend('bottomright', c('real data', 'fitted data'), col=c('blue','red'),lwd=4)

# predict
plot(forecast(fit, h=12))

# predict
predict(fit, n.ahead = 12)

## $pred
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 2022 65.69273 64.06228 59.40232 56.50190 57.03688 56.72545 54.86079 57.36996
##           Sep      Oct      Nov      Dec
## 2022 56.23785 57.48890 57.92828 57.07171
## 
## $se
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2022  7.077329  9.256034 10.533633 11.354337 11.903106 12.278106 12.537727
##            Aug       Sep       Oct       Nov       Dec
## 2022 12.718964 12.846177 12.935799 12.999096 13.043878

follow me for more on

RPubs
Telegram
1. Channel
2. Q & A
YouTube
Website
Aparat

Crude Oil Price Modelling using Time Series Analysis

Afshin Motavali

April, 2022

follow me for more on