# link of the data
# https://finance.yahoo.com/quote/CL%3DF/history?period1=1577836800&period2=1638316800&interval=1mo&filter=history&frequency=1mo&includeAdjustedClose=true
library(readxl)
df = read_excel('dataset/Crude Oil.xlsx')
data = df$Close
data = ts(data, start = c(2018,1), frequency = 12)
plot.ts(data, main='Crude Oil Price from 2018 to 2021', col='blue', lwd=3)

acf(as.numeric(data), main='ACF of Crude Oil Price', col='blue', lwd=6)

pacf(as.numeric(data), main='PACF of Crude Oil Price', col='blue', lwd=6)
summary(data)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.84 50.33 58.87 57.13 66.50 83.57
# autocorrelation test
Box.test(data,lag = 12, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: data
## X-squared = 111.45, df = 12, p-value < 2.2e-16
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo

kpss.test(data, null="Trend")
##
## KPSS Test for Trend Stationarity
##
## data: data
## KPSS Trend = 0.24002, Truncation lag parameter = 3, p-value = 0.01
adf.test(data)
##
## Augmented Dickey-Fuller Test
##
## data: data
## Dickey-Fuller = -2.0416, Lag order = 3, p-value = 0.5575
## alternative hypothesis: stationary
# best arima model
f1 = arima(data,order = c(1,0,0))
f2 = arima(data,order = c(1,0,1))
f3 = arima(data,order = c(2,0,1))
f4 = arima(data,order = c(0,0,1))
f5 = arima(data,order = c(0,0,2))
f6 = arima(data,order = c(1,0,0), seasonal = c(0,0,1))
AIC(f1,f2,f3,f4,f5,f6)
## df AIC
## f1 3 332.1922
## f2 4 333.7803
## f3 5 335.1876
## f4 3 356.7054
## f5 4 348.2848
## f6 4 331.2786
library(forecast)
fit = auto.arima(data)
fit
## Series: data
## ARIMA(1,0,0)(0,0,1)[12] with non-zero mean
##
## Coefficients:
## ar1 sma1 mean
## 0.8429 -0.3008 57.9231
## s.e. 0.0781 0.1744 4.5427
##
## sigma^2 estimated as 50.09: log likelihood=-161.64
## AIC=331.28 AICc=332.21 BIC=338.76
# residual of model
Box.test(fit$residuals, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: fit$residuals
## X-squared = 0.019229, df = 1, p-value = 0.8897
qqnorm(fit$residuals); qqline(fit$residuals)

shapiro.test(fit$residuals)
##
## Shapiro-Wilk normality test
##
## data: fit$residuals
## W = 0.93578, p-value = 0.0112
# forecast
training = df$Close[1:36]
testing = df$Close[37:48]
plot.ts(as.numeric(data), main='Crude Oil Price from 2018 to 2021', col='blue', lwd=3)
abline(v=36, col='green', lwd=2, lty=2)
lines(36:48,fit$fitted[36:48], lwd=3, col='red')
legend('bottomright', c('real data', 'fitted data'), col=c('blue','red'),lwd=4)

# predict
plot(forecast(fit, h=12))

# predict
predict(fit, n.ahead = 12)
## $pred
## Jan Feb Mar Apr May Jun Jul Aug
## 2022 65.69273 64.06228 59.40232 56.50190 57.03688 56.72545 54.86079 57.36996
## Sep Oct Nov Dec
## 2022 56.23785 57.48890 57.92828 57.07171
##
## $se
## Jan Feb Mar Apr May Jun Jul
## 2022 7.077329 9.256034 10.533633 11.354337 11.903106 12.278106 12.537727
## Aug Sep Oct Nov Dec
## 2022 12.718964 12.846177 12.935799 12.999096 13.043878