f.1mo <- read_excel("C:/Users/yujun/Downloads/futures_series.xlsx", sheet = "Sheet1")
f.2mo <- read_excel("C:/Users/yujun/Downloads/futures_series.xlsx", sheet = "2mon")
f.3mo <- read_excel("C:/Users/yujun/Downloads/futures_series.xlsx", sheet = "3mon")
f.4mo <- read_excel("C:/Users/yujun/Downloads/futures_series.xlsx", sheet = "4mon")
f.5mo <- read_excel("C:/Users/yujun/Downloads/futures_series.xlsx", sheet = "5mon")
f.6mo <- read_excel("C:/Users/yujun/Downloads/futures_series.xlsx", sheet = "6mon")
urlfile = "https://raw.githubusercontent.com/razin11/IAQF-case-competition/main/Data%20processing/Formatted%20data%20files/crude_gas_data.csv?token=ARWWGURNJSS23UVZ2ZMW5BTAF76NK"
g.data <- read.csv(url(urlfile))
Ratio: \[\text{Ratio} = \frac{Gas - Forward \ Price}{Forward\ (1 \ lag)}\]
#turn data to weekly
f.1mo$date = as.Date(f.1mo$date)
f.1mo.xts = xts(f.1mo$price,order.by = f.1mo$date)
startpoints <- function (x, on = "weeks", k = 1) {
head(endpoints(x, on, k) + 1, -1)
}
index = startpoints(f.1mo.xts,on="weeks",k=1)
#weekly crude price
f.1mo.wts =period.apply(f.1mo.xts,index,mean)
f.1mo.wts = f.1mo.wts["1993-04-12::2021-01-08"]
f.1mo.w = fortify(f.1mo.wts)
f.1mo.w$index = rep(1:nrow(f.1mo.w))
colnames(f.1mo.w) = c("Date","Weekly_Price","Index")
g.data$Index = rep(1:nrow(g.data))
#to merge data change the date
#f.1mo.w$Date = as.Date(f.1mo.w$Date)-1
#$Date = as.Date(g.data$Date)
#merge data
merge.1mo = left_join(g.data,f.1mo.w,by="Index",copy=T)
merge.1mo = merge.1mo[complete.cases(merge.1mo),]
merge.1mo = merge.1mo %>% select(Date.y,gasoline_retail_price,Weekly_Price)
colnames(merge.1mo) = c("Date","retail_price","weekly_future_price")
lf.1mo = back(merge.1mo$weekly_future_price)
change.1mo = merge.1mo$retail_price-merge.1mo$weekly_future_price
ratio.1mo = change.1mo/lf.1mo
ratio.1mo = ratio.1mo[-1]
adf.test(ratio.1mo)
## Warning in adf.test(ratio.1mo): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: ratio.1mo
## Dickey-Fuller = -10.026, Lag order = 11, p-value = 0.01
## alternative hypothesis: stationary
acf(ratio.1mo,lag=12)
pacf(ratio.1mo,lag=12)
By ADF test, the ratio is stationary for 1month. * ACF plot: considering MA(4) * PACF plot : considering AR(4) Now, select order for ratio
eacf(ratio.1mo)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x x x o o o x o o o o x o
## 1 x o o x o o o x o o o o x o
## 2 x x o x o o o x o o o o x o
## 3 x x x x o o o x o o o o o o
## 4 o x x x o o x o o o o o o o
## 5 x o x x x o x o o o o o o o
## 6 x x x x o o o o o o o o o o
## 7 x x x x x o x o o o o o o o
bic = armasubsets(ratio.1mo,nar = 8,nma=12)
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 4 linear dependencies found
## Reordering variables and trying again:
plot(bic)
Now we try to do
ar(ratio.1mo,method = "mle")
##
## Call:
## ar(x = ratio.1mo, method = "mle")
##
## Coefficients:
## 1 2 3 4 5 6 7 8
## 0.1467 0.0641 0.0515 0.0955 -0.0009 -0.0210 -0.0480 0.0595
##
## Order selected 8 sigma^2 estimated as 0.002267
Let’s try fit a AR(4) model
arima(ratio.1mo,order=c(4,0,0),include.mean = T)
##
## Call:
## arima(x = ratio.1mo, order = c(4, 0, 0), include.mean = T)
##
## Coefficients:
## ar1 ar2 ar3 ar4 intercept
## 0.1450 0.0606 0.0463 0.0978 -0.9523
## s.e. 0.0261 0.0264 0.0264 0.0261 0.0019
##
## sigma^2 estimated as 0.002279: log likelihood = 2348.42, aic = -4686.84
ar3 not significant. Drop! limit coef3 = 0
ar4=arima(ratio.1mo,order=c(4,0,0),fixed = c(NA,NA,0,NA,NA),include.mean = T)
## Warning in stats::arima(x = x, order = order, seasonal = seasonal, xreg =
## xreg, : some AR parameters were fixed: setting transform.pars = FALSE
AIC good! Take this!
arma1.1 = arima(ratio.1mo,order=c(1,0,1),include.mean = T)
arma1.1
##
## Call:
## arima(x = ratio.1mo, order = c(1, 0, 1), include.mean = T)
##
## Coefficients:
## ar1 ma1 intercept
## 0.7574 -0.6215 -0.9523
## s.e. 0.0699 0.0842 0.0020
##
## sigma^2 estimated as 0.002292: log likelihood = 2344.3, aic = -4682.6
Let’s test unit root
abs(polyroot(c(1, -coefficients(arma1.1))))
## [1] 0.8156992 0.8156992 1.5781416
Unit roos are all outside the unit-circle.
tsdiag(arma1.1, gof=10)
Second graph, white noice checked. Third graph all are significant.
As we see in ACF, try ARMA(4,4)
auto = auto.arima(ratio.1mo)
auto
## Series: ratio.1mo
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## 0.1354 -0.9885
## s.e. 0.0264 0.0040
##
## sigma^2 estimated as 0.002287: log likelihood=2343.67
## AIC=-4681.35 AICc=-4681.33 BIC=-4665.52
tsdiag(auto,gof=20)
arima.tstat <- function(mod) coef(mod)[coef(mod) != 0] / sqrt(diag(mod$var.coef))
round(sort(abs(arima.tstat(auto))), 2)
## ar1 ma1
## 5.13 248.28
By t.stat, all significant
nmin = ceiling(.7*length(ratio.1mo))+1
nmax = length(ratio.1mo)
fcst <- matrix(NA_real_, nmax, 3)
fixed <- rep(NA_real_, 10)
fixed[c(4,6,7)] <- 0
for (t1 in nmin : nmax) {
n= t1-1
m1 = arima(ratio.1mo[1:n],order = c(1,0,1),include.mean = T,transform.pars=FALSE)
m2 = arima(ratio.1mo[1:n],order= c(1,1,1),include.mean = T,transform.pars=FALSE)
m3 = arima(ratio.1mo[1:n],order= c(4,0,0),include.mean = T,fixed = c(NA,NA,0,NA,NA),transform.pars = FALSE)
fcst[t1,1] = c(predict(m1, n.ahead=1, newxreg=NULL, se.fit=FALSE))
fcst[t1,2] = c(predict(m2,n.ahead = 1, newxreg=NULL, se.fit=FALSE))
fcst[t1,3] = c(predict(m3,n.ahead = 1, newxreg=NULL, se.fit=FALSE))
}
rmsfe = rep(0.0,3)
mafe = rep(0.0,3)
for (j in 1:3) {
rmsfe[j]= sqrt(mean((fcst[nmin:nmax,j]-ratio.1mo[nmin:nmax])^2))
mafe[j] = mean(abs(fcst[nmin:nmax,j]-ratio.1mo[nmin:nmax]))
}
rmsfe
## [1] 0.06682364 0.06585047 0.06531633
mafe
## [1] 0.03251506 0.03235961 0.03186955
tab1 = data.frame("model" = c("ARMA(1,1)","ARIMA(1,1)","AR(4)"),
RMSFE = round(rmsfe,5),
MAFE = round(mafe,5))
knitr::kable(tab1)
| model | RMSFE | MAFE |
|---|---|---|
| ARMA(1,1) | 0.06682 | 0.03252 |
| ARIMA(1,1) | 0.06585 | 0.03236 |
| AR(4) | 0.06532 | 0.03187 |
#xts.1mo = xts(cbind(ratio.1mo[nmin:nmax],fcst[nmin:nmax,1]),
#index(f.1mo.xts)[-(1:2)][nmin:nmax])
#plot(xts.1mo,type="l",col=c("grey","red"))
par(mfrow = c(2,2))
plot(ratio.1mo[nmin:nmax],type="l",col="black")
lines(fcst[nmin:nmax,1],col="red")
plot(ratio.1mo[nmin:nmax],type="l",col="black")
lines(fcst[nmin:nmax,2],col="blue")
plot(ratio.1mo[nmin:nmax],type="l",col="black")
lines(fcst[nmin:nmax,3],col="green")
Maybe remove COVID time? ### Without Covid time {.tabset .tabset-fade .tabset-pills}
merge.remove.covid = merge.1mo[1:1403,]
lf.1mof = back(merge.remove.covid$weekly_future_price)
change.1mof = merge.remove.covid$retail_price-merge.remove.covid$weekly_future_price
ratiof.1mo = change.1mof/lf.1mof
ratiof.1mo = ratiof.1mo[-1]
adf.test(ratiof.1mo)
## Warning in adf.test(ratiof.1mo): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: ratiof.1mo
## Dickey-Fuller = -8.5615, Lag order = 11, p-value = 0.01
## alternative hypothesis: stationary
acf(ratiof.1mo,lag=12)
pacf(ratiof.1mo,lag=12)
* PACF:AR(3) or AR(8)
ar(ratiof.1mo,method = "mle")
##
## Call:
## ar(x = ratiof.1mo, method = "mle")
##
## Coefficients:
## 1 2 3 4 5 6 7 8
## 0.2796 -0.0615 0.1227 0.0108 0.0549 -0.0204 -0.0275 0.1552
##
## Order selected 8 sigma^2 estimated as 0.001401
auto.f = auto.arima(ratiof.1mo)
auto.f
## Series: ratiof.1mo
## ARIMA(1,1,2)
##
## Coefficients:
## ar1 ma1 ma2
## -0.2348 -0.5005 -0.4715
## s.e. 0.0981 0.0894 0.0855
##
## sigma^2 estimated as 0.001428: log likelihood=2601.51
## AIC=-5195.02 AICc=-5194.99 BIC=-5174.04
eacf(ratiof.1mo)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x x x x o o x x x o x x o
## 1 x x x o x o o x o o o o x o
## 2 x x o o o o o x o o o o x o
## 3 x x x x o o o x o o o o o o
## 4 x x x x o o o x x x o o o o
## 5 x x x x o x o x x o o o o o
## 6 x o x o x o o x o o o o o o
## 7 x x x x x x x x o o o o o o
bicf = armasubsets(ratiof.1mo,nar=8,nma=13)
plot(bicf)
* auto-arima suggest ARIMA(0,1,2) * eacf suggest ARMA(2,8) * BIC: AR(1),AR(3),AR(8) * AIC: AR(8) Let’s do it!
ar1 = arima(ratiof.1mo,order = c(1,0,0))
ar1
##
## Call:
## arima(x = ratiof.1mo, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.2791 -0.9524
## s.e. 0.0256 0.0014
##
## sigma^2 estimated as 0.001475: log likelihood = 2580.72, aic = -5157.43
ar3 = arima(ratiof.1mo,order = c(3,0,0),fixed = c(NA,0,NA,NA),transform.pars = FALSE)
ar3
##
## Call:
## arima(x = ratiof.1mo, order = c(3, 0, 0), transform.pars = FALSE, fixed = c(NA,
## 0, NA, NA))
##
## Coefficients:
## ar1 ar2 ar3 intercept
## 0.2717 0 0.1224 -0.9524
## s.e. 0.0255 0 0.0255 0.0017
##
## sigma^2 estimated as 0.001451: log likelihood = 2592.16, aic = -5178.32
#ar8 = arima(ratiof.1mo,order = c(8,0,0))
#ar8
#arma2.8 = arima(ratiof.1mo,order = c(2,0,8))
#arma2.8
ar(8),ARMA(2,8) both has too many insignificant coef, bad. Take ar3!
nmin = ceiling(.7*length(ratiof.1mo))+1
nmax = length(ratiof.1mo)
fcst <- matrix(NA_real_, nmax, 3)
fixed <- rep(NA_real_, 10)
fixed[c(4,6,7)] <- 0
for (t1 in nmin : nmax) {
n= t1-1
m1 = arima(ratio.1mo[1:n],order = c(1,0,0),include.mean = T,transform.pars=FALSE)
m2 = arima(ratio.1mo[1:n],order = c(3,0,0),fixed = c(NA,0,NA,NA),transform.pars = FALSE)
m3 = arima(ratio.1mo[1:n],order= c(1,1,2),include.mean = T,transform.pars=FALSE)
fcst[t1,1] = c(predict(m1, n.ahead=1, newxreg=NULL, se.fit=FALSE))
fcst[t1,2] = c(predict(m2,n.ahead = 1, newxreg=NULL, se.fit=FALSE))
fcst[t1,3] = c(predict(m3,n.ahead = 1, newxreg=NULL, se.fit=FALSE))
}
rmsfe = rep(0.0,3)
mafe = rep(0.0,3)
for (j in 1:3) {
rmsfe[j]= sqrt(mean((fcst[nmin:nmax,j]-ratio.1mo[nmin:nmax])^2))
mafe[j] = mean(abs(fcst[nmin:nmax,j]-ratio.1mo[nmin:nmax]))
}
rmsfe
## [1] 0.03357622 0.03365368 0.03377164
mafe
## [1] 0.02559810 0.02557187 0.02601874
tab2 = data.frame("model" = c("AR(1)","AR(3)","ARIMA(1,1,2)"),
RMSFE = round(rmsfe,5),
MAFE = round(mafe,5))
knitr::kable(tab2)
| model | RMSFE | MAFE |
|---|---|---|
| AR(1) | 0.03358 | 0.02560 |
| AR(3) | 0.03365 | 0.02557 |
| ARIMA(1,1,2) | 0.03377 | 0.02602 |
#xts.1mo = xts(cbind(ratio.1mo[nmin:nmax],fcst[nmin:nmax,1]),
#index(f.1mo.xts)[-(1:2)][nmin:nmax])
#plot(xts.1mo,type="l",col=c("grey","red"))
par(mfrow = c(2,2))
plot(ratio.1mo[nmin:nmax],type="l",col="black")
lines(fcst[nmin:nmax,1],col="red")
plot(ratio.1mo[nmin:nmax],type="l",col="black")
lines(fcst[nmin:nmax,2],col="blue")
plot(ratio.1mo[nmin:nmax],type="l",col="black")
lines(fcst[nmin:nmax,3],col="yellow")
good! Error drops but not a good estimation method Let’s test Other Month