Predicting Oil Prices: Time Series & Generalized Additive Models for Location, Scale and Shape

library(gamlss)
library(gamlss.data)
library(glmnet)
library(xts)
library(zoo)
library(dplyr)
library(forecast)
library(tseries)
library(corrplot)
library(ggplot2)
library(caret)

Dataset

#setwd
# Load dataset
oil <- gamlss.data::oil
attach(oil)

# help(oil) shows information about the data; data dictionary

# Take a look
# head(oil) # all the data seems to be at same scale

# Check size
dim(oil) # 25 variables and 1000 observations

## [1] 1000   25

# Check NA values
sum(is.na(oil)) # No NA values

## [1] 0

# Var types
str(oil) # all vars are numeric

## 'data.frame':    1000 obs. of  25 variables:
##  $ OILPRICE  : num  4.64 4.63 4.63 4.65 4.63 ...
##  $ CL2_log   : num  4.64 4.65 4.64 4.64 4.65 ...
##  $ CL3_log   : num  4.64 4.65 4.64 4.64 4.65 ...
##  $ CL4_log   : num  4.64 4.65 4.65 4.65 4.66 ...
##  $ CL5_log   : num  4.65 4.66 4.65 4.65 4.66 ...
##  $ CL6_log   : num  4.65 4.66 4.65 4.65 4.66 ...
##  $ CL7_log   : num  4.65 4.66 4.65 4.65 4.66 ...
##  $ CL8_log   : num  4.65 4.66 4.65 4.65 4.66 ...
##  $ CL9_log   : num  4.65 4.66 4.66 4.65 4.66 ...
##  $ CL10_log  : num  4.65 4.66 4.66 4.65 4.66 ...
##  $ CL11_log  : num  4.65 4.66 4.65 4.65 4.66 ...
##  $ CL12_log  : num  4.65 4.66 4.65 4.65 4.66 ...
##  $ CL13_log  : num  4.65 4.65 4.65 4.64 4.65 ...
##  $ CL14_log  : num  4.64 4.65 4.65 4.64 4.65 ...
##  $ CL15_log  : num  4.64 4.65 4.64 4.64 4.65 ...
##  $ BDIY_log  : num  6.85 6.85 6.88 6.88 6.9 ...
##  $ SPX_log   : num  7.22 7.24 7.22 7.22 7.24 ...
##  $ DX1_log   : num  4.39 4.38 4.39 4.38 4.38 ...
##  $ GC1_log   : num  7.41 7.42 7.42 7.41 7.4 ...
##  $ HO1_log   : num  1.14 1.15 1.16 1.14 1.14 ...
##  $ USCI_log  : num  4.11 4.12 4.12 4.1 4.11 ...
##  $ GNR_log   : num  3.92 3.94 3.92 3.93 3.94 ...
##  $ SHCOMP_log: num  7.74 7.76 7.77 7.77 7.76 ...
##  $ FTSE_log  : num  8.64 8.65 8.64 8.64 8.66 ...
##  $ respLAG   : num  4.63 4.64 4.63 4.63 4.65 ...

# Take a look on variables distribution
summary(oil) # vars have median and mean close, probably a simetric distribution

##     OILPRICE        CL2_log         CL3_log         CL4_log     
##  Min.   :3.266   Min.   :3.345   Min.   :3.391   Min.   :3.428  
##  1st Qu.:3.966   1st Qu.:3.982   1st Qu.:4.001   1st Qu.:4.017  
##  Median :4.517   Median :4.519   Median :4.519   Median :4.518  
##  Mean   :4.309   Mean   :4.317   Mean   :4.322   Mean   :4.326  
##  3rd Qu.:4.580   3rd Qu.:4.581   3rd Qu.:4.581   3rd Qu.:4.579  
##  Max.   :4.705   Max.   :4.696   Max.   :4.682   Max.   :4.672  
##     CL5_log         CL6_log         CL7_log         CL8_log     
##  Min.   :3.482   Min.   :3.501   Min.   :3.516   Min.   :3.529  
##  1st Qu.:4.032   1st Qu.:4.048   1st Qu.:4.058   1st Qu.:4.068  
##  Median :4.519   Median :4.518   Median :4.517   Median :4.514  
##  Mean   :4.329   Mean   :4.331   Mean   :4.332   Mean   :4.333  
##  3rd Qu.:4.575   3rd Qu.:4.570   3rd Qu.:4.563   3rd Qu.:4.556  
##  Max.   :4.672   Max.   :4.673   Max.   :4.673   Max.   :4.673  
##     CL9_log         CL10_log        CL11_log        CL12_log    
##  Min.   :3.542   Min.   :3.555   Min.   :3.566   Min.   :3.576  
##  1st Qu.:4.075   1st Qu.:4.082   1st Qu.:4.090   1st Qu.:4.097  
##  Median :4.512   Median :4.509   Median :4.506   Median :4.503  
##  Mean   :4.333   Mean   :4.337   Mean   :4.333   Mean   :4.332  
##  3rd Qu.:4.550   3rd Qu.:4.546   3rd Qu.:4.542   3rd Qu.:4.537  
##  Max.   :4.672   Max.   :4.670   Max.   :4.667   Max.   :4.663  
##     CL13_log        CL14_log        CL15_log        BDIY_log    
##  Min.   :3.585   Min.   :3.594   Min.   :3.603   Min.   :5.670  
##  1st Qu.:4.103   1st Qu.:4.110   1st Qu.:4.117   1st Qu.:6.596  
##  Median :4.500   Median :4.497   Median :4.493   Median :6.806  
##  Mean   :4.332   Mean   :4.331   Mean   :4.331   Mean   :6.787  
##  3rd Qu.:4.532   3rd Qu.:4.527   3rd Qu.:4.522   3rd Qu.:7.011  
##  Max.   :4.658   Max.   :4.654   Max.   :4.649   Max.   :7.757  
##     SPX_log         DX1_log         GC1_log         HO1_log       
##  Min.   :7.153   Min.   :4.369   Min.   :6.956   Min.   :-0.1442  
##  1st Qu.:7.354   1st Qu.:4.391   1st Qu.:7.089   1st Qu.: 0.6220  
##  Median :7.531   Median :4.417   Median :7.159   Median : 1.0547  
##  Mean   :7.481   Mean   :4.459   Mean   :7.192   Mean   : 0.8600  
##  3rd Qu.:7.611   3rd Qu.:4.557   3rd Qu.:7.345   3rd Qu.: 1.1013  
##  Max.   :7.664   Max.   :4.613   Max.   :7.491   Max.   : 1.1877  
##     USCI_log        GNR_log        SHCOMP_log       FTSE_log    
##  Min.   :3.650   Min.   :3.317   Min.   :7.576   Min.   :8.568  
##  1st Qu.:3.838   1st Qu.:3.787   1st Qu.:7.652   1st Qu.:8.716  
##  Median :4.021   Median :3.868   Median :7.734   Median :8.778  
##  Mean   :3.962   Mean   :3.818   Mean   :7.840   Mean   :8.760  
##  3rd Qu.:4.070   3rd Qu.:3.920   3rd Qu.:8.032   3rd Qu.:8.813  
##  Max.   :4.148   Max.   :3.985   Max.   :8.550   Max.   :8.868  
##     respLAG     
##  Min.   :3.266  
##  1st Qu.:3.966  
##  Median :4.517  
##  Mean   :4.310  
##  3rd Qu.:4.580  
##  Max.   :4.705

Data Transformation Framework

Transform Functions

First, it is defined some functions to perform transformations on the data.

# Transformation functions with tag and timestamp

## a. Rolling standard deviation (of arbitrary window)
### input: dataset, variable name (it can be a vector of names of several vars) and window size
roll_sd <- function(data, varnames, window){
  fun <- "roll_sd"              ## function tag
  x <- data.frame(1:nrow(data)) ## create an empty df
  for(i in 1:length(varnames)){ ## calculate for one var at once
    a <- data[,varnames[i]]     ## collect data for the var
    calc <- rollapply(a, window, sd) ## apply function
    calc <- as.data.frame(c((rep(NA, window-1)), ## NA fill - keep vec size
                            calc)) 
    colnames(calc) <- paste0(varnames[i], "_rollsd") ## create new var transformed name
    x <- cbind(x, calc)               ## attach it
    timestamp <- format(Sys.time(),   ## create a timestamp
                        "%m %d, %Y %H:%M") 
    track <- t(c(varnames[i], fun, window, timestamp)) ## create a track vec with metadata                                                           info
    write.table(track, file = "trackmetadata.csv", sep = "\t",
                append = TRUE, col.names = FALSE, row.names = FALSE) ## write metainfo in                                                                         a file
  }
  x[,1] <- NULL ## take off x index
  return(x)
}

## b. Rolling mean (of arbitrary window)
### input: dataset, variable name (it can be a vector of names of several vars) and window size
roll_mean <- function(data, varnames, window){
  fun <- "roll_mean" ## tag
  x <- data.frame(1:nrow(data)) ## empty df
  for(i in 1:length(varnames)){ ## for one var at once
    a <- data[,varnames[i]]     ## collect data
    calc <- rollapply(a, window, mean) ## apply transformation
    calc <- as.data.frame(c((rep(NA, window-1)), calc)) ## set vec size
    colnames(calc) <- paste0(varnames[i], "_rollmean") ## create name of new transf var
    x <- cbind(x, calc) ## attach it
    timestamp <- format(Sys.time(), "%m %d, %Y %H:%M") ## get timestamp
    track <- t(c(varnames[i], fun, window, timestamp)) ## get track info
    write.table(track, file = "trackmetadata.csv", sep = "\t",
                append = TRUE, col.names = FALSE, row.names = FALSE) ## add it in the file
  }
  x[,1] <- NULL
  return(x)
}

## c. Lagging (of arbitrary order)
### input: dataset name (dataframe), varnames (string names) (it can be a vector), lag and dif values (pattern is 1)
lagging <- function(data, varnames, lag=1, dif=1){
  fun <- "lag" ## function tag
  x <- data.frame(1:nrow(data)) ## empty df
  for(i in 1:length(varnames)){ ## for each variable
    a <- data[,varnames[i]] ## collect var data
    calc <- as.data.frame(lag(a, lag=lag, dif=dif)) ## apply transformation
    colnames(calc) <- paste0(varnames[i], "_lag") ## create new var name
    x <- cbind(x, calc) ## attach it
    timestamp <- format(Sys.time(), "%m %d, %Y %H:%M") ## get timestamp
    track <- t(c(varnames[i], fun, paste0(lag,"_","dif"), timestamp)) ## get track info
    write.table(track, file = "trackmetadata.csv", sep = "\t",
                append = TRUE, col.names = FALSE, row.names = FALSE) ## add it in the file
  }
  x[,1] <- NULL ## remove x indexes
  return(x)
}

## d. Leading (of arbitrary order)

leading <- function(data, varnames, k=1, dif=1){
  fun <- "lead" ## function tag
  x <- data.frame(1:nrow(data)) ## empty df
  for(i in 1:length(varnames)){ ## for var
    a <- data[,varnames[i]] ## collect var data
    calc <- dplyr::lead(a, k=k, dif=dif) ## apply transform
    calc <- as.data.frame(calc)
    colnames(calc) <- paste0(varnames[i], "_lead") ## put new var name
    x <- cbind(x, calc) ## attach it
    timestamp <- format(Sys.time(), "%m %d, %Y %H:%M") ## get timestamp
    track <- t(c(varnames[i], fun, paste0(k,"_",dif), timestamp)) ## get tracker
    write.table(track, file = "trackmetadata.csv", sep = "\t",
                append = TRUE, col.names = FALSE, row.names = FALSE) ## add it in the file
  }
  x[,1] <- NULL
  return(x)
}

## e. Differencing

diffts <- function(data, varnames, lag=1, dif=1){
  fun <- "diff" ## function tag
  x <- data.frame(1:nrow(data)) ## empty df
  for(i in 1:length(varnames)){ ## for each var
    a <- data[,varnames[i]] ## het var data
    calc <- diff(a, lag=lag, dif=dif) ## apply transformation
    calc <- as.data.frame(c((rep(NA, lag)), calc))
    calc <- apply(calc, 2, as.numeric)
    colnames(calc) <- paste0(varnames[i], "_diff") # create new varname
    x <- cbind(x, calc) ## attach it
    timestamp <- format(Sys.time(), "%m %d, %Y %H:%M") ## get timestamp
    track <- t(c(varnames[i], fun, window, timestamp)) ## get tracker
    write.table(track, file = "trackmetadata.csv", sep = "\t",
                append = TRUE, col.names = FALSE, row.names = FALSE) ## add it in the file
  }
  x[,1] <- NULL
  return(x)
}

## f. Spread (between two input drivers)

spread <- function(data, var1, var2){
  fun <- "spread" ## function tag
  x <- as.data.frame(data[,var1]-data[,var2]) ## calculate spread
  colnames(x) <- paste0(var1, ".", var2, "_spr") ## new var name
  timestamp <- format(Sys.time(), "%m %d, %Y %H:%M") ## get timestamp
  track <- t(c(paste0(var1, "_", var2), fun, 0, timestamp)) ## get tracker
  write.table(track, file = "trackmetadata.csv", sep = "\t",
              append = TRUE, col.names = FALSE, row.names = FALSE) ## add tracker in the file
  return(x)
}

## g. Ratio (between two input drivers)

ratio <- function(data, var1, var2){
  fun <- "ratio" ## function tag
  x <- as.data.frame(data[,var1]/data[,var2]) ## get ratio values
  colnames(x) <- paste0(var1, ".", var2, "_rt") ## new var name
  timestamp <- format(Sys.time(), "%m %d, %Y %H:%M") ## timestamp
  track <- t(c(paste0(var1, "_", var2), fun, 0, timestamp)) ## get tracker
  write.table(track, file = "trackmetadata.csv", sep = "\t",
              append = TRUE, col.names = FALSE, row.names = FALSE) ## add tracker in the file
  return(x)
}


## h. Product (between two input drivers)

prod <- function(data, var1, var2){
  fun <- "prod" ## function tag
  x <- as.data.frame(data[,var1]*data[,var2]) ## get the product values
  colnames(x) <- paste0(var1, ".", var2, "_prod") ## new var name
  timestamp <- format(Sys.time(), "%m %d, %Y %H:%M") ## timestamp
  track <- t(c(paste0(var1, "_", var2), fun, 0, timestamp)) ## get tracker
  write.table(track, file = "trackmetadata.csv", sep = "\t",
              append = TRUE, col.names = FALSE, row.names = FALSE) ## add tracker in the file
  return(x)
}

## Between diff
betdiff <- function(data, var1, var2){
  fun <- "betdiff" ## fun tag
  x <- as.data.frame(data[,var1]-data[,var2]) ## get diff between 2 vars
  x <- x*1 ## just a transformation to format var type
  colnames(x) <- paste0(var1, ".", var2, "_betdiff") ## new var name
  timestamp <- format(Sys.time(), "%m %d, %Y %H:%M") ## time stamp
  track <- t(c(paste0(var1, "_", var2), fun, 0, timestamp)) ## get tracker
  write.table(track, file = "trackmetadata.csv", sep = "\t",
              append = TRUE, col.names = FALSE, row.names = FALSE) ## add tracker to the file
  return(x)
}

User can specify the transformation by choosing the functions and inputing the name of the variables. To bind transformation to the dataset use:

oil[,ncol(oil)+1] <-  betdiff(oil, "OILPRICE", "CL2_log")

The variables are created following a pattern in their names: VarName_TagFunctionApplied or if the function is a measure between 2 variables the pattern is VarName1.Varname2_TagFunctionApplied. User can have a composition of different functions by using “%>%” like in dplyr package. Example:

head(betdiff(oil, "OILPRICE", "CL2_log") %>%
  roll_sd("OILPRICE.CL2_log_betdiff", 2)) # Note that here it is used the name resulted by                                             previous transformation, following the pattern.

##   OILPRICE.CL2_log_betdiff_rollsd
## 1                              NA
## 2                     0.011825057
## 3                     0.006005468
## 4                     0.008329129
## 5                     0.019094196
## 6                     0.007586508

It is also possible to apply several transformations on the original dataset at once using the function composition below. It receives a list containing the function that must be applied and their arguments. It can be applied only in the variables of the original dataset, so it is not possible concatenate transformations this way at once. Only sequential.

# define function composition
composition <- function(data, list_args){
  x <- data.frame(1:nrow(data))
  for(i in seq(1,length(list_args),2)){
      result <- do.call(list_args[[i]], list_args[[i+1]]) # apply functions chosen
      x <- cbind(x, result)
  }
  x[,1] <- NULL
  data <- cbind(data, x) # attach it to the data
  return(data)
}

Define a list of transformations and their parameters - infinite transformations can be set here. Example: Difference between OILPRICE and respLAG AND lagging of OILPRICE window 2, AND roll mean with window 2 for CL3_log, CL2_log, DX1_log AND leading window 5 to CL15_log etc… etc..

lista <- list(betdiff, list(oil, "OILPRICE", "respLAG"),
              lagging, list(oil, "OILPRICE", 2),
              roll_mean, list(oil, c("CL3_log", "CL2_log", "DX1_log"), 2),
              leading, list(oil, "CL15_log", 5))

# apply a composition
head(composition(oil, lista))

##   OILPRICE  CL2_log  CL3_log  CL4_log  CL5_log  CL6_log  CL7_log  CL8_log
## 1 4.640923 4.636475 4.641116 4.644968 4.648038 4.649761 4.650908 4.651863
## 2 4.633077 4.645352 4.649857 4.653484 4.656338 4.657858 4.658711 4.659564
## 3 4.634049 4.637831 4.642466 4.646312 4.649665 4.651672 4.653103 4.654341
## 4 4.646312 4.638315 4.642562 4.646120 4.648708 4.649952 4.650621 4.651099
## 5 4.631520 4.650526 4.654722 4.658047 4.660321 4.661267 4.661740 4.662117
## 6 4.627616 4.635893 4.640344 4.644199 4.646984 4.648421 4.649378 4.650335
##    CL9_log CL10_log CL11_log CL12_log CL13_log CL14_log CL15_log BDIY_log
## 1 4.652340 4.651672 4.650621 4.648613 4.646120 4.643236 4.639765 6.850126
## 2 4.660037 4.659374 4.657952 4.655578 4.652531 4.649187 4.645256 6.850126
## 3 4.655293 4.655007 4.653865 4.651672 4.648804 4.645736 4.642081 6.879356
## 4 4.651577 4.651004 4.649570 4.647080 4.644102 4.640923 4.636960 6.882437
## 5 4.662401 4.661645 4.659942 4.656908 4.653484 4.649761 4.645544 6.896694
## 6 4.651099 4.650813 4.649570 4.646984 4.643910 4.640537 4.636475 6.913737
##    SPX_log  DX1_log  GC1_log  HO1_log USCI_log  GNR_log SHCOMP_log FTSE_log
## 1 7.221624 4.386554 7.413367 1.136197 4.108412 3.917806   7.744539 8.636699
## 2 7.235309 4.379762 7.419680 1.152564 4.120986 3.942552   7.762536 8.650062
## 3 7.222756 4.387449 7.418481 1.155182 4.115127 3.923952   7.766061 8.639729
## 4 7.222252 4.383675 7.410347 1.136743 4.103965 3.925531   7.765158 8.642292
## 5 7.237620 4.382364 7.399704 1.139946 4.107096 3.941970   7.755763 8.659907
## 6 7.233556 4.382951 7.404888 1.137256 4.097672 3.936325   7.775213 8.656137
##    respLAG OILPRICE.CL2_log_betdiff OILPRICE.respLAG_betdiff OILPRICE_lag
## 1 4.631812              0.004448320             0.0091112388           NA
## 2 4.640923             -0.012274836            -0.0078462165     4.640923
## 3 4.633077             -0.003781823             0.0009720063     4.633077
## 4 4.634049              0.007997345             0.0122629838     4.634049
## 5 4.646312             -0.019005926            -0.0147921680     4.646312
## 6 4.631520             -0.008276984            -0.0039035865     4.631520
##   CL3_log_rollmean CL2_log_rollmean DX1_log_rollmean CL15_log_lead
## 1               NA               NA               NA      4.645256
## 2         4.645487         4.640914         4.383158      4.642081
## 3         4.646161         4.641591         4.383605      4.636960
## 4         4.642514         4.638073         4.385562      4.645544
## 5         4.648642         4.644420         4.383020      4.636475
## 6         4.647533         4.643210         4.382658      4.634341

All functions writes a track information in a metadata file, including: Name of the variable, function applied to get that result and a timestamp. I preferred to write it in a file instead of an object because this information will not be accessed often, so putting it in a file, you save computer memory. Also you can access it in the future. It can be loaded in R and filtered as needed.

Getting Stats

This function get statistics of the variables and returns it here. The information can be stored in a object, but it also writes them in a file.

# Getting stats values of the tests: Stationarity (KPSS & DF); Normality (Shapiro-Wilk);
# Pearson Correlation with target variable
# input: dataset, name of the variables to be tested, name of the target variable
getStats <- function(data, varnames, target){
  statsTest <- structure(list(variable = character(),
                         test = character(),
                         value = numeric()),
                        class = "data.frame")
  for(i in 1:length(varnames)){
    dat <- replace(x = data[,varnames[i]], list = is.na(data[,varnames[i]]),
                  values = 0)
    # Kwiatkowski-Phillips-Schmidt-Shin - Stacionarity
    kpss <- suppressWarnings(kpss.test(dat)[[3]])
    statsTest <- rbind(statsTest, c(varnames[i], "kpss", kpss))
    # Dickey Fuller - Stacionarity
    dickeyfuller <- suppressWarnings(adf.test(dat))[[4]]
    statsTest <- rbind(statsTest, c(varnames[i], "dickeyfuller",
                                    dickeyfuller))
    # Shapiro-Wilk Test - Normality
    shapiro <- suppressWarnings(shapiro.test(dat))[[2]]
    statsTest <- rbind(statsTest, c(varnames[i], "shapiro", shapiro))
    # Pearson Correlation with target variable
    targetcor <- cor(data[,target], dat)
    statsTest <- rbind(statsTest, c(varnames[i], "targetcor",
                                    targetcor)) ## bind to the                                                                               object
  }
  colnames(statsTest) <- c("var", "test", "value")
  write.table(statsTest, "statsmetadata.txt", sep = "\t",
              col.names = FALSE, append = TRUE)  ## write stats in a file
  return(statsTest)
}

# apply a transformation on data
#oil[,ncol(oil)+1] <-  betdiff(oil, "OILPRICE", "CL2_log")

# get stats for variables
getStats(oil, c("OILPRICE.CL2_log_betdiff", "CL2_log", "CL3_log"), "OILPRICE")

##                         var         test                value
## 1  OILPRICE.CL2_log_betdiff         kpss                 0.01
## 2  OILPRICE.CL2_log_betdiff dickeyfuller                 0.01
## 3  OILPRICE.CL2_log_betdiff      shapiro 4.84124736744713e-23
## 4  OILPRICE.CL2_log_betdiff    targetcor     0.52789534800614
## 5                   CL2_log         kpss                 0.01
## 6                   CL2_log dickeyfuller    0.769579302872897
## 7                   CL2_log      shapiro 2.56446659912172e-32
## 8                   CL2_log    targetcor    0.998080113823744
## 9                   CL3_log         kpss                 0.01
## 10                  CL3_log dickeyfuller    0.779965219302587
## 11                  CL3_log      shapiro 1.30062861931017e-32
## 12                  CL3_log    targetcor    0.997696727558502

Model

Lagging Variables in 1 day

Considering that predictions will be of one day ahead, all the indexes (“BDIY_log”, “SPX_log”, “DX1_log”, “GC1_log”, “HO1_log”, “USCI_log”, “GNR_log”, “SHCOMP_log”, “FTSE_log”) will be lagged in 1 day.

laglist <- list(lagging, list(oil, c("BDIY_log",
                                     "SPX_log", "DX1_log", "GC1_log",
                                     "HO1_log", "USCI_log", "GNR_log",
                                     "SHCOMP_log", "FTSE_log")))

oillag <- composition(oil, laglist)
oillag <- na.omit(oillag) # exclude NA values generated by the lag
oillag[,3:24] <- NULL # excluding CL log columns and indexes not lagged

rm(oil)
invisible(gc())

Exploratory Data Analysis - Time Series

Set oil as a time serie.

oilts <- ts(oillag, frequency = 365, start = c(2018, 1, 1)) # random date just to help

autoplot(oilts[,c("OILPRICE", "CL2_log", "BDIY_log_lag", "SPX_log_lag",
                  "DX1_log_lag", "GC1_log_lag", "HO1_log_lag",
                  "USCI_log_lag", "GNR_log_lag", "SHCOMP_log_lag",
                  "FTSE_log_lag", "respLAG")]) +
ggtitle("Plot of the Oil Time-Series") +
theme(plot.title = element_text(hjust = 0.5),
      axis.title.y=element_blank())

Oil price follows a trend similar to USCI_log, but with higher values and with BDIY_log but with lower values. It also show a trend like the opposite of SHCOMP_log. GNR_log and HO1_log also shows a light drop in the end, similar to Oil Price.

Correlation with Target

#dev.off()
cormat <- cor(oillag)

corrplot(cormat, method = "color",
         type = "lower",
         tl.col = "#424242",
         tl.srt = 45,
         addCoef.col = "#ffffff",
         col = colorRampPalette(c("#4D4D4D", "white", "#5288DB"))(200),
         tl.cex = 0.5,
         number.cex = 0.5,
         number.font = 1,
         cl.cex = 0.5)

This heatmap shows that really there is a correlation between Oil Price and USCI_log of 95%. And a negative correlation of 79% with SHCOMP_log. Also shows a correlation with HO1_log of 98% and with GNR_log of 91%. CL2_log is the lag with maximum correlation, 99.8% and for this reason, I choose it to continue in the model and I will exclude the other lags.

par(mfrow=c(3,3))

for(i in 4:length(oillag)) {
  plot(oillag$OILPRICE, oillag[,i], main = paste0("Oil Price x ", colnames(oillag)[i]),
       ylab=colnames(oillag)[i], xlab = "Oil Price")
}

The dispersion graphs shows a positive trend between Oil Price and BDIY_log, H01_log, USCI_log, GNR_log and negative between Oil Price and DX1_log and SHCOMP_log.

Cross Correlation

## CCF of Oil Price and Indexes
par(mfrow=c(3,3))

for(i in 4:length(oillag)) {
  y = oillag[,i]
  ccf(oillag[,"OILPRICE"], y, main = paste0("Cross Correlation Oil Price x ",
                                           colnames(oillag)[i]), 
      ylab=colnames(oillag)[i], xlab = "Oil Price")
}

Considering all these graphs, index FTSE_log does not show influence on Oil Price. It will not be used anymore.

Seasonality

ggseasonplot(oilts[, 'OILPRICE'])

Not possible to identify a clear seasonality with this graph.

Decompose

#autoplot(decompose(oilts[,"OILPRICE"], type = "additive")) + xlab("") + ylab("") + 
#  ggtitle("Additive Decomposition of Oil Price") + 
#  theme(plot.title = element_text(hjust = 0.5))
autoplot(decompose(oilts[,"OILPRICE"], type = "multiplicative")) + xlab("") + ylab("") + 
  ggtitle("Multiplicative Decomposition of Oil Price") + 
  theme(plot.title = element_text(hjust = 0.5))

#stlRes <- stl(oilts[, 'OILPRICE'], s.window = "periodic")

Oil Price shows a trend of drop and now it is possible identify that there is a seasonal component on it.

Stationarity

Getting the Dickey-Fuller test values to identify which of these series are stationary.

stats <- getStats(oillag, c("OILPRICE", "CL2_log", "BDIY_log_lag", "SPX_log_lag",
                  "DX1_log_lag", "GC1_log_lag", "HO1_log_lag",
                  "USCI_log_lag", "GNR_log_lag", "SHCOMP_log_lag", "respLAG"), "OILPRICE")
stats %>%
    filter(test == "dickeyfuller")

##               var         test             value
## 1        OILPRICE dickeyfuller 0.805707301519076
## 2         CL2_log dickeyfuller 0.773007700419777
## 3    BDIY_log_lag dickeyfuller 0.290132480678399
## 4     SPX_log_lag dickeyfuller 0.740638833971042
## 5     DX1_log_lag dickeyfuller 0.685923645955033
## 6     GC1_log_lag dickeyfuller 0.477682133093941
## 7     HO1_log_lag dickeyfuller 0.771040951988363
## 8    USCI_log_lag dickeyfuller 0.689118870548244
## 9     GNR_log_lag dickeyfuller 0.623559284946592
## 10 SHCOMP_log_lag dickeyfuller 0.567639326687163
## 11        respLAG dickeyfuller 0.788519456385602

None of them is stationary. Then, the diffts function will be used to transform them.

difflist <- list(diffts, list(oillag, c("OILPRICE", "CL2_log", "BDIY_log_lag",
                                     "SPX_log_lag", "DX1_log_lag", "GC1_log_lag",
                                     "HO1_log_lag", "USCI_log_lag", "GNR_log_lag",
                                     "SHCOMP_log_lag", "respLAG")))

oildiff <- composition(oillag, difflist)

rm(difflist)
invisible(gc())

Testing differentiated series to check if it was enough to transform them in stationary.

stats <- getStats(oildiff, c("OILPRICE_diff", "CL2_log_diff", "BDIY_log_lag_diff",
                             "SPX_log_lag_diff", "DX1_log_lag_diff", "GC1_log_lag_diff",
                             "HO1_log_lag_diff", "USCI_log_lag_diff", "GNR_log_lag_diff",
                              "SHCOMP_log_lag_diff", "respLAG_diff"), "OILPRICE")
stats %>%
    filter(test == "dickeyfuller")

##                    var         test value
## 1        OILPRICE_diff dickeyfuller  0.01
## 2         CL2_log_diff dickeyfuller  0.01
## 3    BDIY_log_lag_diff dickeyfuller  0.01
## 4     SPX_log_lag_diff dickeyfuller  0.01
## 5     DX1_log_lag_diff dickeyfuller  0.01
## 6     GC1_log_lag_diff dickeyfuller  0.01
## 7     HO1_log_lag_diff dickeyfuller  0.01
## 8    USCI_log_lag_diff dickeyfuller  0.01
## 9     GNR_log_lag_diff dickeyfuller  0.01
## 10 SHCOMP_log_lag_diff dickeyfuller  0.01
## 11        respLAG_diff dickeyfuller  0.01

Now they are all stationary and ready to be imputed in the model.

oilstationary <-  oildiff[,c("OILPRICE_diff", "CL2_log_diff", "BDIY_log_lag_diff",
                             "SPX_log_lag_diff", "DX1_log_lag_diff", "GC1_log_lag_diff",
                             "HO1_log_lag_diff", "USCI_log_lag_diff", "GNR_log_lag_diff",
                             "SHCOMP_log_lag_diff", "respLAG_diff")]
rm(oildiff, stats)
invisible(gc())

oilstationary <- ts(oilstationary, frequency = 365, start = c(2018, 1, 1)) # random date just to help

autoplot(oilstationary[,c("OILPRICE_diff", "CL2_log_diff", "BDIY_log_lag_diff",
                             "SPX_log_lag_diff", "DX1_log_lag_diff", "GC1_log_lag_diff",
                             "HO1_log_lag_diff", "USCI_log_lag_diff", "GNR_log_lag_diff",
                             "SHCOMP_log_lag_diff", "respLAG_diff")]) +
ggtitle("Plot of the Oil Time-Series") +
theme(plot.title = element_text(hjust = 0.5),
      axis.title.y=element_blank())

Split Train & Test

# create time slices for the data
oilfinal <- na.omit(oilstationary)

timeSlices <- createTimeSlices(1:nrow(oilfinal), 
                   initialWindow = 700, horizon = 200, fixedWindow = TRUE)

trainSlices <- timeSlices[[1]]
#testSlices <- timeSlices[[2]]

traintransf = as.data.frame(oilfinal[trainSlices[[1]],])
testtransf = as.data.frame(oilfinal[-trainSlices[[1]],])


# original dataset
oilts1 <- oilts[,c("OILPRICE", "CL2_log", "BDIY_log_lag",
                                     "SPX_log_lag", "DX1_log_lag", "GC1_log_lag",
                                     "HO1_log_lag", "USCI_log_lag", "GNR_log_lag",
                                     "SHCOMP_log_lag", "respLAG")]

trainori = as.data.frame(oilts1[trainSlices[[1]],])
testori = as.data.frame(oilts1[-trainSlices[[1]],])

rm(timeSlices, oilts, oilstationary, trainSlices)
invisible(gc())

Using LASSO Regularization for Feature Selection

# Find best lambda
x_tr <- traintransf
x_ts <- testtransf
cv_model <- cv.glmnet(as.matrix(x_tr[,-1]),
                      as.matrix(x_tr[,'OILPRICE_diff']), alpha = 0, nlambda = 1000,
                      nfolds=20, standardize = TRUE, type.measure = "mae")

best_lambda <- cv_model$lambda.min

# Fit the model with best lambda
best_model <- glmnet(as.matrix(x_tr[,-1]),
                     as.matrix(x_tr[,'OILPRICE_diff']),
                     alpha = 0, lambda = best_lambda, standardize = TRUE)


# Check predictions and adjustment
#predict(best_model, s = best_lambda, newx = as.matrix(x_ts[,-ncol(x_ts)]))
best_model$dev.ratio

## [1] 0.02747424

# Create a dataset with selected vars
# Select features
df_coef <- round(as.matrix(coef(cv_model, s=cv_model$lambda.min)), 2)

# See all contributing variables
selected <- df_coef[df_coef[, 1] != 0, ]

data_sel <- x_tr[,c(which(colnames(x_tr) %in% names(selected)))]

Fitting the Model

Models will be fit considering original data and transformed data. First, I will consider the models with all variables and both datasets (transformed and original).

Base Model - original data

# Base model - AIC:     -3967.455  

model_base <- gamlss(OILPRICE ~ ., data = trainori,  
               trace = FALSE, family = BCT)
shapiro.test(model_base$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  model_base$residuals
## W = 0.99791, p-value = 0.5469

checkresiduals(model_base$residuals, DF=14)

summary(model_base)

## ******************************************************************
## Family:  c("BCT", "Box-Cox t") 
## 
## Call:  gamlss(formula = OILPRICE ~ ., family = BCT, data = trainori,  
##     trace = FALSE) 
## 
## Fitting method: RS() 
## 
## ------------------------------------------------------------------
## Mu link function:  identity
## Mu Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.050979   0.289920   3.625 0.000310 ***
## CL2_log         0.106439   0.153404   0.694 0.488015    
## BDIY_log_lag   -0.005750   0.002367  -2.429 0.015395 *  
## SPX_log_lag    -0.045602   0.014465  -3.153 0.001688 ** 
## DX1_log_lag    -0.001218   0.037210  -0.033 0.973905    
## GC1_log_lag    -0.058847   0.016499  -3.567 0.000386 ***
## HO1_log_lag     0.023642   0.017051   1.387 0.166024    
## USCI_log_lag    0.004198   0.020477   0.205 0.837624    
## GNR_log_lag     0.029555   0.023574   1.254 0.210378    
## SHCOMP_log_lag -0.035153   0.009985  -3.521 0.000459 ***
## respLAG         0.865374   0.146639   5.901 5.65e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------------------------------
## Sigma link function:  log
## Sigma Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.09474    0.03668  -166.2   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------------------------------
## Nu link function:  identity 
## Nu Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    15.61      12.67   1.232    0.218
## 
## ------------------------------------------------------------------
## Tau link function:  log 
## Tau Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.2217     0.1049   11.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------------------------------
## No. of observations in the fit:  700 
## Degrees of Freedom for the fit:  14
##       Residual Deg. of Freedom:  686 
##                       at cycle:  10 
##  
## Global Deviance:     -3995.455 
##             AIC:     -3967.455 
##             SBC:     -3903.74 
## ******************************************************************

Residuals are normally distributed, with mean close to zero and uncorrelated.

Base Model - transformed data

# Base model transf data - -3965.401 
model_t <- gamlss(OILPRICE_diff ~ ., data = traintransf,  
               trace = FALSE, family = TF) # can not use boxcox because of negative values
shapiro.test(model_t$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  model_t$residuals
## W = 0.99562, p-value = 0.04592

checkresiduals(model_t$residuals, DF=13)

summary(model_t)

## ******************************************************************
## Family:  c("TF", "t Family") 
## 
## Call:  gamlss(formula = OILPRICE_diff ~ ., family = TF, data = traintransf,  
##     trace = FALSE) 
## 
## Fitting method: RS() 
## 
## ------------------------------------------------------------------
## Mu link function:  identity
## Mu Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)         -0.0003739  0.0004970  -0.752   0.4521  
## CL2_log_diff        -0.3340934  0.2847331  -1.173   0.2411  
## BDIY_log_lag_diff    0.0030997  0.0202904   0.153   0.8786  
## SPX_log_lag_diff    -0.0375782  0.1216444  -0.309   0.7575  
## DX1_log_lag_diff    -0.0944434  0.1479443  -0.638   0.5234  
## GC1_log_lag_diff     0.0439346  0.0551174   0.797   0.4257  
## HO1_log_lag_diff     0.0895552  0.0510620   1.754   0.0799 .
## USCI_log_lag_diff    0.0359061  0.1118945   0.321   0.7484  
## GNR_log_lag_diff    -0.0575744  0.1117780  -0.515   0.6067  
## SHCOMP_log_lag_diff  0.0014158  0.0462602   0.031   0.9756  
## respLAG_diff         0.2150485  0.2770158   0.776   0.4378  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------------------------------
## Sigma link function:  log
## Sigma Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -4.5399     0.0481  -94.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------------------------------
## Nu link function:  log 
## Nu Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.3462     0.1541   8.734   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------------------------------
## No. of observations in the fit:  700 
## Degrees of Freedom for the fit:  13
##       Residual Deg. of Freedom:  687 
##                       at cycle:  8 
##  
## Global Deviance:     -3986.104 
##             AIC:     -3960.104 
##             SBC:     -3900.94 
## ******************************************************************

Residuals are normally distributed, with mean close to zero and uncorrelated.

Model with Original Data and Cubic Splines

# Base model Cubic Splines - AIC:     -3896.047 
model_base_cs <- gamlss(OILPRICE ~ cs(CL2_log) + cs(BDIY_log_lag) +
                     cs(SPX_log_lag) + cs(DX1_log_lag) + cs(GC1_log_lag) +
                     + cs(HO1_log_lag) + cs(USCI_log_lag) + cs(GNR_log_lag) +
                     cs(SHCOMP_log_lag) + cs(respLAG),
              data = trainori,  
              trace = FALSE, family = BCCG)
shapiro.test(model_base_cs$residuals)
checkresiduals(model_base_cs$residuals, DF = 43)

summary(model_base_cs)

## Important vars: SPX, DX1, GC1, SHCOMP, resplag

Residuals are not normally distributed, the mean is close to zero but the residuals seems to be slightly correlated.

Model with Transformed Data and Cubic Splines on Covariables

# Base model transf data cubic splines - -3974.824  
model_t_cs <- gamlss(OILPRICE_diff ~ cs(CL2_log_diff) + BDIY_log_lag_diff +
                     SPX_log_lag_diff + DX1_log_lag_diff + GC1_log_lag_diff +
                     + cs(HO1_log_lag_diff) + USCI_log_lag_diff + GNR_log_lag_diff +
                     SHCOMP_log_lag_diff + cs(respLAG_diff), data = traintransf,  
               trace = FALSE, family = TF) # can not use boxcox because of negative values
shapiro.test(model_t_cs$residuals)
checkresiduals(model_t_cs$residuals, DF = 42)

summary(model_t_cs)

## Important vars: SPX, GNR, SHCOMP

Residuals are normally distributed by Shapiro Test, with mean close to zero and uncorrelated in most of the cases.

Variable Selection

# Var Selection - base model
stepGAIC(model_base)

#Step:  AIC= -3973.58 
#formula = OILPRICE ~ BDIY_log_lag + SPX_log_lag +  
#    GC1_log_lag + HO1_log_lag + GNR_log_lag + SHCOMP_log_lag +  
#    respLAG, family = BCT, data = trainori, trace = FALSE

# Var Selection - transf data model
stepGAIC(model_t)
#Step:  AIC= -3974.05 
#formula = OILPRICE_diff ~ CL2_log_diff + HO1_log_lag_diff,  
#    family = TF, data = traintransf, trace = FALSE

Models with Selected Variables

model_sel <- gamlss(OILPRICE ~ BDIY_log_lag   +
                    SPX_log_lag + GC1_log_lag +
                    HO1_log_lag + GNR_log_lag +
                    SHCOMP_log_lag + respLAG,
                    data = trainori,  
                    trace = FALSE, family = BCCG)

model_sel_t <- gamlss(OILPRICE_diff ~ CL2_log_diff + 
                      HO1_log_lag_diff, data = traintransf,  
                      trace = FALSE, family = TF)

Models with Selected Variables - original data and cs covariables

# Base with selected vars and cs - -3844.406  
model_sel_cs <- gamlss(OILPRICE ~ BDIY_log_lag   +
                       SPX_log_lag + GC1_log_lag +
                       HO1_log_lag + GNR_log_lag +
                       SHCOMP_log_lag + cs(respLAG),
              data = trainori,  
              trace = FALSE, family = BCCG)

shapiro.test(model_sel_cs$residuals)
checkresiduals(model_sel_cs$residuals, DF = 42)

summary(model_sel_cs)

Models with Selected Variables - transformed data and cs covariables

# Model transformed with sel vars and cs AIC -3987.72  
model_sel_t_cs <- gamlss(OILPRICE_diff ~ cs(CL2_log_diff) + 
                         cs(HO1_log_lag_diff), data = traintransf,  
                         trace = FALSE, family = TF)

shapiro.test(model_sel_t_cs$residuals)
checkresiduals(model_sel_t_cs$residuals, DF = 42)

summary(model_sel_t_cs)

Residuals are not normally distributed by Shapiro Test, with mean close to zero and uncorrelated in most of the cases.

Model with Selected Variables Residuals

plot(model_sel)

## ******************************************************************
##        Summary of the Quantile Residuals
##                            mean   =  -5.203918e-06 
##                        variance   =  1.001431 
##                coef. of skewness  =  0.0577843 
##                coef. of kurtosis  =  9.062829 
## Filliben correlation coefficient  =  0.9640789 
## ******************************************************************

wp(model_sel)

## Warning in wp(model_sel): Some points are missed out 
## increase the y limits using ylim.all

Validation

base_pred <- predict(model_base, newdata = testori, type = "response")
base_cs_pred <- predict(model_base_cs, newdata = testori, type = "response")

## new prediction

sel_pred <- predict(model_sel, newdata = testori, type = "response")
sel_cs_pred <- predict(model_sel_cs, newdata = testori, type = "response")

## new prediction

sel_t_pred <- predict(model_sel_t, newdata = testtransf, type = "response")
sel_t_cs_pred <- predict(model_sel_t_cs, newdata = testtransf, type = "response")

## new prediction

t_pred <- predict(model_t, newdata = testtransf, type = "response")
t_cs_pred <- predict(model_t_cs, newdata = testtransf, type = "response")

## new prediction

accuracy_values <- rbind(
                         accuracy(base_pred, testori[,"OILPRICE"]),
                         accuracy(base_cs_pred, testori[,"OILPRICE"]),
                         accuracy(sel_pred, testori[,"OILPRICE"]),
                         accuracy(sel_cs_pred, testori[,"OILPRICE"]),
                         accuracy(sel_t_pred, testtransf[,"OILPRICE_diff"]),
                         accuracy(sel_t_cs_pred, testtransf[,"OILPRICE_diff"]),
                         accuracy(t_pred, testtransf[,"OILPRICE_diff"]),
                         accuracy(t_cs_pred, testtransf[,"OILPRICE_diff"]))
        
row.names(accuracy_values) <- c("base_pred", "base_cs_pred", "sel_pred",
                                "sel_cs_pred", "sel_t_pred", "sel_t_cs_pred",
                                "t_pred", "t_cs_pred")
accuracy_values

##                          ME       RMSE        MAE        MPE        MAPE
## base_pred      0.0120680335 0.03528933 0.02771349  0.3062927   0.7352158
## base_cs_pred   0.0377049649 0.08824671 0.06854488  0.8922033   1.7822138
## sel_pred       0.0112645887 0.03538602 0.02802574  0.2827468   0.7426300
## sel_cs_pred   -0.0066787269 0.04265551 0.03333156 -0.2213765   0.8948030
## sel_t_pred    -0.0001800693 0.03291055 0.02513054 97.1018507 114.6079746
## sel_t_cs_pred  0.0017576171 0.03344413 0.02546063 94.4076601 124.0153016
## t_pred        -0.0002693446 0.03300739 0.02517636 99.2155333 113.4286559
## t_cs_pred      0.0020826050 0.03349348 0.02549447 97.6230640 124.6598126

Considering these measures of errors, the transformation of the data had not good results. Three models shows good performance, with less than 1% of errors. The Base Model, Model with only the Selected Variables and the Model with Selected Variables & CS. The Model with Selected Variables is chosen to make new predictions because it has a smaller number of terms and it is more generalisable and simple.

Confidence Interval

# Limits for 99% of confindence
critval <-  2.80
upr <- sel_pred + (critval * (sel_pred-testori$OILPRICE))
lwr <- sel_pred - (critval * (sel_pred-testori$OILPRICE))

Probabilistic Forecast

sel_predts <- ts(sel_pred, frequency = 365, start = c(2019,336))
trainorits <- ts(trainori, frequency = 365, start = c(2018,1,1))
testorits <- ts(testori, frequency = 365, start = c(2019,336))
upper <- ts(upr, frequency = 365, start = c(2019,336))
lower <- ts(lwr, frequency = 365, start = c(2019,336))
upr <- c(trainori$OILPRICE, upr)
lwr <- c(trainori$OILPRICE, lwr)
g <- autoplot(oilts1[,"OILPRICE"]) +
  autolayer(sel_predts, color="red") +
#  autolayer(upper) +
#  autolayer(lower) +
  xlab("Year") + ylab("Oil Price") +
  ggtitle("Probabilistic Forecast with Confidence of 99%") +
  guides(colour=guide_legend(title="Probabilistic Forecast for Oil Price"))

g + geom_ribbon(aes(ymin = lwr, ymax = upr), fill = "gray", alpha = 0.6)

# OILPRICE ~ BDIY_log_lag + SPX_log_lag + GC1_log_lag +
#            HO1_log_lag + GNR_log_lag +
#            SHCOMP_log_lag + respLAG,
testing <- data.frame("BDIY_log_lag"=6.579251, 
                        "SPX_log_lag"=7.626482,
                        "GC1_log_lag"=7.166073,
                        "HO1_log_lag"=0.4987733,
                        "GNR_log_lag"=3.771841,
                        "SHCOMP_log_lag"=8.117241,
                        "respLAG"=3.809990)

#calculate the number of model parameters - 1
k <- length(model_sel$coefficients)-1

#calculate sum of squared residuals
SSE <- sum(model_sel$residuals**2)

#calculate total observations in dataset
n <- length(model_sel$residuals)

#calculate residual standard error
stderror <- sqrt(SSE/(n-(1+k)))
conf <- round(1.96*stderror, 2)

print(paste0("The Oil Price tomorrow will be ", round(predict(model_sel, newdata = testing, type = "response"),2), "+/-", conf, " with 95% of confindence. Considering the Mean Absolute Percentage Error, the prediction are different from the real value less than 1%. Then, it will probably be between ", round(predict(model_sel, newdata = testing, type = "response"),2)-round(predict(model_sel, newdata = testing, type = "response"),2)*0.01, " and ",  round(predict(model_sel, newdata = testing, type = "response"),2)+round(predict(model_sel, newdata = testing, type = "response"),2)*0.01))

## [1] "The Oil Price tomorrow will be 3.8+/-1.96 with 95% of confindence. Considering the Mean Absolute Percentage Error, the prediction are different from the real value less than 1%. Then, it will probably be between 3.762 and 3.838"