# install.packages("Quandl")
# install.packages("dygraphs")
library(Quandl)
library(tidyverse)
library(zoo)
library(xts)
# library(dygraphs)
library(psych)
Quandl.api_key("TWp9zrWTMziHW_hbugiN")
# Load necessary EIA data from Quandl
f1 <- Quandl("EIA/PET_RCLC1_M", type="xts")
f2 <- Quandl("EIA/PET_RCLC2_M", type="xts")
f3 <- Quandl("EIA/PET_RCLC3_M", type="xts")
f4 <- Quandl("EIA/PET_RCLC4_M", type="xts")
wti_stock <- Quandl("EIA/STEO_COSXPUS_M", type="xts")
gas_stock <- Quandl("EIA/STEO_MGTSPUS_M", type="xts")
# Calculate monthly, 3-month, 6-month, and annual changes in both oil and gasoline inventory data
wstkd <- diff(log(wti_stock))
wstkd3 <- diff(log(wti_stock),lag=3)
wstkd6 <- diff(log(wti_stock),lag=6)
wstkd12 <- diff(log(wti_stock),lag=12)
gstkd <- diff(log(gas_stock))
gstkd3 <- diff(log(gas_stock),lag=3)
gstkd6 <- diff(log(gas_stock),lag=6)
gstkd12 <- diff(log(gas_stock),lag=12)
# Lag the independent variables by 1 month since we want to use them as leading indicators
wstkdL1 <- lag(wstkd,1)
wstkd3L1 <- lag(wstkd3,1)
wstkd6L1 <- lag(wstkd6,1)
wstkd12L1 <- lag(wstkd12,1)
gstkdL1 <- lag(gstkd,1)
gstkd3L1 <- lag(gstkd3,1)
gstkd6L1 <- lag(gstkd6,1)
gstkd12L1 <- lag(gstkd12,1)
# Create calendar spreads and return sign. Positive means curve is in contango, and negative means curve is in backwardation. Since all the variables are of XTS data type, the dates are automatically taken into account.
f1_2 <- sign(f2-f1)
f1_3 <- sign(f3-f1)
f1_4 <- sign(f4-f1)
f2_3 <- sign(f3-f2)
f2_4 <- sign(f4-f2)
f3_4 <- sign(f4-f3)
# Combine all the data:
df <- na.omit(cbind(f1_2,f1_3,f1_4,f2_3,f2_4,f3_4,wstkdL1,wstkd3L1,wstkd6L1,wstkd12L1,gstkdL1,gstkd3L1,gstkd6L1,gstkd12L1))
colnames(df) <- c('f1_2','f1_3','f1_4','f2_3','f2_4','f3_4','wstkdL1','wstkd3L1','wstkd6L1','wstkd12L1','gstkdL1','gstkd3L1','gstkd6L1','gstkd12L1')
Are changes in WTI and gasoline inventory predictive of an upward sloping (contango) or downward sloping (backwardation) oil curve 2-,3-,4-months out?
What are the cases, and how many are there?
Each case represents a monthly observation of a futures contract and inventory data. There are 320 observations in the given data set.
Describe the method of data collection.
Data is reported by the U.S. Energy Information Administration (EIA) and available via Quandl.
What type of study is this (observational/experiment)?
This is an observational study.
If you collected the data, state self-collected. If not, provide a citation/link.
Data is collected by the EIA and is available online here: http://quandl.com/eia/ For this project, data was extracted using the Quandl R package.
What is the response variable, and what type is it (numerical/categorical)?
The response variable is the shape of the oil futures curve and is binary: +1 for upward sloping (contango), or -1 for downward sloping (backwardation).
What is the explanatory variable, and what type is it (numerical/categorival)?
The explanatory variables are numerical - percent change in oil and gasoline inventory data.
Provide summary statistics relevant to your research question. For example, if you’re comparing means across groups provide means, SDs, sample sizes of each group. This step requires the use of R, hence a code chunk is provided below. Insert more code chunks as needed.
describe(df[,1:6])
## vars n mean sd median trimmed mad min max range skew kurtosis
## f1_2 1 320 0.23 0.97 1 0.29 0 -1 1 2 -0.48 -1.77
## f1_3 2 320 0.19 0.98 1 0.24 0 -1 1 2 -0.39 -1.84
## f1_4 3 320 0.13 0.99 1 0.16 0 -1 1 2 -0.26 -1.94
## f2_3 4 320 0.12 0.99 1 0.15 0 -1 1 2 -0.24 -1.94
## f2_4 5 320 0.08 1.00 1 0.09 0 -1 1 2 -0.15 -1.98
## f3_4 6 320 0.07 1.00 1 0.08 0 -1 1 2 -0.13 -1.99
## se
## f1_2 0.05
## f1_3 0.05
## f1_4 0.06
## f2_3 0.06
## f2_4 0.06
## f3_4 0.06
describe(df[,7:14]*100)
## vars n mean sd median trimmed mad min max range skew
## wstkdL1 1 320 0.11 3.14 0.17 0.09 3.35 -9.03 8.83 17.86 0.04
## wstkd3L1 2 320 0.34 5.91 0.08 0.13 6.43 -15.98 20.55 36.53 0.30
## wstkd6L1 3 320 0.68 7.59 0.21 0.45 6.63 -22.75 28.84 51.59 0.37
## wstkd12L1 4 320 1.25 8.43 0.59 0.97 7.75 -19.14 25.79 44.93 0.32
## gstkdL1 5 320 0.01 3.61 0.03 -0.03 3.43 -7.90 10.54 18.44 0.14
## gstkd3L1 6 320 0.06 6.22 -0.40 -0.26 6.21 -14.40 18.55 32.95 0.45
## gstkd6L1 7 320 0.15 7.06 0.42 0.12 7.71 -19.16 19.67 38.83 0.04
## gstkd12L1 8 320 0.25 4.83 -0.41 0.16 5.05 -12.94 12.67 25.61 0.15
## kurtosis se
## wstkdL1 -0.32 0.18
## wstkd3L1 -0.08 0.33
## wstkd6L1 0.89 0.42
## wstkd12L1 0.28 0.47
## gstkdL1 -0.23 0.20
## gstkd3L1 -0.07 0.35
## gstkd6L1 -0.21 0.39
## gstkd12L1 -0.47 0.27
table(df$f1_2)
##
## -1 0 1
## 122 1 197
prop.table(table(df$f1_2)) * 100
##
## -1 0 1
## 38.1250 0.3125 61.5625
table(df$f1_3)
##
## -1 0 1
## 127 4 189
prop.table(table(df$f1_3)) * 100
##
## -1 0 1
## 39.6875 1.2500 59.0625
table(df$f1_4)
##
## -1 0 1
## 139 1 180
prop.table(table(df$f1_4)) * 100
##
## -1 0 1
## 43.4375 0.3125 56.2500
table(df$f2_3)
##
## -1 0 1
## 140 1 179
prop.table(table(df$f2_3)) * 100
##
## -1 0 1
## 43.7500 0.3125 55.9375
table(df$f2_4)
##
## -1 1
## 148 172
prop.table(table(df$f2_4)) * 100
##
## -1 1
## 46.25 53.75
boxplot(coredata(df[,7:14]*100),col=c('powderblue'),main="Boxplots of the Explanatory Variables",ylab="%",las=2)