This R script goes to the EIA website and scrapes the daily WTI prices. A weekly average is calculated, then 13-,26-, and 52-week averages are calculated for a simple moving avreage crossover strategy.
Then a long/short signal is created if the weekly average is above/below the 52-week average +/- 1 standard deviation.
# Get packages
#-----------------------------------------------------
# install.packages('rvest')
# install.packages('zoo')
# install.packages('dygraphs')
library(rvest)
## Warning: package 'rvest' was built under R version 3.2.5
## Loading required package: xml2
## Warning: package 'xml2' was built under R version 3.3.1
library(stringr)
## Warning: package 'stringr' was built under R version 3.2.5
library(zoo)
## Warning: package 'zoo' was built under R version 3.2.5
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(dygraphs)
## Warning: package 'dygraphs' was built under R version 3.2.5
library(xts)
#-----------------------------------------------------
# Define function
#-----------------------------------------------------
mon_convert <- function(x){
counter = 1
limit =13
mon_az <- c("jan","feb","mar","apr","may","jun","jul","aug",
"sep","oct","nov","dec")
while (counter < limit){
if (tolower(x) == mon_az[counter]) break
counter = counter + 1
}
if (counter > 12) return(0)
return(counter)
}
#-----------------------------------------------------
Get HTML for EIA website and extract the table headers in the ‘B6’ CSS class, as well as the WTI spot prices in the ‘B3’ CSS class.
# Website URL
url <- 'https://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=RCLC1&f=D'
# Read website HTML
wb <- read_html(url)
# Extract B6 CSS class
wti_html <- html_nodes(wb,'.B6')
wti_text <- html_text(wti_html)
# Extract B3 CSS class
price_html <- html_nodes(wb,'.B3')
price_text <- html_text(price_html)
Strip whitespace surrounding text extracted from HTML code. Then convert numbers from characters to numbers, and split year and week into two buckets for 2 columns.
wti_text <- str_trim(wti_text)
price_text <- as.numeric(price_text)
year_week <- str_split_fixed(wti_text," ",2)
Convert extracted data into a dataframe
df <- as.data.frame(matrix(price_text,ncol = 5,byrow = T))
df <- cbind(year_week[,1],year_week[,2],df)
names(df) <- c('year','week','mon','tues','wed','thurs','fri')
head(df)
## year week mon tues wed thurs fri
## 1 1983 Apr- 4 to Apr- 8 29.44 29.71 29.92 30.17 30.38
## 2 1983 Apr-11 to Apr-15 30.26 30.83 30.82 30.67 30.48
## 3 1983 Apr-18 to Apr-22 30.75 30.75 30.70 30.68 30.75
## 4 1983 Apr-25 to Apr-29 30.84 30.71 30.78 30.74 30.63
## 5 1983 May- 2 to May- 6 30.61 30.50 30.42 30.20 30.45
## 6 1983 May- 9 to May-13 30.10 30.31 29.97 29.72 29.80
Extract latest date for prices and create a vector of weekly dates to use for plotting, etc.
date <- df$week[dim(df)[1]]
date <- str_split_fixed(date," ",3)[3]
year <- str_trim(df$year[dim(df)[1]])
mon <- str_extract_all(date,"[[:alpha:]]+")
day <- str_extract_all(date,"[0-9]+")
# Use mon_convert function to convert 3 character month into an integer
mon_n <- mon_convert(mon)
date_str <- paste0(year,"/",mon_n,"/",day)
print(paste0('Latest price as of ', date_str))
## [1] "Latest price as of 2017/9/22"
# Create a vector of weekly dates (each Friday)
idx <- seq(as.Date('1983/4/8'), as.Date(date_str),"week")
head(idx)
## [1] "1983-04-08" "1983-04-15" "1983-04-22" "1983-04-29" "1983-05-06"
## [6] "1983-05-13"
Calculate moving averages for WTI prices and 52 week standard deviation
df$wkavg <- apply(df[,3:7],1,mean,na.rm = TRUE)
df$wkavg13 <- rollmean(df$wkavg,13,fill=NA,align='right')
df$wkavg26 <- rollmean(df$wkavg,26,fill=NA,align='right')
df$wkavg52 <- rollmean(df$wkavg,52,fill=NA,align='right')
df$wkstd52 <- rollapply(df$wkavg,52,FUN=sd,fill=NA,align='right')
# Show top of dataframe
head(df)
## year week mon tues wed thurs fri wkavg wkavg13
## 1 1983 Apr- 4 to Apr- 8 29.44 29.71 29.92 30.17 30.38 29.924 NA
## 2 1983 Apr-11 to Apr-15 30.26 30.83 30.82 30.67 30.48 30.612 NA
## 3 1983 Apr-18 to Apr-22 30.75 30.75 30.70 30.68 30.75 30.726 NA
## 4 1983 Apr-25 to Apr-29 30.84 30.71 30.78 30.74 30.63 30.740 NA
## 5 1983 May- 2 to May- 6 30.61 30.50 30.42 30.20 30.45 30.436 NA
## 6 1983 May- 9 to May-13 30.10 30.31 29.97 29.72 29.80 29.980 NA
## wkavg26 wkavg52 wkstd52
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
## 6 NA NA NA
# Show bottom of dataframe
tail(df)
## year week mon tues wed thurs fri wkavg wkavg13
## 1794 2017 Aug-14 to Aug-18 47.59 47.55 46.78 47.09 48.51 47.5040 46.96073
## 1795 2017 Aug-21 to Aug-25 47.37 47.64 48.41 47.43 47.87 47.7440 46.75242
## 1796 2017 Aug-28 to Sep- 1 46.57 46.44 45.96 47.23 47.29 46.6980 46.61381
## 1797 2017 Sep- 4 to Sep- 8 NA 48.66 49.16 49.09 47.48 48.5975 46.77085
## 1798 2017 Sep-11 to Sep-15 48.07 48.23 49.30 49.89 49.89 49.0760 47.06177
## 1799 2017 Sep-18 to Sep-22 49.91 49.48 NA NA NA 49.6950 47.56585
## wkavg26 wkavg52 wkstd52
## 1794 48.57825 49.01327 3.036840
## 1795 48.33677 49.02031 3.033398
## 1796 48.07262 49.05054 3.001491
## 1797 47.98490 49.10131 2.970313
## 1798 48.00698 49.19235 2.893127
## 1799 48.07779 49.29079 2.819114
Calculate the log difference of the weekly WTI price aka returns. Then calculate the position (long or short, +1 or -1) based on whether the weekly average price is above/below the 52-week avg +/- 1 standard deviation.
df$wkret <- c(0,diff(log(df$wkavg),lag=1))
df$post <- ifelse(df$wkavg > (df$wkavg52+df$wkstd52),1, ifelse(df$wkavg < (df$wkavg52-df$wkstd52), -1, 0))
Make a new dataframe with two columns: weekly returns and long/short signal. Calculate the strategy’s equity curve (the growth of a $1 investment) and plot it.
port <- xts(cbind(df$wkavg,df$wkavg13,df$wkavg26,df$wkavg52,df$post,df$wkret),order.by=as.Date(idx))
names(port) <- c('wkavg','13wkavg','26wkavg','52wkavg','signal','ret')
port$signal <- lag(port$signal,1)
port$signal[is.na(port$signal)] <- 0
port$pnl <- port$ret * port$signal
port$temp <- 0
port$temp <- port$pnl + 1
equity_curve <- cumprod(port$temp)
dygraph(equity_curve,main='Equity Curve') %>% dyRangeSelector()
annRet <- (prod(port$temp)^(1/length(port$temp)))**52*100-100
annStd <- sd(port$temp) * sqrt(52) * 100
print(paste0('The annualized return is ',toString(round(annRet,3)),'%.'))
## [1] "The annualized return is 5.139%."
print(paste0('The annualized standard deviation is ',toString(round(annStd,3)),'%.'))
## [1] "The annualized standard deviation is 23.735%."
print(paste0('The Sharpe ratio is ',toString(round(annRet/annStd,3)),'.'))
## [1] "The Sharpe ratio is 0.217."
Create a plot with weekly averages, 13-week, 26-week, and 52-week averages.
Export necessary data as CSV file.
port <- cbind(port,equity_curve)
names(port)[9] <- 'equity_curve'
write.csv(as.data.frame(port),file="wti_portfolio.csv",row.names = TRUE)