CFTC

library

library(rvest)
library(stringr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

set date

ded <- c("083005", "010207", "070406", "010108", "122507", "111009", "122308",
    "010113", "122512", "070417", "010119", "122518", "122220")
ded <- as.Date(ded, "%m%d%y")

add <- c("010307", "070306", "123107", "122407", "110909", "122208", 
    "123112","122412", "070317", "123118", "122418", "122120")
add <- as.Date(add, "%m%d%y")

day <- seq(date("2005-01-04"), Sys.Date(), by = 7) 
day <- c(day, add)
day <- day[! day %in% ded]

day <- sort(day)

year <- day %>% 
    format("%Y")
day <- day %>%
    format("%m%d%y")

adjust

year[which(day == "123113")] <- "2014"
day[which(day == "120214")] <- "120614"
day[which(day == "112514")] <- "112914"
day[which(day == "122419")] <- "123019"

reset

df <- NULL
r_day <- NULL

scraping loop

for (i in 1:length(day)){

try({

url <- str_c("https://www.cftc.gov/sites/default/files/files/dea/cotarchives/", year[i], "/futures/deanymelf", day[i], ".htm")

v <- read_html(url) %>%
    html_nodes("pre") %>%
    html_text %>%
    str_split(pattern = "\r\n|\n") %>% 
    .[[1]]

index <- str_detect(v, "NATURAL GAS|NAT GAS NYME") & !(str_detect(v, "E-MINY")) 

for (j in 1:length(index)){
    if(index[j]) gas <- v[j:(j+35)]}

n <- gas[11] %>%
    str_split(" ") %>%
    unlist

n <- n[!(n %in% c("All", ":", ""))] %>%
    str_replace_all("[:,]", "") %>%
    as.numeric

r_day <- c(r_day, day[i])
df <- rbind(df, n)

})
}
## Error in open.connection(x, "rb") : HTTP error 404.
dt <- cbind(r_day, df) %>%
    as.data.frame()

colnames(dt) <- c("date", "total_open_interest", 
    "no-comercial_long", "no-comercial_short", "no-comercial_spreading", 
    "comercial-long", "comercial-short", "total_long", "total_short",
    "nonreportable_positions_long", "nonreportable_positions_short")

dt$date <- as.Date(dt$date, "%m%d%y")

# write.csv(dt, row.names = FALSE,
#   file = "C:/Users/PC622R/Desktop/CFTC_Natural_Gas.csv")