CFTC
library
library(rvest)
library(stringr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
set date
ded <- c("083005", "010207", "070406", "010108", "122507", "111009", "122308",
"010113", "122512", "070417", "010119", "122518", "122220")
ded <- as.Date(ded, "%m%d%y")
add <- c("010307", "070306", "123107", "122407", "110909", "122208",
"123112","122412", "070317", "123118", "122418", "122120")
add <- as.Date(add, "%m%d%y")
day <- seq(date("2005-01-04"), Sys.Date(), by = 7)
day <- c(day, add)
day <- day[! day %in% ded]
day <- sort(day)
year <- day %>%
format("%Y")
day <- day %>%
format("%m%d%y")
adjust
year[which(day == "123113")] <- "2014"
day[which(day == "120214")] <- "120614"
day[which(day == "112514")] <- "112914"
day[which(day == "122419")] <- "123019"
reset
df <- NULL
r_day <- NULL
scraping loop
for (i in 1:length(day)){
try({
url <- str_c("https://www.cftc.gov/sites/default/files/files/dea/cotarchives/", year[i], "/futures/deanymelf", day[i], ".htm")
v <- read_html(url) %>%
html_nodes("pre") %>%
html_text %>%
str_split(pattern = "\r\n|\n") %>%
.[[1]]
index <- str_detect(v, "NATURAL GAS|NAT GAS NYME") & !(str_detect(v, "E-MINY"))
for (j in 1:length(index)){
if(index[j]) gas <- v[j:(j+35)]}
n <- gas[11] %>%
str_split(" ") %>%
unlist
n <- n[!(n %in% c("All", ":", ""))] %>%
str_replace_all("[:,]", "") %>%
as.numeric
r_day <- c(r_day, day[i])
df <- rbind(df, n)
})
}
## Error in open.connection(x, "rb") : HTTP error 404.
dt <- cbind(r_day, df) %>%
as.data.frame()
colnames(dt) <- c("date", "total_open_interest",
"no-comercial_long", "no-comercial_short", "no-comercial_spreading",
"comercial-long", "comercial-short", "total_long", "total_short",
"nonreportable_positions_long", "nonreportable_positions_short")
dt$date <- as.Date(dt$date, "%m%d%y")
# write.csv(dt, row.names = FALSE,
# file = "C:/Users/PC622R/Desktop/CFTC_Natural_Gas.csv")