# https://www.sec.gov/edgar/sec-api-documentation
# load packages
require("jsonlite"); require("data.table");require("httr");require("pbapply");require("stringr");require("plyr")
## Loading required package: jsonlite
## Loading required package: data.table
## Loading required package: httr
## Loading required package: pbapply
## Loading required package: stringr
## Loading required package: plyr
# assign user agent
PASS <- new.env()
assign("usrAgent","CNN han.vu@warnermedia.com",env=PASS)
# ****************************************************************************************************************
# Read in CIK Codes :
# ****************************************************************************************************************
# read in list of CIK codes
INFO <- read_json("https://www.sec.gov/files/company_tickers.json")
INFO <- rbindlist(INFO)
# CIK numbers are 10 digits - we have to fill in with zeros
INFO$CIK = do.call(rbind, lapply(as.list(1:nrow(INFO)), function(ii){
ZEROS = 10-as.numeric(str_count(INFO$cik_str[ii]))
paste0(c(rep(0,ZEROS),INFO$cik_str[ii]), collapse = "")
}))
INFO <- as.data.frame(INFO)
# *****************************************************************************************************************
# *****************************************************************************************************************
# function to lookup CIK number by ticker
getCIK = function(symbol){
subset(INFO, INFO$ticker == paste(symbol))$CIK
}
# *****************************************************************************************************************
# *****************************************************************************************************************
# gets all filing values for a specific ticker
getAllEDGAR = function(ticker)
{
# get CIK # for ticker
CIK = getCIK(ticker)
# get data by passing in url & headers
pg <- GET(url = paste0("https://data.sec.gov/api/xbrl/companyfacts/CIK",CIK,".json"),
config = httr::add_headers(`User-Agent` = PASS$usrAgent,
`Accept-Encoding` = 'gzip, deflate'))
# raw data
data_raw <- try(content(pg, as="text", encoding="UTF-8") %>% fromJSON(pg, flatten=FALSE),silent = TRUE)
# ********************************************************************************************************
# DEI
# ********************************************************************************************************
N = length(data_raw$facts$dei)
if(N >= 1)
{
DEI = rbindlist(lapply(as.list(1:N), function(ii){
# extract data
tmp = as.data.frame(rbindlist(data_raw$facts$dei[[ii]]$units[[1]],use.names = TRUE, fill = TRUE))
# add description column
tmp$desc <- names(data_raw$facts$dei)[ii]
# delete duplicates
tmp <- tmp[!duplicated(tmp$end),]
# add ticker column
tmp$symbol <- ticker
# return df
tmp
}),use.names=TRUE, fill=TRUE)
}else{
DEI = NULL
}
# ********************************************************************************************************
# INVEST
# ********************************************************************************************************
N = length(data_raw$facts$invest)
if(N >= 1)
{
INVEST = rbindlist(lapply(as.list(1:N), function(ii){
# extract data
tmp = as.data.frame(rbindlist(data_raw$facts$invest[[ii]]$units[[1]],use.names = TRUE, fill = TRUE))
# add description column
tmp$desc <- names(data_raw$facts$invest)[ii]
# delete duplicates
tmp <- tmp[!duplicated(tmp$end),]
# add ticker column
tmp$symbol <- ticker
# return df
tmp
}),use.names=TRUE, fill=TRUE)
}else{
INVEST = NULL
}
# ********************************************************************************************************
# SRT
# ********************************************************************************************************
N = length(data_raw$facts$srt)
if(N >= 1)
{
SRT = rbindlist(lapply(as.list(1:N), function(ii){
# extract data
tmp = as.data.frame(rbindlist(data_raw$facts$srt[[ii]]$units[[1]],use.names = TRUE, fill = TRUE))
# add description column
tmp$desc <- names(data_raw$facts$srt)[ii]
# delete duplicates
tmp <- tmp[!duplicated(tmp$end),]
# add ticker column
tmp$symbol <- ticker
# return df
tmp
}),use.names=TRUE, fill=TRUE)
}else{
SRT = NULL
}
# ********************************************************************************************************
# US-GAAP
# ********************************************************************************************************
N = length(data_raw$facts$`us-gaap`)
if(N >= 1)
{
GAAP = rbindlist(lapply(as.list(1:N), function(ii){
# extract data
tmp = as.data.frame(rbindlist(data_raw$facts$`us-gaap`[[ii]]$units[[1]],use.names = TRUE, fill = TRUE))
# add description column
tmp$desc <- names(data_raw$facts$`us-gaap`)[ii]
# delete duplicates
tmp <- tmp[!duplicated(tmp$end),]
# add ticker column
tmp$symbol <- ticker
# return df
tmp
}),use.names=TRUE, fill=TRUE)
# re-order
GAAP = GAAP[,c("start","end","val","accn","fy","fp","form","filed","frame","desc","symbol" )]
}else{
GAAP = NULL
}
# combine ALL data
ALL <- rbind.fill(GAAP,DEI,SRT,INVEST)
# return data frame
ALL
}
# *****************************************************************************************************************
# *****************************************************************************************************************
# Get TUP
df = getAllEDGAR(ticker="TUP")
df_10k <- df[grepl("10-K", df$form), ]
df_subset <- df_10k[grepl("SalesRevenueNet|ComprehensiveIncomeNetOfTax", df$desc), ]
write.csv(df, "tup.csv", row.names = FALSE)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2
## ──
## ✔ ggplot2 3.4.1 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ forcats 1.0.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::arrange() masks plyr::arrange()
## ✖ dplyr::between() masks data.table::between()
## ✖ purrr::compact() masks plyr::compact()
## ✖ dplyr::count() masks plyr::count()
## ✖ dplyr::desc() masks plyr::desc()
## ✖ dplyr::failwith() masks plyr::failwith()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ purrr::flatten() masks jsonlite::flatten()
## ✖ dplyr::id() masks plyr::id()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ dplyr::mutate() masks plyr::mutate()
## ✖ dplyr::rename() masks plyr::rename()
## ✖ dplyr::summarise() masks plyr::summarise()
## ✖ dplyr::summarize() masks plyr::summarize()
## ✖ purrr::transpose() masks data.table::transpose()
df_subset1 <- df_10k %>%
filter(str_detect(desc, "RevenueFromContractWithCustomerExcludingAssessedTax|NetIncomeLoss"))
df_subset2 <- df_10k %>%
filter(str_detect(desc, "RevenueFromContractWithCustomerExcludingAssessedTax|NetIncomeLoss|SalesRevenueNet"))
write.csv(df_subset2, "tup2.csv", row.names = FALSE)
write.csv(df, "tup_all.csv", row.names = FALSE)
#try quarterly data
df_both <- df %>%
filter(str_detect(desc, "RevenueFromContractWithCustomerExcludingAssessedTax|NetIncomeLoss|SalesRevenueNet"))
write.csv(df_both, "tupper_quarterly.csv", row.names = FALSE)
#try gross profit
df_profit <- df %>%
filter(str_detect(desc, "GrossProfit"))
write.csv(df_profit, "tupper_grossprofit.csv", row.names = FALSE)
#Debt
df_debt <- df %>%
filter(str_detect(desc, "DebtAndCapitalLeaseObligations"))
write.csv(df_debt, "tupper_debt.csv", row.names = FALSE)