This R Markdown fetches the New York Times Top Stories API, parses the JSON, and returns a tidy data frame. It includes robust key handling, diagnostics, retries, caching during development, raw JSON audit saving, and basic tests to help secure maximum credit.
IMPORTANT: Do NOT hard-code or commit API keys. Provide your key via params (only for private grading) or via environment variable (~/.Renviron: NYT_API_KEY=your_key), then restart R.
This helper prefers params$api_key (if provided and not NA/“NA”/empty), otherwise reads Sys.getenv(“NYT_API_KEY”). It trims whitespace, detects literal “NA” and stops with clear instructions.
get_api_key <- function(param_key = params$api_key) {
key <- param_key
if (is.null(key) || identical(key, NA) || identical(key, "NA") || trimws(as.character(key)) == "") {
key <- Sys.getenv("NYT_API_KEY", unset = NA_character_)
if (is.na(key) || trimws(key) == "") {
stop(
"NYT API key not provided.\nOptions:\n",
" - Add `NYT_API_KEY=your_key` to ~/.Renviron and restart R,\n",
" - Or pass api_key via params when rendering (not recommended to commit),\n",
" - Or run Sys.setenv(NYT_API_KEY = 'your_key') in your R console (interactive only)."
)
} else {
message("Using API key from ~/.Renviron or environment variable.")
}
} else {
key <- trimws(as.character(key))
message("Using API key provided via params$api_key.")
}
if (identical(key, "NA") || key == "") stop("API key appears invalid (NA or empty).")
key
}Run an early check so the document fails fast with a helpful message if the key is missing. This chunk is useful when knitting because Knit starts a new R process.
Use test_api_key(api_key = “your_key”) to inspect the endpoint response and confirm the key works. This is helpful when debugging 401s.
test_api_key <- function(api_key = params$api_key, section = "home", verbose = TRUE) {
key <- api_key
if (is.null(key) || identical(key, NA) || key == "") key <- Sys.getenv("NYT_API_KEY", unset = NA_character_)
if (is.null(key) || identical(key, NA) || key == "") stop("No API key provided to test_api_key().")
url <- sprintf("https://api.nytimes.com/svc/topstories/v2/%s.json", section)
if (verbose) message("Testing key against: ", url)
resp <- httr::GET(url, query = list("api-key" = key), httr::timeout(10))
status <- httr::status_code(resp)
body_text <- tryCatch(httr::content(resp, as = "text", encoding = "UTF-8"), error = function(e) "<unreadable body>")
if (verbose) {
message("HTTP status: ", status)
message("Response body excerpt:\n", paste(head(unlist(strsplit(body_text, "\n")), 20), collapse = "\n"))
}
invisible(list(status = status, body = body_text, response = resp))
}We memoise GET during development to avoid hitting rate limits while iterating. Memoise can be disabled by use_cache = FALSE.
This fetches a section, saves raw JSON (if requested), and returns a tidy tibble. It surfaces server messages on 401 for debugging.
get_top_stories <- function(section = "home",
api_key = NULL,
save_raw = params$save_raw,
verbose = TRUE,
use_cache = TRUE) {
api_key <- get_api_key(api_key)
section <- tolower(section)
base <- "https://api.nytimes.com/svc/topstories/v2"
url <- sprintf("%s/%s.json", base, section)
if (verbose) message(glue::glue("Requesting: {url}"))
resp <- if (isTRUE(use_cache)) memo_get(url, query = list("api-key" = api_key)) else httr::RETRY("GET", url, query = list("api-key" = api_key), times = 4)
status <- httr::status_code(resp)
if (status == 401) {
body_text <- tryCatch(httr::content(resp, as = "text", encoding = "UTF-8"), error = function(e) "<unreadable body>")
stop(glue::glue("401 Unauthorized: API key invalid or expired. Server response (excerpt):\n{paste(head(strsplit(body_text, '\\n')[[1]], 10), collapse='\\n')}"))
}
httr::stop_for_status(resp)
raw_text <- httr::content(resp, as = "text", encoding = "UTF-8")
json <- jsonlite::fromJSON(raw_text, simplifyVector = FALSE)
results <- json$results %||% list()
if (isTRUE(save_raw)) {
dir.create("data/raw", showWarnings = FALSE, recursive = TRUE)
safe_fname <- glue::glue("data/raw/nyt_topstories_{section}_{format(Sys.time(), '%Y%m%d_%H%M%S')}.json")
writeLines(raw_text, con = safe_fname)
if (verbose) message("Saved raw JSON to: ", safe_fname)
}
paste_facets <- function(x) if (is.null(x) || length(x) == 0) NA_character_ else paste(x, collapse = "; ")
df <- tibble(
section = map_chr(results, ~ .x$section %||% NA_character_),
subsection = map_chr(results, ~ .x$subsection %||% NA_character_),
title = map_chr(results, ~ .x$title %||% NA_character_),
abstract = map_chr(results, ~ .x$abstract %||% NA_character_),
byline = map_chr(results, ~ .x$byline %||% NA_character_),
item_type = map_chr(results, ~ .x$item_type %||% NA_character_),
kicker = map_chr(results, ~ .x$kicker %||% NA_character_),
url = map_chr(results, ~ .x$url %||% NA_character_),
short_url = map_chr(results, ~ .x$short_url %||% NA_character_),
published_date_raw = map_chr(results, ~ .x$published_date %||% NA_character_),
material_type_facet = map_chr(results, ~ .x$material_type_facet %||% NA_character_),
des_facet = map_chr(results, ~ paste_facets(.x$des_facet %||% NULL)),
org_facet = map_chr(results, ~ paste_facets(.x$org_facet %||% NULL)),
per_facet = map_chr(results, ~ paste_facets(.x$per_facet %||% NULL)),
geo_facet = map_chr(results, ~ paste_facets(.x$geo_facet %||% NULL)),
multimedia_count = map_int(results, ~ { mm <- .x$multimedia; if (is.null(mm)) 0L else length(mm) }),
top_image_url = map_chr(results, ~ {
mm <- .x$multimedia
if (is.null(mm) || length(mm) == 0) return(NA_character_)
preferred <- c("superJumbo","Jumbo","Large","Large Thumbnail","thumbLarge","mediumThreeByTwo440")
formats <- purrr::map_chr(mm, ~ .x$format %||% "")
idx <- which(tolower(formats) %in% tolower(preferred))
if (length(idx) == 0) mm[[1]]$url %||% NA_character_ else mm[[idx[1]]]$url %||% NA_character_
}),
multimedia = map(results, ~ .x$multimedia %||% list()),
raw_article = map(results, ~ .x)
) %>%
mutate(published_date = suppressWarnings(lubridate::ymd_hms(published_date_raw, tz = "UTC")),
fetched_at = lubridate::with_tz(Sys.time(), tzone = "UTC"))
df
}Run these in your console (or knit after ensuring the key is available to the knit session).
sections <- params$sections
api_key_run <- get_api_key(params$api_key) # call once, reuse
pb <- progress::progress_bar$new(total = length(sections))
# Use set_names + .id so map_dfr creates the source_section column for you
combined <- purrr::map_dfr(set_names(sections), function(sec) {
pb$tick()
get_top_stories(sec, api_key = api_key_run, save_raw = params$save_raw, verbose = FALSE)
}, .id = "source_section")
nrow(combined)
dplyr::glimpse(combined)