The assignment for week 9 is to choose one of the New York Times APIs, construct an interface in R to read in the JSON data, and transform it into an R DataFrame. I chose an API that retrieves the most popular article for the most recent day (24hrs).
library(httr)
library(jsonlite)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ purrr::flatten() masks jsonlite::flatten()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Construct the URL with your API key and query
api_mpurl <- "https://api.nytimes.com/svc/mostpopular/v2/viewed/1.json?api-key=AgAzv4NC9JID11LE6CNtNgiymhhf9xX3"
#fetch the raw data
rawdata <- GET(api_mpurl)
#Retrieve the status code of the HTTP response. A status code of 200 means the request was successful
rawdata$status_code
## [1] 200
summary(rawdata)
## Length Class Mode
## url 1 -none- character
## status_code 1 -none- numeric
## headers 25 insensitive list
## all_headers 1 -none- list
## cookies 7 data.frame list
## content 34845 -none- raw
## date 1 POSIXct numeric
## times 6 -none- numeric
## request 7 request list
## handle 1 curl_handle externalptr
#Extract the content of the raw data as a character string
extract_rawdata <- content(rawdata, as = "text")
#Convert the JSON-formatted data to a data frame
all_data <- extract_rawdata %>%
fromJSON() %>%
as.data.frame()
colnames(all_data)
## [1] "status" "copyright" "num_results"
## [4] "results.uri" "results.url" "results.id"
## [7] "results.asset_id" "results.source" "results.published_date"
## [10] "results.updated" "results.section" "results.subsection"
## [13] "results.nytdsection" "results.adx_keywords" "results.column"
## [16] "results.byline" "results.type" "results.title"
## [19] "results.abstract" "results.des_facet" "results.org_facet"
## [22] "results.per_facet" "results.geo_facet" "results.media"
## [25] "results.eta_id"
popular_articles <- all_data %>%
select(results.published_date,results.title, results.nytdsection)
Rename column names
colnames(popular_articles) <- c("Published Date","Popular Articles","Section")
glimpse(popular_articles)
## Rows: 20
## Columns: 3
## $ `Published Date` <chr> "2024-03-22", "2024-03-23", "2024-03-23", "2024-03-…
## $ `Popular Articles` <chr> "Kate and the King", "James Carville, the Cajun Who…
## $ Section <chr> "opinion", "opinion", "u.s.", "books", "world", "br…