Introduction

The assignment for week 9 is to choose one of the New York Times APIs, construct an interface in R to read in the JSON data, and transform it into an R DataFrame. I chose an API that retrieves the most popular article for the most recent day (24hrs).

Load library

library(httr)
library(jsonlite)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()  masks stats::filter()
## ✖ purrr::flatten() masks jsonlite::flatten()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Get data

# Construct the URL with your API key and query
api_mpurl <- "https://api.nytimes.com/svc/mostpopular/v2/viewed/1.json?api-key=AgAzv4NC9JID11LE6CNtNgiymhhf9xX3"

#fetch the raw data
rawdata <- GET(api_mpurl)

#Retrieve the status code of the HTTP response. A status code of 200 means the request was successful
rawdata$status_code
## [1] 200
summary(rawdata)
##             Length Class       Mode       
## url             1  -none-      character  
## status_code     1  -none-      numeric    
## headers        25  insensitive list       
## all_headers     1  -none-      list       
## cookies         7  data.frame  list       
## content     34845  -none-      raw        
## date            1  POSIXct     numeric    
## times           6  -none-      numeric    
## request         7  request     list       
## handle          1  curl_handle externalptr
#Extract the content of the raw data as a character string
extract_rawdata <- content(rawdata, as = "text")

Convert from JSON-format to a df

#Convert the JSON-formatted data to a data frame
all_data <- extract_rawdata %>% 
  fromJSON() %>% 
  as.data.frame()

colnames(all_data)
##  [1] "status"                 "copyright"              "num_results"           
##  [4] "results.uri"            "results.url"            "results.id"            
##  [7] "results.asset_id"       "results.source"         "results.published_date"
## [10] "results.updated"        "results.section"        "results.subsection"    
## [13] "results.nytdsection"    "results.adx_keywords"   "results.column"        
## [16] "results.byline"         "results.type"           "results.title"         
## [19] "results.abstract"       "results.des_facet"      "results.org_facet"     
## [22] "results.per_facet"      "results.geo_facet"      "results.media"         
## [25] "results.eta_id"

Create another dataframe consisting of just the columns selected

popular_articles <- all_data %>% 
  select(results.published_date,results.title, results.nytdsection)

Rename column names

colnames(popular_articles) <- c("Published Date","Popular Articles","Section")  

glimpse(popular_articles)
## Rows: 20
## Columns: 3
## $ `Published Date`   <chr> "2024-03-22", "2024-03-23", "2024-03-23", "2024-03-…
## $ `Popular Articles` <chr> "Kate and the King", "James Carville, the Cajun Who…
## $ Section            <chr> "opinion", "opinion", "u.s.", "books", "world", "br…