Create a request to the top stories NY Times API
req <- request("https://api.nytimes.com/svc/topstories/v2/science.json?api-key=4ZFLCUM2HxGjmPqm5PvB5InkBUcM7n2I")
req
## <httr2_request>
## GET
## https://api.nytimes.com/svc/topstories/v2/science.json?api-key=4ZFLCUM2HxGjmPqm5PvB5InkBUcM7n2I
## Body: empty
req %>% req_headers("Accept" = "application/json")
## <httr2_request>
## GET
## https://api.nytimes.com/svc/topstories/v2/science.json?api-key=4ZFLCUM2HxGjmPqm5PvB5InkBUcM7n2I
## Headers:
## • Accept: 'application/json'
## Body: empty
req %>% req_body_json(list(x = 1, y = 2))
## <httr2_request>
## POST
## https://api.nytimes.com/svc/topstories/v2/science.json?api-key=4ZFLCUM2HxGjmPqm5PvB5InkBUcM7n2I
## Body: json encoded data
Create a response to read data into a data frame
resp <- req_perform(req)
resp %>% resp_content_type()
## [1] "application/json"
resp <- resp %>% resp_body_json()
Select rows and columns we are interested in and create a new df
top_stories
top_stories <- df %>% select(section, title, abstract, url, byline, published_date)
top_stories <- top_stories[c(-1,-2),]
glimpse(top_stories)
## Rows: 25
## Columns: 6
## $ section <chr> "health", "health", "travel", "magazine", "health", "he…
## $ title <chr> "CUNY Halts Investigation of Alzheimer’s Researcher", "…
## $ abstract <chr> "Citing questions about the integrity of the process, t…
## $ url <chr> "https://www.nytimes.com/2023/10/28/health/cassava-cuny…
## $ byline <chr> "By Apoorva Mandavilli", "By Apoorva Mandavilli", "By S…
## $ published_date <chr> "2023-10-28T09:40:45-04:00", "2023-10-27T15:10:55-04:00…
Clean and tidy byline column
# Extract author name and clean white space for easier tidying
top_stories$byline <- str_trim(str_extract(top_stories$byline, "\\s\\D*"))
# Widen data frame into author first name and last name
top_stories <- top_stories %>% separate_longer_delim(cols = byline, delim = " and ")
top_stories <- top_stories %>% separate_wider_delim(cols = byline,
delim = " ",
names = c("author_fname", "author_lname"),
too_many = "merge")
Data Visualization: The number of articles from each section of the
news paper that make up the top stories
top_stories %>% select(section, title) %>% distinct() %>%
ggplot(aes(x = section, fill = section)) +
geom_bar() +
geom_text(aes(label = ..count..), stat = "count", vjust = 3, color = "white") +
ggtitle("Top Stories By Section") +
xlab("Section") +
theme(legend.position = "none",
axis.title.x = element_text(color="black",size=10),
axis.text.x = element_text(size=8),
plot.title = element_text(color = "black",
size=16))
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
