Processing NYT Most Popular Articles

Load Packages

library(httr)
library(jsonlite)
library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(utils)

Convey and Process NYT Most Popular Articles.

Looking into 7 days of most popular articles.

api_key <- "81r2GPHEk7BZUIsANaNjDZVMyOWwoLXT"
category <- "viewed"
period <- 7

url <- paste0("https://api.nytimes.com/svc/mostpopular/v2/", category, "/", period, ".json?api-key=", "81r2GPHEk7BZUIsANaNjDZVMyOWwoLXT")

#Json Data Retrival and Parsing

response <- GET(url)

if (status_code(response) == 200) {
  data_json <- content(response, as = "text")
  data_list <- fromJSON(data_json, flatten = TRUE)
  articles_df <- data_list$results[, c("title", "abstract", "url", "byline", "published_date")]
  print(head(articles_df))
} else {
  print("Error: Unable to fetch data from the API")
}

##                                                                    title
## 1                        Hillary Clinton: How Much Dumber Will This Get?
## 2                                      The Leaked Signal Chat, Annotated
## 3                                 Trump Has Had Enough. He Is Not Alone.
## 4 Trump Administration Abruptly Cuts Billions From State Health Services
## 5             Columbia Student Hunted by ICE Sues to Prevent Deportation
## 6                        This Is Why Young People Really Voted for Trump
##                                                                                                                                                                                                          abstract
## 1                                                                                                        The Signal group chat is only the latest in a string of self-inflicted wounds by the new administration.
## 2 The chat’s contents, which were obtained by The Atlantic after its editor in chief was added to the Signal group, provide a revealing look at private conversations between top Trump administration officials.
## 3                                                                                                            This is certainly an administration that reminds us why the framers decided on separation of powers.
## 4                                                                                States have been told that they can no longer use grants that were funding infectious disease management and addiction services.
## 5                      Yunseo Chung, a legal permanent resident who has lived in the U.S. since she was 7, participated in pro-Palestinian demonstrations. Immigration agents visited residences looking for her.
## 6                                                                                                                                                           Feeling empowered is different from numerical growth.
##                                                                                         url
## 1                 https://www.nytimes.com/2025/03/28/opinion/trump-hegseth-signal-chat.html
## 2 https://www.nytimes.com/interactive/2025/03/25/us/signal-group-chat-text-annotations.html
## 3           https://www.nytimes.com/2025/03/24/opinion/trump-columbia-courts-law-firms.html
## 4             https://www.nytimes.com/2025/03/26/health/trump-state-health-grants-cuts.html
## 5   https://www.nytimes.com/2025/03/24/nyregion/columbia-student-ice-suit-yunseo-chung.html
## 6                     https://www.nytimes.com/2025/03/26/opinion/young-maga-trump-vote.html
##                                                      byline published_date
## 1                                        By Hillary Clinton     2025-03-28
## 2                                                               2025-03-25
## 3                         By Gail Collins and Bret Stephens     2025-03-24
## 4 By Apoorva Mandavilli, Margot Sanger-Katz and Jan Hoffman     2025-03-26
## 5                    By Jonah E. Bromwich and Hamed Aleaziz     2025-03-24
## 6                                             By Neil Gross     2025-03-26

if (status_code(response) != 200) {
  stop("Failed to fetch data: ", status_code(response))
}

Exploring the Data

str(articles_df)

## 'data.frame':    20 obs. of  5 variables:
##  $ title         : chr  "Hillary Clinton: How Much Dumber Will This Get?" "The Leaked Signal Chat, Annotated" "Trump Has Had Enough. He Is Not Alone." "Trump Administration Abruptly Cuts Billions From State Health Services" ...
##  $ abstract      : chr  "The Signal group chat is only the latest in a string of self-inflicted wounds by the new administration." "The chat’s contents, which were obtained by The Atlantic after its editor in chief was added to the Signal grou"| __truncated__ "This is certainly an administration that reminds us why the framers decided on separation of powers." "States have been told that they can no longer use grants that were funding infectious disease management and ad"| __truncated__ ...
##  $ url           : chr  "https://www.nytimes.com/2025/03/28/opinion/trump-hegseth-signal-chat.html" "https://www.nytimes.com/interactive/2025/03/25/us/signal-group-chat-text-annotations.html" "https://www.nytimes.com/2025/03/24/opinion/trump-columbia-courts-law-firms.html" "https://www.nytimes.com/2025/03/26/health/trump-state-health-grants-cuts.html" ...
##  $ byline        : chr  "By Hillary Clinton" "" "By Gail Collins and Bret Stephens" "By Apoorva Mandavilli, Margot Sanger-Katz and Jan Hoffman" ...
##  $ published_date: chr  "2025-03-28" "2025-03-25" "2025-03-24" "2025-03-26" ...

summary(articles_df)

##     title             abstract             url               byline         
##  Length:20          Length:20          Length:20          Length:20         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  published_date    
##  Length:20         
##  Class :character  
##  Mode  :character

Data visualization- Title Length

articles_df$title_length <- nchar(articles_df$title)

# The top 10 longest article titles
top_articles <- articles_df[order(-articles_df$title_length), ][1:10, ]

ggplot(top_articles, aes(x = reorder(title, title_length), y = title_length)) +
  geom_bar(stat = "identity", fill = "turquoise") +
  coord_flip() +  # Flip the chart for better readability
  labs(title = "Top 10 NYT Most Popular Articles by Title Length",
       x = "Article Title",
       y = "Title Length (Characters)") +
  theme_minimal()

#Comparing Most Read( Manual Data)

most_read_articles <- data.frame(
  title = c(
    "An Interview With Elon Musk’s Daughter",
    "Columbia Student Hunted by ICE Sues",
    "George Foreman Dies at 76",
    "Hillary Clinton: How Much Dumber Will This Get?",
    "If Pete Hegseth Had Any Honor, He Would Resign",
    "Inside Pete Hegseth’s Rocky First Months",
    "It’s Trump vs. the Courts",
    "Now Europe Knows What Trump’s Team Calls It",
    "Online ‘Pedophile Hunters’ Growing More Violent",
    "Signal Chat Leak Angers U.S. Military Pilots"
  ),
  views = c(63, 58, 64, 47, 46, 56, 58, 71, 69, 44) 
)

most_read_articles <- most_read_articles %>% arrange(desc(views))

ggplot(most_read_articles, aes(x = reorder(title, views), y = views, fill = views)) +
  geom_bar(stat = "identity") +
  coord_flip() +  # Flip for readability
  labs(title = "Top 10 Most Read NYT Articles (Last 30 Days)",
       x = "Article Title",
       y = "Read Count") +
  theme_minimal() +
  scale_fill_gradient(low = "lightblue", high = "darkblue")

# Looking at Number of Article published Over Time

# Convert 'published_date' to Date type

articles_df$published_date <- as.Date(articles_df$published_date)

# Count the number of articles per day

articles_per_day <- articles_df %>%
  group_by(published_date) %>%
  summarise(article_count = n())

# Number of articles published over time

ggplot(articles_per_day, aes(x = published_date, y = article_count)) +
  geom_line(color = "blue") +
  labs(title = "Number of NYT Articles Published Over Time",
       x = "Date",
       y = "Number of Articles") +
  theme_minimal()

#Extending visualization with smoothing line graph to allow the trend to appear more appealing.

ggplot(articles_per_day, aes(x = published_date, y = article_count)) +
  geom_line(color = "blue") +
  geom_smooth(method = "loess", se = FALSE, color = "darkred") +
  labs(title = "Number of NYT Articles Published Over Time (Smoothed)",
       x = "Date",
       y = "Number of Articles") +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

Export Data

write.csv(articles_df, "articles_data.csv", row.names = FALSE)

Woodelyne_Web_APIs

2025-03-29