library(httr)
library(jsonlite)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(utils)
Looking into 7 days of most popular articles.
api_key <- "81r2GPHEk7BZUIsANaNjDZVMyOWwoLXT"
category <- "viewed"
period <- 7
url <- paste0("https://api.nytimes.com/svc/mostpopular/v2/", category, "/", period, ".json?api-key=", "81r2GPHEk7BZUIsANaNjDZVMyOWwoLXT")
#Json Data Retrival and Parsing
response <- GET(url)
if (status_code(response) == 200) {
data_json <- content(response, as = "text")
data_list <- fromJSON(data_json, flatten = TRUE)
articles_df <- data_list$results[, c("title", "abstract", "url", "byline", "published_date")]
print(head(articles_df))
} else {
print("Error: Unable to fetch data from the API")
}
## title
## 1 Hillary Clinton: How Much Dumber Will This Get?
## 2 The Leaked Signal Chat, Annotated
## 3 Trump Has Had Enough. He Is Not Alone.
## 4 Trump Administration Abruptly Cuts Billions From State Health Services
## 5 Columbia Student Hunted by ICE Sues to Prevent Deportation
## 6 This Is Why Young People Really Voted for Trump
## abstract
## 1 The Signal group chat is only the latest in a string of self-inflicted wounds by the new administration.
## 2 The chat’s contents, which were obtained by The Atlantic after its editor in chief was added to the Signal group, provide a revealing look at private conversations between top Trump administration officials.
## 3 This is certainly an administration that reminds us why the framers decided on separation of powers.
## 4 States have been told that they can no longer use grants that were funding infectious disease management and addiction services.
## 5 Yunseo Chung, a legal permanent resident who has lived in the U.S. since she was 7, participated in pro-Palestinian demonstrations. Immigration agents visited residences looking for her.
## 6 Feeling empowered is different from numerical growth.
## url
## 1 https://www.nytimes.com/2025/03/28/opinion/trump-hegseth-signal-chat.html
## 2 https://www.nytimes.com/interactive/2025/03/25/us/signal-group-chat-text-annotations.html
## 3 https://www.nytimes.com/2025/03/24/opinion/trump-columbia-courts-law-firms.html
## 4 https://www.nytimes.com/2025/03/26/health/trump-state-health-grants-cuts.html
## 5 https://www.nytimes.com/2025/03/24/nyregion/columbia-student-ice-suit-yunseo-chung.html
## 6 https://www.nytimes.com/2025/03/26/opinion/young-maga-trump-vote.html
## byline published_date
## 1 By Hillary Clinton 2025-03-28
## 2 2025-03-25
## 3 By Gail Collins and Bret Stephens 2025-03-24
## 4 By Apoorva Mandavilli, Margot Sanger-Katz and Jan Hoffman 2025-03-26
## 5 By Jonah E. Bromwich and Hamed Aleaziz 2025-03-24
## 6 By Neil Gross 2025-03-26
if (status_code(response) != 200) {
stop("Failed to fetch data: ", status_code(response))
}
str(articles_df)
## 'data.frame': 20 obs. of 5 variables:
## $ title : chr "Hillary Clinton: How Much Dumber Will This Get?" "The Leaked Signal Chat, Annotated" "Trump Has Had Enough. He Is Not Alone." "Trump Administration Abruptly Cuts Billions From State Health Services" ...
## $ abstract : chr "The Signal group chat is only the latest in a string of self-inflicted wounds by the new administration." "The chat’s contents, which were obtained by The Atlantic after its editor in chief was added to the Signal grou"| __truncated__ "This is certainly an administration that reminds us why the framers decided on separation of powers." "States have been told that they can no longer use grants that were funding infectious disease management and ad"| __truncated__ ...
## $ url : chr "https://www.nytimes.com/2025/03/28/opinion/trump-hegseth-signal-chat.html" "https://www.nytimes.com/interactive/2025/03/25/us/signal-group-chat-text-annotations.html" "https://www.nytimes.com/2025/03/24/opinion/trump-columbia-courts-law-firms.html" "https://www.nytimes.com/2025/03/26/health/trump-state-health-grants-cuts.html" ...
## $ byline : chr "By Hillary Clinton" "" "By Gail Collins and Bret Stephens" "By Apoorva Mandavilli, Margot Sanger-Katz and Jan Hoffman" ...
## $ published_date: chr "2025-03-28" "2025-03-25" "2025-03-24" "2025-03-26" ...
summary(articles_df)
## title abstract url byline
## Length:20 Length:20 Length:20 Length:20
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## published_date
## Length:20
## Class :character
## Mode :character
articles_df$title_length <- nchar(articles_df$title)
# The top 10 longest article titles
top_articles <- articles_df[order(-articles_df$title_length), ][1:10, ]
ggplot(top_articles, aes(x = reorder(title, title_length), y = title_length)) +
geom_bar(stat = "identity", fill = "turquoise") +
coord_flip() + # Flip the chart for better readability
labs(title = "Top 10 NYT Most Popular Articles by Title Length",
x = "Article Title",
y = "Title Length (Characters)") +
theme_minimal()
#Comparing Most Read( Manual Data)
most_read_articles <- data.frame(
title = c(
"An Interview With Elon Musk’s Daughter",
"Columbia Student Hunted by ICE Sues",
"George Foreman Dies at 76",
"Hillary Clinton: How Much Dumber Will This Get?",
"If Pete Hegseth Had Any Honor, He Would Resign",
"Inside Pete Hegseth’s Rocky First Months",
"It’s Trump vs. the Courts",
"Now Europe Knows What Trump’s Team Calls It",
"Online ‘Pedophile Hunters’ Growing More Violent",
"Signal Chat Leak Angers U.S. Military Pilots"
),
views = c(63, 58, 64, 47, 46, 56, 58, 71, 69, 44)
)
most_read_articles <- most_read_articles %>% arrange(desc(views))
ggplot(most_read_articles, aes(x = reorder(title, views), y = views, fill = views)) +
geom_bar(stat = "identity") +
coord_flip() + # Flip for readability
labs(title = "Top 10 Most Read NYT Articles (Last 30 Days)",
x = "Article Title",
y = "Read Count") +
theme_minimal() +
scale_fill_gradient(low = "lightblue", high = "darkblue")
# Looking at Number of Article published Over Time
# Convert 'published_date' to Date type
articles_df$published_date <- as.Date(articles_df$published_date)
# Count the number of articles per day
articles_per_day <- articles_df %>%
group_by(published_date) %>%
summarise(article_count = n())
# Number of articles published over time
ggplot(articles_per_day, aes(x = published_date, y = article_count)) +
geom_line(color = "blue") +
labs(title = "Number of NYT Articles Published Over Time",
x = "Date",
y = "Number of Articles") +
theme_minimal()
#Extending visualization with smoothing line graph to allow the trend to appear more appealing.
ggplot(articles_per_day, aes(x = published_date, y = article_count)) +
geom_line(color = "blue") +
geom_smooth(method = "loess", se = FALSE, color = "darkred") +
labs(title = "Number of NYT Articles Published Over Time (Smoothed)",
x = "Date",
y = "Number of Articles") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
write.csv(articles_df, "articles_data.csv", row.names = FALSE)