Load Packages

library(httr)
library(jsonlite)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(utils)

Exploring the Data

str(articles_df)
## 'data.frame':    20 obs. of  5 variables:
##  $ title         : chr  "Hillary Clinton: How Much Dumber Will This Get?" "The Leaked Signal Chat, Annotated" "Trump Has Had Enough. He Is Not Alone." "Trump Administration Abruptly Cuts Billions From State Health Services" ...
##  $ abstract      : chr  "The Signal group chat is only the latest in a string of self-inflicted wounds by the new administration." "The chat’s contents, which were obtained by The Atlantic after its editor in chief was added to the Signal grou"| __truncated__ "This is certainly an administration that reminds us why the framers decided on separation of powers." "States have been told that they can no longer use grants that were funding infectious disease management and ad"| __truncated__ ...
##  $ url           : chr  "https://www.nytimes.com/2025/03/28/opinion/trump-hegseth-signal-chat.html" "https://www.nytimes.com/interactive/2025/03/25/us/signal-group-chat-text-annotations.html" "https://www.nytimes.com/2025/03/24/opinion/trump-columbia-courts-law-firms.html" "https://www.nytimes.com/2025/03/26/health/trump-state-health-grants-cuts.html" ...
##  $ byline        : chr  "By Hillary Clinton" "" "By Gail Collins and Bret Stephens" "By Apoorva Mandavilli, Margot Sanger-Katz and Jan Hoffman" ...
##  $ published_date: chr  "2025-03-28" "2025-03-25" "2025-03-24" "2025-03-26" ...
summary(articles_df)
##     title             abstract             url               byline         
##  Length:20          Length:20          Length:20          Length:20         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  published_date    
##  Length:20         
##  Class :character  
##  Mode  :character

Data visualization- Title Length

articles_df$title_length <- nchar(articles_df$title)

# The top 10 longest article titles
top_articles <- articles_df[order(-articles_df$title_length), ][1:10, ]

ggplot(top_articles, aes(x = reorder(title, title_length), y = title_length)) +
  geom_bar(stat = "identity", fill = "turquoise") +
  coord_flip() +  # Flip the chart for better readability
  labs(title = "Top 10 NYT Most Popular Articles by Title Length",
       x = "Article Title",
       y = "Title Length (Characters)") +
  theme_minimal()

#Comparing Most Read( Manual Data)

most_read_articles <- data.frame(
  title = c(
    "An Interview With Elon Musk’s Daughter",
    "Columbia Student Hunted by ICE Sues",
    "George Foreman Dies at 76",
    "Hillary Clinton: How Much Dumber Will This Get?",
    "If Pete Hegseth Had Any Honor, He Would Resign",
    "Inside Pete Hegseth’s Rocky First Months",
    "It’s Trump vs. the Courts",
    "Now Europe Knows What Trump’s Team Calls It",
    "Online ‘Pedophile Hunters’ Growing More Violent",
    "Signal Chat Leak Angers U.S. Military Pilots"
  ),
  views = c(63, 58, 64, 47, 46, 56, 58, 71, 69, 44) 
)

most_read_articles <- most_read_articles %>% arrange(desc(views))

ggplot(most_read_articles, aes(x = reorder(title, views), y = views, fill = views)) +
  geom_bar(stat = "identity") +
  coord_flip() +  # Flip for readability
  labs(title = "Top 10 Most Read NYT Articles (Last 30 Days)",
       x = "Article Title",
       y = "Read Count") +
  theme_minimal() +
  scale_fill_gradient(low = "lightblue", high = "darkblue")

# Looking at Number of Article published Over Time

# Convert 'published_date' to Date type

articles_df$published_date <- as.Date(articles_df$published_date)

# Count the number of articles per day

articles_per_day <- articles_df %>%
  group_by(published_date) %>%
  summarise(article_count = n())

# Number of articles published over time

ggplot(articles_per_day, aes(x = published_date, y = article_count)) +
  geom_line(color = "blue") +
  labs(title = "Number of NYT Articles Published Over Time",
       x = "Date",
       y = "Number of Articles") +
  theme_minimal()

#Extending visualization with smoothing line graph to allow the trend to appear more appealing.

ggplot(articles_per_day, aes(x = published_date, y = article_count)) +
  geom_line(color = "blue") +
  geom_smooth(method = "loess", se = FALSE, color = "darkred") +
  labs(title = "Number of NYT Articles Published Over Time (Smoothed)",
       x = "Date",
       y = "Number of Articles") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

Export Data

write.csv(articles_df, "articles_data.csv", row.names = FALSE)