Step 1: Import libraries
library(httr)
library(jsonlite)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
Step 2: Place New York Times API key
source("config.R") # My key is saved in config file, put your key here
api_key <- NYT_API_KEY
Step 3: Making API request to fetch data
# Get data
response <- GET(
"https://api.nytimes.com/svc/topstories/v2/technology.json",
query = list("api-key" = api_key)
)
cat("Status:", response$status_code, "\n")
## Status: 200
Step 4: Parsing JSON and creating DataFrame
data <- fromJSON(content(response, "text"))
## No encoding supplied: defaulting to UTF-8.
cat("Top level names:", names(data), "\n")
## Top level names: status copyright section last_updated num_results results
cat("Has results:", !is.null(data$results), "\n")
## Has results: TRUE
if (!is.null(data$results)) {
articles <- data$results
cat("Results type:", class(articles), "\n")
cat("Results dimensions:", dim(articles), "\n")
cat("Column names:", names(articles), "\n")
if (nrow(articles) > 0) {
cat("\nFirst article title:", articles$title[1], "\n")
cat("First article section:", articles$section[1], "\n")
cat("First article date:", articles$published_date[1], "\n")
}
}
## Results type: data.frame
## Results dimensions: 29 19
## Column names: section subsection title abstract url uri byline item_type updated_date created_date published_date material_type_facet kicker des_facet org_facet per_facet geo_facet multimedia short_url
##
## First article title: Meta Layoffs Included Employees Who Monitored Risks to User Privacy
## First article section: technology
## First article date: 2025-10-23T19:03:46-04:00
Step 5: Dataframe with only the main columns
# dataframe with only the main columns
df <- data.frame(
title = articles$title,
section = articles$section,
abstract = articles$abstract,
url = articles$url,
byline = articles$byline,
published_date = as.Date(articles$published_date),
item_type = articles$item_type,
stringsAsFactors = FALSE
)
cat("✅ Created simplified DataFrame with", ncol(df), "columns\n")
## ✅ Created simplified DataFrame with 7 columns
cat("📊 Dimensions:", nrow(df), "rows ×", ncol(df), "columns\n\n")
## 📊 Dimensions: 29 rows × 7 columns
# Show structure
str(df)
## 'data.frame': 29 obs. of 7 variables:
## $ title : chr "Meta Layoffs Included Employees Who Monitored Risks to User Privacy" "Trump Pardons Founder of the Crypto Exchange Binance" "Google’s Quantum Computer Makes a Big Technical Leap" "Reddit Accuses ‘Data Scraper’ Companies of Stealing Its Information" ...
## $ section : chr "technology" "technology" "technology" "technology" ...
## $ abstract : chr "While the company announced job cuts in artificial intelligence, it also expanded plans to replace privacy and "| __truncated__ "Changpeng Zhao, the richest man in crypto, had admitted to money-laundering violations that allowed terrorists "| __truncated__ "Designed to accelerate advances in medicine and other fields, the tech giant’s quantum algorithm runs 13,000 ti"| __truncated__ "In a lawsuit, Reddit pulled back the curtain on an ecosystem of start-ups that scrape Google’s search results a"| __truncated__ ...
## $ url : chr "https://www.nytimes.com/2025/10/23/technology/meta-layoffs-user-privacy.html" "https://www.nytimes.com/2025/10/23/technology/trump-pardons-cz-binance.html" "https://www.nytimes.com/2025/10/22/technology/googles-quantum-computer-leap.html" "https://www.nytimes.com/2025/10/22/technology/reddit-data-scrapers-perplexity-theft.html" ...
## $ byline : chr "By Mike Isaac and Eli Tan" "By David Yaffe-Bellany and Kenneth P. Vogel" "By Cade Metz" "By Mike Isaac" ...
## $ published_date: Date, format: "2025-10-23" "2025-10-23" ...
## $ item_type : chr "Article" "Article" "Article" "Article" ...
# Save
write.csv(df, "nyt_technology_data.csv", row.names = FALSE)
cat("💾 Saved to: nyt_technology_data.csv\n")
## 💾 Saved to: nyt_technology_data.csv
Step 6: Queries regarding dataframe
# Summary statistics
cat("\n=== SUMMARY STATISTICS ===\n")
##
## === SUMMARY STATISTICS ===
cat("Total articles:", nrow(df), "\n")
## Total articles: 29
cat("Date range:", as.character(min(df$published_date)), "to",
as.character(max(df$published_date)), "\n")
## Date range: 2025-10-16 to 2025-10-24
cat("Unique sections:", length(unique(df$section)), "\n")
## Unique sections: 7
# Articles by section
cat("\n=== ARTICLES BY SECTION ===\n")
##
## === ARTICLES BY SECTION ===
section_counts <- table(df$section)
print(section_counts)
##
## arts business health magazine podcasts science technology
## 1 8 1 2 2 2 13
cat("Most frequent section:", names(which.max(section_counts)), "\n")
## Most frequent section: technology
# Title length analysis
cat("\n=== TITLE ANALYSIS ===\n")
##
## === TITLE ANALYSIS ===
df$title_length <- nchar(df$title)
cat("Average title length:", round(mean(df$title_length)), "characters\n")
## Average title length: 62 characters
cat("Longest title:", max(df$title_length), "characters\n")
## Longest title: 101 characters
cat("Shortest title:", min(df$title_length), "characters\n")
## Shortest title: 38 characters
# Find articles containing specific keywords
cat("\n=== ARTICLES ABOUT AI ===\n")
##
## === ARTICLES ABOUT AI ===
ai_articles <- df[grepl("AI|artificial intelligence", df$title, ignore.case = TRUE), ]
cat("Found", nrow(ai_articles), "articles about AI:\n")
## Found 4 articles about AI:
kable(ai_articles[, c("title", "published_date")])
| 13 |
OpenAI Unveils Web Browser Built for Artificial
Intelligence |
2025-10-21 |
| 14 |
G.M. Raises Profit Forecast on Strong Demand and Lower
Tariff Costs |
2025-10-21 |
| 25 |
California Regulates A.I. Companions + OpenAI
Investigates Its Critics + The Hard Fork Review of Slop |
2025-10-17 |
| 29 |
China’s Rare Earth Restrictions Aim to Beat U.S. at Its
Own Game |
2025-10-16 |
# Latest articles
cat("\n=== LATEST 5 ARTICLES ===\n")
##
## === LATEST 5 ARTICLES ===
latest_articles <- df[order(df$published_date, decreasing = TRUE), ]
kable(head(latest_articles[, c("title", "published_date", "section")], 5))
| 5 |
Celebrities Fight Sora + Amazon’s Secret Automation
Plans + ChatGPT Gets a Browser |
2025-10-24 |
podcasts |
| 6 |
A Teen in Love With a Chatbot Killed Himself. Can the
Chatbot Be Held Responsible? |
2025-10-24 |
magazine |
| 1 |
Meta Layoffs Included Employees Who Monitored Risks to
User Privacy |
2025-10-23 |
technology |
| 2 |
Trump Pardons Founder of the Crypto Exchange
Binance |
2025-10-23 |
technology |
| 7 |
Ford’s Profit Jumps on Strong Sales but Company Lowers
its Outlook |
2025-10-23 |
business |
# Find articles containing specific keywords
cat("\n=== ARTICLES ABOUT AI ===\n")
##
## === ARTICLES ABOUT AI ===
ai_articles <- df[grepl("AI|artificial intelligence", df$title, ignore.case = TRUE), ]
cat("Found", nrow(ai_articles), "articles about AI:\n")
## Found 4 articles about AI:
kable(ai_articles[, c("title", "published_date")])
| 13 |
OpenAI Unveils Web Browser Built for Artificial
Intelligence |
2025-10-21 |
| 14 |
G.M. Raises Profit Forecast on Strong Demand and Lower
Tariff Costs |
2025-10-21 |
| 25 |
California Regulates A.I. Companions + OpenAI
Investigates Its Critics + The Hard Fork Review of Slop |
2025-10-17 |
| 29 |
China’s Rare Earth Restrictions Aim to Beat U.S. at Its
Own Game |
2025-10-16 |
# Latest articles
cat("\n=== LATEST 5 ARTICLES ===\n")
##
## === LATEST 5 ARTICLES ===
latest_articles <- df[order(df$published_date, decreasing = TRUE), ]
kable(head(latest_articles[, c("title", "published_date", "section")], 5))
| 5 |
Celebrities Fight Sora + Amazon’s Secret Automation
Plans + ChatGPT Gets a Browser |
2025-10-24 |
podcasts |
| 6 |
A Teen in Love With a Chatbot Killed Himself. Can the
Chatbot Be Held Responsible? |
2025-10-24 |
magazine |
| 1 |
Meta Layoffs Included Employees Who Monitored Risks to
User Privacy |
2025-10-23 |
technology |
| 2 |
Trump Pardons Founder of the Crypto Exchange
Binance |
2025-10-23 |
technology |
| 7 |
Ford’s Profit Jumps on Strong Sales but Company Lowers
its Outlook |
2025-10-23 |
business |
# Check for missing values
cat("\n=== MISSING VALUES ===\n")
##
## === MISSING VALUES ===
missing_summary <- sapply(df, function(x) sum(is.na(x)))
print(missing_summary)
## title section abstract url byline
## 0 0 0 0 0
## published_date item_type title_length
## 0 0 0
# Check for empty strings
cat("\n=== EMPTY STRINGS ===\n")
##
## === EMPTY STRINGS ===
empty_strings <- sapply(df, function(x) sum(x == "" | is.na(x)))
print(empty_strings)
## title section abstract url byline
## 0 0 0 0 0
## published_date item_type title_length
## NA 0 0