Step 1: Import libraries

library(httr)
library(jsonlite)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)

Step 2: Place New York Times API key

source("config.R") # My key is saved in config file, put your key here
api_key <- NYT_API_KEY

Step 3: Making API request to fetch data

# Get data
response <- GET(
  "https://api.nytimes.com/svc/topstories/v2/technology.json",
  query = list("api-key" = api_key)
)

cat("Status:", response$status_code, "\n")
## Status: 200

Step 4: Parsing JSON and creating DataFrame

data <- fromJSON(content(response, "text"))
## No encoding supplied: defaulting to UTF-8.
cat("Top level names:", names(data), "\n")
## Top level names: status copyright section last_updated num_results results
cat("Has results:", !is.null(data$results), "\n")
## Has results: TRUE
if (!is.null(data$results)) {
  articles <- data$results
  cat("Results type:", class(articles), "\n")
  cat("Results dimensions:", dim(articles), "\n")
  cat("Column names:", names(articles), "\n")
  
  if (nrow(articles) > 0) {
    cat("\nFirst article title:", articles$title[1], "\n")
    cat("First article section:", articles$section[1], "\n")
    cat("First article date:", articles$published_date[1], "\n")
  }
}    
## Results type: data.frame 
## Results dimensions: 29 19 
## Column names: section subsection title abstract url uri byline item_type updated_date created_date published_date material_type_facet kicker des_facet org_facet per_facet geo_facet multimedia short_url 
## 
## First article title: Meta Layoffs Included Employees Who Monitored Risks to User Privacy 
## First article section: technology 
## First article date: 2025-10-23T19:03:46-04:00

Step 5: Dataframe with only the main columns

# dataframe with only the main columns
df <- data.frame(
  title = articles$title,
  section = articles$section,
  abstract = articles$abstract,
  url = articles$url,
  byline = articles$byline,
  published_date = as.Date(articles$published_date),
  item_type = articles$item_type,
  stringsAsFactors = FALSE
)

cat("✅ Created simplified DataFrame with", ncol(df), "columns\n")
## ✅ Created simplified DataFrame with 7 columns
cat("📊 Dimensions:", nrow(df), "rows ×", ncol(df), "columns\n\n")
## 📊 Dimensions: 29 rows × 7 columns
# Show structure
str(df)
## 'data.frame':    29 obs. of  7 variables:
##  $ title         : chr  "Meta Layoffs Included Employees Who Monitored Risks to User Privacy" "Trump Pardons Founder of the Crypto Exchange Binance" "Google’s Quantum Computer Makes a Big Technical Leap" "Reddit Accuses ‘Data Scraper’ Companies of Stealing Its Information" ...
##  $ section       : chr  "technology" "technology" "technology" "technology" ...
##  $ abstract      : chr  "While the company announced job cuts in artificial intelligence, it also expanded plans to replace privacy and "| __truncated__ "Changpeng Zhao, the richest man in crypto, had admitted to money-laundering violations that allowed terrorists "| __truncated__ "Designed to accelerate advances in medicine and other fields, the tech giant’s quantum algorithm runs 13,000 ti"| __truncated__ "In a lawsuit, Reddit pulled back the curtain on an ecosystem of start-ups that scrape Google’s search results a"| __truncated__ ...
##  $ url           : chr  "https://www.nytimes.com/2025/10/23/technology/meta-layoffs-user-privacy.html" "https://www.nytimes.com/2025/10/23/technology/trump-pardons-cz-binance.html" "https://www.nytimes.com/2025/10/22/technology/googles-quantum-computer-leap.html" "https://www.nytimes.com/2025/10/22/technology/reddit-data-scrapers-perplexity-theft.html" ...
##  $ byline        : chr  "By Mike Isaac and Eli Tan" "By David Yaffe-Bellany and Kenneth P. Vogel" "By Cade Metz" "By Mike Isaac" ...
##  $ published_date: Date, format: "2025-10-23" "2025-10-23" ...
##  $ item_type     : chr  "Article" "Article" "Article" "Article" ...
# Save
write.csv(df, "nyt_technology_data.csv", row.names = FALSE)
cat("💾 Saved to: nyt_technology_data.csv\n")
## 💾 Saved to: nyt_technology_data.csv

Step 6: Queries regarding dataframe

# Summary statistics
cat("\n=== SUMMARY STATISTICS ===\n")
## 
## === SUMMARY STATISTICS ===
cat("Total articles:", nrow(df), "\n")
## Total articles: 29
cat("Date range:", as.character(min(df$published_date)), "to", 
    as.character(max(df$published_date)), "\n")
## Date range: 2025-10-16 to 2025-10-24
cat("Unique sections:", length(unique(df$section)), "\n")
## Unique sections: 7
# Articles by section
cat("\n=== ARTICLES BY SECTION ===\n")
## 
## === ARTICLES BY SECTION ===
section_counts <- table(df$section)
print(section_counts)
## 
##       arts   business     health   magazine   podcasts    science technology 
##          1          8          1          2          2          2         13
cat("Most frequent section:", names(which.max(section_counts)), "\n")
## Most frequent section: technology
# Title length analysis
cat("\n=== TITLE ANALYSIS ===\n")
## 
## === TITLE ANALYSIS ===
df$title_length <- nchar(df$title)
cat("Average title length:", round(mean(df$title_length)), "characters\n")
## Average title length: 62 characters
cat("Longest title:", max(df$title_length), "characters\n")
## Longest title: 101 characters
cat("Shortest title:", min(df$title_length), "characters\n")
## Shortest title: 38 characters
# Find articles containing specific keywords
cat("\n=== ARTICLES ABOUT AI ===\n")
## 
## === ARTICLES ABOUT AI ===
ai_articles <- df[grepl("AI|artificial intelligence", df$title, ignore.case = TRUE), ]
cat("Found", nrow(ai_articles), "articles about AI:\n")
## Found 4 articles about AI:
kable(ai_articles[, c("title", "published_date")])
title published_date
13 OpenAI Unveils Web Browser Built for Artificial Intelligence 2025-10-21
14 G.M. Raises Profit Forecast on Strong Demand and Lower Tariff Costs 2025-10-21
25 California Regulates A.I. Companions + OpenAI Investigates Its Critics + The Hard Fork Review of Slop 2025-10-17
29 China’s Rare Earth Restrictions Aim to Beat U.S. at Its Own Game 2025-10-16
# Latest articles
cat("\n=== LATEST 5 ARTICLES ===\n")
## 
## === LATEST 5 ARTICLES ===
latest_articles <- df[order(df$published_date, decreasing = TRUE), ]
kable(head(latest_articles[, c("title", "published_date", "section")], 5))
title published_date section
5 Celebrities Fight Sora + Amazon’s Secret Automation Plans + ChatGPT Gets a Browser 2025-10-24 podcasts
6 A Teen in Love With a Chatbot Killed Himself. Can the Chatbot Be Held Responsible? 2025-10-24 magazine
1 Meta Layoffs Included Employees Who Monitored Risks to User Privacy 2025-10-23 technology
2 Trump Pardons Founder of the Crypto Exchange Binance 2025-10-23 technology
7 Ford’s Profit Jumps on Strong Sales but Company Lowers its Outlook 2025-10-23 business
# Find articles containing specific keywords
cat("\n=== ARTICLES ABOUT AI ===\n")
## 
## === ARTICLES ABOUT AI ===
ai_articles <- df[grepl("AI|artificial intelligence", df$title, ignore.case = TRUE), ]
cat("Found", nrow(ai_articles), "articles about AI:\n")
## Found 4 articles about AI:
kable(ai_articles[, c("title", "published_date")])
title published_date
13 OpenAI Unveils Web Browser Built for Artificial Intelligence 2025-10-21
14 G.M. Raises Profit Forecast on Strong Demand and Lower Tariff Costs 2025-10-21
25 California Regulates A.I. Companions + OpenAI Investigates Its Critics + The Hard Fork Review of Slop 2025-10-17
29 China’s Rare Earth Restrictions Aim to Beat U.S. at Its Own Game 2025-10-16
# Latest articles
cat("\n=== LATEST 5 ARTICLES ===\n")
## 
## === LATEST 5 ARTICLES ===
latest_articles <- df[order(df$published_date, decreasing = TRUE), ]
kable(head(latest_articles[, c("title", "published_date", "section")], 5))
title published_date section
5 Celebrities Fight Sora + Amazon’s Secret Automation Plans + ChatGPT Gets a Browser 2025-10-24 podcasts
6 A Teen in Love With a Chatbot Killed Himself. Can the Chatbot Be Held Responsible? 2025-10-24 magazine
1 Meta Layoffs Included Employees Who Monitored Risks to User Privacy 2025-10-23 technology
2 Trump Pardons Founder of the Crypto Exchange Binance 2025-10-23 technology
7 Ford’s Profit Jumps on Strong Sales but Company Lowers its Outlook 2025-10-23 business
# Check for missing values
cat("\n=== MISSING VALUES ===\n")
## 
## === MISSING VALUES ===
missing_summary <- sapply(df, function(x) sum(is.na(x)))
print(missing_summary)
##          title        section       abstract            url         byline 
##              0              0              0              0              0 
## published_date      item_type   title_length 
##              0              0              0
# Check for empty strings
cat("\n=== EMPTY STRINGS ===\n")
## 
## === EMPTY STRINGS ===
empty_strings <- sapply(df, function(x) sum(x == "" | is.na(x)))
print(empty_strings)
##          title        section       abstract            url         byline 
##              0              0              0              0              0 
## published_date      item_type   title_length 
##             NA              0              0