Introduction

In this assignment, I was asked to connect with the NYT API and import data into an R dataframe. I decided to import data from three different NYT API sections: their live feed, most viewed, and most shared (via facebook).

Code

After signing up for a NYT API key, the first step was to loas libraries and download each API.

library(jsonlite)
library(tidyverse)

## -- Attaching packages -------------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0

## -- Conflicts ----------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter()  masks stats::filter()
## x purrr::flatten() masks jsonlite::flatten()
## x dplyr::lag()     masks stats::lag()

key <- "2rgJQycgUYqUYaZOWbMGtfjtK7ZzUkNh"

live_url <- str_c("https://api.nytimes.com/svc/news/v3/content/all/all.json?api-key=",key)
most_viewed_url <- str_c("https://api.nytimes.com/svc/mostpopular/v2/viewed/1.json?api-key=",key)
most_shared_url <- str_c("https://api.nytimes.com/svc/mostpopular/v2/shared/1/facebook.json?api-key=",key)

live_import <- fromJSON(live_url)
nyt_live_feed <- live_import$results

mv_import <- fromJSON(most_viewed_url)
nyt_most_viewed <- mv_import$results

ms_import <- fromJSON(most_shared_url)
nyt_most_shared <- ms_import$results

Next, I compared the three downloads to see if there was any overlap in the articles. It turns out, the only overlap was between the most viewed and most shared articles.

live_and_viewed <- inner_join(nyt_live_feed, nyt_most_viewed, by = "url")
live_and_shared <- inner_join(nyt_live_feed, nyt_most_shared, by = "url")
shared_and_viewed <- inner_join(nyt_most_shared, nyt_most_viewed, by = "url")

I was curious about what keywords were in these two libraries, so I performed some data wrangling to get the most used keywords for each of the downloads. I plotted this data just so I could see what keywords were most prevelant in each of the datasets.

adx_keywords <- str_split(nyt_most_shared$adx_keywords,";")

phrases_shared <- c()
for(i in 1:length(adx_keywords)){
  for(j in 1:length(adx_keywords[[i]])){
    phrases_shared <- rbind(phrases_shared, adx_keywords[[i]][j])
  }
}

keywords_shared <- data.frame(phrases_shared) %>%
  count(phrases_shared) %>%
  filter(n > 2)

names(keywords_shared) <- c("Phrases", "Counts")

ggplot(keywords_shared, aes(x = reorder(Phrases, Counts), y = Counts)) +
  geom_bar(position = "dodge", stat = "identity") +
  labs(x = "Keyword",
       y = "Occurances") +
  coord_flip()

adx_keywords <- str_split(nyt_most_viewed$adx_keywords,";")

phrases_viewed <- c()
for(i in 1:length(adx_keywords)){
  for(j in 1:length(adx_keywords[[i]])){
    phrases_viewed <- rbind(phrases_viewed, adx_keywords[[i]][j])
  }
}

keywords_viewed <- data.frame(phrases_viewed) %>%
  count(phrases_viewed) %>%
  filter(n > 2)

names(keywords_viewed) <- c("Phrases", "Counts")

ggplot(keywords_viewed, aes(x = reorder(Phrases, Counts), y = Counts)) +
  geom_bar(position = "dodge", stat = "identity") +
  labs(x = "Keyword",
       y = "Occurances") +
  coord_flip()

Finally, I wanted to combine these datasets and show how these keywords interplayed between the two. This took a little data cleaning, but it was a rather quick process.

all_keywords <- full_join(keywords_shared, keywords_viewed, by = "Phrases")

## Warning: Column `Phrases` joining factors with different levels, coercing to
## character vector

names(all_keywords) <- c("Phrases","Shared","Viewed")
all_keywords$Shared[is.na(all_keywords$Shared)] = 0
all_keywords$Viewed[is.na(all_keywords$Viewed)] = 0
all_keywords$Total <- all_keywords$Shared + all_keywords$Viewed
all_keywords <- pivot_longer(all_keywords,
                             c("Shared","Viewed","Total"),
                             names_to = "Type",
                             values_to = "Count")

ggplot(all_keywords, aes(x = reorder(Phrases, Count), y = Count,
                         fill = Type)) +
  geom_bar(position = "dodge", stat = "identity") +
  scale_fill_brewer(palette = "Dark2") +
  labs(x = "Keyword",
       y = "Occurances",
       fill = "Type") +
  coord_flip()

Web APIs

David Moste

3/23/2020

Introduction

Code

Conclusion