For this assignment, we'll be using the NYTimes API key to obtain data on the top emailed NYTimes articles (for time intervals of 7 and 30 days). We will then compare that data to Facebook engagement to answer our primary question: How do emailed articles compare with Facebook engagement?
Here we'll import the data into a list:
method = c("emailed", "shared")
vec.days = c("7", "30")
lst.data = list()
for (i in vec.days) {
json = paste0('https://api.nytimes.com/svc/mostpopular/v2/shared/',i, "/facebook.json?api-key=",api_key)
lst.data[[paste0("facebook.", i)]] = jsonlite::fromJSON(json)
json = paste0('https://api.nytimes.com/svc/mostpopular/v2/emailed/',i, ".json?api-key=",api_key)
lst.data[[paste0("emailed.", i)]] = jsonlite::fromJSON(json)
}
The results portion of the JSON file will be of interest here. We'll use another loop to convert the data to data frames:
lst.data.tables = list()
for (i in 1:length(lst.data)) {
lst.data.tables[[names(lst.data)[i]]] = lst.data[[i]]$results %>% as.data.table()
}
And we can preview them here:
lst.data.tables$facebook.7[, 1:6] %>% head(2) %>% kable() %>% kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| uri | url | id | asset_id | source | published_date |
|---|---|---|---|---|---|
| nyt://article/6db68e52-ca3f-51be-83d6-672aa1b985aa | https://www.nytimes.com/2022/03/27/business/media/chris-wallace-cnn-fox-news.html | 1e+14 | 1e+14 | New York Times | 2022-03-27 |
| nyt://article/b3771ccd-aa28-56df-af1f-ffe2bdd16cdf | https://www.nytimes.com/2022/03/30/well/live/ba2-omicron-covid.html | 1e+14 | 1e+14 | New York Times | 2022-03-30 |
lst.data.tables$facebook.30[, 1:6] %>% head(2) %>% kable() %>% kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| uri | url | id | asset_id | source | published_date |
|---|---|---|---|---|---|
| nyt://article/be93acde-db33-560e-8704-0b319d98986a | https://www.nytimes.com/2022/03/23/nyregion/trump-investigation-felony-resignation-pomerantz.html | 1e+14 | 1e+14 | New York Times | 2022-03-23 |
| nyt://article/04ff0b39-a00d-5d18-9bdc-831f1d5a056d | https://www.nytimes.com/2022/03/11/us/west-point-cadets-fentanyl-overdose.html | 1e+14 | 1e+14 | New York Times | 2022-03-11 |
lst.data.tables$emailed.7[, 1:6] %>% head(2) %>% kable() %>% kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| uri | url | id | asset_id | source | published_date |
|---|---|---|---|---|---|
| nyt://article/4a9e2250-78c0-50f0-9735-19c0de1c3862 | https://www.nytimes.com/2022/03/29/opinion/ukraine-war-putin.html | 1e+14 | 1e+14 | New York Times | 2022-03-29 |
| nyt://article/593b8fa1-a0ae-5707-9eb5-a56de5a77fc0 | https://www.nytimes.com/2022/03/29/opinion/how-to-defeat-putin-and-save-the-planet.html | 1e+14 | 1e+14 | New York Times | 2022-03-29 |
lst.data.tables$emailed.30[, 1:6] %>% head(2) %>% kable() %>% kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| uri | url | id | asset_id | source | published_date |
|---|---|---|---|---|---|
| nyt://article/d8a34056-bf95-572f-a46f-f106600a198f | https://www.nytimes.com/2022/02/17/opinion/liberalism-democracy-russia-ukraine.html | 1e+14 | 1e+14 | New York Times | 2022-02-17 |
| nyt://article/38478cfb-a092-5902-8925-31cd44d8c7f4 | https://www.nytimes.com/2022/03/11/dining/induction-cooking.html | 1e+14 | 1e+14 | New York Times | 2022-03-11 |
We can start with some basic word clouds:
data(stop_words)
fb.words <- lst.data.tables$emailed.7%>% unnest_tokens(word, title) %>% select(word)
dt.fb.7 = fb.words[,.N, by = "word"][order(N, decreasing= T)]
dt.fb.7 = dt.fb.7[!(word %in% stop_words$word),]
wordcloud(words = dt.fb.7$word, freq = dt.fb.7$N, min.freq = 1, max.words=200, random.order=FALSE, rot.per=0.35, colors = brewer.pal(8, "Dark2"))
data(stop_words)
fb.words <- lst.data.tables$emailed.30%>% unnest_tokens(word, title) %>% select(word)
dt.fb.7 = fb.words[,.N, by = "word"][order(N, decreasing= T)]
dt.fb.7 = dt.fb.7[!(word %in% stop_words$word),]
wordcloud(words = dt.fb.7$word, freq = dt.fb.7$N, min.freq = 1, max.words=200, random.order=FALSE, rot.per=0.35, colors = brewer.pal(8, "Dark2"))
The next step will be directly comparing article categories between mediums (Facebook/Email) between time periods ## Facebook vs Email
dt.30days = merge(lst.data.tables$emailed.30[,.N, by = .(section)], lst.data.tables$facebook.30[,.N, by = .(section)], by = "section", all.x = T, all.y = T)
dt.7days = merge(lst.data.tables$emailed.7[,.N, by = .(section)], lst.data.tables$facebook.7[,.N, by = .(section)], by = "section", all.x = T, all.y = T)
dt.alluvial = merge(dt.30days, dt.7days, by = "section")
setnames(dt.alluvial, c("N.x.x", "N.y.x", "N.x.y", "N.y.y"), c("Email_30", "FB_30", "Email_7", "FB_7"))
mlt.alluvial = melt(dt.alluvial)
mlt.alluvial[is.na(value), value := 0]
ggplot(data = mlt.alluvial[grepl("30", variable)],
aes(x = variable, stratum = section, alluvium = section,
y = value, label = section)) +
geom_alluvium(aes(fill = section)) +
geom_stratum() + geom_text(stat = "stratum") +
labs(x = "Medium/Days", y = "Frequency", title = "(30 Days) Titles") + scale_fill_manual(values = brewer.pal(8, "Dark2")) + theme_minimal()
ggplot(data = mlt.alluvial[grepl("7", variable)],
aes(x = variable, stratum = section, alluvium = section,
y = value, label = section)) +
geom_alluvium(aes(fill = section)) +
geom_stratum() + geom_text(stat = "stratum") +
labs(x = "Medium/Days", y = "Frequency", title = "(7 Days) Titles") + scale_fill_manual(values = brewer.pal(8, "Dark2")) + theme_minimal()