nytimes <- "05be843d4019437796937f7185fbf3bc"
term <- "global+warming" # Need to use + to string together separate words
begin_date <- "20170101"
end_date <- "20180901"
baseurl <- paste0("http://api.nytimes.com/svc/search/v2/articlesearch.json?q=",term,
"&begin_date=",begin_date,"&end_date=",end_date,
"&facet_filter=true&api-key=",nytimes, sep="")
The API extracts 10 results for each request. jsonliteis used to collect all pages that meet the search criteria. This analysis will loop through all the pages instead of the first 10 results. The method to determine the number of pages was taken from a Storybench article to determine the number of pages of every 10 hits. the first page is 0 not 1 so the pages needed to be queried is 9.
initialQuery <- fromJSON(baseurl)
maxPages <- round((initialQuery$response$meta$hits[1]/10)-1)
pages <- list()
for(i in 0:maxPages) {
nytSearch <- fromJSON(paste0(baseurl, "&page=", i), flatten = TRUE) %>% data.frame()
message("Retrieving page", i)
pages[[i+1]] <- nytSearch
Sys.sleep(1) #This function is used to provide time between each query so the API does not time out
}
## Retrieving page0
## Retrieving page1
## Retrieving page2
## Retrieving page3
## Retrieving page4
## Retrieving page5
## Retrieving page6
## Retrieving page7
## Retrieving page8
## Retrieving page9
## Retrieving page10
## Retrieving page11
## Retrieving page12
## Retrieving page13
## Retrieving page14
## Retrieving page15
## Retrieving page16
## Retrieving page17
## Retrieving page18
## Retrieving page19
## Retrieving page20
## Retrieving page21
## Retrieving page22
## Retrieving page23
## Retrieving page24
## Retrieving page25
## Retrieving page26
## Retrieving page27
## Retrieving page28
## Retrieving page29
## Retrieving page30
## Retrieving page31
## Retrieving page32
## Retrieving page33
## Retrieving page34
## Retrieving page35
## Retrieving page36
## Retrieving page37
## Retrieving page38
## Retrieving page39
## Retrieving page40
## Retrieving page41
## Retrieving page42
## Retrieving page43
## Retrieving page44
## Retrieving page45
## Retrieving page46
## Retrieving page47
## Retrieving page48
## Retrieving page49
## Retrieving page50
## Retrieving page51
## Retrieving page52
## Retrieving page53
## Retrieving page54
## Retrieving page55
## Retrieving page56
## Retrieving page57
## Retrieving page58
## Retrieving page59
## Retrieving page60
## Retrieving page61
## Retrieving page62
## Retrieving page63
## Retrieving page64
## Retrieving page65
## Retrieving page66
## Retrieving page67
## Retrieving page68
## Retrieving page69
## Retrieving page70
## Retrieving page71
## Retrieving page72
## Retrieving page73
## Retrieving page74
## Retrieving page75
## Retrieving page76
## Retrieving page77
## Retrieving page78
## Retrieving page79
## Retrieving page80
## Retrieving page81
## Retrieving page82
## Retrieving page83
## Retrieving page84
## Retrieving page85
## Retrieving page86
## Retrieving page87
## Retrieving page88
## Retrieving page89
## Retrieving page90
## Retrieving page91
## Retrieving page92
## Retrieving page93
## Retrieving page94
## Retrieving page95
## Retrieving page96
## Retrieving page97
## Retrieving page98
## Retrieving page99
## Retrieving page100
## Retrieving page101
## Retrieving page102
## Retrieving page103
## Retrieving page104
## Retrieving page105
## Retrieving page106
## Retrieving page107
## Retrieving page108
## Retrieving page109
## Retrieving page110
## Retrieving page111
## Retrieving page112
## Retrieving page113
## Retrieving page114
## Retrieving page115
## Retrieving page116
## Retrieving page117
## Retrieving page118
## Retrieving page119
## Retrieving page120
## Retrieving page121
## Retrieving page122
## Retrieving page123
## Retrieving page124
## Retrieving page125
## Retrieving page126
## Retrieving page127
## Retrieving page128
## Retrieving page129
## Retrieving page130
## Retrieving page131
## Retrieving page132
## Retrieving page133
## Retrieving page134
## Retrieving page135
## Retrieving page136
## Retrieving page137
## Retrieving page138
## Retrieving page139
## Retrieving page140
## Retrieving page141
## Retrieving page142
## Retrieving page143
## Retrieving page144
## Retrieving page145
## Retrieving page146
## Retrieving page147
## Retrieving page148
## Retrieving page149
## Retrieving page150
## Retrieving page151
## Retrieving page152
## Retrieving page153
## Retrieving page154
## Retrieving page155
## Retrieving page156
## Retrieving page157
## Retrieving page158
## Retrieving page159
## Retrieving page160
## Retrieving page161
## Retrieving page162
## Retrieving page163
## Retrieving page164
## Retrieving page165
## Retrieving page166
## Retrieving page167
## Retrieving page168
## Retrieving page169
## Retrieving page170
## Retrieving page171
## Retrieving page172
## Retrieving page173
## Retrieving page174
## Retrieving page175
## Retrieving page176
## Retrieving page177
## Retrieving page178
## Retrieving page179
## Retrieving page180
## Retrieving page181
## Retrieving page182
## Retrieving page183
## Retrieving page184
## Retrieving page185
## Retrieving page186
## Retrieving page187
## Retrieving page188
allNYTSearch <- rbind_pages(pages)
allNYTSearch %>%
group_by(response.docs.type_of_material) %>%
summarize(count=n()) %>%
mutate(percent = (count / sum(count))*100) %>%
ggplot() +
geom_bar(aes(y=percent, x=response.docs.type_of_material, fill=response.docs.type_of_material), stat = "identity") + coord_flip()