Assignment – Web APIs
For this assignment I will be using Top Stories API and focus on the section World.
Libraries
# install packages if you don't have them
#install.packages("rjson")
# load libraries
library("tidyverse")
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
##
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
##
## flatten
library("httr")
library("stringr")
library("ggplot2")
library("dplyr")
Connection to API
# API Key
apikey <- "x3mOLM3nRnwjYVOuE89tloE62oC8NofC"
# Get the URL
theURL <- paste("https://api.nytimes.com/svc/topstories/v2/world.json?api-key=", apikey)
topstories <- GET(theURL)
# Get status code
topstories$status_code
## [1] 200
## Length Class Mode
## url 1 -none- character
## status_code 1 -none- numeric
## headers 19 insensitive list
## all_headers 1 -none- list
## cookies 7 data.frame list
## content 107041 -none- raw
## date 1 POSIXct numeric
## times 6 -none- numeric
## request 7 request list
## handle 1 curl_handle externalptr
top_stories <- content(topstories, as = "text")
## No encoding supplied: defaulting to UTF-8.
Data Frame Conversion
#Option 1
top_storiesdf <- top_stories %>%
fromJSON() %>%
as.data.frame()
#Get column names
colnames(top_storiesdf)
## [1] "status" "copyright"
## [3] "section" "last_updated"
## [5] "num_results" "results.section"
## [7] "results.subsection" "results.title"
## [9] "results.abstract" "results.url"
## [11] "results.uri" "results.byline"
## [13] "results.item_type" "results.updated_date"
## [15] "results.created_date" "results.published_date"
## [17] "results.material_type_facet" "results.kicker"
## [19] "results.des_facet" "results.org_facet"
## [21] "results.per_facet" "results.geo_facet"
## [23] "results.multimedia" "results.short_url"
#Option 2
top_storiesdf2 <- fromJSON(top_stories, flatten = TRUE)
top_storiesdf2 <- data.frame(top_storiesdf2$results, stringsAsFactors = FALSE)
#Get column names
colnames(top_storiesdf2)
## [1] "section" "subsection" "title"
## [4] "abstract" "url" "uri"
## [7] "byline" "item_type" "updated_date"
## [10] "created_date" "published_date" "material_type_facet"
## [13] "kicker" "des_facet" "org_facet"
## [16] "per_facet" "geo_facet" "multimedia"
## [19] "short_url"
#Rename columns
colnames(top_storiesdf2) <- c("Section","Subsection", "Title", "Abstract", "URL", "URI", "Byline", "Item_Type", "Updated_Date", "Created_Date", "Published_Date", "Material_Type_Facet", "Kicker", "Des_Facet", "Org_Facet", "Per_Facet", "Geo_Facet", "Multimedia", "Short_Url")
#Drop columns not needed
top_storiesdf3 <- top_storiesdf2[, -c(12:18)]
Data Analysis
#Count of Section
Section <- top_storiesdf3%>%
group_by(Section)%>%
summarise(num=n())%>%
arrange(desc(num))
head(Section)
## # A tibble: 6 × 2
## Section num
## <chr> <int>
## 1 world 32
## 2 briefing 1
## 3 climate 1
## 4 insider 1
## 5 style 1
## 6 technology 1
#Count of Subsection
Subsection <- top_storiesdf3%>%
group_by(Subsection)%>%
summarise(num=n())%>%
arrange(desc(num))
head(Subsection)
## # A tibble: 6 × 2
## Subsection num
## <chr> <int>
## 1 "" 13
## 2 "asia" 8
## 3 "europe" 6
## 4 "americas" 4
## 5 "africa" 2
## 6 "middleeast" 2
Plots
The bar plot below shows that plenty of the articles belong to the world section followed by briefing and science.
# Bar Plot for Section
top_storiesdf3 %>%
ggplot(aes(x = Section)) +
geom_bar()

Within the next bar plot there is an unknown subsection that has the highest count followed by Asia and Europe.
# Bar Plot for Subsection
top_storiesdf3 %>%
ggplot(aes(x = Subsection)) +
geom_bar()
