Alexis Mekueko
10/23/2020
library(tidyverse) #loading all library needed for this assignment
#library(openintro)
#head(fastfood)
library(readxl)
#library(data.table)
library(DT)
library(knitr)
library(readr)
#library(plyr)
library(dplyr)
library(stringr)
library(XML)
library(RCurl)
library(jsonlite)
library(httr)
#library(maps)
#library(dice)
# #library(VennDiagram)
# #library(help = "dice")
#ibrary(DBI)
#library(dbplyr)
# library(rstudioapi)
# library(RJDBC)
# library(odbc)
# library(RSQLite)
# #library(rvest)
#library(readtext)
#library(ggpubr)
#library(fitdistrplus)
#library(ggplot2)
#library(moments)
#library(qualityTools)
#library(normalp)
#library(utils)
#library(MASS)
#library(qqplotr)
#library(DATA606)Github Link: https://github.com/asmozo24/DATA607_Assignment9.git
Web link:
This assignment of week 9 is about practicing web API calls. How to get web API in R. Approach: I signed up on New York Time to get an API key. there are various option group in category. I chose the “most”Most Popular" and played around to see what I can get for news. I used GET() for httr request functions on the most shared news within 7days on facebook. glimpse(NyTimes_newsJ) display all the 20 observations of 22 variables. Now I want to print out a link to let’s say a news
NyTimes_newsRaw <- GET("https://api.nytimes.com/svc/mostpopular/v2/shared/7/facebook.json?api-key=K2Yl3COR8LJh0kxjVuyWWb9svbI1NyWj")
NyTimes_newsRaw## Response [https://api.nytimes.com/svc/mostpopular/v2/shared/7/facebook.json?api-key=K2Yl3COR8LJh0kxjVuyWWb9svbI1NyWj]
## Date: 2020-10-24 19:32
## Status: 200
## Content-Type: application/json; charset=utf-8
## Size: 37.2 kB
## [1] "Response object from GET(), status: 200 means the GET() call was successful"
## $date
## [1] "Sat, 24 Oct 2020 19:32:14 GMT"
##
## $`content-type`
## [1] "application/json; charset=utf-8"
##
## $`transfer-encoding`
## [1] "chunked"
##
## $connection
## [1] "keep-alive"
##
## $`cache-control`
## [1] "max-age=60"
##
## $`x-nyt-most-popular-values`
## [1] "FACEBOOK 7"
##
## $`x-request-id`
## [1] "1603567934299675562"
##
## $`content-encoding`
## [1] "gzip"
##
## $`x-cloud-trace-context`
## [1] "68c42989fc1468230d8509327c4221d8;o=1"
##
## $server
## [1] "Google Frontend"
##
## $`accept-ranges`
## [1] "bytes"
##
## $via
## [1] "1.1 varnish"
##
## $age
## [1] "0"
##
## $`x-served-by`
## [1] "cache-sjc10037-SJC"
##
## $`x-cache`
## [1] "MISS"
##
## $`x-cache-hits`
## [1] "0"
##
## $`x-timer`
## [1] "S1603567934.903202,VS0,VE496"
##
## $vary
## [1] "Accept-Encoding"
##
## $`access-control-allow-origin`
## [1] "*"
##
## $`access-control-allow-headers`
## [1] "Accept, Content-Type, X-Forwarded-For, X-Prototype-Version, X-Requested-With"
##
## $`access-control-expose-headers`
## [1] "Content-Length, X-JSON"
##
## $`access-control-allow-methods`
## [1] "GET, OPTIONS"
##
## attr(,"class")
## [1] "insensitive" "list"
# getting content .....str(NyTimes_newsC)
NyTimes_newsC <- content(NyTimes_newsRaw, as = 'text')
NyTimes_newsJ <- fromJSON(NyTimes_newsC)
# THIS GIVES me an overview of content
glimpse(NyTimes_newsJ) ## List of 4
## $ status : chr "OK"
## $ copyright : chr "Copyright (c) 2020 The New York Times Company. All Rights Reserved."
## $ num_results: int 20
## $ results :'data.frame': 20 obs. of 22 variables:
## ..$ uri : chr [1:20] "nyt://article/056561cc-ff8f-5ba7-a363-797857695dd0" "nyt://article/7e02bd6e-67a6-572a-b04d-fa3d35d67a00" "nyt://article/c1b25e90-e5d8-536a-b46e-ac92b957b249" "nyt://article/9034a996-a9e1-52fa-b238-934652719145" ...
## ..$ url : chr [1:20] "https://www.nytimes.com/2020/10/21/world/europe/pope-francis-same-sex-civil-unions.html" "https://www.nytimes.com/2020/10/19/us/elections/trump-fauci.html" "https://www.nytimes.com/2020/10/20/us/trump-taxes-china.html" "https://www.nytimes.com/2020/10/17/us/coronavirus-pandemic-fatigue.html" ...
## ..$ id : num [1:20] 1e+14 1e+14 1e+14 1e+14 1e+14 ...
## ..$ asset_id : num [1:20] 1e+14 1e+14 1e+14 1e+14 1e+14 ...
## ..$ source : chr [1:20] "New York Times" "New York Times" "New York Times" "New York Times" ...
## ..$ published_date: chr [1:20] "2020-10-21" "2020-10-19" "2020-10-20" "2020-10-17" ...
## ..$ updated : chr [1:20] "2020-10-23 09:19:15" "2020-10-19 23:36:47" "2020-10-23 02:53:22" "2020-10-23 05:10:59" ...
## ..$ section : chr [1:20] "World" "U.S." "U.S." "U.S." ...
## ..$ subsection : chr [1:20] "Europe" "Elections" "" "" ...
## ..$ nytdsection : chr [1:20] "world" "u.s." "u.s." "u.s." ...
## ..$ adx_keywords : chr [1:20] "Same-Sex Marriage, Civil Unions and Domestic Partnerships;Documentary Films and Programs;Homosexuality and Bise"| __truncated__ "" "Presidential Election of 2020;Trump Tax Returns;Conflicts of Interest;United States Politics and Government;Tru"| __truncated__ "Coronavirus (2019-nCoV);Disease Rates;Contact Tracing (Public Health);Masks;Anxiety and Stress;United States;Europe" ...
## ..$ column : logi [1:20] NA NA NA NA NA NA ...
## ..$ byline : chr [1:20] "By Jason Horowitz" "By Maggie Haberman and Michael Crowley" "By Mike McIntire, Russ Buettner and Susanne Craig" "By Julie Bosman, Sarah Mervosh and Marc Santora" ...
## ..$ type : chr [1:20] "Article" "Article" "Article" "Article" ...
## ..$ title : chr [1:20] "In Shift for Church, Pope Francis Voices Support for Same-Sex Civil Unions" "Trump calls Fauci ‘a disaster’ and says Americans are tired of virus warnings from ‘these idiots.’" "Trump Records Shed New Light on Chinese Business Pursuits" "As the Coronavirus Surges, a New Culprit Emerges: Pandemic Fatigue" ...
## ..$ abstract : chr [1:20] "The comments, shown in a new documentary, are the strongest yet from a pontificate that has taken a more tolera"| __truncated__ "President Trump told his campaign staff that Dr. Anthony S. Fauci was “a disaster” and said, “People are tired "| __truncated__ "As he raises questions about his opponent’s standing with China, President Trump’s taxes reveal details about h"| __truncated__ "Exhaustion and impatience are creating new risks as cases soar in parts of the world. “They have had enough,” o"| __truncated__ ...
## ..$ des_facet :List of 20
## .. ..$ : chr [1:3] "Same-Sex Marriage, Civil Unions and Domestic Partnerships" "Documentary Films and Programs" "Homosexuality and Bisexuality"
## .. ..$ : chr(0)
## .. ..$ : chr [1:4] "Presidential Election of 2020" "Trump Tax Returns" "Conflicts of Interest" "United States Politics and Government"
## .. ..$ : chr [1:5] "Coronavirus (2019-nCoV)" "Disease Rates" "Contact Tracing (Public Health)" "Masks" ...
## .. ..$ : chr [1:3] "Elections, Senate" "Presidential Election of 2020" "United States Politics and Government"
## .. ..$ : chr [1:5] "Jazz" "Classical Music" "Content Type: Personal Profile" "Pianos" ...
## .. ..$ : chr [1:3] "Elections, Senate" "Presidential Election of 2020" "United States Politics and Government"
## .. ..$ : chr [1:5] "Love (Emotion)" "Marriages" "Lying" "Dating and Relationships" ...
## .. ..$ : chr [1:2] "Presidential Election of 2020" "Debates (Political)"
## .. ..$ : chr "Videophones and Videoconferencing"
## .. ..$ : chr [1:4] "Capital Punishment" "Murders, Attempted Murders and Homicides" "Women and Girls" "Kidnapping and Hostages"
## .. ..$ : chr [1:4] "Coronavirus (2019-nCoV)" "Masks" "Presidential Election of 2020" "United States Politics and Government"
## .. ..$ : chr [1:7] "Antitrust Laws and Competition Issues" "Computers and the Internet" "Online Advertising" "Suits and Litigation (Civil)" ...
## .. ..$ : chr(0)
## .. ..$ : chr [1:3] "Wildfires" "Disasters and Emergencies" "Recession and Depression"
## .. ..$ : chr [1:6] "United States Politics and Government" "Coronavirus Aid, Relief, and Economic Security Act (2020)" "Stimulus (Economic)" "United States Economy" ...
## .. ..$ : chr [1:3] "Coronavirus (2019-nCoV)" "Disease Rates" "Shutdowns (Institutional)"
## .. ..$ : chr [1:5] "Prostitution" "Human Trafficking" "Suits and Litigation (Civil)" "Sex Crimes" ...
## .. ..$ : chr [1:5] "Presidential Election of 2020" "Suits and Litigation (Civil)" "Libel and Slander" "Immunity from Prosecution" ...
## .. ..$ : chr [1:3] "Presidential Election of 2020" "News and News Media" "Conflicts of Interest"
## ..$ org_facet :List of 20
## .. ..$ : chr "Roman Catholic Church"
## .. ..$ : chr(0)
## .. ..$ : chr [1:4] "Federal Bureau of Investigation" "Senate" "Trump Organization" "Trump Tower (Manhattan, NY)"
## .. ..$ : chr(0)
## .. ..$ : chr [1:2] "Republican Party" "Senate"
## .. ..$ : chr "ECM (Record Label)"
## .. ..$ : chr [1:2] "Republican Party" "Senate"
## .. ..$ : chr(0)
## .. ..$ : chr(0)
## .. ..$ : chr [1:3] "New Yorker" "CNN" "Zoom Video Communications"
## .. ..$ : chr(0)
## .. ..$ : chr "Democratic Party"
## .. ..$ : chr [1:2] "Google Inc" "Justice Department"
## .. ..$ : chr(0)
## .. ..$ : chr [1:2] "Cal Fire" "Federal Emergency Management Agency"
## .. ..$ : chr [1:3] "Senate" "Republican Party" "House of Representatives"
## .. ..$ : chr(0)
## .. ..$ : chr(0)
## .. ..$ : chr(0)
## .. ..$ : chr [1:2] "Burisma Holdings Ltd" "New York Post"
## ..$ per_facet :List of 20
## .. ..$ : chr [1:2] "Francis" "Afineevsky, Evgeny (1972- )"
## .. ..$ : chr(0)
## .. ..$ : chr [1:4] "Trump, Donald J" "Biden, Joseph R Jr" "Biden, Hunter" "Ruffin, Phil"
## .. ..$ : chr(0)
## .. ..$ : chr "Trump, Donald J"
## .. ..$ : chr "Jarrett, Keith"
## .. ..$ : chr [1:2] "Sasse, Benjamin E" "Trump, Donald J"
## .. ..$ : chr(0)
## .. ..$ : chr [1:3] "Biden, Joseph R Jr" "Trump, Donald J" "Welker, Kristen (1976- )"
## .. ..$ : chr "Toobin, Jeffrey"
## .. ..$ : chr [1:2] "Montgomery, Lisa M" "Stinnett, Bobbie Jo"
## .. ..$ : chr [1:2] "Trump, Donald J" "Biden, Joseph R Jr"
## .. ..$ : chr "Barr, William P"
## .. ..$ : chr(0)
## .. ..$ : chr [1:2] "Newsom, Gavin" "Trump, Donald J"
## .. ..$ : chr [1:4] "McConnell, Mitch" "Pelosi, Nancy" "Mnuchin, Steven T" "Trump, Donald J"
## .. ..$ : chr(0)
## .. ..$ : chr [1:3] "Giuffre, Virginia Roberts" "Maxwell, Ghislaine" "Epstein, Jeffrey E (1953- )"
## .. ..$ : chr [1:3] "Carroll, E Jean" "Trump, Donald J" "Barr, William P"
## .. ..$ : chr [1:7] "Bannon, Stephen K" "Biden, Hunter" "Biden, Joseph R Jr" "Giuliani, Rudolph W" ...
## ..$ geo_facet :List of 20
## .. ..$ : chr(0)
## .. ..$ : chr(0)
## .. ..$ : chr [1:2] "United States" "China"
## .. ..$ : chr [1:2] "United States" "Europe"
## .. ..$ : chr(0)
## .. ..$ : chr(0)
## .. ..$ : chr "Nebraska"
## .. ..$ : chr(0)
## .. ..$ : chr(0)
## .. ..$ : chr(0)
## .. ..$ : chr "Terre Haute (Ind)"
## .. ..$ : chr "United States"
## .. ..$ : chr(0)
## .. ..$ : chr(0)
## .. ..$ : chr [1:6] "California" "Fresno (Calif)" "Mendocino (Calif)" "San Bernardino (Calif)" ...
## .. ..$ : chr(0)
## .. ..$ : chr [1:6] "United States" "Ann Arbor (Mich)" "Michigan" "Kansas" ...
## .. ..$ : chr [1:2] "Manhattan (NYC)" "Palm Beach (Fla)"
## .. ..$ : chr(0)
## .. ..$ : chr(0)
## ..$ media :List of 20
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 0 obs. of 0 variables
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## .. ..$ :'data.frame': 1 obs. of 6 variables:
## ..$ eta_id : int [1:20] 0 0 0 0 0 0 0 0 0 0 ...
#names(NyTimes_newsJ)
sampl1 <- NyTimes_newsJ$results$url[1] # use $ to print the first link
#cat("The most popular article from New York Times shared on facebook is: ", sampl1)
# most viewed article on New York Times
NyTimes_newsRaw2 <- GET("https://api.nytimes.com/svc/mostpopular/v2/viewed/1.json?api-key=K2Yl3COR8LJh0kxjVuyWWb9svbI1NyWj")
#print("Response object from GET(), status: 200 means the GET() call was successful")
#headers(NyTimes_newsRaw2)
NyTimes_newsC2 <- content(NyTimes_newsRaw2, as = 'text') # getting content
#str(NyTimes_newsC2)
NyTimes_newsJ2 <- fromJSON(NyTimes_newsC2)
sampl2 <- NyTimes_newsJ2$results$url[1]
#cat("The most viewed article on The New York Times is :") The most popular article from New York Times shared on facebook: [click here]
The most viewed article on The New York Times: [click here]
#Conclusion The New York Times API can be easy to use in scraping articles published on their website. However, I think the website it is pretty nested and need a better understanding of the New York Times website structure.