library(RCurl)
library(kableExtra)# manipulate table styles
library(stringr)
library(httr)
library(rvest)
url <- "https://www.elections.ny.gov/2016ElectionResults.html"
page <- read_html(url)
links <- html_attr(html_nodes(page, "a"), "href")
xls_links <- links[str_detect(links,"2016.+\\.xls")]
names <- unlist(str_extract_all(xls_links, "2016.+\\.xls"))
names
## [1] "2016/General/2016President.xls" "2016/General/2016Congress.xls"
## [3] "2016/general/2016USSenator.xls" "2016/General/2016Senate.xls"
## [5] "2016/General/2016Assembly.xls" "2016/General/2016SupremeCourt.xls"
# Below a fix on the regex provides a better way to extract the files names
split_name <- str_split(names,"/")
split_name
## [[1]]
## [1] "2016" "General" "2016President.xls"
##
## [[2]]
## [1] "2016" "General" "2016Congress.xls"
##
## [[3]]
## [1] "2016" "general" "2016USSenator.xls"
##
## [[4]]
## [1] "2016" "General" "2016Senate.xls"
##
## [[5]]
## [1] "2016" "General" "2016Assembly.xls"
##
## [[6]]
## [1] "2016" "General" "2016SupremeCourt.xls"
fnames <- sapply(split_name, function(v) v[3])
fnames
## [1] "2016President.xls" "2016Congress.xls" "2016USSenator.xls"
## [4] "2016Senate.xls" "2016Assembly.xls" "2016SupremeCourt.xls"
download_xls <- function(links,fnames,baseurl){
n <- length(links)
for (i in 1:n){
fileurl <- str_c(baseurl,links[i])
download.file(fileurl, destfile = fnames[i])
Sys.sleep(1)
}
}
download_xls(xls_links,fnames,baseurl="https://www.elections.ny.gov")
Some of the links start with “/NYSBOE/elections/2016/” and others with “/NYSBOE/download/law/”. We are interested in the former, and these link are divided in two categories, “General” and “Primary”.
pdf_links <- links[str_detect(links,"NYSBOE/elections/2016.+\\.pdf")]
pdf_links
## [1] "/NYSBOE/elections/2016/General/2016President.pdf"
## [2] "/NYSBOE/elections/2016/General/2016Congress.pdf"
## [3] "/NYSBOE/elections/2016/General/2016USSenator.pdf"
## [4] "/NYSBOE/elections/2016/General/2016Senate.pdf"
## [5] "/NYSBOE/elections/2016/General/2016Assembly.pdf"
## [6] "/NYSBOE/elections/2016/General/2016SupremeCourt.pdf"
## [7] "/NYSBOE/elections/2016/General/2016GeneralElectionCandidateList.pdf"
## [8] "/NYSBOE/elections/2016/Primary/FederalPrimaryResults.pdf"
## [9] "/NYSBOE/elections/2016/Primary/FilingsRecdFedPrimary2016.pdf"
## [10] "/NYSBOE/elections/2016/Primary/DemocraticPresPrimaryResults.pdf"
## [11] "/NYSBOE/elections/2016/Primary/RepublicanPresPrimaryResults.pdf"
## [12] "/NYSBOE/elections/2016/Primary/2016StateLocalPrimaryElectionResults.pdf"
# Notice the difference between the above regex and the one below
pdf_names <- unlist(str_extract_all(pdf_links, "[^/]*.pdf$"))
pdf_names
## [1] "2016President.pdf"
## [2] "2016Congress.pdf"
## [3] "2016USSenator.pdf"
## [4] "2016Senate.pdf"
## [5] "2016Assembly.pdf"
## [6] "2016SupremeCourt.pdf"
## [7] "2016GeneralElectionCandidateList.pdf"
## [8] "FederalPrimaryResults.pdf"
## [9] "FilingsRecdFedPrimary2016.pdf"
## [10] "DemocraticPresPrimaryResults.pdf"
## [11] "RepublicanPresPrimaryResults.pdf"
## [12] "2016StateLocalPrimaryElectionResults.pdf"
download_pdf <- function(fnames,baseurl1,baseurl2){
n <- length(fnames)
for (i in 1:n){
fileurl <- str_c(baseurl1,fnames[i])
try(content <- getBinaryURL(fileurl))
if(length(content)>1){
writeBin(content,fnames[i])
}else {
fileurl <- str_c(baseurl2,fnames[i])
content <- getBinaryURL(fileurl)
writeBin(content,fnames[i])
}
}
}
baseurl1="https://www.elections.ny.gov/NYSBOE/elections/2016/General/"
baseurl2="https://www.elections.ny.gov/NYSBOE/elections/2016/Primary/"
download_pdf(pdf_names,baseurl1,baseurl2)
The code below is a modification of the code available on https://cran.r-project.org/web/packages/httr/vignettes/api-packages.html.
github_api <- function(path) {
url <- modify_url("https://api.github.com", path = path)
GET(url)
}
path = "/repos/bsosnovski/project3"
resp <- github_api(path)
resp
## Response [https://api.github.com/repos/bsosnovski/project3]
## Date: 2018-11-03 18:15
## Status: 200
## Content-Type: application/json; charset=utf-8
## Size: 5.26 kB
## {
## "id": 152161547,
## "node_id": "MDEwOlJlcG9zaXRvcnkxNTIxNjE1NDc=",
## "name": "Project3",
## "full_name": "bsosnovski/Project3",
## "private": false,
## "owner": {
## "login": "bsosnovski",
## "id": 31420908,
## "node_id": "MDQ6VXNlcjMxNDIwOTA4",
## ...
if (http_type(resp) != "application/json") {
stop("API did not return json", call. = FALSE)
}
http_type(resp)
## [1] "application/json"
parsed <- jsonlite::fromJSON(content(resp, "text"), simplifyVector = FALSE)
head(parsed)
## $id
## [1] 152161547
##
## $node_id
## [1] "MDEwOlJlcG9zaXRvcnkxNTIxNjE1NDc="
##
## $name
## [1] "Project3"
##
## $full_name
## [1] "bsosnovski/Project3"
##
## $private
## [1] FALSE
##
## $owner
## $owner$login
## [1] "bsosnovski"
##
## $owner$id
## [1] 31420908
##
## $owner$node_id
## [1] "MDQ6VXNlcjMxNDIwOTA4"
##
## $owner$avatar_url
## [1] "https://avatars2.githubusercontent.com/u/31420908?v=4"
##
## $owner$gravatar_id
## [1] ""
##
## $owner$url
## [1] "https://api.github.com/users/bsosnovski"
##
## $owner$html_url
## [1] "https://github.com/bsosnovski"
##
## $owner$followers_url
## [1] "https://api.github.com/users/bsosnovski/followers"
##
## $owner$following_url
## [1] "https://api.github.com/users/bsosnovski/following{/other_user}"
##
## $owner$gists_url
## [1] "https://api.github.com/users/bsosnovski/gists{/gist_id}"
##
## $owner$starred_url
## [1] "https://api.github.com/users/bsosnovski/starred{/owner}{/repo}"
##
## $owner$subscriptions_url
## [1] "https://api.github.com/users/bsosnovski/subscriptions"
##
## $owner$organizations_url
## [1] "https://api.github.com/users/bsosnovski/orgs"
##
## $owner$repos_url
## [1] "https://api.github.com/users/bsosnovski/repos"
##
## $owner$events_url
## [1] "https://api.github.com/users/bsosnovski/events{/privacy}"
##
## $owner$received_events_url
## [1] "https://api.github.com/users/bsosnovski/received_events"
##
## $owner$type
## [1] "User"
##
## $owner$site_admin
## [1] FALSE
The following code provides nice print methods of the response object, instead of a list.
# structure(
# list(
# content = parsed,
# path = path,
# response = resp
# ),
# class = "github_api"
# )
print.github_api <- function(x, ...) {
cat("<GitHub ", x$path, ">\n", sep = "")
str(x$content)
invisible(x)
}
github_api("/users/bsosnovski")
## Response [https://api.github.com/users/bsosnovski]
## Date: 2018-11-03 18:15
## Status: 200
## Content-Type: application/json; charset=utf-8
## Size: 1.39 kB
## {
## "login": "bsosnovski",
## "id": 31420908,
## "node_id": "MDQ6VXNlcjMxNDIwOTA4",
## "avatar_url": "https://avatars2.githubusercontent.com/u/31420908?v=4",
## "gravatar_id": "",
## "url": "https://api.github.com/users/bsosnovski",
## "html_url": "https://github.com/bsosnovski",
## "followers_url": "https://api.github.com/users/bsosnovski/followers",
## "following_url": "https://api.github.com/users/bsosnovski/following{/o...
## ...
Since many APIs are rate limited, we can implement a rate_limit() function that tells us how many calls against the github API are available to us.
rate_limit <- function() {
github_api("/rate_limit")
}
rate_limit()
## Response [https://api.github.com/rate_limit]
## Date: 2018-11-03 18:15
## Status: 200
## Content-Type: application/json; charset=utf-8
## Size: 373 B
## {
## "resources": {
## "core": {
## "limit": 60,
## "remaining": 58,
## "reset": 1541272517
## },
## "search": {
## "limit": 10,
## "remaining": 10,
## ...
url <- "http://httpbin.org/get"
r <- GET(url, add_headers(Name = "Sosnovski", Test = "Example 2"))
r
## Response [http://httpbin.org/get]
## Date: 2018-11-03 18:15
## Status: 200
## Content-Type: application/json
## Size: 377 B
## {
## "args": {},
## "headers": {
## "Accept": "application/json, text/xml, application/xml, */*",
## "Accept-Encoding": "gzip, deflate",
## "Connection": "close",
## "Host": "httpbin.org",
## "Name": "Sosnovski",
## "Test": "Example 2",
## "User-Agent": "libcurl/7.54.0 r-curl/3.2 httr/1.3.1"
## ...
http_status(r)
## $category
## [1] "Success"
##
## $reason
## [1] "OK"
##
## $message
## [1] "Success: (200) OK"
content(r, "raw") # Raw bytes
## [1] 7b 0a 20 20 22 61 72 67 73 22 3a 20 7b 7d 2c 20 0a 20 20 22 68 65 61
## [24] 64 65 72 73 22 3a 20 7b 0a 20 20 20 20 22 41 63 63 65 70 74 22 3a 20
## [47] 22 61 70 70 6c 69 63 61 74 69 6f 6e 2f 6a 73 6f 6e 2c 20 74 65 78 74
## [70] 2f 78 6d 6c 2c 20 61 70 70 6c 69 63 61 74 69 6f 6e 2f 78 6d 6c 2c 20
## [93] 2a 2f 2a 22 2c 20 0a 20 20 20 20 22 41 63 63 65 70 74 2d 45 6e 63 6f
## [116] 64 69 6e 67 22 3a 20 22 67 7a 69 70 2c 20 64 65 66 6c 61 74 65 22 2c
## [139] 20 0a 20 20 20 20 22 43 6f 6e 6e 65 63 74 69 6f 6e 22 3a 20 22 63 6c
## [162] 6f 73 65 22 2c 20 0a 20 20 20 20 22 48 6f 73 74 22 3a 20 22 68 74 74
## [185] 70 62 69 6e 2e 6f 72 67 22 2c 20 0a 20 20 20 20 22 4e 61 6d 65 22 3a
## [208] 20 22 53 6f 73 6e 6f 76 73 6b 69 22 2c 20 0a 20 20 20 20 22 54 65 73
## [231] 74 22 3a 20 22 45 78 61 6d 70 6c 65 20 32 22 2c 20 0a 20 20 20 20 22
## [254] 55 73 65 72 2d 41 67 65 6e 74 22 3a 20 22 6c 69 62 63 75 72 6c 2f 37
## [277] 2e 35 34 2e 30 20 72 2d 63 75 72 6c 2f 33 2e 32 20 68 74 74 72 2f 31
## [300] 2e 33 2e 31 22 0a 20 20 7d 2c 20 0a 20 20 22 6f 72 69 67 69 6e 22 3a
## [323] 20 22 37 34 2e 39 30 2e 31 32 30 2e 31 34 39 22 2c 20 0a 20 20 22 75
## [346] 72 6c 22 3a 20 22 68 74 74 70 3a 2f 2f 68 74 74 70 62 69 6e 2e 6f 72
## [369] 67 2f 67 65 74 22 0a 7d 0a
content(r, "text") # No encoding supplied: defaulting to UTF-8
## No encoding supplied: defaulting to UTF-8.
## [1] "{\n \"args\": {}, \n \"headers\": {\n \"Accept\": \"application/json, text/xml, application/xml, */*\", \n \"Accept-Encoding\": \"gzip, deflate\", \n \"Connection\": \"close\", \n \"Host\": \"httpbin.org\", \n \"Name\": \"Sosnovski\", \n \"Test\": \"Example 2\", \n \"User-Agent\": \"libcurl/7.54.0 r-curl/3.2 httr/1.3.1\"\n }, \n \"origin\": \"74.90.120.149\", \n \"url\": \"http://httpbin.org/get\"\n}\n"
content(r, "parse")
## $args
## named list()
##
## $headers
## $headers$Accept
## [1] "application/json, text/xml, application/xml, */*"
##
## $headers$`Accept-Encoding`
## [1] "gzip, deflate"
##
## $headers$Connection
## [1] "close"
##
## $headers$Host
## [1] "httpbin.org"
##
## $headers$Name
## [1] "Sosnovski"
##
## $headers$Test
## [1] "Example 2"
##
## $headers$`User-Agent`
## [1] "libcurl/7.54.0 r-curl/3.2 httr/1.3.1"
##
##
## $origin
## [1] "74.90.120.149"
##
## $url
## [1] "http://httpbin.org/get"
In the link below access Yahoo Weather API, where the location is New York City.
url <- "https://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20weather.forecast%20where%20woeid%20in%20(select%20woeid%20from%20geo.places(1)%20where%20text%3D%22newyorkcity%22)&format=json&env=store%3A%2F%2Fdatatables.org%2Falltableswithkeys"
info <- read.table("yahoo_api_key.txt", header = TRUE, stringsAsFactors = FALSE)
names(info)
## [1] "App_ID" "Consumer_Key" "Consumer_Secret"
dim(info)
## [1] 1 3
usr <- info$Consumer_Key
psw <- info$Consumer_Secret
resp <- GET(url, authenticate(usr, psw))
http_status(resp)
## $category
## [1] "Success"
##
## $reason
## [1] "OK"
##
## $message
## [1] "Success: (200) OK"
parsed <- jsonlite::fromJSON(content(resp, "text"), simplifyVector = FALSE)
head(parsed)
## $query
## $query$count
## [1] 1
##
## $query$created
## [1] "2018-11-03T18:15:17Z"
##
## $query$lang
## [1] "en-US"
##
## $query$results
## $query$results$channel
## $query$results$channel$units
## $query$results$channel$units$distance
## [1] "mi"
##
## $query$results$channel$units$pressure
## [1] "in"
##
## $query$results$channel$units$speed
## [1] "mph"
##
## $query$results$channel$units$temperature
## [1] "F"
##
##
## $query$results$channel$title
## [1] "Yahoo! Weather - New York, NY, US"
##
## $query$results$channel$link
## [1] "http://us.rd.yahoo.com/dailynews/rss/weather/Country__Country/*https://weather.yahoo.com/country/state/city-2459115/"
##
## $query$results$channel$description
## [1] "Yahoo! Weather for New York, NY, US"
##
## $query$results$channel$language
## [1] "en-us"
##
## $query$results$channel$lastBuildDate
## [1] "Sat, 03 Nov 2018 02:15 PM EDT"
##
## $query$results$channel$ttl
## [1] "60"
##
## $query$results$channel$location
## $query$results$channel$location$city
## [1] "New York"
##
## $query$results$channel$location$country
## [1] "United States"
##
## $query$results$channel$location$region
## [1] " NY"
##
##
## $query$results$channel$wind
## $query$results$channel$wind$chill
## [1] "50"
##
## $query$results$channel$wind$direction
## [1] "265"
##
## $query$results$channel$wind$speed
## [1] "36"
##
##
## $query$results$channel$atmosphere
## $query$results$channel$atmosphere$humidity
## [1] "55"
##
## $query$results$channel$atmosphere$pressure
## [1] "1008.0"
##
## $query$results$channel$atmosphere$rising
## [1] "0"
##
## $query$results$channel$atmosphere$visibility
## [1] "16.1"
##
##
## $query$results$channel$astronomy
## $query$results$channel$astronomy$sunrise
## [1] "7:28 am"
##
## $query$results$channel$astronomy$sunset
## [1] "5:50 pm"
##
##
## $query$results$channel$image
## $query$results$channel$image$title
## [1] "Yahoo! Weather"
##
## $query$results$channel$image$width
## [1] "142"
##
## $query$results$channel$image$height
## [1] "18"
##
## $query$results$channel$image$link
## [1] "http://weather.yahoo.com"
##
## $query$results$channel$image$url
## [1] "http://l.yimg.com/a/i/brand/purplelogo//uh/us/news-wea.gif"
##
##
## $query$results$channel$item
## $query$results$channel$item$title
## [1] "Conditions for New York, NY, US at 01:00 PM EDT"
##
## $query$results$channel$item$lat
## [1] "40.71455"
##
## $query$results$channel$item$long
## [1] "-74.007118"
##
## $query$results$channel$item$link
## [1] "http://us.rd.yahoo.com/dailynews/rss/weather/Country__Country/*https://weather.yahoo.com/country/state/city-2459115/"
##
## $query$results$channel$item$pubDate
## [1] "Sat, 03 Nov 2018 01:00 PM EDT"
##
## $query$results$channel$item$condition
## $query$results$channel$item$condition$code
## [1] "24"
##
## $query$results$channel$item$condition$date
## [1] "Sat, 03 Nov 2018 01:00 PM EDT"
##
## $query$results$channel$item$condition$temp
## [1] "56"
##
## $query$results$channel$item$condition$text
## [1] "Windy"
##
##
## $query$results$channel$item$forecast
## $query$results$channel$item$forecast[[1]]
## $query$results$channel$item$forecast[[1]]$code
## [1] "12"
##
## $query$results$channel$item$forecast[[1]]$date
## [1] "03 Nov 2018"
##
## $query$results$channel$item$forecast[[1]]$day
## [1] "Sat"
##
## $query$results$channel$item$forecast[[1]]$high
## [1] "65"
##
## $query$results$channel$item$forecast[[1]]$low
## [1] "48"
##
## $query$results$channel$item$forecast[[1]]$text
## [1] "Rain"
##
##
## $query$results$channel$item$forecast[[2]]
## $query$results$channel$item$forecast[[2]]$code
## [1] "34"
##
## $query$results$channel$item$forecast[[2]]$date
## [1] "04 Nov 2018"
##
## $query$results$channel$item$forecast[[2]]$day
## [1] "Sun"
##
## $query$results$channel$item$forecast[[2]]$high
## [1] "53"
##
## $query$results$channel$item$forecast[[2]]$low
## [1] "42"
##
## $query$results$channel$item$forecast[[2]]$text
## [1] "Mostly Sunny"
##
##
## $query$results$channel$item$forecast[[3]]
## $query$results$channel$item$forecast[[3]]$code
## [1] "39"
##
## $query$results$channel$item$forecast[[3]]$date
## [1] "05 Nov 2018"
##
## $query$results$channel$item$forecast[[3]]$day
## [1] "Mon"
##
## $query$results$channel$item$forecast[[3]]$high
## [1] "55"
##
## $query$results$channel$item$forecast[[3]]$low
## [1] "45"
##
## $query$results$channel$item$forecast[[3]]$text
## [1] "Scattered Showers"
##
##
## $query$results$channel$item$forecast[[4]]
## $query$results$channel$item$forecast[[4]]$code
## [1] "11"
##
## $query$results$channel$item$forecast[[4]]$date
## [1] "06 Nov 2018"
##
## $query$results$channel$item$forecast[[4]]$day
## [1] "Tue"
##
## $query$results$channel$item$forecast[[4]]$high
## [1] "65"
##
## $query$results$channel$item$forecast[[4]]$low
## [1] "53"
##
## $query$results$channel$item$forecast[[4]]$text
## [1] "Showers"
##
##
## $query$results$channel$item$forecast[[5]]
## $query$results$channel$item$forecast[[5]]$code
## [1] "12"
##
## $query$results$channel$item$forecast[[5]]$date
## [1] "07 Nov 2018"
##
## $query$results$channel$item$forecast[[5]]$day
## [1] "Wed"
##
## $query$results$channel$item$forecast[[5]]$high
## [1] "63"
##
## $query$results$channel$item$forecast[[5]]$low
## [1] "50"
##
## $query$results$channel$item$forecast[[5]]$text
## [1] "Rain"
##
##
## $query$results$channel$item$forecast[[6]]
## $query$results$channel$item$forecast[[6]]$code
## [1] "34"
##
## $query$results$channel$item$forecast[[6]]$date
## [1] "08 Nov 2018"
##
## $query$results$channel$item$forecast[[6]]$day
## [1] "Thu"
##
## $query$results$channel$item$forecast[[6]]$high
## [1] "55"
##
## $query$results$channel$item$forecast[[6]]$low
## [1] "45"
##
## $query$results$channel$item$forecast[[6]]$text
## [1] "Mostly Sunny"
##
##
## $query$results$channel$item$forecast[[7]]
## $query$results$channel$item$forecast[[7]]$code
## [1] "47"
##
## $query$results$channel$item$forecast[[7]]$date
## [1] "09 Nov 2018"
##
## $query$results$channel$item$forecast[[7]]$day
## [1] "Fri"
##
## $query$results$channel$item$forecast[[7]]$high
## [1] "56"
##
## $query$results$channel$item$forecast[[7]]$low
## [1] "45"
##
## $query$results$channel$item$forecast[[7]]$text
## [1] "Scattered Thunderstorms"
##
##
## $query$results$channel$item$forecast[[8]]
## $query$results$channel$item$forecast[[8]]$code
## [1] "47"
##
## $query$results$channel$item$forecast[[8]]$date
## [1] "10 Nov 2018"
##
## $query$results$channel$item$forecast[[8]]$day
## [1] "Sat"
##
## $query$results$channel$item$forecast[[8]]$high
## [1] "48"
##
## $query$results$channel$item$forecast[[8]]$low
## [1] "42"
##
## $query$results$channel$item$forecast[[8]]$text
## [1] "Scattered Thunderstorms"
##
##
## $query$results$channel$item$forecast[[9]]
## $query$results$channel$item$forecast[[9]]$code
## [1] "30"
##
## $query$results$channel$item$forecast[[9]]$date
## [1] "11 Nov 2018"
##
## $query$results$channel$item$forecast[[9]]$day
## [1] "Sun"
##
## $query$results$channel$item$forecast[[9]]$high
## [1] "48"
##
## $query$results$channel$item$forecast[[9]]$low
## [1] "37"
##
## $query$results$channel$item$forecast[[9]]$text
## [1] "Partly Cloudy"
##
##
## $query$results$channel$item$forecast[[10]]
## $query$results$channel$item$forecast[[10]]$code
## [1] "30"
##
## $query$results$channel$item$forecast[[10]]$date
## [1] "12 Nov 2018"
##
## $query$results$channel$item$forecast[[10]]$day
## [1] "Mon"
##
## $query$results$channel$item$forecast[[10]]$high
## [1] "48"
##
## $query$results$channel$item$forecast[[10]]$low
## [1] "38"
##
## $query$results$channel$item$forecast[[10]]$text
## [1] "Partly Cloudy"
##
##
##
## $query$results$channel$item$description
## [1] "<![CDATA[<img src=\"http://l.yimg.com/a/i/us/we/52/24.gif\"/>\n<BR />\n<b>Current Conditions:</b>\n<BR />Windy\n<BR />\n<BR />\n<b>Forecast:</b>\n<BR /> Sat - Rain. High: 65Low: 48\n<BR /> Sun - Mostly Sunny. High: 53Low: 42\n<BR /> Mon - Scattered Showers. High: 55Low: 45\n<BR /> Tue - Showers. High: 65Low: 53\n<BR /> Wed - Rain. High: 63Low: 50\n<BR />\n<BR />\n<a href=\"http://us.rd.yahoo.com/dailynews/rss/weather/Country__Country/*https://weather.yahoo.com/country/state/city-2459115/\">Full Forecast at Yahoo! Weather</a>\n<BR />\n<BR />\n<BR />\n]]>"
##
## $query$results$channel$item$guid
## $query$results$channel$item$guid$isPermaLink
## [1] "false"