See https://www.jmarshall.com/easy/http/
HTTP is stateless and follows a client server model. The client opens a connection, sends a request and the server returns a response.
We will use the following pacakages.
library(httr)
library(openssl)
library(jsonlite)
library(curl)
library(digest)
We use httr
to interact with a website via its API. Before we start it is good to have this model in your head:
This first section is meant to serve as a quick reminder of httr
. Mostly taken directly from the httr
vignette but has been abbreviated. The main action verbs are GET
and POST
(for forms); the others are HEAD
, POST
, PATCH
, PUT
and DELETE.
The httr
understands some of the content types as described in Content-Type:
and can automatically parse. If the particular content type parse is not available, read the raw data stream and parse manually.
r <- GET("http://httpbin.org/get")
r
## Response [http://httpbin.org/get]
## Date: 2021-02-15 08:45
## Status: 200
## Content-Type: application/json
## Size: 370 B
## {
## "args": {},
## "headers": {
## "Accept": "application/json, text/xml, application/xml, */*",
## "Accept-Encoding": "deflate, gzip, br",
## "Host": "httpbin.org",
## "User-Agent": "libcurl/7.68.0 r-curl/4.3 httr/1.4.2",
## "X-Amzn-Trace-Id": "Root=1-602a3498-2634b61658f9fd8c636f5494"
## },
## "origin": "43.250.205.183",
## ...
The response contains status, headers and body.
status_code(r) # 200 is success
## [1] 200
headers(r)
## $date
## [1] "Mon, 15 Feb 2021 08:45:12 GMT"
##
## $`content-type`
## [1] "application/json"
##
## $`content-length`
## [1] "370"
##
## $connection
## [1] "keep-alive"
##
## $server
## [1] "gunicorn/19.9.0"
##
## $`access-control-allow-origin`
## [1] "*"
##
## $`access-control-allow-credentials`
## [1] "true"
##
## attr(,"class")
## [1] "insensitive" "list"
warn_for_status(r)
stop_for_status(r)
Access the body via content
.
# character vector
content(r, "text")
## No encoding supplied: defaulting to UTF-8.
## [1] "{\n \"args\": {}, \n \"headers\": {\n \"Accept\": \"application/json, text/xml, application/xml, */*\", \n \"Accept-Encoding\": \"deflate, gzip, br\", \n \"Host\": \"httpbin.org\", \n \"User-Agent\": \"libcurl/7.68.0 r-curl/4.3 httr/1.4.2\", \n \"X-Amzn-Trace-Id\": \"Root=1-602a3498-2634b61658f9fd8c636f5494\"\n }, \n \"origin\": \"43.250.205.183\", \n \"url\": \"http://httpbin.org/get\"\n}\n"
# use a specific encoding
content(r, "text", encoding = "ISO-8859-1")
## [1] "{\n \"args\": {}, \n \"headers\": {\n \"Accept\": \"application/json, text/xml, application/xml, */*\", \n \"Accept-Encoding\": \"deflate, gzip, br\", \n \"Host\": \"httpbin.org\", \n \"User-Agent\": \"libcurl/7.68.0 r-curl/4.3 httr/1.4.2\", \n \"X-Amzn-Trace-Id\": \"Root=1-602a3498-2634b61658f9fd8c636f5494\"\n }, \n \"origin\": \"43.250.205.183\", \n \"url\": \"http://httpbin.org/get\"\n}\n"
# attempt to identify encoding
stringi::stri_enc_detect(content(r, "raw"))
## [[1]]
## Encoding Language Confidence
## 1 ISO-8859-1 en 0.37
## 2 UTF-8 0.15
## 3 ISO-8859-2 ro 0.15
## 4 UTF-16BE 0.10
## 5 UTF-16LE 0.10
## 6 ISO-8859-9 tr 0.10
## 7 Shift_JIS ja 0.10
## 8 GB18030 zh 0.10
## 9 EUC-JP ja 0.10
## 10 EUC-KR ko 0.10
## 11 Big5 zh 0.10
Binary content.
content(r, "raw")
## [1] 7b 0a 20 20 22 61 72 67 73 22 3a 20 7b 7d 2c 20 0a 20 20 22 68 65 61 64 65
## [26] 72 73 22 3a 20 7b 0a 20 20 20 20 22 41 63 63 65 70 74 22 3a 20 22 61 70 70
## [51] 6c 69 63 61 74 69 6f 6e 2f 6a 73 6f 6e 2c 20 74 65 78 74 2f 78 6d 6c 2c 20
## [76] 61 70 70 6c 69 63 61 74 69 6f 6e 2f 78 6d 6c 2c 20 2a 2f 2a 22 2c 20 0a 20
## [101] 20 20 20 22 41 63 63 65 70 74 2d 45 6e 63 6f 64 69 6e 67 22 3a 20 22 64 65
## [126] 66 6c 61 74 65 2c 20 67 7a 69 70 2c 20 62 72 22 2c 20 0a 20 20 20 20 22 48
## [151] 6f 73 74 22 3a 20 22 68 74 74 70 62 69 6e 2e 6f 72 67 22 2c 20 0a 20 20 20
## [176] 20 22 55 73 65 72 2d 41 67 65 6e 74 22 3a 20 22 6c 69 62 63 75 72 6c 2f 37
## [201] 2e 36 38 2e 30 20 72 2d 63 75 72 6c 2f 34 2e 33 20 68 74 74 72 2f 31 2e 34
## [226] 2e 32 22 2c 20 0a 20 20 20 20 22 58 2d 41 6d 7a 6e 2d 54 72 61 63 65 2d 49
## [251] 64 22 3a 20 22 52 6f 6f 74 3d 31 2d 36 30 32 61 33 34 39 38 2d 32 36 33 34
## [276] 62 36 31 36 35 38 66 39 66 64 38 63 36 33 36 66 35 34 39 34 22 0a 20 20 7d
## [301] 2c 20 0a 20 20 22 6f 72 69 67 69 6e 22 3a 20 22 34 33 2e 32 35 30 2e 32 30
## [326] 35 2e 31 38 33 22 2c 20 0a 20 20 22 75 72 6c 22 3a 20 22 68 74 74 70 3a 2f
## [351] 2f 68 74 74 70 62 69 6e 2e 6f 72 67 2f 67 65 74 22 0a 7d 0a
bin <- content(r, "raw")
writeBin(bin, "myfile.txt")
Automatic parsing.
content(r, "parsed") #json by default
## $args
## named list()
##
## $headers
## $headers$Accept
## [1] "application/json, text/xml, application/xml, */*"
##
## $headers$`Accept-Encoding`
## [1] "deflate, gzip, br"
##
## $headers$Host
## [1] "httpbin.org"
##
## $headers$`User-Agent`
## [1] "libcurl/7.68.0 r-curl/4.3 httr/1.4.2"
##
## $headers$`X-Amzn-Trace-Id`
## [1] "Root=1-602a3498-2634b61658f9fd8c636f5494"
##
##
## $origin
## [1] "43.250.205.183"
##
## $url
## [1] "http://httpbin.org/get"
Accessing cookies.
r <- GET("http://httpbin.org/cookies/set",
query = list(a = 1))
cookies(r)
## domain flag path secure expiration name value
## 1 httpbin.org FALSE / FALSE <NA> a 1
Add a new value to the existing cookies. Cookies persist across multiple requests in a session (they are sent back and forth).
r <- GET("http://httpbin.org/cookies/set",
query = list(b = 1))
cookies(r)
## domain flag path secure expiration name value
## 1 httpbin.org FALSE / FALSE <NA> a 1
## 2 httpbin.org FALSE / FALSE <NA> b 1
The request also has status, header and body with the status defining the http method and the url. Additional data can be embedded into the url or put in the headers and the body (if it is a POST
request).
Query string.
r <- GET("http://httpbin.org/get",
query = list(key1 = "value1",
key2 = "value2")
)
content(r)$args
## $key1
## [1] "value1"
##
## $key2
## [1] "value2"
Custom header. Note - content(r)$headers
is whatever was sent and headers(r)
is the headers that was sent back in the response
r <- GET("http://httpbin.org/get",
config = add_headers(Name = "Hadley"))
str(content(r)$headers)
## List of 7
## $ Accept : chr "application/json, text/xml, application/xml, */*"
## $ Accept-Encoding: chr "deflate, gzip, br"
## $ Cookie : chr "b=1; a=1"
## $ Host : chr "httpbin.org"
## $ Name : chr "Hadley"
## $ User-Agent : chr "libcurl/7.68.0 r-curl/4.3 httr/1.4.2"
## $ X-Amzn-Trace-Id: chr "Root=1-602a349a-01e58a835937086a1fd8a62d"
headers(r)
## $date
## [1] "Mon, 15 Feb 2021 08:45:14 GMT"
##
## $`content-type`
## [1] "application/json"
##
## $`content-length`
## [1] "420"
##
## $connection
## [1] "keep-alive"
##
## $server
## [1] "gunicorn/19.9.0"
##
## $`access-control-allow-origin`
## [1] "*"
##
## $`access-control-allow-credentials`
## [1] "true"
##
## attr(,"class")
## [1] "insensitive" "list"
POST
can contain data in body
of request.
r <- POST("http://httpbin.org/post",
body = list(a = 1, b = 2, c = 3))
Use encode
to be sppecific about the encoding style.
url <- "http://httpbin.org/post"
body <- list(a = 1, b = 2, c = 3)
# Form encoded
r <- POST(url, body = body, encode = "form")
# Multipart encoded
r <- POST(url, body = body, encode = "multipart")
# JSON encoded
r <- POST(url, body = body, encode = "json")
Pro-tip - use the following at the console to see what is going on.
POST(url,
body = body,
encode = "json", verbose())
## Response [http://httpbin.org/post]
## Date: 2021-02-15 08:45
## Status: 200
## Content-Type: application/json
## Size: 594 B
## {
## "args": {},
## "data": "{\"a\":1,\"b\":2,\"c\":3}",
## "files": {},
## "form": {},
## "headers": {
## "Accept": "application/json, text/xml, application/xml, */*",
## "Accept-Encoding": "deflate, gzip, br",
## "Content-Length": "19",
## "Content-Type": "application/json",
## ...
You can send files.
POST(url, body = upload_file("mypath.txt"))
As a general rule, change your password before and after you share it with me or anyone else, even via cryptography.
Passwords used for interacting with an account via an API are clearly secret, but there are additional secret information that needs to be passed between agents. You do not want to mistakenly share your secret data via some public store (such as RPubs or GitHub). It is a very bad idea to hard-code your secrets into the R
code.
A simple way to manage secrets is via environment variables. To create environment variables use the .Renvion
stored in your home dir. Note that I intentionally set VAR2
to be empty.
# dummy contents of .Renviron
# quotes around strings not necessary
VAR1 = abc123
VAR2 =
Retrieve via (I only set VAR1).
Sys.getenv("VAR1")
## [1] "abc123"
Sys.getenv("VAR2")
## [1] ""
The keyring
package is another alternative, refer to that vignette for more detail.
To share secrets with other agents use public key crypto. Your public key can be transmitted freely and if I go an get your public key then I can encrypt my data with your public key, which only works to unlock the data with your private key.
Here we go to Hadley Wickham’s github site, pick up his public key and encrypt our secret using that public key.
encrypt <- function(secret, username) {
# This site stores the public key
url <- paste("https://api.github.com/users",
username, "keys", sep = "/")
resp <- httr::GET(url)
httr::stop_for_status(resp)
pubkey <- httr::content(resp)[[1]]$key
opubkey <- openssl::read_pubkey(pubkey)
# Create the encrypted sequence using the public key
cipher <- openssl::rsa_encrypt(charToRaw(secret),
opubkey)
# bundle into base64 encoder
jsonlite::base64_enc(cipher)
}
cipher <- encrypt(secret = "<username>\n<password>",
username = "hadley")
cat(cipher)
## lVA6EwerwXu6lGzNjXsMo7t7CEO8fjTvUbNgLwOF1LKYGzdrQnfbl0j+teGXwaZ+U0bt5ElI
## lWkCWoXpjk5sszMevK0CUIjx68TAnBE0S794xWVlyZQvemeDLy2EKHWrx94kSQMWzRq3Nv8a
## jYKZgjRR7Hsfp7+rQ/i11iEwyKM00qmfkg9QOy02UGHwAqlmv5pJjCscSIkwjKHbgZtrsZts
## p1VyWee+aQE8jTPv6voAVad5r45EDNOqROEEl7kld7cybxJZ+lO5+luVc+5l7oeCjqKvFz9H
## FSCfRH5YzlEWOAsYRQA7BlnaATZ60H8wjBhngtMjIubxFGDbptV3Uw==
Decryption reverses the process.
decrypt <- function(cipher, key = openssl::my_key()) {
cipherraw <- jsonlite::base64_dec(cipher)
rawToChar(openssl::rsa_decrypt(cipherraw, key = key))
}
A minimal example using some test keys. In practice you would be using someone’s public key to do this bit.
key <- rsa_keygen()
pubkey <- key$pubkey
cipher <- openssl::rsa_encrypt(charToRaw("This is a test secret"),
pubkey)
# ready to be bundled
cipher <- jsonlite::base64_enc(cipher)
cat(cipher)
## WXjeKbKZwUfZwC4X0UEMvgsH4f0Mfybcuo7JVmRnwSdfI10kwVona+YL5PbzGrxHCXv+bNGW
## bzLemBcFvbx3HwL0jqYjzZWgIfPuNzvGOuXNYCI8MkVM0qwvpfaA1y3mlCEcHYk73YyWfy/m
## j6iMpEbI+M6JQvQqt3i45p8TWrV8FHVcW/swpQ33CDtnKCLE3O0tzry+oNYPNnhslg59CmTr
## qUeNbhwImNDxXtfmS0omc0D20QoaJWu/fvf1shw/+7ATDJrwsM1FPb4AWZVcuip+bGp37QNk
## 46xC6Pp1HyYY4kKHYbb195fmGgmFeVBSCBKG5M6D0a2ZPPnHbIMc9Q==
The reciver decrypts using their public/private key pair.
cipherraw <- jsonlite::base64_dec(cipher)
message <- rsa_decrypt(cipherraw, key)
out <- rawToChar(message)
cat(out)
## This is a test secret
APIs define the structure of request and responses, which comprise:
?foo=bar
)Authentication is generally required so let’s deal with that first. One way to do this is via basic authentication with an API key. Alternatively, OAuth2 can be used for generating a user or session specific token.
OAuth2 automates the key exchange process by providing a standard way for a client to get a key from a server by walking the user through a set of steps. The Getting Started with OAuth2.0 text by Boyd seems quite good. The protocol is defined under RFC6749 (https://tools.ietf.org/html/rfc6749). However, for a more friendly intro on OAuth2 see (https://www.joyofdata.de/blog/oauth2-google-api-python-google-analytics/) and (https://zapier.com/learn/apis/chapter-5-authentication-part-2/). It is useful to think of OAuth2 in terms of the actors involved:
In a nutshell:
From there on in the client is free to access the server on the users behalf. The tokens generally expire after a bit.
An API Key is much simpler and known as basic authentication; you create a key and a secret and these are passed in the header of the request.
There are some good demos here but some additional examples follow.
Go to:
https://developer.domain.com.au/docs/v2/getting-started/creating-first-project and set up a project
Start with the API key as this is simplest, see: https://developer.domain.com.au/docs/v2/authentication/apikey/using-api-key
Create the key and then got to API access and select the “properties and locations API”, which is free up to 500 calls per day.
See https://developer.domain.com.au/docs/latest/apis/pkg_properties_locations for the API docs
Store your key in your .Renviron
file (you will need to restart R to pick this up).
key <- Sys.getenv("DOMAIN_API_KEY")
params <- list()
params$propertyCategory="house"
params$bedrooms=3
params$periodSize="Years"
params$startingPeriodRelativeToCurrent=1
params$totalPeriods=4
# add the header
qq <- list(
base = 'https://api.domain.com.au',
# the endpoint state/suburb/postcode
path = "/v2/suburbPerformanceStatistics/NSW/Ashfield/2131"
)
# a generic query function can be set up an you would pass the http
# action that you want to use via text, e.g.
METHOD <- getFromNamespace("GET", ns = 'httr')
# strictly, this should be wrapped in tryCatch
r <- METHOD(url = qq$base,
config = add_headers('X-API-Key' = key),
path = qq$path,
query = params,
body = NULL,
verbose()
)
warn_for_status(r)
cc <- content(r, "parsed")
# just show header, series is there as well
cc$header
## $suburb
## [1] "Ashfield"
##
## $state
## [1] "NSW"
##
## $propertyCategory
## [1] "House"
cc$series$seriesInfo[[1]]
## $year
## [1] 2017
##
## $month
## [1] 11
##
## $values
## $values$medianSoldPrice
## [1] 1485000
##
## $values$numberSold
## [1] 39
##
## $values$highestSoldPrice
## [1] 1950000
##
## $values$lowestSoldPrice
## [1] 795000
##
## $values$`5thPercentileSoldPrice`
## [1] 850000
##
## $values$`25thPercentileSoldPrice`
## [1] 1201000
##
## $values$`75thPercentileSoldPrice`
## [1] 1710000
##
## $values$`95thPercentileSoldPrice`
## [1] 1935000
##
## $values$medianSaleListingPrice
## [1] 1500000
##
## $values$numberSaleListing
## [1] 46
##
## $values$highestSaleListingPrice
## [1] 2800000
##
## $values$lowestSaleListingPrice
## [1] 869000
##
## $values$auctionNumberAuctioned
## [1] 36
##
## $values$auctionNumberSold
## [1] 28
##
## $values$auctionNumberWithdrawn
## NULL
##
## $values$daysOnMarket
## [1] 37
##
## $values$discountPercentage
## NULL
##
## $values$medianRentListingPrice
## [1] 700
##
## $values$numberRentListing
## [1] 76
##
## $values$highestRentListingPrice
## [1] 1300
##
## $values$lowestRentListingPrice
## [1] 500
For signing a request, you would also add
# for signing the request
params$timestamp <- timestamp()
## ##------ Mon Feb 15 16:45:18 2021 ------##
params$signature <- digest::hmac(
key = key,
object = paste(
mapply(paste,
names(params),
params,
sep = '=',
USE.NAMES = FALSE),
collapse = '&'),
algo = 'sha256')
For the domain API, on instances where you do not identify a user or owner, you can set things up using a client credentials grant. The process involves only a single step action of obtaining access token by providing clientId and secret. An example using the github api is below, this is just a transcription from Hadley’s repository for httr
. The docs are here http://developer.github.com/v3/oauth/
For this to work, you will need to make your own application. Go to https://github.com/settings/developers and register.
For the homepage URL use http://github.com For the callback URL use http://localhost:1410/
oep <- oauth_endpoint(base_url = "https://github.com/login/oauth",
request = NULL,
authorize = "authorize",
access = "access_token" )
mykey <- Sys.getenv("GITHUB_OAUTH_ID")
mysec <- Sys.getenv("GITHUB_OAUTH_SEC")
myapp <- oauth_app("github",
key = mykey,
# The secret isn't secrete. A user still has to authenticate when redirected.
secret = mysec
)
# Retrieve the token
token <- oauth2.0_token(oep, myapp)
# Now Use API
gtoken <- config(token = token)
req <- GET("https://api.github.com/user/following", config = gtoken)
stop_for_status(req)
l <- content(req)
sapply(l, function(z) z$login)
## [1] "kjhealy" "eschulte" "bbolker" "jrnold"
## [5] "tharte" "gavinsimpson" "harrelfe" "cscherrer"
## [9] "dchudz" "betanalpha" "helske" "tomwallis"
## [13] "jacobrousseau" "kholsinger" "mailund" "rmcelreath"
## [17] "AustinRochford" "miklevin" "ellisp" "kgoldfeld"
## [21] "mpatacchiola" "ASKurz"
Trying to sort out a non-standard OAuth2 authentication seems obfuscated given the solution that I finally arrived at. The insight came from a stackoverflow post and the Boyd textbook mentioned earlier. After that, I posted this solution at stackoverflow https://stackoverflow.com/questions/66202868/convert-a-curl-oauth2-token-request-to-httr
mykey <- Sys.getenv("DOMAIN_OAUTH_ID")
mysec <- Sys.getenv("DOMAIN_OAUTH_SEC")
r <- POST("https://auth.domain.com.au/v1/connect/token",
config = list(),
body = list(
grant_type="client_credentials",
client_id=mykey,
client_secret=mysec,
scope="api_suburbperformance_read"
),
encode = "form"
)
warn_for_status(r)
cr <- content(r)
tok <- cr$access_token
# hiding the token
cr$access_token <- "the_token_was_here"
cr
## $access_token
## [1] "the_token_was_here"
##
## $expires_in
## [1] 43200
##
## $token_type
## [1] "Bearer"
rg <- GET("https://api.domain.com.au/v2/suburbPerformanceStatistics/NSW/Pyrmont/2009",
add_headers("Content-Type"="application/x-www-form-urlencoded",
Accept="text/plain",
"Authorization"=paste("Bearer", tok)))
warn_for_status(rg)
# here is the head but the series is in here too.
content(rg)$header
## $suburb
## [1] "Pyrmont"
##
## $state
## [1] "NSW"
##
## $propertyCategory
## [1] "House"
When authentication is required you will need to use an alternative access approach. This was taking too much time and I couldn’t figure this out and so have left it for now.