HTTP

See https://www.jmarshall.com/easy/http/

HTTP is stateless and follows a client server model. The client opens a connection, sends a request and the server returns a response.

HTTR

We will use the following pacakages.

library(httr)
library(openssl)
library(jsonlite)
library(curl)
library(digest)

We use httr to interact with a website via its API. Before we start it is good to have this model in your head:

This first section is meant to serve as a quick reminder of httr. Mostly taken directly from the httr vignette but has been abbreviated. The main action verbs are GET and POST (for forms); the others are HEAD, POST, PATCH, PUT and DELETE. The httr understands some of the content types as described in Content-Type: and can automatically parse. If the particular content type parse is not available, read the raw data stream and parse manually.

r <- GET("http://httpbin.org/get")
r
## Response [http://httpbin.org/get]
##   Date: 2021-02-15 08:45
##   Status: 200
##   Content-Type: application/json
##   Size: 370 B
## {
##   "args": {}, 
##   "headers": {
##     "Accept": "application/json, text/xml, application/xml, */*", 
##     "Accept-Encoding": "deflate, gzip, br", 
##     "Host": "httpbin.org", 
##     "User-Agent": "libcurl/7.68.0 r-curl/4.3 httr/1.4.2", 
##     "X-Amzn-Trace-Id": "Root=1-602a3498-2634b61658f9fd8c636f5494"
##   }, 
##   "origin": "43.250.205.183", 
## ...

The response contains status, headers and body.

status_code(r) # 200 is success
## [1] 200
headers(r)
## $date
## [1] "Mon, 15 Feb 2021 08:45:12 GMT"
## 
## $`content-type`
## [1] "application/json"
## 
## $`content-length`
## [1] "370"
## 
## $connection
## [1] "keep-alive"
## 
## $server
## [1] "gunicorn/19.9.0"
## 
## $`access-control-allow-origin`
## [1] "*"
## 
## $`access-control-allow-credentials`
## [1] "true"
## 
## attr(,"class")
## [1] "insensitive" "list"
warn_for_status(r)
stop_for_status(r)

Access the body via content.

# character vector
content(r, "text") 
## No encoding supplied: defaulting to UTF-8.
## [1] "{\n  \"args\": {}, \n  \"headers\": {\n    \"Accept\": \"application/json, text/xml, application/xml, */*\", \n    \"Accept-Encoding\": \"deflate, gzip, br\", \n    \"Host\": \"httpbin.org\", \n    \"User-Agent\": \"libcurl/7.68.0 r-curl/4.3 httr/1.4.2\", \n    \"X-Amzn-Trace-Id\": \"Root=1-602a3498-2634b61658f9fd8c636f5494\"\n  }, \n  \"origin\": \"43.250.205.183\", \n  \"url\": \"http://httpbin.org/get\"\n}\n"
# use a specific encoding
content(r, "text", encoding = "ISO-8859-1") 
## [1] "{\n  \"args\": {}, \n  \"headers\": {\n    \"Accept\": \"application/json, text/xml, application/xml, */*\", \n    \"Accept-Encoding\": \"deflate, gzip, br\", \n    \"Host\": \"httpbin.org\", \n    \"User-Agent\": \"libcurl/7.68.0 r-curl/4.3 httr/1.4.2\", \n    \"X-Amzn-Trace-Id\": \"Root=1-602a3498-2634b61658f9fd8c636f5494\"\n  }, \n  \"origin\": \"43.250.205.183\", \n  \"url\": \"http://httpbin.org/get\"\n}\n"
# attempt to identify encoding
stringi::stri_enc_detect(content(r, "raw"))
## [[1]]
##      Encoding Language Confidence
## 1  ISO-8859-1       en       0.37
## 2       UTF-8                0.15
## 3  ISO-8859-2       ro       0.15
## 4    UTF-16BE                0.10
## 5    UTF-16LE                0.10
## 6  ISO-8859-9       tr       0.10
## 7   Shift_JIS       ja       0.10
## 8     GB18030       zh       0.10
## 9      EUC-JP       ja       0.10
## 10     EUC-KR       ko       0.10
## 11       Big5       zh       0.10

Binary content.

content(r, "raw")
##   [1] 7b 0a 20 20 22 61 72 67 73 22 3a 20 7b 7d 2c 20 0a 20 20 22 68 65 61 64 65
##  [26] 72 73 22 3a 20 7b 0a 20 20 20 20 22 41 63 63 65 70 74 22 3a 20 22 61 70 70
##  [51] 6c 69 63 61 74 69 6f 6e 2f 6a 73 6f 6e 2c 20 74 65 78 74 2f 78 6d 6c 2c 20
##  [76] 61 70 70 6c 69 63 61 74 69 6f 6e 2f 78 6d 6c 2c 20 2a 2f 2a 22 2c 20 0a 20
## [101] 20 20 20 22 41 63 63 65 70 74 2d 45 6e 63 6f 64 69 6e 67 22 3a 20 22 64 65
## [126] 66 6c 61 74 65 2c 20 67 7a 69 70 2c 20 62 72 22 2c 20 0a 20 20 20 20 22 48
## [151] 6f 73 74 22 3a 20 22 68 74 74 70 62 69 6e 2e 6f 72 67 22 2c 20 0a 20 20 20
## [176] 20 22 55 73 65 72 2d 41 67 65 6e 74 22 3a 20 22 6c 69 62 63 75 72 6c 2f 37
## [201] 2e 36 38 2e 30 20 72 2d 63 75 72 6c 2f 34 2e 33 20 68 74 74 72 2f 31 2e 34
## [226] 2e 32 22 2c 20 0a 20 20 20 20 22 58 2d 41 6d 7a 6e 2d 54 72 61 63 65 2d 49
## [251] 64 22 3a 20 22 52 6f 6f 74 3d 31 2d 36 30 32 61 33 34 39 38 2d 32 36 33 34
## [276] 62 36 31 36 35 38 66 39 66 64 38 63 36 33 36 66 35 34 39 34 22 0a 20 20 7d
## [301] 2c 20 0a 20 20 22 6f 72 69 67 69 6e 22 3a 20 22 34 33 2e 32 35 30 2e 32 30
## [326] 35 2e 31 38 33 22 2c 20 0a 20 20 22 75 72 6c 22 3a 20 22 68 74 74 70 3a 2f
## [351] 2f 68 74 74 70 62 69 6e 2e 6f 72 67 2f 67 65 74 22 0a 7d 0a
bin <- content(r, "raw")
writeBin(bin, "myfile.txt")

Automatic parsing.

content(r, "parsed") #json by default
## $args
## named list()
## 
## $headers
## $headers$Accept
## [1] "application/json, text/xml, application/xml, */*"
## 
## $headers$`Accept-Encoding`
## [1] "deflate, gzip, br"
## 
## $headers$Host
## [1] "httpbin.org"
## 
## $headers$`User-Agent`
## [1] "libcurl/7.68.0 r-curl/4.3 httr/1.4.2"
## 
## $headers$`X-Amzn-Trace-Id`
## [1] "Root=1-602a3498-2634b61658f9fd8c636f5494"
## 
## 
## $origin
## [1] "43.250.205.183"
## 
## $url
## [1] "http://httpbin.org/get"

Accessing cookies.

r <- GET("http://httpbin.org/cookies/set", 
         query = list(a = 1))
cookies(r)
##        domain  flag path secure expiration name value
## 1 httpbin.org FALSE    /  FALSE       <NA>    a     1

Add a new value to the existing cookies. Cookies persist across multiple requests in a session (they are sent back and forth).

r <- GET("http://httpbin.org/cookies/set", 
         query = list(b = 1))
cookies(r)
##        domain  flag path secure expiration name value
## 1 httpbin.org FALSE    /  FALSE       <NA>    a     1
## 2 httpbin.org FALSE    /  FALSE       <NA>    b     1

The request also has status, header and body with the status defining the http method and the url. Additional data can be embedded into the url or put in the headers and the body (if it is a POST request).

Query string.

r <- GET("http://httpbin.org/get", 
         query = list(key1 = "value1",
                      key2 = "value2")
)
content(r)$args
## $key1
## [1] "value1"
## 
## $key2
## [1] "value2"

Custom header. Note - content(r)$headers is whatever was sent and headers(r) is the headers that was sent back in the response

r <- GET("http://httpbin.org/get",
         config = add_headers(Name = "Hadley"))
str(content(r)$headers)
## List of 7
##  $ Accept         : chr "application/json, text/xml, application/xml, */*"
##  $ Accept-Encoding: chr "deflate, gzip, br"
##  $ Cookie         : chr "b=1; a=1"
##  $ Host           : chr "httpbin.org"
##  $ Name           : chr "Hadley"
##  $ User-Agent     : chr "libcurl/7.68.0 r-curl/4.3 httr/1.4.2"
##  $ X-Amzn-Trace-Id: chr "Root=1-602a349a-01e58a835937086a1fd8a62d"
headers(r)
## $date
## [1] "Mon, 15 Feb 2021 08:45:14 GMT"
## 
## $`content-type`
## [1] "application/json"
## 
## $`content-length`
## [1] "420"
## 
## $connection
## [1] "keep-alive"
## 
## $server
## [1] "gunicorn/19.9.0"
## 
## $`access-control-allow-origin`
## [1] "*"
## 
## $`access-control-allow-credentials`
## [1] "true"
## 
## attr(,"class")
## [1] "insensitive" "list"

POST can contain data in body of request.

r <- POST("http://httpbin.org/post", 
          body = list(a = 1, b = 2, c = 3))

Use encode to be sppecific about the encoding style.

url <- "http://httpbin.org/post"
body <- list(a = 1, b = 2, c = 3)

# Form encoded
r <- POST(url, body = body, encode = "form")
# Multipart encoded
r <- POST(url, body = body, encode = "multipart")
# JSON encoded
r <- POST(url, body = body, encode = "json")

Pro-tip - use the following at the console to see what is going on.

POST(url, 
     body = body, 
     encode = "json", verbose())
## Response [http://httpbin.org/post]
##   Date: 2021-02-15 08:45
##   Status: 200
##   Content-Type: application/json
##   Size: 594 B
## {
##   "args": {}, 
##   "data": "{\"a\":1,\"b\":2,\"c\":3}", 
##   "files": {}, 
##   "form": {}, 
##   "headers": {
##     "Accept": "application/json, text/xml, application/xml, */*", 
##     "Accept-Encoding": "deflate, gzip, br", 
##     "Content-Length": "19", 
##     "Content-Type": "application/json", 
## ...

You can send files.

POST(url, body = upload_file("mypath.txt"))

Managing Secrets

As a general rule, change your password before and after you share it with me or anyone else, even via cryptography.

Passwords used for interacting with an account via an API are clearly secret, but there are additional secret information that needs to be passed between agents. You do not want to mistakenly share your secret data via some public store (such as RPubs or GitHub). It is a very bad idea to hard-code your secrets into the R code.

A simple way to manage secrets is via environment variables. To create environment variables use the .Renvion stored in your home dir. Note that I intentionally set VAR2 to be empty.

# dummy contents of .Renviron
# quotes around strings not necessary
VAR1 = abc123
VAR2 = 

Retrieve via (I only set VAR1).

Sys.getenv("VAR1")
## [1] "abc123"
Sys.getenv("VAR2")
## [1] ""

The keyring package is another alternative, refer to that vignette for more detail.

To share secrets with other agents use public key crypto. Your public key can be transmitted freely and if I go an get your public key then I can encrypt my data with your public key, which only works to unlock the data with your private key.

Here we go to Hadley Wickham’s github site, pick up his public key and encrypt our secret using that public key.

encrypt <- function(secret, username) {
  
  # This site stores the public key
  url <- paste("https://api.github.com/users",
               username, "keys", sep = "/")
  resp <- httr::GET(url)
  httr::stop_for_status(resp)
  pubkey <- httr::content(resp)[[1]]$key
  opubkey <- openssl::read_pubkey(pubkey)
  # Create the encrypted sequence using the public key
  cipher <- openssl::rsa_encrypt(charToRaw(secret), 
                                 opubkey)
  # bundle into base64 encoder
  jsonlite::base64_enc(cipher)
}
cipher <- encrypt(secret = "<username>\n<password>", 
                  username = "hadley")
cat(cipher)
## lVA6EwerwXu6lGzNjXsMo7t7CEO8fjTvUbNgLwOF1LKYGzdrQnfbl0j+teGXwaZ+U0bt5ElI
## lWkCWoXpjk5sszMevK0CUIjx68TAnBE0S794xWVlyZQvemeDLy2EKHWrx94kSQMWzRq3Nv8a
## jYKZgjRR7Hsfp7+rQ/i11iEwyKM00qmfkg9QOy02UGHwAqlmv5pJjCscSIkwjKHbgZtrsZts
## p1VyWee+aQE8jTPv6voAVad5r45EDNOqROEEl7kld7cybxJZ+lO5+luVc+5l7oeCjqKvFz9H
## FSCfRH5YzlEWOAsYRQA7BlnaATZ60H8wjBhngtMjIubxFGDbptV3Uw==

Decryption reverses the process.

decrypt <- function(cipher, key = openssl::my_key()) {
  cipherraw <- jsonlite::base64_dec(cipher)
  rawToChar(openssl::rsa_decrypt(cipherraw, key = key))
}

A minimal example using some test keys. In practice you would be using someone’s public key to do this bit.

key <- rsa_keygen()
pubkey <- key$pubkey
cipher <- openssl::rsa_encrypt(charToRaw("This is a test secret"), 
                               pubkey)
# ready to be bundled
cipher <- jsonlite::base64_enc(cipher)
cat(cipher)
## WXjeKbKZwUfZwC4X0UEMvgsH4f0Mfybcuo7JVmRnwSdfI10kwVona+YL5PbzGrxHCXv+bNGW
## bzLemBcFvbx3HwL0jqYjzZWgIfPuNzvGOuXNYCI8MkVM0qwvpfaA1y3mlCEcHYk73YyWfy/m
## j6iMpEbI+M6JQvQqt3i45p8TWrV8FHVcW/swpQ33CDtnKCLE3O0tzry+oNYPNnhslg59CmTr
## qUeNbhwImNDxXtfmS0omc0D20QoaJWu/fvf1shw/+7ATDJrwsM1FPb4AWZVcuip+bGp37QNk
## 46xC6Pp1HyYY4kKHYbb195fmGgmFeVBSCBKG5M6D0a2ZPPnHbIMc9Q==

The reciver decrypts using their public/private key pair.

cipherraw <- jsonlite::base64_dec(cipher)
message <- rsa_decrypt(cipherraw, key)
out <- rawToChar(message)
cat(out)
## This is a test secret

API

APIs define the structure of request and responses, which comprise:

Authentication is generally required so let’s deal with that first. One way to do this is via basic authentication with an API key. Alternatively, OAuth2 can be used for generating a user or session specific token.

OAuth2 automates the key exchange process by providing a standard way for a client to get a key from a server by walking the user through a set of steps. The Getting Started with OAuth2.0 text by Boyd seems quite good. The protocol is defined under RFC6749 (https://tools.ietf.org/html/rfc6749). However, for a more friendly intro on OAuth2 see (https://www.joyofdata.de/blog/oauth2-google-api-python-google-analytics/) and (https://zapier.com/learn/apis/chapter-5-authentication-part-2/). It is useful to think of OAuth2 in terms of the actors involved:

In a nutshell:

  1. the user goes to a website (client) and tries to access something
  2. the client redirects to the server where the user logs in as per usual
  3. the server sends the user back to the client with an authorisation code for the client
  4. the client exchanges code and its secret key for access token
  5. if all ok, the server responds back with an access token, which the client needs to include with every request

From there on in the client is free to access the server on the users behalf. The tokens generally expire after a bit.

An API Key is much simpler and known as basic authentication; you create a key and a secret and these are passed in the header of the request.

There are some good demos here but some additional examples follow.

API Key example

Go to:

https://developer.domain.com.au/docs/v2/getting-started/creating-first-project and set up a project

Start with the API key as this is simplest, see: https://developer.domain.com.au/docs/v2/authentication/apikey/using-api-key

Create the key and then got to API access and select the “properties and locations API”, which is free up to 500 calls per day.

See https://developer.domain.com.au/docs/latest/apis/pkg_properties_locations for the API docs

Store your key in your .Renviron file (you will need to restart R to pick this up).

key <- Sys.getenv("DOMAIN_API_KEY")

params <- list()

params$propertyCategory="house"
params$bedrooms=3
params$periodSize="Years"
params$startingPeriodRelativeToCurrent=1
params$totalPeriods=4

# add the header
qq <- list(
  base = 'https://api.domain.com.au',
  # the endpoint state/suburb/postcode
  path = "/v2/suburbPerformanceStatistics/NSW/Ashfield/2131" 
  )

# a generic query function can be set up an you would pass the http 
# action that you want to use via text, e.g.
METHOD <- getFromNamespace("GET", ns = 'httr')

# strictly, this should be wrapped in tryCatch
r <- METHOD(url = qq$base, 
            config = add_headers('X-API-Key' = key),
            path = qq$path,
            query = params,
            body = NULL,
            verbose()
            )
warn_for_status(r)
cc <- content(r, "parsed") 
# just show header, series is there as well
cc$header
## $suburb
## [1] "Ashfield"
## 
## $state
## [1] "NSW"
## 
## $propertyCategory
## [1] "House"
cc$series$seriesInfo[[1]]
## $year
## [1] 2017
## 
## $month
## [1] 11
## 
## $values
## $values$medianSoldPrice
## [1] 1485000
## 
## $values$numberSold
## [1] 39
## 
## $values$highestSoldPrice
## [1] 1950000
## 
## $values$lowestSoldPrice
## [1] 795000
## 
## $values$`5thPercentileSoldPrice`
## [1] 850000
## 
## $values$`25thPercentileSoldPrice`
## [1] 1201000
## 
## $values$`75thPercentileSoldPrice`
## [1] 1710000
## 
## $values$`95thPercentileSoldPrice`
## [1] 1935000
## 
## $values$medianSaleListingPrice
## [1] 1500000
## 
## $values$numberSaleListing
## [1] 46
## 
## $values$highestSaleListingPrice
## [1] 2800000
## 
## $values$lowestSaleListingPrice
## [1] 869000
## 
## $values$auctionNumberAuctioned
## [1] 36
## 
## $values$auctionNumberSold
## [1] 28
## 
## $values$auctionNumberWithdrawn
## NULL
## 
## $values$daysOnMarket
## [1] 37
## 
## $values$discountPercentage
## NULL
## 
## $values$medianRentListingPrice
## [1] 700
## 
## $values$numberRentListing
## [1] 76
## 
## $values$highestRentListingPrice
## [1] 1300
## 
## $values$lowestRentListingPrice
## [1] 500

For signing a request, you would also add

# for signing the request
params$timestamp <- timestamp()
## ##------ Mon Feb 15 16:45:18 2021 ------##
params$signature <- digest::hmac(
  key = key,
  object = paste(
    mapply(paste,
           names(params),
           params,
           sep = '=',
           USE.NAMES = FALSE),
    collapse = '&'),
  algo = 'sha256')

OAuth2 example 1

For the domain API, on instances where you do not identify a user or owner, you can set things up using a client credentials grant. The process involves only a single step action of obtaining access token by providing clientId and secret. An example using the github api is below, this is just a transcription from Hadley’s repository for httr. The docs are here http://developer.github.com/v3/oauth/

For this to work, you will need to make your own application. Go to https://github.com/settings/developers and register.

For the homepage URL use http://github.com For the callback URL use http://localhost:1410/

oep <- oauth_endpoint(base_url = "https://github.com/login/oauth", 
                      request = NULL,
                      authorize = "authorize", 
                      access = "access_token" )

mykey <- Sys.getenv("GITHUB_OAUTH_ID")
mysec <- Sys.getenv("GITHUB_OAUTH_SEC")

myapp <- oauth_app("github",
  key = mykey,
  # The secret isn't secrete. A user still has to authenticate when redirected.
  secret = mysec
)

# Retrieve the token
token <- oauth2.0_token(oep, myapp)

# Now Use API
gtoken <- config(token = token)
req <- GET("https://api.github.com/user/following", config = gtoken)
stop_for_status(req)
l <- content(req)
sapply(l, function(z) z$login)
##  [1] "kjhealy"        "eschulte"       "bbolker"        "jrnold"        
##  [5] "tharte"         "gavinsimpson"   "harrelfe"       "cscherrer"     
##  [9] "dchudz"         "betanalpha"     "helske"         "tomwallis"     
## [13] "jacobrousseau"  "kholsinger"     "mailund"        "rmcelreath"    
## [17] "AustinRochford" "miklevin"       "ellisp"         "kgoldfeld"     
## [21] "mpatacchiola"   "ASKurz"

OAuth2 example 2

Trying to sort out a non-standard OAuth2 authentication seems obfuscated given the solution that I finally arrived at. The insight came from a stackoverflow post and the Boyd textbook mentioned earlier. After that, I posted this solution at stackoverflow https://stackoverflow.com/questions/66202868/convert-a-curl-oauth2-token-request-to-httr

mykey <- Sys.getenv("DOMAIN_OAUTH_ID")
mysec <- Sys.getenv("DOMAIN_OAUTH_SEC")

r <- POST("https://auth.domain.com.au/v1/connect/token",
          config = list(),
          body = list(
            grant_type="client_credentials",
            client_id=mykey,
            client_secret=mysec,
            scope="api_suburbperformance_read"
          ),
          encode = "form"
)
warn_for_status(r)          
cr <- content(r)
tok <- cr$access_token
# hiding the token
cr$access_token <- "the_token_was_here"
cr
## $access_token
## [1] "the_token_was_here"
## 
## $expires_in
## [1] 43200
## 
## $token_type
## [1] "Bearer"
rg <- GET("https://api.domain.com.au/v2/suburbPerformanceStatistics/NSW/Pyrmont/2009",
          add_headers("Content-Type"="application/x-www-form-urlencoded",
                      Accept="text/plain",
                      "Authorization"=paste("Bearer", tok)))

warn_for_status(rg)
# here is the head but the series is in here too.
content(rg)$header
## $suburb
## [1] "Pyrmont"
## 
## $state
## [1] "NSW"
## 
## $propertyCategory
## [1] "House"

OAuth2 example 3

When authentication is required you will need to use an alternative access approach. This was taking too much time and I couldn’t figure this out and so have left it for now.