First load some libraries:
library(dplyr) # data manipulation
library(xml2) # xml parsing
library(purrr) # data manipulation
library(lubridate) # date formatting
library(ggplot2) # plotting
library(tidyr) # data manipulation
library(readr) # data input/output
Set up a function to read one page of user plays from the XML API and parse it into a data frame.
read_page <- function(pagenum, username){
Sys.sleep(2) # time delay to play nice with BGG
# create URL from username and page number
page_string <- paste0('https://www.boardgamegeek.com/xmlapi2/plays?username=',
username,
'&page=',
pagenum)
plays <- read_xml(page_string) # read the XML
# now parse the required fields from the XML
date <- plays %>% xml_find_all('//play') %>% xml_attr('date') %>% as.Date()
quantity <- plays %>% xml_find_all('//play') %>% xml_attr('quantity') %>% as.integer()
id <- plays %>% xml_find_all('//play/item') %>% xml_attr('objectid')
name <- plays %>% xml_find_all('//play/item') %>% xml_attr('name')
# and put them in a data frame to return
data_frame(date, quantity, id, name)
}
Now get the first page in order to determine how many total pages there are for the user, and then call the read_page function for each of those pages. Takes a while!
username <- 'qwertymartin' # set target username here
page_string <- paste0('https://www.boardgamegeek.com/xmlapi2/plays?username=', username)
# get total plays and convert to total number of pages to retrieve
tot_plays <- read_xml(page_string) %>% xml_attr('total') %>% as.integer()
pages <- ceiling(tot_plays / 100)
#read all plays pages into one data frame using purrr::map_df to iterate
plays_df <- map_df(1:pages, read_page, username)
Do some tidying up - get rid of plays with ‘weird’ dates or quantities of zero. Then calculate the running total of plays for each game (cum_play) and the sum (normal and log) of the running totals where more than one play logged in the same item.
plays_df <- plays_df %>%
arrange(date) %>% # sort by date
filter(date > '2000-01-01', quantity > 0) %>% # get rid of weird items
group_by(id) %>%
mutate(cum_play = cumsum(quantity)) %>% # find running total per game
ungroup() %>%
mutate(sum_play = quantity * (2 * cum_play - quantity + 1) / 2, # sum of running total
sum_log = map2_dbl(cum_play, quantity, ~sum(log(seq(.x, .x+1-.y, -1)))))
# sum of running total of logs
plays_df
Now calculate means over monthly periods. You can change ‘month’ to ‘week’/‘quarter’/‘year’ if desired.
qq_df <- plays_df %>%
group_by(month = floor_date(date, "month")) %>%
summarise(qq = mean(sum_play),
logqq = exp(sum(sum_log)/sum(quantity))) %>%
ungroup()
qq_df
And finally plot the results. The normal and log measures are separated out in to separate rows as this is easier for ggplot.
qq_df %>%
gather(measure, value, -month) %>% # make separate rows for normal and log qq
filter(measure == 'logqq') %>% # only plot log
ggplot(aes(x = month, y = value, color = measure)) +
geom_point() +
theme_minimal() +
theme(legend.position = 'none') +
labs (x = 'date', y = 'log-qq')

LS0tCnRpdGxlOiAiQ2FsY3VsYXRpbmcgdGhlIFF3ZXJ0eSBRdW90aWVudCIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICBoaWdobGlnaHQ6IHplbmIKLS0tCgpGaXJzdCBsb2FkIHNvbWUgbGlicmFyaWVzOgoKYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KCmxpYnJhcnkoZHBseXIpICMgZGF0YSBtYW5pcHVsYXRpb24KbGlicmFyeSh4bWwyKSAjIHhtbCBwYXJzaW5nCmxpYnJhcnkocHVycnIpICMgZGF0YSBtYW5pcHVsYXRpb24KbGlicmFyeShsdWJyaWRhdGUpICMgZGF0ZSBmb3JtYXR0aW5nCmxpYnJhcnkoZ2dwbG90MikgIyBwbG90dGluZwpsaWJyYXJ5KHRpZHlyKSAjIGRhdGEgbWFuaXB1bGF0aW9uCmxpYnJhcnkocmVhZHIpICMgZGF0YSBpbnB1dC9vdXRwdXQKCmBgYAoKU2V0IHVwIGEgZnVuY3Rpb24gdG8gcmVhZCBvbmUgcGFnZSBvZiB1c2VyIHBsYXlzIGZyb20gdGhlIFhNTCBBUEkgYW5kIHBhcnNlIGl0IGludG8gYSBkYXRhIGZyYW1lLgoKYGBge3J9CnJlYWRfcGFnZSA8LSBmdW5jdGlvbihwYWdlbnVtLCB1c2VybmFtZSl7CiAgU3lzLnNsZWVwKDIpICMgdGltZSBkZWxheSB0byBwbGF5IG5pY2Ugd2l0aCBCR0cKCiAgIyBjcmVhdGUgVVJMIGZyb20gdXNlcm5hbWUgYW5kIHBhZ2UgbnVtYmVyCiAgcGFnZV9zdHJpbmcgPC0gcGFzdGUwKCdodHRwczovL3d3dy5ib2FyZGdhbWVnZWVrLmNvbS94bWxhcGkyL3BsYXlzP3VzZXJuYW1lPScsCiAgICAgICAgICAgICAgICAgICAgICAgIHVzZXJuYW1lLAogICAgICAgICAgICAgICAgICAgICAgICAnJnBhZ2U9JywKICAgICAgICAgICAgICAgICAgICAgICAgcGFnZW51bSkKICAKICBwbGF5cyA8LSByZWFkX3htbChwYWdlX3N0cmluZykgIyByZWFkIHRoZSBYTUwKICAKICAjIG5vdyBwYXJzZSB0aGUgcmVxdWlyZWQgZmllbGRzIGZyb20gdGhlIFhNTAogIGRhdGUgPC0gcGxheXMgJT4lIHhtbF9maW5kX2FsbCgnLy9wbGF5JykgJT4lIHhtbF9hdHRyKCdkYXRlJykgJT4lIGFzLkRhdGUoKQogIHF1YW50aXR5IDwtIHBsYXlzICU+JSB4bWxfZmluZF9hbGwoJy8vcGxheScpICU+JSB4bWxfYXR0cigncXVhbnRpdHknKSAlPiUgYXMuaW50ZWdlcigpCiAgaWQgPC0gcGxheXMgJT4lIHhtbF9maW5kX2FsbCgnLy9wbGF5L2l0ZW0nKSAlPiUgeG1sX2F0dHIoJ29iamVjdGlkJykKICBuYW1lIDwtIHBsYXlzICU+JSB4bWxfZmluZF9hbGwoJy8vcGxheS9pdGVtJykgJT4lIHhtbF9hdHRyKCduYW1lJykKICAKICAjIGFuZCBwdXQgdGhlbSBpbiBhIGRhdGEgZnJhbWUgdG8gcmV0dXJuCiAgZGF0YV9mcmFtZShkYXRlLCBxdWFudGl0eSwgaWQsIG5hbWUpCn0KCmBgYAoKTm93IGdldCB0aGUgZmlyc3QgcGFnZSBpbiBvcmRlciB0byBkZXRlcm1pbmUgaG93IG1hbnkgdG90YWwgcGFnZXMgdGhlcmUgYXJlIGZvciB0aGUgdXNlciwgYW5kIHRoZW4gY2FsbCB0aGUgcmVhZF9wYWdlIGZ1bmN0aW9uIGZvciBlYWNoIG9mIHRob3NlIHBhZ2VzLiBUYWtlcyBhIHdoaWxlIQoKYGBge3J9CnVzZXJuYW1lIDwtICdxd2VydHltYXJ0aW4nICMgc2V0IHRhcmdldCB1c2VybmFtZSBoZXJlCnBhZ2Vfc3RyaW5nIDwtIHBhc3RlMCgnaHR0cHM6Ly93d3cuYm9hcmRnYW1lZ2Vlay5jb20veG1sYXBpMi9wbGF5cz91c2VybmFtZT0nLCB1c2VybmFtZSkKCiMgZ2V0IHRvdGFsIHBsYXlzIGFuZCBjb252ZXJ0IHRvIHRvdGFsIG51bWJlciBvZiBwYWdlcyB0byByZXRyaWV2ZQp0b3RfcGxheXMgPC0gcmVhZF94bWwocGFnZV9zdHJpbmcpICU+JSB4bWxfYXR0cigndG90YWwnKSAlPiUgYXMuaW50ZWdlcigpCnBhZ2VzIDwtIGNlaWxpbmcodG90X3BsYXlzIC8gMTAwKQoKI3JlYWQgYWxsIHBsYXlzIHBhZ2VzIGludG8gb25lIGRhdGEgZnJhbWUgdXNpbmcgcHVycnI6Om1hcF9kZiB0byBpdGVyYXRlCnBsYXlzX2RmIDwtIG1hcF9kZigxOnBhZ2VzLCByZWFkX3BhZ2UsIHVzZXJuYW1lKQoKYGBgCgpEbyBzb21lIHRpZHlpbmcgdXAgLSBnZXQgcmlkIG9mIHBsYXlzIHdpdGggJ3dlaXJkJyBkYXRlcyBvciBxdWFudGl0aWVzIG9mIHplcm8uIFRoZW4gY2FsY3VsYXRlIHRoZSBydW5uaW5nIHRvdGFsIG9mIHBsYXlzIGZvciBlYWNoIGdhbWUgKGN1bV9wbGF5KSBhbmQgdGhlIHN1bSAobm9ybWFsIGFuZCBsb2cpIG9mIHRoZSBydW5uaW5nIHRvdGFscyB3aGVyZSBtb3JlIHRoYW4gb25lIHBsYXkgbG9nZ2VkIGluIHRoZSBzYW1lIGl0ZW0uCgpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpwbGF5c19kZiA8LSBwbGF5c19kZiAlPiUKICBhcnJhbmdlKGRhdGUpICU+JSAjIHNvcnQgYnkgZGF0ZQogIGZpbHRlcihkYXRlID4gJzIwMDAtMDEtMDEnLCBxdWFudGl0eSA+IDApICU+JSAjIGdldCByaWQgb2Ygd2VpcmQgaXRlbXMKICBncm91cF9ieShpZCkgJT4lIAogIG11dGF0ZShjdW1fcGxheSA9IGN1bXN1bShxdWFudGl0eSkpICU+JSAjIGZpbmQgcnVubmluZyB0b3RhbCBwZXIgZ2FtZQogIHVuZ3JvdXAoKSAlPiUKICBtdXRhdGUoc3VtX3BsYXkgPSBxdWFudGl0eSAqICgyICogY3VtX3BsYXkgLSBxdWFudGl0eSArIDEpIC8gMiwgIyBzdW0gb2YgcnVubmluZyB0b3RhbAogICAgICAgICBzdW1fbG9nID0gbWFwMl9kYmwoY3VtX3BsYXksIHF1YW50aXR5LCB+c3VtKGxvZyhzZXEoLngsIC54KzEtLnksIC0xKSkpKSkgCiAgICAgICAgICMgc3VtIG9mIHJ1bm5pbmcgdG90YWwgb2YgbG9ncwoKcGxheXNfZGYKYGBgCgpOb3cgY2FsY3VsYXRlIG1lYW5zIG92ZXIgbW9udGhseSBwZXJpb2RzLiBZb3UgY2FuIGNoYW5nZSAnbW9udGgnIHRvICd3ZWVrJy8ncXVhcnRlcicvJ3llYXInIGlmIGRlc2lyZWQuCgpgYGB7cn0KcXFfZGYgPC0gcGxheXNfZGYgJT4lCiAgZ3JvdXBfYnkobW9udGggPSBmbG9vcl9kYXRlKGRhdGUsICJtb250aCIpKSAlPiUKICBzdW1tYXJpc2UocXEgPSBtZWFuKHN1bV9wbGF5KSwKICAgICAgICAgICAgbG9ncXEgPSBleHAoc3VtKHN1bV9sb2cpL3N1bShxdWFudGl0eSkpKSAlPiUKICB1bmdyb3VwKCkKCnFxX2RmCmBgYAoKQW5kIGZpbmFsbHkgcGxvdCB0aGUgcmVzdWx0cy4gVGhlIG5vcm1hbCBhbmQgbG9nIG1lYXN1cmVzIGFyZSBzZXBhcmF0ZWQgb3V0IGluIHRvIHNlcGFyYXRlIHJvd3MgYXMgdGhpcyBpcyBlYXNpZXIgZm9yIGdncGxvdC4KCmBgYHtyfQpxcV9kZiAlPiUKICBnYXRoZXIobWVhc3VyZSwgdmFsdWUsIC1tb250aCkgJT4lICMgbWFrZSBzZXBhcmF0ZSByb3dzIGZvciBub3JtYWwgYW5kIGxvZyBxcQogIGZpbHRlcihtZWFzdXJlID09ICdsb2dxcScpICU+JSAjIG9ubHkgcGxvdCBsb2cgCiAgZ2dwbG90KGFlcyh4ID0gbW9udGgsIHkgPSB2YWx1ZSwgY29sb3IgPSBtZWFzdXJlKSkgKwogIGdlb21fcG9pbnQoKSArCiAgdGhlbWVfbWluaW1hbCgpICsKICB0aGVtZShsZWdlbmQucG9zaXRpb24gPSAnbm9uZScpICsKICBsYWJzICh4ID0gJ2RhdGUnLCB5ID0gJ2xvZy1xcScpCmBgYA==