Get all the ranked trick-takers into a data frame.

trick_df <- run_search(list('advsearch' = '1', 
                            'range[numvoters][min]'='30',
                            'nosubtypes[]'='boardgameexpansion',
                            'propertyids[]'='2009'))
[1] 3
[1] 1
[1] 2
[1] 3
trick_df

Now get XML for all these IDs in a single page.

tricks_xml <- GET('https://boardgamegeek.com',
                     accept_xml(),
                     path = '/xmlapi2/thing',
                     query = list('id' = paste(trick_df$id, collapse = ','),
                                  'stats' = '1'))

Then start picking out the properties we are interested in.

items <- content(tricks_xml) %>% 
  xml_find_all('item')
attrs <- c('yearpublished',
           'minplayers',
           'maxplayers',
           'playingtime',
           'minplaytime',
           'maxplaytime',
           'minage',
           'statistics/ratings/usersrated',
           'statistics/ratings/average',
           'statistics/ratings/bayesaverage',
           'statistics/ratings/stddev',
           'statistics/ratings/owned',
           'statistics/ratings/numweights',
           'statistics/ratings/averageweight',
           'statistics/ratings/ranks/rank[@name="boardgame"]'
           )
trick_data <- map(attrs, ~items %>% 
                    xml_find_all(paste0("//",.x)) %>% 
                    xml_attr('value') %>% 
                    as.numeric()) %>% 
  set_names(str_replace(attrs, 'statistics/ratings/','')) %>% 
  bind_cols(id = trick_df$id, name = trick_df$ordtitle, .) %>%
  rename(rank = `ranks/rank[@name="boardgame"]`)
NAs introduced by coercion

Add some more complex properties

# list columns
# links (already written this for Knizia networks)
add_linkcol <- function(data, link_type){
  xpath <- paste0(".//link[@type=\'boardgame",link_type,"\']")
  data[[link_type]] <- map(items, ~xml_find_all(.x, xpath) %>% 
                              xml_attr('value'))
  
  data
}
link_types <- c('category','mechanic','family','expansion','designer','artist','publisher')
for(type in link_types){
  trick_data <- add_linkcol(trick_data, type)
}
# poll for best number of players (already written this at home)
poll_list <- items %>% xml_find_all('poll[@name="suggested_numplayers"]')
get_poll_df <- function(poll){
    
  numplayers <- poll %>% 
    xml_find_all('.//results') %>%
    xml_attr('numplayers')
      
  quality <- poll %>%
    xml_find_all('.//results/result') %>%
    xml_attr('value')
      
  votes <- poll %>%
    xml_find_all('.//results/result') %>%
    xml_attr('numvotes') %>%
    as.integer()
  
  data_frame(numplayers = rep(numplayers, each = 3), quality, votes)
}
trick_data$poll_votes <- poll_list %>%
    xml_attr('totalvotes') %>%
    as.integer()
trick_data$poll <- map(poll_list, get_poll_df)    

Let’s have a look at most common mechanisms.

trick_data %>% 
  unnest(mechanic) %>% 
  count(mechanic) %>% 
  arrange(desc(n))
package ‘bindrcpp’ was built under R version 3.3.3

And designers:

trick_data %>% 
  unnest(designer) %>% 
  count(designer) %>%
  filter(n > 1) %>%
  arrange(desc(n))

And publishers:

trick_data %>% 
  unnest(publisher) %>% 
  count(publisher) %>%
  filter(n > 5) %>%
  arrange(desc(n))

And artists:

trick_data %>% 
  unnest(artist) %>% 
  count(artist) %>%
  filter(n > 1) %>%
  arrange(desc(n))

And category:

trick_data %>% 
  unnest(category) %>% 
  count(category) %>%
  filter(n > 5) %>%
  arrange(desc(n))

And family:

trick_data %>% 
  unnest(family) %>% 
  count(family) %>%
  filter(n > 1) %>%
  arrange(desc(n))

simple plots - year

trick_data %>% filter(yearpublished > 1980) %>% ggplot(aes(x = yearpublished)) + geom_bar()

minimum players

trick_data %>% ggplot(aes(x = minplayers)) + geom_bar()

maximum players

trick_data %>% filter(maxplayers < 12) %>% ggplot(aes(x = maxplayers)) + geom_bar()

Best with…

best_data <- trick_data %>%
  filter(poll_votes >= 5) %>%
  mutate(poll_freq = map(poll, ~group_by(.,numplayers) %>%
                           mutate(freq = votes / sum(votes)) %>%
                           ungroup()),
         best = map(poll_freq, ~filter(.,quality == 'Best') %>% 
                          filter(freq > 0.5) %>% 
                          pull(numplayers) %>% as.integer()),
         nbest = map_int(best, length))
best_data %>%  
  filter(nbest == 1) %>%
  mutate(best = as.integer(best)) %>%
  arrange(best, rank) %>%
  select(name, best)

Range of best with:

best_data %>%  
  count(nbest)

Best with (single count):

best_data %>%  
  filter(nbest == 1) %>%
  mutate(best = as.integer(best)) %>% 
  count(best)
best_data %>% 
  unnest(best) %>%
  count(best) 
best_data %>% 
  unnest(best) %>%
  filter(best == 5) %>%
  arrange(rank) %>%
  select(name, best) 

Trickster nominees:

trick_data %>% 
  filter(yearpublished < 2009, 
                      map_lgl(designer, ~ length(.) > 0),
                      map_lgl(designer, ~ !('(Uncredited)' %in% .)),
                      map_lgl(family, ~ !('Climbing Games' %in% .)),
                      !is.na(rank)) %>% 
  arrange(desc(average)) %>%
  select(name, rank) 

By year:

trick_data %>% 
  filter(map_lgl(designer, ~ length(.) > 0),
         map_lgl(designer, ~ !('(Uncredited)' %in% .)),
         map_lgl(family, ~ !('Climbing Games' %in% .)),
         !is.na(rank)) %>%
  group_by(yearpublished) %>%
  top_n(-1, rank) %>%
  arrange(yearpublished) %>%
  select(yearpublished, name)
