Get all the ranked trick-takers into a data frame.

trick_df <- run_search(list('advsearch' = '1', 
                            'range[numvoters][min]'='30',
                            'nosubtypes[]'='boardgameexpansion',
                            'propertyids[]'='2009'))
[1] 3
[1] 1
[1] 2
[1] 3
trick_df

Now get XML for all these IDs in a single page.

tricks_xml <- GET('https://boardgamegeek.com',
                     accept_xml(),
                     path = '/xmlapi2/thing',
                     query = list('id' = paste(trick_df$id, collapse = ','),
                                  'stats' = '1'))

Then start picking out the properties we are interested in.

items <- content(tricks_xml) %>% 
  xml_find_all('item')
attrs <- c('yearpublished',
           'minplayers',
           'maxplayers',
           'playingtime',
           'minplaytime',
           'maxplaytime',
           'minage',
           'statistics/ratings/usersrated',
           'statistics/ratings/average',
           'statistics/ratings/bayesaverage',
           'statistics/ratings/stddev',
           'statistics/ratings/owned',
           'statistics/ratings/numweights',
           'statistics/ratings/averageweight',
           'statistics/ratings/ranks/rank[@name="boardgame"]'
           )
trick_data <- map(attrs, ~items %>% 
                    xml_find_all(paste0("//",.x)) %>% 
                    xml_attr('value') %>% 
                    as.numeric()) %>% 
  set_names(str_replace(attrs, 'statistics/ratings/','')) %>% 
  bind_cols(id = trick_df$id, name = trick_df$ordtitle, .) %>%
  rename(rank = `ranks/rank[@name="boardgame"]`)
NAs introduced by coercion

Add some more complex properties

# list columns
# links (already written this for Knizia networks)
add_linkcol <- function(data, link_type){
  xpath <- paste0(".//link[@type=\'boardgame",link_type,"\']")
  data[[link_type]] <- map(items, ~xml_find_all(.x, xpath) %>% 
                              xml_attr('value'))
  
  data
}
link_types <- c('category','mechanic','family','expansion','designer','artist','publisher')
for(type in link_types){
  trick_data <- add_linkcol(trick_data, type)
}
# poll for best number of players (already written this at home)
poll_list <- items %>% xml_find_all('poll[@name="suggested_numplayers"]')
get_poll_df <- function(poll){
    
  numplayers <- poll %>% 
    xml_find_all('.//results') %>%
    xml_attr('numplayers')
      
  quality <- poll %>%
    xml_find_all('.//results/result') %>%
    xml_attr('value')
      
  votes <- poll %>%
    xml_find_all('.//results/result') %>%
    xml_attr('numvotes') %>%
    as.integer()
  
  data_frame(numplayers = rep(numplayers, each = 3), quality, votes)
}
trick_data$poll_votes <- poll_list %>%
    xml_attr('totalvotes') %>%
    as.integer()
trick_data$poll <- map(poll_list, get_poll_df)    

Let’s have a look at most common mechanisms.

trick_data %>% 
  unnest(mechanic) %>% 
  count(mechanic) %>% 
  arrange(desc(n))
package ‘bindrcpp’ was built under R version 3.3.3

And designers:

trick_data %>% 
  unnest(designer) %>% 
  count(designer) %>%
  filter(n > 1) %>%
  arrange(desc(n))

And publishers:

trick_data %>% 
  unnest(publisher) %>% 
  count(publisher) %>%
  filter(n > 5) %>%
  arrange(desc(n))

And artists:

trick_data %>% 
  unnest(artist) %>% 
  count(artist) %>%
  filter(n > 1) %>%
  arrange(desc(n))

And category:

trick_data %>% 
  unnest(category) %>% 
  count(category) %>%
  filter(n > 5) %>%
  arrange(desc(n))

And family:

trick_data %>% 
  unnest(family) %>% 
  count(family) %>%
  filter(n > 1) %>%
  arrange(desc(n))

simple plots - year

trick_data %>% filter(yearpublished > 1980) %>% ggplot(aes(x = yearpublished)) + geom_bar()

minimum players

trick_data %>% ggplot(aes(x = minplayers)) + geom_bar()

maximum players

trick_data %>% filter(maxplayers < 12) %>% ggplot(aes(x = maxplayers)) + geom_bar()

Best with…

best_data <- trick_data %>%
  filter(poll_votes >= 5) %>%
  mutate(poll_freq = map(poll, ~group_by(.,numplayers) %>%
                           mutate(freq = votes / sum(votes)) %>%
                           ungroup()),
         best = map(poll_freq, ~filter(.,quality == 'Best') %>% 
                          filter(freq > 0.5) %>% 
                          pull(numplayers) %>% as.integer()),
         nbest = map_int(best, length))
best_data %>%  
  filter(nbest == 1) %>%
  mutate(best = as.integer(best)) %>%
  arrange(best, rank) %>%
  select(name, best)

Range of best with:

best_data %>%  
  count(nbest)

Best with (single count):

best_data %>%  
  filter(nbest == 1) %>%
  mutate(best = as.integer(best)) %>% 
  count(best)
best_data %>% 
  unnest(best) %>%
  count(best) 
best_data %>% 
  unnest(best) %>%
  filter(best == 5) %>%
  arrange(rank) %>%
  select(name, best) 

Trickster nominees:

trick_data %>% 
  filter(yearpublished < 2009, 
                      map_lgl(designer, ~ length(.) > 0),
                      map_lgl(designer, ~ !('(Uncredited)' %in% .)),
                      map_lgl(family, ~ !('Climbing Games' %in% .)),
                      !is.na(rank)) %>% 
  arrange(desc(average)) %>%
  select(name, rank) 

By year:

trick_data %>% 
  filter(map_lgl(designer, ~ length(.) > 0),
         map_lgl(designer, ~ !('(Uncredited)' %in% .)),
         map_lgl(family, ~ !('Climbing Games' %in% .)),
         !is.na(rank)) %>%
  group_by(yearpublished) %>%
  top_n(-1, rank) %>%
  arrange(yearpublished) %>%
  select(yearpublished, name)
---
title: "Tricktaker analysis"
output: html_notebook
---


```{r, include=FALSE}

library(tidyverse)
library(httr)
library(rvest)
library(stringr)

run_search <- function(query){
  #get HTML page of first page of search
  search_html <- GET('https://boardgamegeek.com',
                     accept_xml(), 
                     path = '/search/boardgame/page/1',
                     query = query)

  #get number of pages
  npages <- search_html %>%
    read_html %>%
    html_nodes('.fr a') %>%
    length()

  if(npages == 0){ # only one page so no links
    npages <- 1
  } else if(npages == 6){ # collapsed link list so need to extract last one
    npages <- search_html %>%
      read_html %>%
      html_nodes('.fr a') %>%
      last %>%
      html_attr('href') %>%
      str_replace('/search/boardgame/page/', '') %>%
      str_replace('\\?.*', '')
  }
  print(npages)
  #get one page of json search results
  get_json_page <- function(pagenum, query){
    Sys.sleep(2)
    print(pagenum)
    page <- GET('https://boardgamegeek.com',
        accept_json(), 
        path = paste0('/search/boardgame/page/', pagenum),
        query = query)

    #make sure it worked
    if(headers(page)$`content-type` == "application/json; charset=utf-8"){
      page
    } else {
      get_json_page(pagenum, query)
    }
  }
  
  #get all pages of json search results and then convert to df
  search_json <- map(1:npages, get_json_page, query)
  map_df(search_json, content) %>% map_df(bind_rows)
}
```

Get all the ranked trick-takers into a data frame.

```{r}
trick_df <- run_search(list('advsearch' = '1', 
                            'range[numvoters][min]'='30',
                            'nosubtypes[]'='boardgameexpansion',
                            'propertyids[]'='2009'))


trick_df

```

Now get XML for all these IDs in a single page.

```{r}
tricks_xml <- GET('https://boardgamegeek.com',
                     accept_xml(),
                     path = '/xmlapi2/thing',
                     query = list('id' = paste(trick_df$id, collapse = ','),
                                  'stats' = '1'))

```

Then start picking out the properties we are interested in.

```{r}

items <- content(tricks_xml) %>% 
  xml_find_all('item')

attrs <- c('yearpublished',
           'minplayers',
           'maxplayers',
           'playingtime',
           'minplaytime',
           'maxplaytime',
           'minage',
           'statistics/ratings/usersrated',
           'statistics/ratings/average',
           'statistics/ratings/bayesaverage',
           'statistics/ratings/stddev',
           'statistics/ratings/owned',
           'statistics/ratings/numweights',
           'statistics/ratings/averageweight',
           'statistics/ratings/ranks/rank[@name="boardgame"]'
           )

trick_data <- map(attrs, ~items %>% 
                    xml_find_all(paste0("//",.x)) %>% 
                    xml_attr('value') %>% 
                    as.numeric()) %>% 
  set_names(str_replace(attrs, 'statistics/ratings/','')) %>% 
  bind_cols(id = trick_df$id, name = trick_df$ordtitle, .) %>%
  rename(rank = `ranks/rank[@name="boardgame"]`)

```

Add some more complex properties

```{r}
# list columns

# links (already written this for Knizia networks)
add_linkcol <- function(data, link_type){
  xpath <- paste0(".//link[@type=\'boardgame",link_type,"\']")

  data[[link_type]] <- map(items, ~xml_find_all(.x, xpath) %>% 
                              xml_attr('value'))
  
  data
}

link_types <- c('category','mechanic','family','expansion','designer','artist','publisher')

for(type in link_types){
  trick_data <- add_linkcol(trick_data, type)
}

# poll for best number of players (already written this at home)


poll_list <- items %>% xml_find_all('poll[@name="suggested_numplayers"]')



get_poll_df <- function(poll){
    
  numplayers <- poll %>% 
    xml_find_all('.//results') %>%
    xml_attr('numplayers')
      
  quality <- poll %>%
    xml_find_all('.//results/result') %>%
    xml_attr('value')
      
  votes <- poll %>%
    xml_find_all('.//results/result') %>%
    xml_attr('numvotes') %>%
    as.integer()
  
  data_frame(numplayers = rep(numplayers, each = 3), quality, votes)
}

trick_data$poll_votes <- poll_list %>%
    xml_attr('totalvotes') %>%
    as.integer()

trick_data$poll <- map(poll_list, get_poll_df)    

```

Let's have a look at most common mechanisms.

```{r}
trick_data %>% 
  unnest(mechanic) %>% 
  count(mechanic) %>% 
  arrange(desc(n))

```

And designers:

```{r}
trick_data %>% 
  unnest(designer) %>% 
  count(designer) %>%
  filter(n > 1) %>%
  arrange(desc(n))

```

And publishers:

```{r}
trick_data %>% 
  unnest(publisher) %>% 
  count(publisher) %>%
  filter(n > 5) %>%
  arrange(desc(n))

```

And artists:

```{r}
trick_data %>% 
  unnest(artist) %>% 
  count(artist) %>%
  filter(n > 1) %>%
  arrange(desc(n))

```

And category:

```{r}
trick_data %>% 
  unnest(category) %>% 
  count(category) %>%
  filter(n > 5) %>%
  arrange(desc(n))

```

And family:

```{r}
trick_data %>% 
  unnest(family) %>% 
  count(family) %>%
  filter(n > 1) %>%
  arrange(desc(n))

```

simple plots - year

```{r}
trick_data %>% filter(yearpublished > 1980) %>% ggplot(aes(x = yearpublished)) + geom_bar()
```

minimum players

```{r}
trick_data %>% ggplot(aes(x = minplayers)) + geom_bar()
```

maximum players

```{r}
trick_data %>% filter(maxplayers < 12) %>% ggplot(aes(x = maxplayers)) + geom_bar()
```

Best with...

```{r}
best_data <- trick_data %>%
  filter(poll_votes >= 5) %>%
  mutate(poll_freq = map(poll, ~group_by(.,numplayers) %>%
                           mutate(freq = votes / sum(votes)) %>%
                           ungroup()),
         best = map(poll_freq, ~filter(.,quality == 'Best') %>% 
                          filter(freq > 0.5) %>% 
                          pull(numplayers) %>% as.integer()),
         nbest = map_int(best, length))

best_data %>%  
  filter(nbest == 1) %>%
  mutate(best = as.integer(best)) %>%
  arrange(best, rank) %>%
  select(name, best)
```

Range of best with:

```{r}
best_data %>%  
  count(nbest)
```

Best with (single count):

```{r}
best_data %>%  
  filter(nbest == 1) %>%
  mutate(best = as.integer(best)) %>% 
  count(best)
```


```{r}
best_data %>% 
  unnest(best) %>%
  count(best) 

```

```{r}
best_data %>% 
  unnest(best) %>%
  filter(best == 5) %>%
  arrange(rank) %>%
  select(name, best) 
```

Trickster nominees:

```{r}
trick_data %>% 
  filter(yearpublished < 2009, 
                      map_lgl(designer, ~ length(.) > 0),
                      map_lgl(designer, ~ !('(Uncredited)' %in% .)),
                      map_lgl(family, ~ !('Climbing Games' %in% .)),
                      !is.na(rank)) %>% 
  arrange(desc(average)) %>%
  select(name, rank) 
```

By year:

```{r}
trick_data %>% 
  filter(map_lgl(designer, ~ length(.) > 0),
         map_lgl(designer, ~ !('(Uncredited)' %in% .)),
         map_lgl(family, ~ !('Climbing Games' %in% .)),
         !is.na(rank)) %>%
  group_by(yearpublished) %>%
  top_n(-1, rank) %>%
  arrange(yearpublished) %>%
  select(yearpublished, name)

```