ndata <- readRDS("C:/RRRR/stat_software/ndata.rds")
par_year1234 <- readRDS("C:/RRRR/stat_software/par_year1234.rds")

Introduction

Outline

Questions

Data: Source

https://boardgamegeek.com/boardgame/174430/gloomhaven bgg

Data

head(ndata,1)
##        id      type     name yearpublished minplayers maxplayers playingtime
## 486 36235 boardgame The Duke          2013          2          2          30
##     minplaytime maxplaytime minage users_rated average_rating
## 486          30          30     13        3526         7.4375
##     bayes_average_rating total_owners total_traders total_wanters total_wishers
## 486               6.8684         5864           188           310          1498
##     total_comments total_weights average_weight               types
## 486            850           120         2.5167 boardgame,abstracts
##                     categories                    mechanics
## 486 Abstract Strategy,Medieval Grid Movement,Tile Placement
##                                              family
## 486 Crowdfunding: Kickstarter,The Duke,Mensa Select
##                             designers
## 486 Jeremy Holcomb,Stephen McLaughlin
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      description
## 486 Levy. Maneuver. Conquer.&#10;&#10;The Duke is a dynamic, tile-based strategy game with an old-world, feudal theme, high-quality wooden playing pieces, and an innovative game mechanism in its double-sided tiles. Each side represents a different posture &ndash; often considered to be defensive or offensive &ndash; and demonstrates exactly what the piece can do within the turn. At the end of a move (or after the use of a special ability), the tile is flipped to its other side, displaying a new offensive or defensive posture.&#10;&#10;Each posture conveys different options for maneuver and attack. The full circle is a standard Move, the hollow circle the Jump, the arrow provides for the Slide, the star a special Strike ability and so on. Each turn a player may select any tile to maneuver, attempting to defend his own troops while positioning himself to capture his opponent's tiles. If you end your movement in a square occupied by an opponent's tile, you capture that tile. Capture your opponent's Duke to win!&#10;&#10;Players start the game by placing their Duke in one of the two middle squares on their side of the game board. Two Footman are then placed next to the Duke. Each turn a player may choose to either move a single tile or randomly draw a new tile from the bag. With fifteen different Troop Tiles, all double-sided, and nineteen total pieces for each player (plus special optional tiles), the variety of game play is limitless.&#10;&#10;Beyond the endless variety of the basic game, Terrain Tiles introduce a variety of game play options, altering the game board. These rules also include several alternate objectives, such as the challenging Dark Rider game which pits five Pikeman against a lone Knight.&#10;&#10;

load library

library(ggplot2)
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggrepel)

str(ndata)
## 'data.frame':    976 obs. of  26 variables:
##  $ id                  : chr  "36235" "197405" "19427" "215613" ...
##  $ type                : chr  "boardgame" "boardgame" "boardgame" "boardgame" ...
##  $ name                : chr  "The Duke" "Tak" "Gemblo" "Tao Long: The Way of the Dragon" ...
##  $ yearpublished       : int  2013 2017 2003 2017 2016 2017 2012 2015 2013 2019 ...
##  $ minplayers          : int  2 2 2 2 1 2 2 2 2 2 ...
##  $ maxplayers          : int  2 2 6 2 4 4 7 2 2 4 ...
##  $ playingtime         : int  30 60 30 30 30 60 45 45 15 30 ...
##  $ minplaytime         : int  30 20 30 10 30 15 45 30 15 15 ...
##  $ maxplaytime         : int  30 60 30 30 30 60 45 45 15 30 ...
##  $ minage              : int  13 12 6 14 13 12 8 12 0 14 ...
##  $ users_rated         : int  3526 1708 1335 1040 927 534 743 1705 1730 271 ...
##  $ average_rating      : num  7.44 7.73 6.83 7.03 6.8 ...
##  $ bayes_average_rating: num  6.87 6.63 6.14 6.12 5.98 ...
##  $ total_owners        : int  5864 3329 1478 3239 1948 1252 1218 3607 4785 512 ...
##  $ total_traders       : int  188 41 41 144 168 21 74 191 362 11 ...
##  $ total_wanters       : int  310 199 148 64 45 44 45 88 19 25 ...
##  $ total_wishers       : int  1498 1089 588 492 235 165 191 650 255 105 ...
##  $ total_comments      : num  850 475 466 353 325 154 222 536 736 104 ...
##  $ total_weights       : num  120 46 128 22 24 16 46 51 122 12 ...
##  $ average_weight      : num  2.52 2.46 1.9 2.68 1.96 ...
##  $ types               : chr  "boardgame,abstracts" "boardgame,abstracts" "boardgame,abstracts" "boardgame,abstracts" ...
##  $ categories          : chr  "Abstract Strategy,Medieval" "Abstract Strategy" "Abstract Strategy" "Abstract Strategy" ...
##  $ mechanics           : chr  "Grid Movement,Tile Placement" "Grid Movement,Network and Route Building" "" "Grid Movement,Modular Board,Point to Point Movement" ...
##  $ family              : chr  "Crowdfunding: Kickstarter,The Duke,Mensa Select" "5x5 grid,Combinatorial,Connection Games,Crowdfunding: Kickstarter,Fictional Games" "Combinatorial,Crowdfunding: Kickstarter,Gemblo" "Crowdfunding: Kickstarter" ...
##  $ designers           : chr  "Jeremy Holcomb,Stephen McLaughlin" "James Ernest,Patrick Rothfuss" "Justin Oh" "Dox Lucchin,Pedro Latro" ...
##  $ description         : chr  "Levy. Maneuver. Conquer.&#10;&#10;The Duke is a dynamic, tile-based strategy game with an old-world, feudal the"| __truncated__ "&quot;My next several hours were spent learning how to play tak. Even if I had not been nearly mad with idlenes"| __truncated__ "Gemblo is an abstract strategy game with translucent, colored pieces, each of which is made up of one to five h"| __truncated__ "&quot;There was something formless and perfect before the Universe was born. For lack of a better name, I call "| __truncated__ ...
#makes options
options(scipen = 999)
mytheme <-
  theme(
    axis.text.x = element_text(
      angle = 90,
      size = 8,
      vjust = 0.4
    ),
    plot.title = element_text(
      size = 12,
      vjust = 2,
      family = "Georgia",
      face = "bold",
      margin = margin(b = 20)
    ),
    axis.title.y = element_text(margin = margin(r = 20)),
    axis.title.x = element_text(
      size = 12,
      vjust = -0.35,
      margin = margin(t = 20)
    ),
    plot.background = element_rect(fill = "#EDEFF7"),
    panel.background = element_rect(fill = "#EDEFF7"),
    legend.background = element_rect(fill = "#EDEFF7"),
    legend.title = element_text(
      size = 10,
      family = "Arial",
      face = "bold"
    ),
    legend.text = element_text(size = 8, family = "Arial"),
    panel.grid.major = element_line(
      size = 0.4,
      linetype = "solid",
      color = "#cccccc"
    ),
    panel.grid.minor = element_line(size = 0),
    axis.ticks = element_blank(),
    plot.margin = unit(c(0.5, 1, 1, 1), "cm")
  )
colors = c(
  "#9E0142",
  "#D53E4F",
  "#F46D43" ,
  "#FDAE61",
  "#FEE08B",
  "#e7fe8b",
  "#bcfe8b",
  "#8bfeb1",
  "#8bc4fe",
  "#8b96fe",
  "#ad8bfe",
  "#d98bfe",
  "#fe8bd1",
  "#fe8b96",
  "#fb6761"
)

treemap of mechanics

library(treemap)
library(viridis)
## Loading required package: viridisLite
library(splitstackshape)
ndata$mechanics <- as.character(ndata$mechanics)
tags <-
  cSplit(ndata, splitCols = "mechanics", direction = "long")
tags.table <- as.data.frame(table(tags$mechanics))

treemap(
  tags.table,
  index = "Var1",
  vSize = "Freq",
  vColor = "#58ACFA",
  palette = viridis(6)
)
## Warning in if (class(try(col2rgb(palette), silent = TRUE)) == "try-error")
## stop("color palette is not correct"): 條件的長度 > 1,因此只能用其第一元素

distribution of weight

options(repr.plot.width = 3, repr.plot.height = 3)
ndata %>% ggplot(aes(x = average_weight)) + geom_density(fill = "#FDAE61", alpha =
                                                                0.4) + mytheme + labs(title = "Distribution of weight")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

distribution of ages

options(repr.plot.width = 4, repr.plot.height = 4)
ndata %>% ggplot(aes(x = minage)) + geom_histogram(aes(y = ..density..), fill =
                                                          "#9E0142", alpha = 0.4) + mytheme + geom_density(col = "#F46D43") + labs(title =
                                                                                                                                     "Distribution of Age")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

category

options(repr.plot.width = 12, repr.plot.height = 20)
category <-
  as.data.frame(table(str_trim(unlist(
    strsplit(str_trim(as.character(ndata$types)), ", ")
  ))))
category %>% arrange(desc(Freq)) %>% ggplot(aes(x = Var1, y = Freq, fill =
                                                  Var1)) + geom_bar(stat = "identity") + coord_flip() + theme(legend.position = "") +
  labs(x = "categories", y = "count", title = "Games Per Category")

distribution of different rating

options(repr.plot.width = 10, repr.plot.height = 4)

g3 <-
  ndata %>% ggplot(aes(x = bayes_average_rating)) + geom_density(fill = "#FEE08B", alpha =
                                                                        0.4) + mytheme + labs(title = "Distribution of Bayes Average Rating")

grid.arrange(g3, nrow = 1, ncol = 1)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

options(repr.plot.width = 10, repr.plot.height = 4)
g2 <-
  ndata %>% ggplot(aes(x = average_rating)) + geom_density(fill = "#FDAE61", alpha =
                                                                  0.4) + mytheme + labs(title = "Distribution of Average Rating")
grid.arrange(g2, nrow = 1, ncol = 1)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

linear model:

m1. <-
  lm(
    average_rating ~ types + maxplayers + minage + total_wanters + yearpublished + playingtime + users_rated + average_weight + total_owners,
    data = par_year1234
  )
summary(m1.)
## 
## Call:
## lm(formula = average_rating ~ types + maxplayers + minage + total_wanters + 
##     yearpublished + playingtime + users_rated + average_weight + 
##     total_owners, data = par_year1234)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.35232 -0.24534  0.01443  0.25114  2.17439 
## 
## Coefficients:
##                                 Estimate  Std. Error t value
## (Intercept)                   5.87067363  0.11374194  51.614
## typesboardgame,strategygames  0.03273491  0.05892177   0.556
## maxplayers                    0.01001343  0.00483840   2.070
## minage                       -0.00623653  0.00839849  -0.743
## total_wanters                 0.00111332  0.00014275   7.799
## yearpublished2                0.16490067  0.04966829   3.320
## yearpublished3                0.33804976  0.05739377   5.890
## yearpublished4                0.67470240  0.06218698  10.850
## playingtime                   0.00048708  0.00041633   1.170
## users_rated                  -0.00006134  0.00002301  -2.666
## average_weight                0.29092989  0.04547164   6.398
## total_owners                  0.00003987  0.00001445   2.760
##                                          Pr(>|t|)    
## (Intercept)                  < 0.0000000000000002 ***
## typesboardgame,strategygames             0.578707    
## maxplayers                               0.038903 *  
## minage                                   0.458017    
## total_wanters                  0.0000000000000263 ***
## yearpublished2                           0.000952 ***
## yearpublished3                 0.0000000063147203 ***
## yearpublished4               < 0.0000000000000002 ***
## playingtime                              0.242478    
## users_rated                              0.007869 ** 
## average_weight                 0.0000000003090201 ***
## total_owners                             0.005952 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4908 on 624 degrees of freedom
## Multiple R-squared:  0.5157, Adjusted R-squared:  0.5072 
## F-statistic: 60.42 on 11 and 624 DF,  p-value: < 0.00000000000000022
par(mfrow = c(2,2))
plot(m1.)

Polynomial regression

quadratic model:

m2s. <-
  lm(
    average_rating ~ types+ yearpublished+poly(maxplayers, 2) + poly(minage, 2) +
      poly(total_wanters, 2)+ poly(playingtime, 2) + poly(users_rated, 2) + poly(average_weight, 2) + poly(total_owners, 2),
    data = par_year1234
  )
summary(m2s.)
## 
## Call:
## lm(formula = average_rating ~ types + yearpublished + poly(maxplayers, 
##     2) + poly(minage, 2) + poly(total_wanters, 2) + poly(playingtime, 
##     2) + poly(users_rated, 2) + poly(average_weight, 2) + poly(total_owners, 
##     2), data = par_year1234)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.24115 -0.22880 -0.00857  0.23810  2.26954 
## 
## Coefficients:
##                              Estimate Std. Error t value             Pr(>|t|)
## (Intercept)                   6.78441    0.04968 136.560 < 0.0000000000000002
## typesboardgame,strategygames  0.03594    0.05835   0.616             0.538214
## yearpublished2                0.17312    0.04772   3.628             0.000310
## yearpublished3                0.31212    0.05518   5.657  0.00000002362118310
## yearpublished4                0.60649    0.06055  10.016 < 0.0000000000000002
## poly(maxplayers, 2)1          1.34974    0.51326   2.630             0.008759
## poly(maxplayers, 2)2         -0.23954    0.49702  -0.482             0.630013
## poly(minage, 2)1             -0.41506    0.53601  -0.774             0.439017
## poly(minage, 2)2             -0.54438    0.48860  -1.114             0.265643
## poly(total_wanters, 2)1       7.43181    0.91520   8.120  0.00000000000000253
## poly(total_wanters, 2)2      -5.17844    0.63806  -8.116  0.00000000000000262
## poly(playingtime, 2)1         0.71776    0.72261   0.993             0.320963
## poly(playingtime, 2)2         0.24389    0.60269   0.405             0.685867
## poly(users_rated, 2)1        -6.36362    2.73241  -2.329             0.020184
## poly(users_rated, 2)2         4.68346    1.38830   3.374             0.000789
## poly(average_weight, 2)1      5.05295    0.96378   5.243  0.00000021752684085
## poly(average_weight, 2)2     -0.31244    0.51904  -0.602             0.547426
## poly(total_owners, 2)1        5.91401    2.43880   2.425             0.015596
## poly(total_owners, 2)2       -1.76605    1.35128  -1.307             0.191717
##                                 
## (Intercept)                  ***
## typesboardgame,strategygames    
## yearpublished2               ***
## yearpublished3               ***
## yearpublished4               ***
## poly(maxplayers, 2)1         ** 
## poly(maxplayers, 2)2            
## poly(minage, 2)1                
## poly(minage, 2)2                
## poly(total_wanters, 2)1      ***
## poly(total_wanters, 2)2      ***
## poly(playingtime, 2)1           
## poly(playingtime, 2)2           
## poly(users_rated, 2)1        *  
## poly(users_rated, 2)2        ***
## poly(average_weight, 2)1     ***
## poly(average_weight, 2)2        
## poly(total_owners, 2)1       *  
## poly(total_owners, 2)2          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4671 on 617 degrees of freedom
## Multiple R-squared:  0.5664, Adjusted R-squared:  0.5538 
## F-statistic: 44.78 on 18 and 617 DF,  p-value: < 0.00000000000000022
par(mfrow = c(2,2))
plot(m2s.)
## Warning in sqrt(crit * p * (1 - hh)/hh): 產生了 NaNs

## Warning in sqrt(crit * p * (1 - hh)/hh): 產生了 NaNs

three-dimensional models:

m3s. <-
  lm(
    average_rating ~ types  + yearpublished +poly(maxplayers, 3)  +
      poly(minage, 3) + poly(total_wanters, 3) + poly(playingtime, 3) + poly(users_rated, 3) + poly(average_weight, 3) + poly(total_owners, 3),
    data = par_year1234
  )
summary(m3s.)
## 
## Call:
## lm(formula = average_rating ~ types + yearpublished + poly(maxplayers, 
##     3) + poly(minage, 3) + poly(total_wanters, 3) + poly(playingtime, 
##     3) + poly(users_rated, 3) + poly(average_weight, 3) + poly(total_owners, 
##     3), data = par_year1234)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.13758 -0.21537  0.00214  0.23257  2.30184 
## 
## Coefficients:
##                              Estimate Std. Error t value             Pr(>|t|)
## (Intercept)                   6.78748    0.04916 138.063 < 0.0000000000000002
## typesboardgame,strategygames  0.03956    0.05934   0.667             0.505293
## yearpublished2                0.17642    0.04682   3.768             0.000181
## yearpublished3                0.31182    0.05424   5.749         0.0000000142
## yearpublished4                0.56972    0.06006   9.486 < 0.0000000000000002
## poly(maxplayers, 3)1          1.43614    0.50578   2.839             0.004670
## poly(maxplayers, 3)2         -0.25292    0.48733  -0.519             0.603950
## poly(maxplayers, 3)3         -0.45076    0.47532  -0.948             0.343342
## poly(minage, 3)1             -0.17062    0.53241  -0.320             0.748717
## poly(minage, 3)2             -0.52461    0.49185  -1.067             0.286571
## poly(minage, 3)3             -0.40435    0.50265  -0.804             0.421452
## poly(total_wanters, 3)1       9.29756    1.01430   9.166 < 0.0000000000000002
## poly(total_wanters, 3)2      -3.60053    0.72168  -4.989         0.0000007921
## poly(total_wanters, 3)3       3.74396    0.74363   5.035         0.0000006310
## poly(playingtime, 3)1         0.57211    0.72082   0.794             0.427682
## poly(playingtime, 3)2         0.45521    0.61624   0.739             0.460384
## poly(playingtime, 3)3        -1.53273    0.52647  -2.911             0.003730
## poly(users_rated, 3)1        -7.20371    2.78451  -2.587             0.009910
## poly(users_rated, 3)2         2.11544    1.75839   1.203             0.229422
## poly(users_rated, 3)3        -4.47925    1.16471  -3.846             0.000133
## poly(average_weight, 3)1      4.73463    0.97591   4.851         0.0000015578
## poly(average_weight, 3)2     -0.68095    0.54516  -1.249             0.212109
## poly(average_weight, 3)3      0.19682    0.50824   0.387             0.698706
## poly(total_owners, 3)1        4.74592    2.45059   1.937             0.053250
## poly(total_owners, 3)2       -1.90774    1.49979  -1.272             0.203857
## poly(total_owners, 3)3        2.00152    1.09931   1.821             0.069142
##                                 
## (Intercept)                  ***
## typesboardgame,strategygames    
## yearpublished2               ***
## yearpublished3               ***
## yearpublished4               ***
## poly(maxplayers, 3)1         ** 
## poly(maxplayers, 3)2            
## poly(maxplayers, 3)3            
## poly(minage, 3)1                
## poly(minage, 3)2                
## poly(minage, 3)3                
## poly(total_wanters, 3)1      ***
## poly(total_wanters, 3)2      ***
## poly(total_wanters, 3)3      ***
## poly(playingtime, 3)1           
## poly(playingtime, 3)2           
## poly(playingtime, 3)3        ** 
## poly(users_rated, 3)1        ** 
## poly(users_rated, 3)2           
## poly(users_rated, 3)3        ***
## poly(average_weight, 3)1     ***
## poly(average_weight, 3)2        
## poly(average_weight, 3)3        
## poly(total_owners, 3)1       .  
## poly(total_owners, 3)2          
## poly(total_owners, 3)3       .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4545 on 610 degrees of freedom
## Multiple R-squared:  0.5942, Adjusted R-squared:  0.5775 
## F-statistic: 35.72 on 25 and 610 DF,  p-value: < 0.00000000000000022
par(mfrow = c(2,2))
plot(m3s.)
## Warning in sqrt(crit * p * (1 - hh)/hh): 產生了 NaNs

## Warning in sqrt(crit * p * (1 - hh)/hh): 產生了 NaNs

machine learning:

ndata$average_rating.high <- ndata$average_rating > 7
ndata$average_rating.high <-  1 * (ndata$average_rating.high)



dataset <- ndata
library(caret)
## Loading required package: lattice
library(lattice)
library(ggplot2)

set.seed(509)   # Set random seed
# Shuffle rows
rows <- sample(nrow(dataset))
dataset <- dataset[rows, ]

# create a list of 80% of the rows in the original dataset we can use for training
train_index <-
  createDataPartition(dataset$average_rating.high, p = 0.80, list = FALSE)
# Randomly select 80% of data as the training data set
data_tr <- dataset[train_index, ]
# Assign the remaining 20% of as the validation data set
data_va <- dataset[-train_index, ]

head(data_tr, 3)
##          id      type                name yearpublished minplayers maxplayers
## 5198 108665 boardgame          Zpocalypse          2013          1          4
## 4913  43365 boardgame      Kansas Pacific          2009          3          6
## 2244 228328 boardgame Rurik: Dawn of Kiev          2019          1          4
##      playingtime minplaytime maxplaytime minage users_rated average_rating
## 5198         100         100         100     13         602        6.09884
## 4913         100         100         100     14         237        6.60156
## 2244         120          60         120     13         465        7.81744
##      bayes_average_rating total_owners total_traders total_wanters
## 5198              5.65564         1684           130            33
## 4913              5.67332          688            41            24
## 2244              6.02451          955            16           134
##      total_wishers total_comments total_weights average_weight
## 5198           298            244            37         3.3784
## 4913            67            116            22         2.6364
## 2244           574            189            30         2.9667
##                        types
## 5198      boardgame,thematic
## 4913 boardgame,strategygames
## 2244 boardgame,strategygames
##                                               categories
## 5198                 Exploration,Fighting,Horror,Zombies
## 4913                                     Economic,Trains
## 2244 Civilization,Medieval,Miniatures,Territory Building
##                                                                                                                       mechanics
## 5198 Card Drafting,Cooperative Game,Dice Rolling,Grid Movement,Modular Board,Role Playing,Tile Placement,Variable Player Powers
## 4913                                                                   Auction/Bidding,Network and Route Building,Stock Holding
## 2244                                             Area Majority / Influence,Area Movement,Auction/Bidding,Variable Player Powers
##                                                                                                             family
## 5198                                              Components: Miniatures,Crowdfunding: Kickstarter,Solitaire Games
## 4913 Crowdfunding: Kickstarter,Cube Rails,Historic Railroads System,Queen Iron Horse Collection,Winsome Essen Sets
## 2244                   Cities: Kiev (Ukraine),Country: Ukraine,Crowdfunding: Kickstarter,Madison Game Design Cabal
##                    designers
## 5198 Julie Ahern,Jeff Gracia
## 4913             John Bohrer
## 2244         Stan Kordonskiy
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        description
## 5198 Zpocalypse is a survival board game set in a post-apocalyptic wasteland. Two to four players start in a basement/fallout shelter which can lead out to several base locations or even through the tunnels to sewer grates throughout the transformable board. The survivors are trying to make it day to day in a world torn apart from the walking dead.&#10;&#10;As in any survival game, one needs food, guns, ammo, and more. Players go out into the wasteland and scavenge for supplies, or perhaps to find a new gun or melee weapon to fend off the next zombie attack. In addition to gathering supplies, each player forms and controls his own squad &ndash; and each squad works to fortify the bases. Your walls and barbed wire won&rsquo;t hold the dead for long, however, and they&rsquo;re getting hungry.&#10;&#10;Characters receive victory points for killing zombies, building defenses, and successfully completing goals. Victory points show who is soldiering on the best in this wasteland. The final tally of points determines the winner. Victory points also act as experience points for leveling up a character.&#10;&#10;The game combines combat, tactics, resource collecting, character development, team play, and strategy towards the goal of surviving in the Zpocalypse!&#10;&#10;
## 4913                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    David V. H. Peters' Kansas Pacific is a no-luck tactical train game that was released as one of the games in the Winsome Games 2009 Essen Set.  In this game, players are purchasing auctioned shares to run or share in the profits of one of six railroads that begin on the Eastern edge of Kansas. The railroads gain income by passing through a variety of Kansas cities and towns, with sizable income available for those that go through Wichita or Topeka.  There is an additional mechanism and incentive for furthering the railroads westward: the Western quarter of the state is a 'land grant' area where the railroads can gain additional track cubes, and rail lines that reach the Colorado border receive a sizable income bonus.  When the game ends, the player with the most cash on hand is the winner.&#10;&#10;
## 2244                                                                                                                                              Rurik: Dawn of Kiev is a euro-style realm building game set in an 11th century Eastern European Kingdom. It features area control, resource management, and a new game mechanic - &quot;auction programming.&quot;&#10;&#10;You play as a potential successor to the throne following the death of your father, Vladimir the Great, in 1015. The people value a well-rounded leader, so you must establish your legacy by building, taxing, fighting, and accomplishing great deeds. Will you win over the hearts of the people to become the next ruler of Kievan Rus?&#10;&#10;Rurik brings to life the ancient culture of Kievan Rus with game design by Russian designer Stanislav Kordonskiy and illustrations by Ukrainian artist Yaroslav Radeckyi.&#10;&#10;In Rurik, players openly bid for actions with their advisors. Stronger advisors earn greater benefits at the cost of performing their action later than other players. Conversely, weaker advisors earn lesser benefits but perform their action quickly. This planning mechanism (&quot;auction programming&quot;) adds a fun tension to the game.&#10;&#10;&mdash;description from the publisher&#10;&#10;
##      average_rating.high
## 5198                   0
## 4913                   0
## 2244                   1
# scatterplot matrix
library(ellipse)    # package ellipse is required
## 
## Attaching package: 'ellipse'
## The following object is masked from 'package:graphics':
## 
##     pairs
data_tr$types <- as.character(data_tr$types)
data_tr$average_rating.high <-
  as.factor(data_tr$average_rating.high)
x <- c(4:7,10:20)
featurePlot(
  x = data_tr[, x],
  y = data_tr$average_rating.high,
  plot = "ellipse",
  ## Add a key at the top
  auto.key = list(columns = 2)
)

# summarize the class distribution
table.class <- table(data_tr$average_rating.high)
percentage <- prop.table(table.class) * 100
print(cbind(counts = table.class, percentage = percentage))
##   counts percentage
## 0    340   43.53393
## 1    441   56.46607
# Run algorithms using 10-fold cross validation
control <- trainControl(method = "cv", number = 10)
metric <- "Accuracy"


dataset$average_rating.high <-
  as.factor(dataset$average_rating.high)
# a) linear algorithms
set.seed(7)
fit.lda <-
  train(
    average_rating.high ~ yearpublished + minplayers + maxplayers + playingtime + minage + users_rated + total_owners + total_traders + total_wanters + total_wishers + total_comments + total_weights + average_weight,
    data = dataset,
    method = "lda",
    metric = metric,
    trControl = control
  )

# b) nonlinear algorithms
# CART
set.seed(7)
fit.cart <-
  train(
    average_rating.high ~ yearpublished + minplayers + maxplayers + playingtime + minage + users_rated + total_owners + total_traders + total_wanters + total_wishers + total_comments + total_weights + average_weight,
    data = dataset,
    method = "rpart",
    metric = metric,
    trControl = control
  )

# kNN
set.seed(7)
fit.knn <-
  train(
    average_rating.high ~ yearpublished + minplayers + maxplayers + playingtime + minage + users_rated + total_owners + total_traders + total_wanters + total_wishers + total_comments + total_weights + average_weight,
    data = dataset,
    method = "knn",
    metric = metric,
    trControl = control
  )

# c) advanced algorithms
# SVM
set.seed(7)
fit.svm <-
  train(
    average_rating.high ~ yearpublished + minplayers + maxplayers + playingtime + minage + users_rated + total_owners + total_traders + total_wanters + total_wishers + total_comments + total_weights + average_weight,
    data = dataset,
    method = "svmRadial",
    metric = metric,
    trControl = control
  )

# Random Forest
set.seed(7)
fit.rf <-
  train(
    average_rating.high ~ yearpublished + minplayers + maxplayers + playingtime + minage + users_rated + total_owners + total_traders + total_wanters + total_wishers + total_comments + total_weights + average_weight,
    data = dataset,
    method = "rf",
    metric = metric,
    trControl = control
  )

# d) Special guests
# XGBoost: LightGBM, etc. Friend in high places :)
set.seed(7)
fit.xgb <-
  train(
    average_rating.high ~ yearpublished + minplayers + maxplayers + playingtime + minage + users_rated + total_owners + total_traders + total_wanters + total_wishers + total_comments + total_weights + average_weight,
    data = dataset,
    method = "xgbTree",
    metric = metric,
    trControl = control
  )

# summarize accuracy of models
results <- resamples(list(lda=fit.lda, cart=fit.cart, knn=fit.knn, svm=fit.svm, rf=fit.rf, xgb=fit.xgb))
summary(results)
## 
## Call:
## summary.resamples(object = results)
## 
## Models: lda, cart, knn, svm, rf, xgb 
## Number of resamples: 10 
## 
## Accuracy 
##           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## lda  0.6836735 0.7397959 0.7845571 0.7706186 0.8020724 0.8247423    0
## cart 0.6428571 0.7500000 0.7704082 0.7582790 0.7815064 0.8041237    0
## knn  0.7142857 0.7216495 0.7487376 0.7612666 0.7959184 0.8453608    0
## svm  0.7755102 0.8067010 0.8265306 0.8310120 0.8509363 0.9175258    0
## rf   0.8163265 0.8380234 0.8564065 0.8545550 0.8673469 0.8969072    0
## xgb  0.8163265 0.8469388 0.8513044 0.8607090 0.8817852 0.9175258    0
## 
## Kappa 
##           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## lda  0.3560831 0.4729190 0.5571063 0.5334978 0.5972336 0.6440751    0
## cart 0.2910294 0.4866880 0.5210937 0.5015601 0.5593714 0.5907173    0
## knn  0.4198732 0.4372251 0.4939052 0.5163414 0.5866723 0.6787370    0
## svm  0.5418615 0.6060157 0.6495024 0.6563757 0.6970886 0.8320346    0
## rf   0.6308079 0.6760997 0.7081475 0.7056573 0.7320100 0.7900433    0
## xgb  0.6344799 0.6868099 0.6979811 0.7177573 0.7619949 0.8301226    0

Conclusion and Discussion

  • users_rated, average_weight, total_owners have significant effects.
  • SVM, Random forest, XG bloost are good for predicting the rating score.