Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  ebbr,
  #devtools::install_github("dgrtwo/ebbr")
  tidymodels
)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.3.0 ──
## ✔ broom        1.0.8     ✔ rsample      1.3.0
## ✔ dials        1.4.0     ✔ tune         1.3.0
## ✔ infer        1.0.8     ✔ workflows    1.2.0
## ✔ modeldata    1.4.0     ✔ workflowsets 1.1.0
## ✔ parsnip      1.3.1     ✔ yardstick    1.3.2
## ✔ recipes      1.3.0     
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ psych::%+%()             masks ggplot2::%+%()
## ✖ scales::alpha()          masks psych::alpha(), ggplot2::alpha()
## ✖ recipes::averages()      masks kirkegaard::averages()
## ✖ scales::discard()        masks purrr::discard()
## ✖ recipes::discretize()    masks kirkegaard::discretize()
## ✖ magrittr::extract()      masks tidyr::extract()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ recipes::fixed()         masks stringr::fixed()
## ✖ assertthat::has_name()   masks tibble::has_name()
## ✖ kirkegaard::is_logical() masks purrr::is_logical()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ magrittr::set_names()    masks purrr::set_names()
## ✖ yardstick::spec()        masks readr::spec()
## ✖ Hmisc::src()             masks dplyr::src()
## ✖ recipes::step()          masks stats::step()
## ✖ Hmisc::summarize()       masks dplyr::summarize()
## ✖ parsnip::translate()     masks Hmisc::translate()
theme_set(theme_bw())

options(
    digits = 3
)

#multithreading
library(future)
library(furrr)
plan(sequential)
plan(multisession(workers = 3))

Functions

Data

#read steam data files
games = read_csv("data/games.zip", na = "\\N", col_select = c(app_id, name, release_date)) %>% mutate(app_id = as.integer(app_id))
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 140082 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): name
## dbl  (1): app_id
## date (1): release_date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
reviews = read_csv("data/reviews.zip") %>% mutate(app_id = as.integer(app_id))
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 140116 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): app_id, review_score_description, metacritic_score, reviews, recomm...
## dbl (7): review_score, positive, negative, total, steamspy_user_score, steam...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `app_id = as.integer(app_id)`.
## Caused by warning:
## ! NAs introduced by coercion
genres = read_csv("data/genres.zip") %>% mutate(app_id = as.integer(app_id))
## Rows: 353339 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): genre
## dbl (1): app_id
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tags = read_csv("data/tags.zip") %>% mutate(app_id = as.integer(app_id))
## Rows: 1744632 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): tag
## dbl (1): app_id
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Empirical bayes rating

#reviews but no missing data
reviews_sub = reviews %>% select(app_id, positive, total) %>% 
  filter(total >= 10) %>% 
  miss_filter()

#empirical bayes rating
eb_fit = ebb_fit_prior(
  reviews_sub,
  positive,
  total
)
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## ℹ The deprecated feature was likely used in the ebbr package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#save results
reviews_sub = bind_cols(
  reviews_sub %>% select(app_id),
  eb_fit %>% augment()
)

#actual distribution of ratings, full sample
reviews_all = reviews %>% 
  mutate(
    mean = positive / total
  ) %>% 
  select(app_id, positive, total, mean) %>% 
  miss_filter()

#fit empirical bayes rating
eb_fit_all = ebb_fit_prior(
  reviews_all,
  positive,
  total
)

#save results
reviews_all = bind_cols(
  reviews_all %>% select(app_id),
  eb_fit_all %>% augment()
)

#mean rating as function of rating count
reviews_all %>% 
  ggplot(aes(total, .raw)) +
  geom_point(aes(), alpha = 0.1) +
  scale_x_log10() + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = seq(0, 1, 0.1)) +
  geom_smooth(method = "gam",
              formula = y ~ s(x, k = 20),  # increase number of knots
              method.args = list(family = gaussian)) +
  labs(
    x = "Number of reviews",
    y = "Positive rating %",
    title = "Steam game ratings"
  )

GG_save("figs/mean_rating~ratings.png")

#raw and adjusted ratings
reviews_all %>% 
  GG_scatter(".raw", ".fitted", alpha = 0.1) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
  scale_x_continuous(labels = scales::percent_format(accuracy = 1), breaks = seq(0, 1, 0.1)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = seq(0, 1, 0.1)) +
  labs(
    x = "Raw rating",
    y = "Adjusted rating",
    title = "Steam game ratings"
  )
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/raw mean vs. bayes mean.png")
## `geom_smooth()` using formula = 'y ~ x'
reviews_all %>% 
  filter(.raw == 1) %>% 
  select(total, .fitted) %>% 
  describe2()
reviews_all %>% 
  filter(total == 1) %>% 
  select(total, .fitted) %>% 
  describe2()

Restructure genres

#spread from long format to wide format with dummy coding
genres2 = genres %>% 
  pivot_wider(
    names_from = genre,
    values_from = genre
  ) %>% 
  mutate(across(where(is.character), ~ ifelse(is.na(.), 0, 1)))

#exclude unnecessary columns
genre_counts = genres2 %>% 
  select(-app_id) %>%
  colSums() %>% 
  sort()

#keep those with 100+
genres_final = genres2 %>% 
  select(app_id, all_of(names(genre_counts[genre_counts > 100]))) %>% 
  df_add_affix(prefix = "genre_") %>%
  rename(app_id = genre_app_id)

Restructure tags

#spread from long format to wide format with dummy coding
tags2 = tags %>% 
  pivot_wider(
    names_from = tag,
    values_from = tag
  ) %>% 
  mutate(across(where(is.character), ~ ifelse(is.na(.), 0, 1)))

#exclude unnecessary columns
tag_counts = tags2 %>% 
  select(-app_id) %>%
  colSums() %>% 
  sort()

#version with legal names
tag_counts_legal = tag_counts
names(tag_counts_legal) = ("tag_" + names(tag_counts_legal)) %>% str_legalize()

#keep those with 100+
tags_final = tags2 %>% 
  select(app_id, all_of(names(tag_counts[tag_counts > 100]))) %>% 
  df_add_affix(prefix = "tag_") %>%
  rename(app_id = tag_app_id)

Join

#join games, reviews, genres, tags
#keep only inner join
d = games %>% 
  inner_join(reviews_sub, by = "app_id") %>% 
  inner_join(genres_final, by = "app_id") %>% 
  inner_join(tags_final, by = "app_id") %>% 
  df_legalize_names() 

Analysis

Plots

#rating by release date
d %>% 
  ggplot(aes(x = release_date, y = fitted)) +
  geom_point(alpha = 0.1, aes(size = total)) +
  geom_smooth() +
  scale_x_date(date_labels = "%Y-%m-%d", date_breaks = "1 year") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  labs(
    x = "Release date",
    y = "Positive rating %",
    title = "Steam game ratings by release date"
  ) +
  theme(
    axis.text.x = element_text(angle = 90)
  )
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 695 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 695 rows containing missing values or values outside the scale range
## (`geom_point()`).

GG_save("figs/rating~release_date.png")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 695 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 695 rows containing missing values or values outside the scale range
## (`geom_point()`).
#rating by number of reviews
d %>% 
  ggplot(aes(x = total, y = fitted)) +
  geom_point(alpha = 0.1) +
  geom_smooth() +
  #log 10 x
  scale_x_log10() +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  labs(
    x = "Number of reviews",
    y = "Positive rating %",
    title = "Steam game ratings by number of reviews"
  )
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

GG_save("figs/rating~reviews.png")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#rating by tag
d %>% 
  #keep only top 50 tags
  select(app_id, tail(names(tag_counts_legal), 50)) %>% 
  pivot_longer(cols = starts_with("tag_")) %>% 
  filter(value == 1) %>%
  mutate(
    name = str_remove(name, "tag_")
  ) %>%
  #join with ratings
  inner_join(d %>% select(app_id, fitted), by = "app_id") %>%
  #sort by mean rating
  mutate(
    name = fct_reorder(name, fitted, .fun = mean)
  ) %>%
  GG_group_means(
    "fitted",
    "name",
    type = "point",
    split_group_labels = F
  ) +
  coord_flip() +
  theme(
    axis.text.x = element_text(hjust = 1, size = 8)  # smaller and left-aligned
  ) +
  #labs
  labs(
    x = "Tag",
    y = "Positive rating %",
    title = "Steam game ratings by tag"
  )

#multiplayer and singleplayer
d %>% 
  mutate(
    player_tags = ifelse(
      tag_Multiplayer == 1 & tag_Singleplayer == 1,
      "multiplayer and singleplayer",
      ifelse(
        tag_Multiplayer == 1,
        "multiplayer",
        ifelse(
          tag_Singleplayer == 1,
          "singleplayer",
          "none"
        )
      )
    ) %>% factor(levels = c("none", "singleplayer", "multiplayer", "multiplayer and singleplayer"))
  ) %>% 
  {
    lm(
      fitted ~ player_tags + as.numeric(release_date),
      data = .,
    )
  } %>% 
  ggeffects::ggpredict(
    terms = "player_tags"
  ) %>% 
  plot() +
  labs(
    x = "Player tags",
    y = "Positive rating %",
    title = "Steam game ratings by player tags",
    subtitle = "Adjusted for release date, and using empirical bayes rating"
  )

GG_save("figs/multiplayer_singleplayer.png")

Regressions

#build a model formula
form_1 = str_glue("fitted ~ release_date + {str_c(names(tag_counts_legal[tag_counts_legal >= 100]), collapse = ' + ')}")

fit_1 = lm(form_1, data = d)
fit_1 %>% summary()
## 
## Call:
## lm(formula = form_1, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7573 -0.0754  0.0227  0.0962  0.3846 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            3.36e-01   1.16e-02   28.89  < 2e-16 ***
## release_date                           2.09e-05   6.55e-07   31.88  < 2e-16 ***
## tag_Mod                                5.38e-02   1.62e-02    3.33  0.00087 ***
## tag_Tile_Matching                      8.75e-02   1.82e-02    4.79  1.6e-06 ***
## tag_Baseball                           3.65e-02   1.85e-02    1.97  0.04885 *  
## tag_Intentionally_Awkward_Controls     2.23e-02   1.71e-02    1.31  0.19067    
## tag_Crowdfunded                        1.75e-02   1.48e-02    1.18  0.23604    
## tag_Cycling                            3.02e-02   1.83e-02    1.65  0.09916 .  
## tag_Dungeons_and_Dragons               3.39e-02   1.57e-02    2.16  0.03042 *  
## tag_Vikings                            1.59e-02   1.57e-02    1.02  0.30877    
## tag_Gaming                            -4.44e-02   1.82e-02   -2.44  0.01472 *  
## tag_Pool                              -2.69e-02   1.71e-02   -1.57  0.11544    
## tag_Skating                            1.20e-02   1.94e-02    0.62  0.53781    
## tag_Boss_Rush                          3.96e-02   1.81e-02    2.19  0.02856 *  
## tag_Tennis                            -3.90e-03   1.73e-02   -0.23  0.82173    
## tag_Movie                             -2.91e-02   1.44e-02   -2.01  0.04406 *  
## tag_Epic                               3.87e-02   1.31e-02    2.95  0.00316 ** 
## tag_Jump_Scare                         2.41e-02   1.62e-02    1.48  0.13808    
## tag_Ambient                            2.15e-02   1.67e-02    1.29  0.19840    
## tag_Web_Publishing                     4.87e-03   1.56e-02    0.31  0.75519    
## tag_Skateboarding                      3.40e-02   1.80e-02    1.89  0.05867 .  
## tag_Wrestling                          2.71e-02   1.57e-02    1.72  0.08457 .  
## tag_Silent_Protagonist                -1.03e-02   1.33e-02   -0.77  0.43969    
## tag_Football_American                 -2.33e-02   1.39e-02   -1.68  0.09350 .  
## tag_Mini_Golf                          3.15e-02   1.92e-02    1.64  0.10166    
## tag_360_Video                         -4.25e-02   1.70e-02   -2.50  0.01243 *  
## tag_Job_Simulator                      1.94e-02   1.64e-02    1.19  0.23511    
## tag_Dwarf                              4.05e-02   1.26e-02    3.21  0.00133 ** 
## tag_Electronic_Music                   3.04e-02   1.70e-02    1.79  0.07300 .  
## tag_Social_Deduction                   2.37e-02   1.41e-02    1.68  0.09299 .  
## tag_Pinball                           -1.74e-02   1.30e-02   -1.34  0.18050    
## tag_LEGO                               2.08e-02   1.41e-02    1.48  0.13895    
## tag_Basketball                        -2.14e-02   1.44e-02   -1.49  0.13584    
## tag_Asymmetric_VR                      1.87e-02   1.43e-02    1.31  0.19120    
## tag_GameMaker                         -1.17e-04   1.16e-02   -0.01  0.99196    
## tag_Motorbike                          2.53e-02   1.44e-02    1.76  0.07787 .  
## tag_Kickstarter                        1.28e-03   1.12e-02    0.11  0.90895    
## tag_Medical_Sim                       -3.79e-02   1.25e-02   -3.02  0.00254 ** 
## tag_Bikes                             -1.22e-02   1.51e-02   -0.81  0.41731    
## tag_World_War_I                        2.08e-02   1.24e-02    1.68  0.09359 .  
## tag_Spaceships                         4.15e-02   1.42e-02    2.92  0.00354 ** 
## tag_Rome                               1.20e-02   1.27e-02    0.95  0.34334    
## tag_Unforgiving                        1.77e-02   1.24e-02    1.43  0.15241    
## tag_Submarine                          4.34e-02   1.46e-02    2.98  0.00289 ** 
## tag_Photo_Editing                      1.86e-02   1.33e-02    1.39  0.16338    
## tag_Experience                         2.86e-03   1.07e-02    0.27  0.78984    
## tag_Golf                              -8.08e-03   1.51e-02   -0.53  0.59274    
## tag_Software_Training                 -4.82e-02   1.28e-02   -3.75  0.00018 ***
## tag_Sequel                            -7.75e-03   9.95e-03   -0.78  0.43640    
## tag_Roguevania                        -2.00e-02   1.39e-02   -1.43  0.15188    
## tag_Episodic                           3.16e-02   1.05e-02    3.02  0.00253 ** 
## tag_Outbreak_Sim                      -1.30e-02   1.25e-02   -1.04  0.30030    
## tag_Jet                               -2.49e-02   1.32e-02   -1.88  0.05944 .  
## tag_Chess                             -7.99e-03   1.15e-02   -0.69  0.48728    
## tag_Farming                           -2.12e-04   1.24e-02   -0.02  0.98642    
## tag_Horses                             5.63e-03   1.08e-02    0.52  0.60365    
## tag_Touch_Friendly                     4.74e-02   9.11e-03    5.20  2.0e-07 ***
## tag_Boxing                            -5.89e-03   1.24e-02   -0.48  0.63471    
## tag_Werewolves                         9.50e-03   1.18e-02    0.81  0.42060    
## tag_Mars                              -2.17e-02   1.13e-02   -1.93  0.05401 .  
## tag_Spelling                           8.49e-03   1.32e-02    0.64  0.52024    
## tag_Sniper                             6.67e-03   1.07e-02    0.62  0.53407    
## tag_Cult_Classic                       1.54e-02   8.67e-03    1.77  0.07612 .  
## tag_Nostalgia                          4.61e-02   9.89e-03    4.66  3.1e-06 ***
## tag_Transhumanism                      7.44e-03   1.21e-02    0.61  0.54004    
## tag_Audio_Production                  -3.06e-03   1.11e-02   -0.27  0.78350    
## tag_Villain_Protagonist                2.74e-03   9.43e-03    0.29  0.77099    
## tag_Offroad                           -1.19e-02   1.08e-02   -1.10  0.27284    
## tag_Illuminati                        -2.15e-02   9.07e-03   -2.37  0.01775 *  
## tag_Foreign                            1.13e-03   1.13e-02    0.10  0.91994    
## tag_Music_Based_Procedural_Generation  1.93e-02   1.11e-02    1.74  0.08214 .  
## tag_Real_Time_with_Pause               3.29e-02   8.76e-03    3.76  0.00017 ***
## tag_Football_Soccer                    4.11e-03   1.03e-02    0.40  0.68904    
## tag_Naval_Combat                       1.38e-02   1.23e-02    1.12  0.26349    
## tag_Time_Attack                        2.11e-02   9.15e-03    2.30  0.02120 *  
## tag_Sailing                           -1.27e-03   1.12e-02   -0.11  0.90998    
## tag_Video_Production                  -1.57e-02   1.05e-02   -1.50  0.13463    
## tag_Heist                              3.16e-03   1.01e-02    0.31  0.75492    
## tag_Trains                             2.39e-02   9.08e-03    2.63  0.00847 ** 
## tag_On_Rails_Shooter                  -1.33e-02   9.83e-03   -1.35  0.17565    
## tag_Addictive                          4.89e-02   7.55e-03    6.48  9.3e-11 ***
## tag_Cold_War                           2.80e-02   9.25e-03    3.03  0.00242 ** 
## tag_Gambling                          -4.94e-03   9.87e-03   -0.50  0.61642    
## tag_Trivia                            -5.52e-02   9.97e-03   -5.53  3.2e-08 ***
## tag_Archery                            6.10e-03   1.01e-02    0.60  0.54529    
## tag_Diplomacy                         -1.58e-03   9.87e-03   -0.16  0.87244    
## tag_Snow                               1.31e-02   9.51e-03    1.38  0.16721    
## tag_Remake                            -1.06e-02   7.23e-03   -1.47  0.14274    
## tag_Real_Time                         -1.20e-02   7.69e-03   -1.56  0.11893    
## tag_Naval                             -6.34e-03   1.06e-02   -0.60  0.55072    
## tag_Minigames                          6.86e-03   8.51e-03    0.81  0.42071    
## tag_Traditional_Roguelike              1.74e-02   1.00e-02    1.74  0.08158 .  
## tag_Party                              5.06e-03   9.93e-03    0.51  0.61040    
## tag_Superhero                          6.02e-03   8.79e-03    0.69  0.49308    
## tag_Asynchronous_Multiplayer           6.93e-03   8.58e-03    0.81  0.41918    
## tag_Boomer_Shooter                     5.29e-02   9.35e-03    5.66  1.6e-08 ***
## tag_Politics                           7.04e-03   9.20e-03    0.77  0.44409    
## tag_Transportation                     1.21e-02   8.98e-03    1.35  0.17730    
## tag_MOBA                              -1.92e-02   8.90e-03   -2.16  0.03083 *  
## tag_Typing                             1.60e-02   9.49e-03    1.69  0.09136 .  
## tag_Escape_Room                        7.22e-03   7.85e-03    0.92  0.35784    
## tag_Western                            1.06e-02   8.46e-03    1.25  0.21155    
## tag_Assassin                          -3.23e-03   8.87e-03   -0.36  0.71595    
## tag_Dinosaurs                          3.19e-03   8.06e-03    0.40  0.69212    
## tag_Political_Sim                     -1.68e-02   8.97e-03   -1.88  0.06057 .  
## tag_Programming                        3.31e-02   8.33e-03    3.98  7.0e-05 ***
## tag_Immersive                          2.08e-02   8.08e-03    2.58  0.01002 *  
## tag_Party_Game                         2.78e-02   9.32e-03    2.98  0.00289 ** 
## tag_Faith                              3.82e-02   7.83e-03    4.88  1.1e-06 ***
## tag_4X                                -1.94e-02   7.71e-03   -2.52  0.01169 *  
## tag_Hacking                            1.87e-02   8.40e-03    2.23  0.02576 *  
## tag_Tanks                              6.09e-03   8.07e-03    0.75  0.45071    
## tag_Fishing                            1.94e-02   8.25e-03    2.35  0.01860 *  
## tag_Narrative                          2.11e-02   7.68e-03    2.75  0.00600 ** 
## tag_Hunting                           -2.45e-02   8.36e-03   -2.93  0.00334 ** 
## tag_Animation_and_Modeling            -6.14e-03   8.08e-03   -0.76  0.44717    
## tag_Blood                              7.81e-03   6.69e-03    1.17  0.24331    
## tag_Vampire                           -6.49e-03   7.93e-03   -0.82  0.41331    
## tag_Trading_Card_Game                 -2.82e-02   7.75e-03   -3.64  0.00027 ***
## tag_FMV                                1.81e-03   6.55e-03    0.28  0.78230    
## tag_Mouse_only                         1.23e-02   6.38e-03    1.94  0.05299 .  
## tag_Game_Development                  -4.23e-03   8.16e-03   -0.52  0.60401    
## tag_Otome                              1.70e-02   7.37e-03    2.31  0.02114 *  
## tag_Roguelike_Deckbuilder              2.16e-02   9.50e-03    2.27  0.02331 *  
## tag_Political                          1.27e-03   7.86e-03    0.16  0.87166    
## tag_Ninja                              9.07e-03   7.38e-03    1.23  0.21931    
## tag_Underwater                         1.12e-02   8.25e-03    1.36  0.17469    
## tag_Beautiful                          9.25e-03   5.87e-03    1.57  0.11538    
## tag_Mining                             1.19e-02   7.92e-03    1.50  0.13310    
## tag_Dynamic_Narration                 -6.70e-03   7.93e-03   -0.84  0.39839    
## tag_Hex_Grid                           1.52e-02   7.33e-03    2.07  0.03816 *  
## tag_Pirates                            1.12e-02   7.61e-03    1.47  0.14029    
## tag_Time_Travel                        1.01e-02   7.49e-03    1.35  0.17593    
## tag_Wholesome                          9.64e-05   8.32e-03    0.01  0.99075    
## tag_Co_op_Campaign                    -3.48e-04   7.31e-03   -0.05  0.96204    
## tag_Dog                                1.22e-02   7.06e-03    1.73  0.08446 .  
## tag_Steampunk                         -1.09e-03   6.70e-03   -0.16  0.87089    
## tag_Character_Action_Game             -1.92e-02   6.34e-03   -3.03  0.00243 ** 
## tag_Sokoban                            4.07e-02   7.44e-03    5.46  4.7e-08 ***
## tag_Satire                             6.22e-03   7.04e-03    0.88  0.37719    
## tag_Quick_Time_Events                 -1.14e-03   7.48e-03   -0.15  0.87880    
## tag_Underground                       -8.30e-03   7.53e-03   -1.10  0.27030    
## tag_God_Game                          -3.04e-03   6.83e-03   -0.44  0.65690    
## tag_Action_RTS                         1.26e-02   8.42e-03    1.49  0.13555    
## tag_Open_World_Survival_Craft          7.47e-03   7.26e-03    1.03  0.30394    
## tag_Mechs                             -2.55e-03   6.74e-03   -0.38  0.70508    
## tag_Martial_Arts                      -8.95e-03   7.22e-03   -1.24  0.21479    
## tag_World_War_II                       1.85e-02   6.09e-03    3.03  0.00243 ** 
## tag_Software                           2.23e-02   7.77e-03    2.87  0.00417 ** 
## tag_Voxel                              8.70e-03   6.54e-03    1.33  0.18317    
## tag_Time_Manipulation                  2.03e-02   7.12e-03    2.85  0.00439 ** 
## tag_Solitaire                          1.30e-02   6.95e-03    1.87  0.06104 .  
## tag_Gothic                             2.18e-02   7.03e-03    3.11  0.00190 ** 
## tag_MMORPG                            -2.72e-02   6.62e-03   -4.11  4.0e-05 ***
## tag_Agriculture                        1.37e-02   7.65e-03    1.80  0.07258 .  
## tag_Looter_Shooter                    -1.29e-02   7.70e-03   -1.67  0.09433 .  
## tag_Conspiracy                        -6.72e-04   7.02e-03   -0.10  0.92378    
## tag_Combat_Racing                      4.75e-03   7.75e-03    0.61  0.54022    
## tag_Trading                           -1.08e-02   6.80e-03   -1.59  0.11221    
## tag_Spectacle_fighter                  6.84e-03   6.61e-03    1.03  0.30090    
## tag_Bullet_Time                        1.02e-02   6.71e-03    1.52  0.12736    
## tag_Split_Screen                       1.97e-02   6.34e-03    3.10  0.00192 ** 
## tag_Parody                             4.43e-04   6.12e-03    0.07  0.94224    
## tag_Hero_Shooter                       3.27e-03   6.98e-03    0.47  0.63936    
## tag_America                           -7.84e-03   6.56e-03   -1.20  0.23185    
## tag_Cozy                               5.32e-02   7.74e-03    6.87  6.4e-12 ***
## tag_3D_Vision                         -1.51e-02   6.68e-03   -2.27  0.02339 *  
## tag_6DOF                              -8.88e-03   6.61e-03   -1.34  0.17899    
## tag_Battle_Royale                     -1.53e-02   6.54e-03   -2.34  0.01910 *  
## tag_Dragons                            4.93e-03   6.38e-03    0.77  0.43937    
## tag_Auto_Battler                      -1.40e-02   6.55e-03   -2.14  0.03261 *  
## tag_Gun_Customization                 -4.26e-03   6.94e-03   -0.61  0.53927    
## tag_eSports                            4.47e-03   6.12e-03    0.73  0.46513    
## tag_Cooking                            1.88e-02   6.70e-03    2.80  0.00512 ** 
## tag_Soundtrack                        -2.39e-03   5.22e-03   -0.46  0.64708    
## tag_Mystery_Dungeon                   -1.29e-02   6.27e-03   -2.07  0.03892 *  
## tag_Class_Based                        1.69e-02   6.64e-03    2.54  0.01103 *  
## tag_Vehicular_Combat                   8.12e-03   7.17e-03    1.13  0.25732    
## tag_Grand_Strategy                    -1.37e-02   6.39e-03   -2.15  0.03175 *  
## tag_Word_Game                         -4.15e-03   6.35e-03   -0.65  0.51329    
## tag_Science                            1.57e-02   6.03e-03    2.61  0.00898 ** 
## tag_Colony_Sim                         1.75e-02   6.52e-03    2.68  0.00742 ** 
## tag_Design_and_Illustration            1.46e-02   6.22e-03    2.35  0.01901 *  
## tag_Swordplay                          2.25e-03   6.27e-03    0.36  0.71949    
## tag_Moddable                           5.76e-02   5.25e-03   10.97  < 2e-16 ***
## tag_Utilities                          1.10e-02   6.89e-03    1.59  0.11170    
## tag_Noir                              -7.92e-04   5.78e-03   -0.14  0.89104    
## tag_Lovecraftian                       1.36e-02   5.62e-03    2.42  0.01566 *  
## tag_Automation                         1.49e-02   6.02e-03    2.48  0.01304 *  
## tag_Space_Sim                         -5.69e-03   6.20e-03   -0.92  0.35894    
## tag_Farming_Sim                       -9.39e-03   6.93e-03   -1.35  0.17543    
## tag_Dark_Comedy                        1.04e-02   5.63e-03    1.85  0.06466 .  
## tag_Capitalism                         1.39e-02   6.18e-03    2.25  0.02463 *  
## tag_Competitive                        1.30e-02   4.99e-03    2.61  0.00906 ** 
## tag_Match_3                           -2.29e-02   5.24e-03   -4.36  1.3e-05 ***
## tag_3D_Fighter                        -1.70e-02   6.29e-03   -2.71  0.00682 ** 
## tag_Fighting                           3.52e-03   5.37e-03    0.66  0.51164    
## tag_Philosophical                      1.25e-02   5.55e-03    2.26  0.02378 *  
## tag_Experimental                      -8.64e-03   4.67e-03   -1.85  0.06431 .  
## tag_Classic                            5.76e-02   4.34e-03   13.29  < 2e-16 ***
## tag_Automobile_Sim                    -5.19e-03   6.13e-03   -0.85  0.39726    
## tag_Music                             -1.22e-02   5.02e-03   -2.42  0.01553 *  
## tag_Rhythm                             2.40e-02   5.66e-03    4.23  2.3e-05 ***
## tag_Crime                             -2.61e-03   5.34e-03   -0.49  0.62546    
## tag_Creature_Collector                -1.19e-03   5.54e-03   -0.21  0.83019    
## tag_Twin_Stick_Shooter                 1.08e-02   5.43e-03    1.99  0.04611 *  
## tag_Destruction                        1.39e-02   5.13e-03    2.71  0.00677 ** 
## tag_CRPG                               3.44e-03   5.16e-03    0.67  0.50528    
## tag_Level_Editor                       2.10e-02   4.84e-03    4.33  1.5e-05 ***
## tag_Mythology                          6.82e-03   5.16e-03    1.32  0.18556    
## tag_2D_Fighter                        -9.81e-03   5.76e-03   -1.70  0.08848 .  
## tag_Cats                               2.87e-02   4.76e-03    6.03  1.7e-09 ***
## tag_Alternate_History                  6.42e-04   4.97e-03    0.13  0.89713    
## tag_Grid_Based_Movement                1.91e-02   5.31e-03    3.60  0.00032 ***
## tag_Parkour                            1.97e-02   5.20e-03    3.79  0.00015 ***
## tag_Card_Battler                      -1.46e-02   6.75e-03   -2.17  0.03019 *  
## tag_RPGMaker                           3.15e-02   4.21e-03    7.50  6.7e-14 ***
## tag_Loot                               5.06e-03   4.84e-03    1.05  0.29580    
## tag_Party_Based_RPG                   -1.68e-03   5.10e-03   -0.33  0.74177    
## tag_4_Player_Local                     9.55e-03   5.25e-03    1.82  0.06864 .  
## tag_Team_Based                        -6.85e-03   4.83e-03   -1.42  0.15644    
## tag_Inventory_Management               2.44e-03   5.12e-03    0.48  0.63434    
## tag_Flight                             9.97e-03   4.72e-03    2.11  0.03450 *  
## tag_Wargame                           -6.18e-03   5.27e-03   -1.17  0.24107    
## tag_Modern                            -6.36e-03   4.45e-03   -1.43  0.15342    
## tag_Souls_like                         1.21e-02   4.93e-03    2.46  0.01383 *  
## tag_Artificial_Intelligence           -6.05e-03   4.80e-03   -1.26  0.20823    
## tag_Runner                            -2.91e-03   4.86e-03   -0.60  0.54974    
## tag_Beat_em_up                         2.46e-03   4.72e-03    0.52  0.60260    
## tag_Memes                              9.89e-03   3.73e-03    2.65  0.00802 ** 
## tag_Idler                             -2.46e-02   4.43e-03   -5.55  2.9e-08 ***
## tag_Dystopian                          7.26e-03   4.80e-03    1.51  0.13040    
## tag_Comic_Book                         6.28e-03   4.53e-03    1.39  0.16595    
## tag_Short                              6.50e-03   3.59e-03    1.81  0.07006 .  
## tag_Tutorial                           2.49e-03   4.61e-03    0.54  0.58969    
## tag_Driving                           -1.52e-03   5.34e-03   -0.28  0.77628    
## tag_Fast_Paced                         1.13e-02   3.91e-03    2.89  0.00389 ** 
## tag_Metroidvania                       6.02e-03   4.78e-03    1.26  0.20767    
## tag_Psychedelic                        1.84e-02   4.42e-03    4.16  3.1e-05 ***
## tag_Deckbuilding                       2.04e-02   6.50e-03    3.14  0.00167 ** 
## tag_Tactical_RPG                       1.83e-03   5.03e-03    0.36  0.71664    
## tag_City_Builder                      -2.17e-05   5.00e-03    0.00  0.99654    
## tag_Turn_Based                         1.20e-02   3.87e-03    3.10  0.00192 ** 
## tag_Tabletop                           5.86e-03   4.60e-03    1.27  0.20275    
## tag_Supernatural                       6.49e-03   4.39e-03    1.48  0.13918    
## tag_Perma_Death                        2.33e-05   4.28e-03    0.01  0.99567    
## tag_Board_Game                         1.59e-03   4.60e-03    0.35  0.72956    
## tag_Collectathon                       1.68e-02   4.25e-03    3.96  7.6e-05 ***
## tag_Massively_Multiplayer             -5.07e-02   4.06e-03  -12.51  < 2e-16 ***
## tag_Real_Time_Tactics                 -3.80e-03   4.66e-03   -0.82  0.41491    
## tag_Thriller                          -4.21e-03   4.24e-03   -0.99  0.32086    
## tag_Arena_Shooter                      1.08e-02   4.39e-03    2.45  0.01421 *  
## tag_Psychological                      1.24e-03   4.09e-03    0.30  0.76226    
## tag_Nonlinear                          9.64e-03   4.33e-03    2.23  0.02596 *  
## tag_Economy                           -8.70e-03   4.61e-03   -1.88  0.05947 .  
## tag_Tower_Defense                      1.16e-02   4.19e-03    2.77  0.00567 ** 
## tag_Demons                            -7.53e-03   4.12e-03   -1.83  0.06750 .  
## tag_Lore_Rich                          9.95e-03   4.31e-03    2.31  0.02102 *  
## tag_RTS                                7.58e-03   4.37e-03    1.73  0.08296 .  
## tag_Dark_Humor                         1.65e-02   3.98e-03    4.15  3.3e-05 ***
## tag_Aliens                             1.02e-03   4.32e-03    0.24  0.81288    
## tag_Military                           5.56e-03   4.56e-03    1.22  0.22332    
## tag_Time_Management                    1.89e-02   4.02e-03    4.71  2.5e-06 ***
## tag_Strategy_RPG                      -8.19e-03   4.44e-03   -1.84  0.06538 .  
## tag_Robots                             1.73e-02   4.15e-03    4.17  3.0e-05 ***
## tag_Detective                         -3.67e-03   4.27e-03   -0.86  0.39038    
## tag_Abstract                           8.81e-03   3.95e-03    2.23  0.02571 *  
## tag_Cyberpunk                         -1.69e-03   4.13e-03   -0.41  0.68259    
## tag_Conversation                       4.77e-03   3.98e-03    1.20  0.23163    
## tag_1980s                              2.04e-03   4.11e-03    0.50  0.61929    
## tag_Third_Person_Shooter              -8.36e-03   4.37e-03   -1.91  0.05607 .  
## tag_Replay_Value                      -1.34e-02   3.35e-03   -4.00  6.4e-05 ***
## tag_Precision_Platformer               1.81e-02   4.37e-03    4.15  3.4e-05 ***
## tag_LGBTQplus                          1.52e-02   3.50e-03    4.36  1.3e-05 ***
## tag_Local_Co_Op                        4.99e-03   4.16e-03    1.20  0.22989    
## tag_Investigation                      1.72e-03   4.17e-03    0.41  0.67946    
## tag_Narration                         -5.72e-04   3.64e-03   -0.16  0.87517    
## tag_Card_Game                         -2.81e-03   4.98e-03   -0.56  0.57300    
## tag_Clicker                           -4.55e-02   3.60e-03  -12.63  < 2e-16 ***
## tag_Life_Sim                           2.47e-03   3.89e-03    0.64  0.52527    
## tag_Stealth                            4.22e-03   3.68e-03    1.15  0.25134    
## tag_Isometric                          5.88e-03   3.48e-03    1.69  0.09051 .  
## tag_Cinematic                         -7.46e-03   3.77e-03   -1.98  0.04767 *  
## tag_2_5D                               5.95e-03   3.71e-03    1.60  0.10881    
## tag_Education                          2.28e-02   3.69e-03    6.19  6.1e-10 ***
## tag_War                                2.14e-03   4.20e-03    0.51  0.60932    
## tag_Historical                         1.26e-02   3.54e-03    3.56  0.00037 ***
## tag_Text_Based                         4.88e-03   3.58e-03    1.36  0.17258    
## tag_Top_Down_Shooter                   4.68e-04   4.32e-03    0.11  0.91370    
## tag_NSFW                              -1.09e-02   4.31e-03   -2.54  0.01124 *  
## tag_Zombies                           -1.27e-02   3.52e-03   -3.62  0.00030 ***
## tag_Hentai                             1.77e-02   3.98e-03    4.45  8.7e-06 ***
## tag_Score_Attack                       8.03e-03   3.57e-03    2.25  0.02445 *  
## tag_JRPG                              -1.60e-03   3.56e-03   -0.45  0.65221    
## tag_Post_apocalyptic                   7.47e-03   3.57e-03    2.09  0.03663 *  
## tag_Base_Building                      1.22e-03   4.07e-03    0.30  0.76434    
## tag_Surreal                            1.87e-02   3.29e-03    5.67  1.4e-08 ***
## tag_Racing                            -3.02e-03   3.79e-03   -0.80  0.42547    
## tag_Dating_Sim                        -8.43e-03   3.50e-03   -2.41  0.01603 *  
## tag_1990_s                             1.31e-02   3.47e-03    3.78  0.00016 ***
## tag_Great_Soundtrack                   3.74e-02   2.55e-03   14.63  < 2e-16 ***
## tag_Dungeon_Crawler                   -5.26e-04   3.54e-03   -0.15  0.88193    
## tag_Walking_Simulator                 -1.05e-02   3.29e-03   -3.18  0.00148 ** 
## tag_Local_Multiplayer                  7.92e-03   3.89e-03    2.03  0.04200 *  
## tag_Mature                            -1.11e-02   3.87e-03   -2.88  0.00395 ** 
## tag_Nature                             1.62e-02   3.51e-03    4.62  3.8e-06 ***
## tag_Shoot_Em_Up                        1.06e-02   3.60e-03    2.95  0.00319 ** 
## tag_Resource_Management                7.14e-03   3.59e-03    1.99  0.04663 *  
## tag_Hack_and_Slash                    -2.27e-03   3.50e-03   -0.65  0.51696    
## tag_Turn_Based_Tactics                 1.06e-04   3.94e-03    0.03  0.97863    
## tag_Emotional                          1.87e-02   3.19e-03    5.84  5.3e-09 ***
## tag_Online_Co_Op                      -4.94e-03   3.54e-03   -1.39  0.16366    
## tag_Sports                            -3.07e-03   3.25e-03   -0.95  0.34417    
## tag_Bullet_Hell                        1.75e-02   3.68e-03    4.75  2.0e-06 ***
## tag_Survival_Horror                   -8.91e-03   3.46e-03   -2.58  0.00995 ** 
## tag_Turn_Based_Combat                  6.56e-03   3.70e-03    1.77  0.07609 .  
## tag_Romance                            2.22e-03   3.34e-03    0.67  0.50591    
## tag_Dark_Fantasy                      -8.20e-04   3.31e-03   -0.25  0.80445    
## tag_Turn_Based_Strategy                8.65e-03   3.91e-03    2.21  0.02691 *  
## tag_Interactive_Fiction                6.67e-03   3.18e-03    2.10  0.03587 *  
## tag_Crafting                          -1.15e-02   3.42e-03   -3.36  0.00079 ***
## tag_Immersive_Sim                      6.49e-03   3.18e-03    2.04  0.04115 *  
## tag_Gore                               9.81e-03   3.21e-03    3.06  0.00223 ** 
## tag_Hidden_Object                     -4.40e-03   2.96e-03   -1.49  0.13685    
## tag_Choose_Your_Own_Adventure          3.38e-03   3.15e-03    1.07  0.28335    
## tag_Cartoon                           -8.73e-04   3.09e-03   -0.28  0.77762    
## tag_Drama                              3.52e-03   3.03e-03    1.16  0.24561    
## tag_Procedural_Generation              2.05e-02   3.13e-03    6.56  5.4e-11 ***
## tag_Action_Roguelike                   1.01e-02   3.95e-03    2.56  0.01038 *  
## tag_Puzzle_Platformer                  2.03e-03   3.09e-03    0.66  0.51164    
## tag_Medieval                           2.98e-03   3.14e-03    0.95  0.34315    
## tag_Logic                             -4.15e-03   3.09e-03   -1.34  0.17925    
## tag_Futuristic                         7.94e-06   3.29e-03    0.00  0.99808    
## tag_Old_School                         6.41e-03   3.05e-03    2.10  0.03567 *  
## tag_Tactical                          -6.14e-05   3.09e-03   -0.02  0.98416    
## tag_3D_Platformer                     -3.32e-03   3.31e-03   -1.00  0.31587    
## tag_Management                        -1.14e-02   3.29e-03   -3.48  0.00050 ***
## tag_Building                          -3.40e-03   3.41e-03   -1.00  0.31784    
## tag_Side_Scroller                     -9.31e-03   2.88e-03   -3.23  0.00124 ** 
## tag_Space                              2.81e-04   3.20e-03    0.09  0.93004    
## tag_Violent                           -1.90e-02   2.95e-03   -6.45  1.2e-10 ***
## tag_Magic                              3.22e-03   2.93e-03    1.10  0.27196    
## tag_Roguelite                          8.14e-03   3.67e-03    2.22  0.02652 *  
## tag_Sandbox                            1.39e-02   2.84e-03    4.89  1.0e-06 ***
## tag_Action_RPG                        -1.02e-02   3.14e-03   -3.24  0.00118 ** 
## tag_Hand_drawn                         9.16e-03   2.64e-03    3.47  0.00052 ***
## tag_VR                                 2.54e-02   2.49e-03   10.19  < 2e-16 ***
## tag_Co_op                              4.73e-03   3.14e-03    1.51  0.13133    
## tag_Character_Customization            4.45e-03   2.64e-03    1.68  0.09213 .  
## tag_Point_and_Click                    5.18e-03   2.56e-03    2.02  0.04338 *  
## tag_Minimalist                         1.61e-02   2.65e-03    6.08  1.2e-09 ***
## tag_Difficult                          4.89e-03   2.33e-03    2.10  0.03553 *  
## tag_Roguelike                          7.83e-03   3.62e-03    2.16  0.03044 *  
## tag_FPS                               -1.29e-02   2.92e-03   -4.43  9.4e-06 ***
## tag_Nudity                            -2.54e-02   3.66e-03   -6.93  4.2e-12 ***
## tag_Physics                            1.03e-02   2.61e-03    3.95  7.7e-05 ***
## tag_Psychological_Horror               1.17e-02   2.67e-03    4.38  1.2e-05 ***
## tag_Sexual_Content                    -1.10e-02   3.61e-03   -3.06  0.00219 ** 
## tag_PvP                               -1.44e-02   3.00e-03   -4.78  1.7e-06 ***
## tag_PvE                               -4.37e-03   2.78e-03   -1.57  0.11602    
## tag_Multiple_Endings                   1.69e-02   2.63e-03    6.42  1.4e-10 ***
## tag_Comedy                             2.09e-02   2.38e-03    8.81  < 2e-16 ***
## tag_Dark                               9.89e-04   2.62e-03    0.38  0.70529    
## tag_Mystery                            8.09e-03   2.65e-03    3.06  0.00223 ** 
## tag_Open_World                        -6.46e-03   2.52e-03   -2.56  0.01053 *  
## tag_Free_to_Play                       1.30e-02   1.85e-03    7.04  1.9e-12 ***
## tag_Cartoony                           7.39e-03   2.50e-03    2.96  0.00311 ** 
## tag_Survival                          -1.56e-02   2.49e-03   -6.27  3.5e-10 ***
## tag_Linear                             5.90e-03   2.40e-03    2.45  0.01412 *  
## tag_Realistic                         -3.14e-02   2.52e-03  -12.49  < 2e-16 ***
## tag_2D_Platformer                      1.36e-04   2.87e-03    0.05  0.96212    
## tag_Choices_Matter                     6.41e-03   2.55e-03    2.51  0.01198 *  
## tag_Visual_Novel                       3.44e-02   2.65e-03   12.99  < 2e-16 ***
## tag_Top_Down                           1.07e-02   2.48e-03    4.33  1.5e-05 ***
## tag_Controller                         1.76e-02   2.24e-03    7.84  4.4e-15 ***
## tag_Female_Protagonist                 2.00e-03   1.98e-03    1.01  0.31156    
## tag_Third_Person                       1.03e-03   2.45e-03    0.42  0.67357    
## tag_Family_Friendly                   -1.89e-03   2.19e-03   -0.86  0.38730    
## tag_Stylized                           1.01e-02   2.21e-03    4.56  5.1e-06 ***
## tag_Retro                              8.85e-03   2.23e-03    3.97  7.1e-05 ***
## tag_Platformer                         1.23e-02   2.64e-03    4.68  2.9e-06 ***
## tag_Sci_fi                             7.00e-03   2.39e-03    2.93  0.00339 ** 
## tag_Horror                            -3.99e-03   2.44e-03   -1.64  0.10165    
## tag_Anime                              2.15e-02   2.14e-03   10.05  < 2e-16 ***
## tag_Shooter                            4.19e-03   2.68e-03    1.56  0.11845    
## tag_Combat                             5.37e-03   2.41e-03    2.23  0.02566 *  
## tag_Relaxing                           5.44e-03   2.16e-03    2.51  0.01202 *  
## tag_Funny                              1.72e-02   2.00e-03    8.60  < 2e-16 ***
## tag_Early_Access                      -1.16e-02   1.86e-03   -6.25  4.2e-10 ***
## tag_Multiplayer                        1.49e-02   2.30e-03    6.45  1.1e-10 ***
## tag_Arcade                             1.25e-03   2.06e-03    0.61  0.54209    
## tag_Action_Adventure                  -4.02e-03   2.17e-03   -1.85  0.06410 .  
## tag_First_Person                      -5.39e-03   2.19e-03   -2.46  0.01395 *  
## tag_Fantasy                            2.80e-03   1.95e-03    1.44  0.15122    
## tag_Cute                               1.51e-02   1.85e-03    8.14  4.1e-16 ***
## tag_Pixel_Graphics                     2.95e-02   1.92e-03   15.39  < 2e-16 ***
## tag_Story_Rich                         1.23e-02   1.85e-03    6.65  3.0e-11 ***
## tag_Exploration                       -9.59e-04   1.87e-03   -0.51  0.60829    
## tag_Colorful                           1.32e-03   1.79e-03    0.74  0.45999    
## tag_Puzzle                             2.43e-02   1.80e-03   13.53  < 2e-16 ***
## tag_Atmospheric                        3.37e-03   1.65e-03    2.04  0.04165 *  
## tag_RPG                               -5.97e-03   1.82e-03   -3.29  0.00102 ** 
## tag_Strategy                          -7.74e-03   1.80e-03   -4.30  1.7e-05 ***
## tag_Simulation                        -2.45e-02   1.62e-03  -15.11  < 2e-16 ***
## tag_3D                                -7.87e-04   1.84e-03   -0.43  0.66927    
## tag_2D                                 1.12e-02   1.67e-03    6.69  2.3e-11 ***
## tag_Casual                            -2.00e-03   1.33e-03   -1.50  0.13306    
## tag_Adventure                         -7.78e-03   1.35e-03   -5.75  9.2e-09 ***
## tag_Action                            -6.73e-03   1.54e-03   -4.38  1.2e-05 ***
## tag_Indie                              4.80e-03   1.30e-03    3.68  0.00023 ***
## tag_Singleplayer                       1.27e-02   1.38e-03    9.23  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.136 on 59095 degrees of freedom
##   (695 observations deleted due to missingness)
## Multiple R-squared:  0.217,  Adjusted R-squared:  0.212 
## F-statistic: 39.9 on 410 and 59095 DF,  p-value: <2e-16
(fit_1_tidy = tidy(fit_1, conf.int = T))
fit_1_tidy$p.value.fdr = p.adjust(fit_1_tidy$p.value, method = "fdr")

#plot coefficients with fdr < 0.05
fit_1_tidy %>% 
  filter(
    p.value.fdr < 0.001,
    str_detect(term, "tag_")
    ) %>% 
  mutate(
    term = str_remove(term, "tag_"),
    term = str_clean(term),
    #reorder
    term = fct_reorder(term, estimate, .fun = mean)
  ) %>%
  ggplot(aes(x = term, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.3) +
  coord_flip() +
  labs(
    x = "Tag",
    y = "Coefficient",
    title = "Steam game ratings by tag"
  ) +
  theme(
    axis.text.x = element_text(hjust = 1, size = 8)  # smaller and left-aligned
  )

#compare models
fit_1_compare = compare_predictors(
  d,
  outcome = "fitted",
  predictors = c(tag_counts_legal %>% tail(100) %>% names()),
  controls = c("release_date"),
  keep_controls = F
)

#plot
#determine factor levels based only on full fit
fit_1_compare_levels = fit_1_compare %>% 
  filter(
    model == "full"
  ) %>%
  mutate(
    term = str_remove(term, "tag_"),
    term = str_clean(term),
    #reorder
    term = fct_reorder(term, estimate, .fun = mean)
  ) %>% 
  pull(term)

#plot
fit_1_compare %>% 
  mutate(
    term = str_remove(term, "tag_"),
    term = str_clean(term),
    #reorder
    term = fct_relevel(term, fit_1_compare_levels %>% levels)
  ) %>%
  GG_plot_models() +
  labs(
    x = "Tag",
    y = "Coefficient",
    title = "Steam game ratings by tag"
  ) +
  #smaller x axis font size
  theme(
    axis.text.y = element_text(size = 5.5)  # smaller and left-aligned
  )

  # scale_y_discrete(guide = guide_axis(n.dodge = 2))

GG_save("figs/compare_tags_models.png")

#scatterplot of coefs from singular vs. full regression
fit_1_compare %>% 
  filter(
    model == "singular",
    term != "(Intercept)"
  ) %>%
  select(term, estimate) %>%
  inner_join(
    fit_1_compare %>% 
      filter(
        model == "full",
        term != "(Intercept)"
      ) %>%
      select(term, estimate),
    by = "term",
    suffix = c("_singular", "_full")
  ) %>%
  GG_scatter("estimate_singular", "estimate_full", case_names = "term") +
  geom_point() +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", alpha = 0.3) +
  labs(
    x = "Singular model coefficient",
    y = "Full model coefficient",
    title = "Steam game ratings by tag"
  )
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/compare_tags_scatter.png")
## `geom_smooth()` using formula = 'y ~ x'

Look for 2-way interactions

#look for 2-way interactions
#data frame of all possible 2-way interactions
interactions = expand_grid(
  tag_1 = names(tag_counts_legal[tag_counts_legal >= 100]),
  tag_2 = names(tag_counts_legal[tag_counts_legal >= 100])
) %>% 
  filter(
    tag_1 < tag_2
  )

#compute counts, since many tag combinations dont exist
interactions$n_tag_combo = future_map2_int(
    interactions$tag_1,
    interactions$tag_2,
    function(x, y) {
      d %>%
        filter(
          .data[[x]] == 1,
          .data[[y]] == 1
        ) %>%
        nrow()
    }
  )

#subset tags
interactions_sub = interactions %>% 
  filter(
    n_tag_combo > 20
  )

#fit models
interactions_sub$lm_fit <- future_map2(
  interactions_sub$tag_1, 
  interactions_sub$tag_2,
  function(t1, t2) {
    formula_str <- str_glue("fitted ~ release_date + {t1} * {t2}")
    list(lm(as.formula(formula_str), data = d) %>% 
      broom::tidy())
  },
  .progress = TRUE
)

#extract interactions
interactions_coefs = map_dfr(
  interactions_sub$lm_fit,
  function(x) {
    x[[1]] %>% 
      filter(str_detect(term, ":"))
  }
)

#adjust the p values
interactions_coefs$p.value.fdr = p.adjust(interactions_coefs$p.value, method = "fdr")
interactions_coefs$p.value.bon = p.adjust(interactions_coefs$p.value, method = "bonferroni")

#plot distribution of p values
interactions_coefs %>% 
  ggplot(aes(x = p.value)) +
  geom_histogram(bins = 100) +
  labs(
    x = "FDR adjusted p value",
    y = "Count",
    title = "Distribution of p values for interactions",
    subtitle = str_glue("Based on {nrow(interactions_sub)} interaction tests")
  )
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

GG_save("figs/interactions_pvalue_dist.png")
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

Machine learning - xgboost

This is done in Python to use GPU acceleration on another machine. But we plot the results.

#read predictions
d_ml_results = read_tsv("data/steam_xgboost_predictions.tsv.zip")
## Rows: 59506 Columns: 414
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## dbl (414): tag_Mod, tag_Tile_Matching, tag_Baseball, tag_Intentionally_Awkwa...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#plot
d_ml_results %>% 
  GG_scatter("predicted", "fitted", alpha = 0.1) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
  scale_x_continuous(labels = scales::percent_format(accuracy = 1), breaks = seq(0, 1, 0.1)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = seq(0, 1, 0.1)) +
  labs(
    x = "Predicted rating",
    y = "Adjusted rating",
    title = "Steam game ratings by predicted rating",
    subtitle = str_glue("Based on {nrow(d_ml_results)} games")
  )
## `geom_smooth()` using formula = 'y ~ x'

#numerical
lm(fitted ~ predicted, data = d_ml_results) %>% summary()
## 
## Call:
## lm(formula = fitted ~ predicted, data = d_ml_results)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7889 -0.0654  0.0182  0.0819  0.4578 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.02181    0.00444   -4.92  8.9e-07 ***
## predicted    1.02865    0.00572  179.73  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.123 on 59504 degrees of freedom
## Multiple R-squared:  0.352,  Adjusted R-squared:  0.352 
## F-statistic: 3.23e+04 on 1 and 59504 DF,  p-value: <2e-16
GG_save("figs/xgboost_predictions.png")
## `geom_smooth()` using formula = 'y ~ x'

Not used

#internal multi-threading
# set_engine("xgboost", nthread = 4)

#--- Select predictors and outcome ----
#only top 10 tags for fast fit, testing purposes
tag_vars <- tag_counts_legal %>% names() %>% head()
tag_vars = d %>% 
  select(starts_with("tag_")) %>% 
  names()

# Model formula
predictors <- c(tag_vars, "total", "release_date")
d_ml <- d %>%
  select(all_of(predictors), fitted) %>%
  mutate(
    release_date = as.numeric(release_date), # numeric encoding
    total = as.numeric(total),
    fitted = as.numeric(fitted)
  ) %>%
  drop_na()

#--- Train/Test Split ----
set.seed(1)
data_split <- initial_split(d_ml, prop = 0.98)
train_data <- training(data_split)
test_data  <- testing(data_split)

#--- Model Specification ----
xgb_spec <- boost_tree(
  trees = tune(),
  tree_depth = tune(),
  learn_rate = tune(),
  loss_reduction = tune(),
  sample_size = tune(),
  mtry = tune()
) %>%
  set_engine("xgboost") %>%
  set_mode("regression")

#--- Recipe ----
xgb_rec <- recipe(fitted ~ ., data = train_data)

#--- Workflow ----
xgb_wflow <- workflow() %>%
  add_model(xgb_spec) %>%
  add_recipe(xgb_rec)

#--- Grid Specification ----
xgb_grid <- grid_space_filling(
  trees(),
  tree_depth(),
  learn_rate(range = c(-3, -1)),  # log10 scale
  loss_reduction(),
  sample_size = sample_prop(),
  mtry(range = c(5, length(predictors))),
  size = 20
)

#--- Cross-validation for tuning on training set ----
set.seed(2)
cv_folds <- vfold_cv(train_data, v = 10)

#--- Tune the model ----
tuned_results <- tune_grid(
  xgb_wflow,
  resamples = cv_folds,
  grid = xgb_grid,
  metrics = metric_set(rmse, rsq),
  control = control_grid(save_pred = TRUE)
)

#--- Evaluate on train set ----
show_best(tuned_results, metric = "rsq")

#--- Select best hyperparameters ----
best_params <- select_best(tuned_results, metric = "rmse")

#--- Finalize and fit on full training set ----
final_wflow <- finalize_workflow(xgb_wflow, best_params)

final_fit <- final_wflow %>%
  last_fit(split = data_split, metrics = metric_set(rsq, rmse))

#--- Evaluate on test set ----
show_best(final_fit, metric = "rsq")

#compare OLS with same predictors
ols_fit = lm(fitted ~ ., data = train_data)
ols_fit %>% summary()

Meta

#versions
write_sessioninfo()
## R version 4.5.0 (2025-04-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] furrr_0.3.1           future_1.40.0         yardstick_1.3.2      
##  [4] workflowsets_1.1.0    workflows_1.2.0       tune_1.3.0           
##  [7] rsample_1.3.0         recipes_1.3.0         parsnip_1.3.1        
## [10] modeldata_1.4.0       infer_1.0.8           dials_1.4.0          
## [13] scales_1.3.0          broom_1.0.8           tidymodels_1.3.0     
## [16] ebbr_0.1              kirkegaard_2025-05-02 psych_2.5.3          
## [19] assertthat_0.2.1      weights_1.0.4         Hmisc_5.2-3          
## [22] magrittr_2.0.3        lubridate_1.9.4       forcats_1.0.0        
## [25] stringr_1.5.1         dplyr_1.1.4           purrr_1.0.4          
## [28] readr_2.1.5           tidyr_1.3.1           tibble_3.2.1         
## [31] ggplot2_3.5.2         tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] rstudioapi_0.17.1   jsonlite_2.0.0      shape_1.4.6.1      
##   [4] datawizard_1.0.2    jomo_2.7-6          farver_2.1.2       
##   [7] nloptr_2.2.1        rmarkdown_2.29      ragg_1.4.0         
##  [10] vctrs_0.6.5         minqa_1.2.8         base64enc_0.1-3    
##  [13] htmltools_0.5.8.1   haven_2.5.4         Formula_1.2-5      
##  [16] mitml_0.4-5         sass_0.4.10         parallelly_1.43.0  
##  [19] bslib_0.9.0         htmlwidgets_1.6.4   plyr_1.8.9         
##  [22] cachem_1.1.0        lifecycle_1.0.4     iterators_1.0.14   
##  [25] pkgconfig_2.0.3     Matrix_1.7-3        R6_2.6.1           
##  [28] fastmap_1.2.0       rbibutils_2.3       digest_0.6.37      
##  [31] colorspace_2.1-1    textshaping_1.0.0   labeling_0.4.3     
##  [34] timechange_0.3.0    gdata_3.0.1         mgcv_1.9-1         
##  [37] compiler_4.5.0      bit64_4.6.0-1       withr_3.0.2        
##  [40] htmlTable_2.4.3     backports_1.5.0     pan_1.9            
##  [43] MASS_7.3-65         lava_1.8.1          gtools_3.9.5       
##  [46] tools_4.5.0         foreign_0.8-90      future.apply_1.11.3
##  [49] nnet_7.3-20         glue_1.8.0          nlme_3.1-168       
##  [52] grid_4.5.0          checkmate_2.3.2     cluster_2.1.8.1    
##  [55] generics_0.1.3      gtable_0.3.6        tzdb_0.5.0         
##  [58] class_7.3-23        data.table_1.17.0   hms_1.1.3          
##  [61] foreach_1.5.2       pillar_1.10.2       vroom_1.6.5        
##  [64] splines_4.5.0       lhs_1.2.0           lattice_0.22-5     
##  [67] survival_3.8-3      bit_4.6.0           tidyselect_1.2.1   
##  [70] knitr_1.50          reformulas_0.4.0    gridExtra_2.3      
##  [73] stats4_4.5.0        xfun_0.52           hardhat_1.4.1      
##  [76] timeDate_4041.110   stringi_1.8.7       VGAM_1.1-13        
##  [79] DiceDesign_1.10     lazyeval_0.2.2      yaml_2.3.10        
##  [82] boot_1.3-31         evaluate_1.0.3      codetools_0.2-19   
##  [85] cli_3.6.4           rpart_4.1.24        systemfonts_1.2.2  
##  [88] Rdpack_2.6.4        munsell_0.5.1       jquerylib_0.1.4    
##  [91] Rcpp_1.0.14         ggeffects_2.2.1     globals_0.17.0     
##  [94] parallel_4.5.0      gower_1.0.2         GPfit_1.0-9        
##  [97] lme4_1.1-37         listenv_0.9.1       glmnet_4.1-8       
## [100] ipred_0.9-15        prodlim_2025.04.28  insight_1.2.0      
## [103] crayon_1.5.3        rlang_1.1.6         mnormt_2.1.1       
## [106] mice_3.17.0
#write data to file for reuse
d %>% write_rds("data/data_for_reuse_joined.rds", compress = "xz")
# d_ml %>% write_rds("data/data_for_reuse_ml.rds", compress = "xz")

#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}