Final Project

Daniel DeBonis

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(recosystem)

The Dataset

Through Kaggle, I was able to find a collection of ratings for, including informative descriptions of board games from a prominent board game resources, boardgamesgeek.com. The database is comprised of approximately 19 million ratings from around 411,000 users of around 22,000 different board games. In total, the data is contained across 9 tables. One table contains the ratings. A second contains based game information such as year released, number of players, and a text description. A third table breaks down ratings by game instead of user, providing the number of times each game was given each rating (0-10 by tenths). Three tables provide themes, mechanics, and subcategories associated with each Game ID. The final three tables are centered around the game makers, looking at associated artists, designers, and publishers, but only given data if the creator is associated with three or more games.

games <- read.csv("C:/Users/ddebo/Downloads/archive(2)/games.csv")
mechanics <- read.csv("C:/Users/ddebo/Downloads/archive(2)/mechanics.csv")
themes <- read.csv("C:/Users/ddebo/Downloads/archive(2)/themes.csv")
subcategories <- read.csv("C:/Users/ddebo/Downloads/archive(2)/subcategories.csv")
designers <- read.csv("C:/Users/ddebo/Downloads/archive(2)/designers_reduced.csv")
ratings <- read.csv('C:/Users/ddebo/Downloads/archive(2)/user_ratings.csv')
head(mechanics)
##   BGGId Alliances Area.Majority...Influence Auction.Bidding Dice.Rolling
## 1     1         1                         1               1            1
## 2     2         0                         0               0            0
## 3     3         0                         1               0            0
## 4     4         0                         1               1            0
## 5     5         0                         0               0            0
## 6     6         0                         0               0            1
##   Hand.Management Simultaneous.Action.Selection Trick.taking Hexagon.Grid
## 1               1                             1            0            0
## 2               0                             0            1            0
## 3               1                             0            0            1
## 4               0                             0            0            0
## 5               1                             0            0            0
## 6               0                             0            0            0
##   Once.Per.Game.Abilities Set.Collection Tile.Placement Action.Points
## 1                       0              0              0             0
## 2                       0              0              0             0
## 3                       1              1              1             0
## 4                       0              1              0             1
## 5                       0              0              1             0
## 6                       0              0              0             0
##   Investment Market Square.Grid Stock.Holding Victory.Points.as.a.Resource
## 1          0      0           0             0                            0
## 2          0      0           0             0                            0
## 3          0      0           0             0                            0
## 4          0      0           0             0                            0
## 5          1      1           1             1                            1
## 6          0      0           0             0                            0
##   Enclosure Pattern.Building Pattern.Recognition Modular.Board
## 1         0                0                   0             0
## 2         0                0                   0             0
## 3         0                0                   0             0
## 4         0                0                   0             0
## 5         0                0                   0             0
## 6         0                0                   0             0
##   Network.and.Route.Building Point.to.Point.Movement Melding.and.Splaying
## 1                          0                       0                    0
## 2                          0                       0                    0
## 3                          0                       0                    0
## 4                          0                       0                    0
## 5                          0                       0                    0
## 6                          0                       0                    0
##   Negotiation Trading Push.Your.Luck Income Race Random.Production
## 1           1       0              0      0    0                 0
## 2           0       0              0      0    0                 0
## 3           0       0              0      0    0                 0
## 4           0       0              0      0    0                 0
## 5           0       0              0      0    0                 0
## 6           0       0              0      0    0                 0
##   Variable.Set.up Roll...Spin.and.Move Variable.Player.Powers Action.Queue Bias
## 1               0                    0                      0            0    0
## 2               0                    0                      0            0    0
## 3               0                    0                      0            0    0
## 4               0                    0                      0            0    0
## 5               0                    0                      0            0    0
## 6               0                    0                      0            0    0
##   Grid.Movement Lose.a.Turn Programmed.Movement
## 1             0           0                   0
## 2             0           0                   0
## 3             0           0                   0
## 4             0           0                   0
## 5             0           0                   0
## 6             0           0                   0
##   Scenario...Mission...Campaign.Game Voting Events Paper.and.Pencil
## 1                                  0      0      0                0
## 2                                  0      0      0                0
## 3                                  0      0      0                0
## 4                                  0      0      0                0
## 5                                  0      0      0                0
## 6                                  0      0      0                0
##   Player.Elimination Role.Playing Movement.Points Simulation
## 1                  0            0               0          0
## 2                  0            0               0          0
## 3                  0            0               0          0
## 4                  0            0               0          0
## 5                  0            0               0          0
## 6                  0            0               0          0
##   Variable.Phase.Order Area.Movement Commodity.Speculation Cooperative.Game
## 1                    0             0                     0                0
## 2                    0             0                     0                0
## 3                    0             0                     0                0
## 4                    0             0                     0                0
## 5                    0             0                     0                0
## 6                    0             0                     0                0
##   Deduction Sudden.Death.Ending Connections Highest.Lowest.Scoring
## 1         0                   0           0                      0
## 2         0                   0           0                      0
## 3         0                   0           0                      0
## 4         0                   0           0                      0
## 5         0                   0           0                      0
## 6         0                   0           0                      0
##   Betting.and.Bluffing Memory Score.and.Reset.Game Layering Map.Addition
## 1                    0      0                    0        0            0
## 2                    0      0                    0        0            0
## 3                    0      0                    0        0            0
## 4                    0      0                    0        0            0
## 5                    0      0                    0        0            0
## 6                    0      0                    0        0            0
##   Secret.Unit.Deployment Increase.Value.of.Unchosen.Resources
## 1                      0                                    0
## 2                      0                                    0
## 3                      0                                    0
## 4                      0                                    0
## 5                      0                                    0
## 6                      0                                    0
##   Ratio...Combat.Results.Table Take.That Team.Based.Game
## 1                            0         0               0
## 2                            0         0               0
## 3                            0         0               0
## 4                            0         0               0
## 5                            0         0               0
## 6                            0         0               0
##   Campaign...Battle.Card.Driven Tech.Trees...Tech.Tracks Player.Judge
## 1                             0                        0            0
## 2                             0                        0            0
## 3                             0                        0            0
## 4                             0                        0            0
## 5                             0                        0            0
## 6                             0                        0            0
##   Chit.Pull.System Three.Dimensional.Movement Action.Drafting
## 1                0                          0               0
## 2                0                          0               0
## 3                0                          0               0
## 4                0                          0               0
## 5                0                          0               0
## 6                0                          0               0
##   Minimap.Resolution Stat.Check.Resolution Action.Timer Pick.up.and.Deliver
## 1                  0                     0            0                   0
## 2                  0                     0            0                   0
## 3                  0                     0            0                   0
## 4                  0                     0            0                   0
## 5                  0                     0            0                   0
## 6                  0                     0            0                   0
##   Map.Deformation Bingo Crayon.Rail.System Multiple.Maps Hidden.Roles
## 1               0     0                  0             0            0
## 2               0     0                  0             0            0
## 3               0     0                  0             0            0
## 4               0     0                  0             0            0
## 5               0     0                  0             0            0
## 6               0     0                  0             0            0
##   Line.Drawing Tug.of.War Pattern.Movement Static.Capture
## 1            0          0                0              0
## 2            0          0                0              0
## 3            0          0                0              0
## 4            0          0                0              0
## 5            0          0                0              0
## 6            0          0                0              0
##   Different.Dice.Movement Chaining Ladder.Climbing Predictive.Bid
## 1                       0        0               0              0
## 2                       0        0               0              0
## 3                       0        0               0              0
## 4                       0        0               0              0
## 5                       0        0               0              0
## 6                       0        0               0              0
##   Solo...Solitaire.Game Line.of.Sight Critical.Hits.and.Failures Interrupts
## 1                     0             0                          0          0
## 2                     0             0                          0          0
## 3                     0             0                          0          0
## 4                     0             0                          0          0
## 5                     0             0                          0          0
## 6                     0             0                          0          0
##   Zone.of.Control Bribery End.Game.Bonuses Area.Impulse Worker.Placement
## 1               0       0                0            0                0
## 2               0       0                0            0                0
## 3               0       0                0            0                0
## 4               0       0                0            0                0
## 5               0       0                0            0                0
## 6               0       0                0            0                0
##   Measurement.Movement Map.Reduction Real.Time Resource.to.Move Mancala
## 1                    0             0         0                0       0
## 2                    0             0         0                0       0
## 3                    0             0         0                0       0
## 4                    0             0         0                0       0
## 5                    0             0         0                0       0
## 6                    0             0         0                0       0
##   Ownership Kill.Steal Hidden.Movement Track.Movement Deck.Construction
## 1         0          0               0              0                 0
## 2         0          0               0              0                 0
## 3         0          0               0              0                 0
## 4         0          0               0              0                 0
## 5         0          0               0              0                 0
## 6         0          0               0              0                 0
##   Drafting TableauBuilding Prisoner.s.Dilemma Hidden.Victory.Points
## 1        0               0                  0                     0
## 2        0               0                  0                     0
## 3        0               0                  0                     0
## 4        0               0                  0                     0
## 5        0               0                  0                     0
## 6        0               0                  0                     0
##   Movement.Template Slide.Push Targeted.Clues Command.Cards Grid.Coverage
## 1                 0          0              0             0             0
## 2                 0          0              0             0             0
## 3                 0          0              0             0             0
## 4                 0          0              0             0             0
## 5                 0          0              0             0             0
## 6                 0          0              0             0             0
##   Relative.Movement Action.Event Card.Play.Conflict.Resolution
## 1                 0            0                             0
## 2                 0            0                             0
## 3                 0            0                             0
## 4                 0            0                             0
## 5                 0            0                             0
## 6                 0            0                             0
##   I.Cut..You.Choose Die.Icon.Resolution Elapsed.Real.Time.Ending
## 1                 0                   0                        0
## 2                 0                   0                        0
## 3                 0                   0                        0
## 4                 0                   0                        0
## 5                 0                   0                        0
## 6                 0                   0                        0
##   Advantage.Token Storytelling Catch.the.Leader
## 1               0            0                0
## 2               0            0                0
## 3               0            0                0
## 4               0            0                0
## 5               0            0                0
## 6               0            0                0
##   Roles.with.Asymmetric.Information Traitor.Game Moving.Multiple.Units
## 1                                 0            0                     0
## 2                                 0            0                     0
## 3                                 0            0                     0
## 4                                 0            0                     0
## 5                                 0            0                     0
## 6                                 0            0                     0
##   Semi.Cooperative.Game Communication.Limits Time.Track Speed.Matching
## 1                     0                    0          0              0
## 2                     0                    0          0              0
## 3                     0                    0          0              0
## 4                     0                    0          0              0
## 5                     0                    0          0              0
## 6                     0                    0          0              0
##   Cube.Tower Re.rolling.and.Locking Impulse.Movement Loans Delayed.Purchase
## 1          0                      0                0     0                0
## 2          0                      0                0     0                0
## 3          0                      0                0     0                0
## 4          0                      0                0     0                0
## 5          0                      0                0     0                0
## 6          0                      0                0     0                0
##   Deck..Bag..and.Pool.Building Move.Through.Deck Single.Loser.Game Matching
## 1                            0                 0                 0        0
## 2                            0                 0                 0        0
## 3                            0                 0                 0        0
## 4                            0                 0                 0        0
## 5                            0                 0                 0        0
## 6                            0                 0                 0        0
##   Induction Physical.Removal Narrative.Choice...Paragraph Pieces.as.Map Follow
## 1         0                0                            0             0      0
## 2         0                0                            0             0      0
## 3         0                0                            0             0      0
## 4         0                0                            0             0      0
## 5         0                0                            0             0      0
## 6         0                0                            0             0      0
##   Finale.Ending Order.Counters Contracts Passed.Action.Token King.of.the.Hill
## 1             0              0         0                   0                0
## 2             0              0         0                   0                0
## 3             0              0         0                   0                0
## 4             0              0         0                   0                0
## 5             0              0         0                   0                0
## 6             0              0         0                   0                0
##   Action.Retrieval Force.Commitment Rondel Automatic.Resource.Growth
## 1                0                0      0                         0
## 2                0                0      0                         0
## 3                0                0      0                         0
## 4                0                0      0                         0
## 5                0                0      0                         0
## 6                0                0      0                         0
##   Legacy.Game Dexterity Physical
## 1           0         0        0
## 2           0         0        0
## 3           0         0        0
## 4           0         0        0
## 5           0         0        0
## 6           0         0        0
head(ratings)
##    BGGId Rating    Username
## 1 213788      8   Tonydorrf
## 2 213788      8  tachyon14k
## 3 213788      8    Ungotter
## 4 213788      8 brainlocki3
## 5 213788      8        PPMP
## 6 213788      8  Dychotimer
games_features <- games |>
  left_join(mechanics, by = "BGGId") |>
  left_join(themes, by = "BGGId") |>
  left_join(subcategories, by = "BGGId") 
  #left_join(designers, by = "BGGId") #added too many columns with too little impact on recs/RMSE
glimpse(games_features)
## Rows: 21,925
## Columns: 432
## $ BGGId                                                        <int> 1, 2, 3, …
## $ Name                                                         <chr> "Die Mach…
## $ Description                                                  <chr> "die mach…
## $ YearPublished                                                <int> 1986, 198…
## $ GameWeight                                                   <dbl> 4.3206, 1…
## $ AvgRating                                                    <dbl> 7.61428, …
## $ BayesAvgRating                                               <dbl> 7.10363, …
## $ StdDev                                                       <dbl> 1.57979, …
## $ MinPlayers                                                   <int> 3, 3, 2, …
## $ MaxPlayers                                                   <int> 5, 4, 4, …
## $ ComAgeRec                                                    <dbl> 14.366667…
## $ LanguageEase                                                 <dbl> 1.395833,…
## $ BestPlayers                                                  <int> 5, 0, 3, …
## $ GoodPlayers                                                  <chr> "['4', '5…
## $ NumOwned                                                     <int> 7498, 128…
## $ NumWant                                                      <int> 501, 72, …
## $ NumWish                                                      <int> 2039, 191…
## $ NumWeightVotes                                               <int> 761, 54, …
## $ MfgPlaytime                                                  <int> 240, 30, …
## $ ComMinPlaytime                                               <int> 240, 30, …
## $ ComMaxPlaytime                                               <int> 240, 30, …
## $ MfgAgeRec                                                    <int> 14, 12, 1…
## $ NumUserRatings                                               <int> 5354, 562…
## $ NumComments                                                  <int> 0, 0, 0, …
## $ NumAlternates                                                <int> 2, 0, 6, …
## $ NumExpansions                                                <int> 0, 0, 0, …
## $ NumImplementations                                           <int> 0, 2, 1, …
## $ IsReimplementation                                           <int> 0, 1, 0, …
## $ Family                                                       <chr> "Classic …
## $ Kickstarted                                                  <int> 0, 0, 0, …
## $ ImagePath                                                    <chr> "https://…
## $ Rank.boardgame                                               <int> 316, 3993…
## $ Rank.strategygames                                           <int> 180, 1577…
## $ Rank.abstracts                                               <int> 21926, 21…
## $ Rank.familygames                                             <int> 21926, 21…
## $ Rank.thematic                                                <int> 21926, 21…
## $ Rank.cgs                                                     <int> 21926, 21…
## $ Rank.wargames                                                <int> 21926, 21…
## $ Rank.partygames                                              <int> 21926, 21…
## $ Rank.childrensgames                                          <int> 21926, 21…
## $ Cat.Thematic                                                 <int> 0, 0, 0, …
## $ Cat.Strategy                                                 <int> 1, 1, 1, …
## $ Cat.War                                                      <int> 0, 0, 0, …
## $ Cat.Family                                                   <int> 0, 0, 0, …
## $ Cat.CGS                                                      <int> 0, 0, 0, …
## $ Cat.Abstract                                                 <int> 0, 0, 0, …
## $ Cat.Party                                                    <int> 0, 0, 0, …
## $ Cat.Childrens                                                <int> 0, 0, 0, …
## $ Alliances                                                    <int> 1, 0, 0, …
## $ Area.Majority...Influence                                    <int> 1, 0, 1, …
## $ Auction.Bidding                                              <int> 1, 0, 0, …
## $ Dice.Rolling                                                 <int> 1, 0, 0, …
## $ Hand.Management                                              <int> 1, 0, 1, …
## $ Simultaneous.Action.Selection                                <int> 1, 0, 0, …
## $ Trick.taking                                                 <int> 0, 1, 0, …
## $ Hexagon.Grid                                                 <int> 0, 0, 1, …
## $ Once.Per.Game.Abilities                                      <int> 0, 0, 1, …
## $ Set.Collection                                               <int> 0, 0, 1, …
## $ Tile.Placement                                               <int> 0, 0, 1, …
## $ Action.Points                                                <int> 0, 0, 0, …
## $ Investment                                                   <int> 0, 0, 0, …
## $ Market                                                       <int> 0, 0, 0, …
## $ Square.Grid                                                  <int> 0, 0, 0, …
## $ Stock.Holding                                                <int> 0, 0, 0, …
## $ Victory.Points.as.a.Resource                                 <int> 0, 0, 0, …
## $ Enclosure                                                    <int> 0, 0, 0, …
## $ Pattern.Building                                             <int> 0, 0, 0, …
## $ Pattern.Recognition                                          <int> 0, 0, 0, …
## $ Modular.Board                                                <int> 0, 0, 0, …
## $ Network.and.Route.Building                                   <int> 0, 0, 0, …
## $ Point.to.Point.Movement                                      <int> 0, 0, 0, …
## $ Melding.and.Splaying                                         <int> 0, 0, 0, …
## $ Negotiation                                                  <int> 1, 0, 0, …
## $ Trading                                                      <int> 0, 0, 0, …
## $ Push.Your.Luck                                               <int> 0, 0, 0, …
## $ Income                                                       <int> 0, 0, 0, …
## $ Race                                                         <int> 0, 0, 0, …
## $ Random.Production                                            <int> 0, 0, 0, …
## $ Variable.Set.up                                              <int> 0, 0, 0, …
## $ Roll...Spin.and.Move                                         <int> 0, 0, 0, …
## $ Variable.Player.Powers                                       <int> 0, 0, 0, …
## $ Action.Queue                                                 <int> 0, 0, 0, …
## $ Bias                                                         <int> 0, 0, 0, …
## $ Grid.Movement                                                <int> 0, 0, 0, …
## $ Lose.a.Turn                                                  <int> 0, 0, 0, …
## $ Programmed.Movement                                          <int> 0, 0, 0, …
## $ Scenario...Mission...Campaign.Game                           <int> 0, 0, 0, …
## $ Voting                                                       <int> 0, 0, 0, …
## $ Events                                                       <int> 0, 0, 0, …
## $ Paper.and.Pencil                                             <int> 0, 0, 0, …
## $ Player.Elimination                                           <int> 0, 0, 0, …
## $ Role.Playing                                                 <int> 0, 0, 0, …
## $ Movement.Points                                              <int> 0, 0, 0, …
## $ Simulation                                                   <int> 0, 0, 0, …
## $ Variable.Phase.Order                                         <int> 0, 0, 0, …
## $ Area.Movement                                                <int> 0, 0, 0, …
## $ Commodity.Speculation                                        <int> 0, 0, 0, …
## $ Cooperative.Game                                             <int> 0, 0, 0, …
## $ Deduction                                                    <int> 0, 0, 0, …
## $ Sudden.Death.Ending                                          <int> 0, 0, 0, …
## $ Connections                                                  <int> 0, 0, 0, …
## $ Highest.Lowest.Scoring                                       <int> 0, 0, 0, …
## $ Betting.and.Bluffing                                         <int> 0, 0, 0, …
## $ Memory                                                       <int> 0, 0, 0, …
## $ Score.and.Reset.Game                                         <int> 0, 0, 0, …
## $ Layering                                                     <int> 0, 0, 0, …
## $ Map.Addition                                                 <int> 0, 0, 0, …
## $ Secret.Unit.Deployment                                       <int> 0, 0, 0, …
## $ Increase.Value.of.Unchosen.Resources                         <int> 0, 0, 0, …
## $ Ratio...Combat.Results.Table                                 <int> 0, 0, 0, …
## $ Take.That                                                    <int> 0, 0, 0, …
## $ Team.Based.Game                                              <int> 0, 0, 0, …
## $ Campaign...Battle.Card.Driven                                <int> 0, 0, 0, …
## $ Tech.Trees...Tech.Tracks                                     <int> 0, 0, 0, …
## $ Player.Judge                                                 <int> 0, 0, 0, …
## $ Chit.Pull.System                                             <int> 0, 0, 0, …
## $ Three.Dimensional.Movement                                   <int> 0, 0, 0, …
## $ Action.Drafting                                              <int> 0, 0, 0, …
## $ Minimap.Resolution                                           <int> 0, 0, 0, …
## $ Stat.Check.Resolution                                        <int> 0, 0, 0, …
## $ Action.Timer                                                 <int> 0, 0, 0, …
## $ Pick.up.and.Deliver                                          <int> 0, 0, 0, …
## $ Map.Deformation                                              <int> 0, 0, 0, …
## $ Bingo                                                        <int> 0, 0, 0, …
## $ Crayon.Rail.System                                           <int> 0, 0, 0, …
## $ Multiple.Maps                                                <int> 0, 0, 0, …
## $ Hidden.Roles                                                 <int> 0, 0, 0, …
## $ Line.Drawing                                                 <int> 0, 0, 0, …
## $ Tug.of.War                                                   <int> 0, 0, 0, …
## $ Pattern.Movement                                             <int> 0, 0, 0, …
## $ Static.Capture                                               <int> 0, 0, 0, …
## $ Different.Dice.Movement                                      <int> 0, 0, 0, …
## $ Chaining                                                     <int> 0, 0, 0, …
## $ Ladder.Climbing                                              <int> 0, 0, 0, …
## $ Predictive.Bid                                               <int> 0, 0, 0, …
## $ Solo...Solitaire.Game                                        <int> 0, 0, 0, …
## $ Line.of.Sight                                                <int> 0, 0, 0, …
## $ Critical.Hits.and.Failures                                   <int> 0, 0, 0, …
## $ Interrupts                                                   <int> 0, 0, 0, …
## $ Zone.of.Control                                              <int> 0, 0, 0, …
## $ Bribery                                                      <int> 0, 0, 0, …
## $ End.Game.Bonuses                                             <int> 0, 0, 0, …
## $ Area.Impulse                                                 <int> 0, 0, 0, …
## $ Worker.Placement                                             <int> 0, 0, 0, …
## $ Measurement.Movement                                         <int> 0, 0, 0, …
## $ Map.Reduction                                                <int> 0, 0, 0, …
## $ Real.Time                                                    <int> 0, 0, 0, …
## $ Resource.to.Move                                             <int> 0, 0, 0, …
## $ Mancala                                                      <int> 0, 0, 0, …
## $ Ownership                                                    <int> 0, 0, 0, …
## $ Kill.Steal                                                   <int> 0, 0, 0, …
## $ Hidden.Movement                                              <int> 0, 0, 0, …
## $ Track.Movement                                               <int> 0, 0, 0, …
## $ Deck.Construction                                            <int> 0, 0, 0, …
## $ Drafting                                                     <int> 0, 0, 0, …
## $ TableauBuilding                                              <int> 0, 0, 0, …
## $ Prisoner.s.Dilemma                                           <int> 0, 0, 0, …
## $ Hidden.Victory.Points                                        <int> 0, 0, 0, …
## $ Movement.Template                                            <int> 0, 0, 0, …
## $ Slide.Push                                                   <int> 0, 0, 0, …
## $ Targeted.Clues                                               <int> 0, 0, 0, …
## $ Command.Cards                                                <int> 0, 0, 0, …
## $ Grid.Coverage                                                <int> 0, 0, 0, …
## $ Relative.Movement                                            <int> 0, 0, 0, …
## $ Action.Event                                                 <int> 0, 0, 0, …
## $ Card.Play.Conflict.Resolution                                <int> 0, 0, 0, …
## $ I.Cut..You.Choose                                            <int> 0, 0, 0, …
## $ Die.Icon.Resolution                                          <int> 0, 0, 0, …
## $ Elapsed.Real.Time.Ending                                     <int> 0, 0, 0, …
## $ Advantage.Token                                              <int> 0, 0, 0, …
## $ Storytelling                                                 <int> 0, 0, 0, …
## $ Catch.the.Leader                                             <int> 0, 0, 0, …
## $ Roles.with.Asymmetric.Information                            <int> 0, 0, 0, …
## $ Traitor.Game                                                 <int> 0, 0, 0, …
## $ Moving.Multiple.Units                                        <int> 0, 0, 0, …
## $ Semi.Cooperative.Game                                        <int> 0, 0, 0, …
## $ Communication.Limits                                         <int> 0, 0, 0, …
## $ Time.Track                                                   <int> 0, 0, 0, …
## $ Speed.Matching                                               <int> 0, 0, 0, …
## $ Cube.Tower                                                   <int> 0, 0, 0, …
## $ Re.rolling.and.Locking                                       <int> 0, 0, 0, …
## $ Impulse.Movement                                             <int> 0, 0, 0, …
## $ Loans                                                        <int> 0, 0, 0, …
## $ Delayed.Purchase                                             <int> 0, 0, 0, …
## $ Deck..Bag..and.Pool.Building                                 <int> 0, 0, 0, …
## $ Move.Through.Deck                                            <int> 0, 0, 0, …
## $ Single.Loser.Game                                            <int> 0, 0, 0, …
## $ Matching                                                     <int> 0, 0, 0, …
## $ Induction                                                    <int> 0, 0, 0, …
## $ Physical.Removal                                             <int> 0, 0, 0, …
## $ Narrative.Choice...Paragraph                                 <int> 0, 0, 0, …
## $ Pieces.as.Map                                                <int> 0, 0, 0, …
## $ Follow                                                       <int> 0, 0, 0, …
## $ Finale.Ending                                                <int> 0, 0, 0, …
## $ Order.Counters                                               <int> 0, 0, 0, …
## $ Contracts                                                    <int> 0, 0, 0, …
## $ Passed.Action.Token                                          <int> 0, 0, 0, …
## $ King.of.the.Hill                                             <int> 0, 0, 0, …
## $ Action.Retrieval                                             <int> 0, 0, 0, …
## $ Force.Commitment                                             <int> 0, 0, 0, …
## $ Rondel                                                       <int> 0, 0, 0, …
## $ Automatic.Resource.Growth                                    <int> 0, 0, 0, …
## $ Legacy.Game                                                  <int> 0, 0, 0, …
## $ Dexterity                                                    <int> 0, 0, 0, …
## $ Physical                                                     <int> 0, 0, 0, …
## $ Adventure                                                    <int> 0, 0, 0, …
## $ Fantasy                                                      <int> 0, 1, 0, …
## $ Fighting                                                     <int> 0, 0, 0, …
## $ Environmental                                                <int> 0, 0, 0, …
## $ Medical                                                      <int> 0, 0, 0, …
## $ Economic                                                     <int> 1, 0, 0, …
## $ Industry...Manufacturing                                     <int> 0, 0, 0, …
## $ Transportation                                               <int> 0, 0, 0, …
## $ Science.Fiction                                              <int> 0, 0, 0, …
## $ Space.Exploration                                            <int> 0, 0, 0, …
## $ Civilization                                                 <int> 0, 0, 0, …
## $ Civil.War                                                    <int> 0, 0, 0, …
## $ Movies...TV...Radio.theme                                    <int> 0, 0, 0, …
## $ Novel.based                                                  <int> 0, 0, 0, …
## $ Age.of.Reason                                                <int> 0, 0, 0, …
## $ Mythology                                                    <int> 0, 0, 0, …
## $ Renaissance                                                  <int> 0, 0, 0, …
## $ American.West                                                <int> 0, 0, 0, …
## $ Animals                                                      <int> 0, 0, 0, …
## $ Modern.Warfare                                               <int> 0, 0, 0, …
## $ Medieval                                                     <int> 0, 0, 1, …
## $ Ancient                                                      <int> 0, 0, 0, …
## $ Nautical                                                     <int> 0, 0, 0, …
## $ Post.Napoleonic                                              <int> 0, 0, 0, …
## $ Horror                                                       <int> 0, 0, 0, …
## $ Farming                                                      <int> 0, 0, 0, …
## $ Religious                                                    <int> 0, 0, 0, …
## $ Travel                                                       <int> 0, 0, 0, …
## $ Murder.Mystery                                               <int> 0, 0, 0, …
## $ Pirates                                                      <int> 0, 0, 0, …
## $ Comic.Book...Strip                                           <int> 0, 0, 0, …
## $ Mature...Adult                                               <int> 0, 0, 0, …
## $ Video.Game.Theme                                             <int> 0, 0, 0, …
## $ Spies.Secret.Agents                                          <int> 0, 0, 0, …
## $ Arabian                                                      <int> 0, 0, 0, …
## $ Prehistoric                                                  <int> 0, 0, 0, …
## $ Trains                                                       <int> 0, 0, 0, …
## $ Aviation...Flight                                            <int> 0, 0, 0, …
## $ Zombies                                                      <int> 0, 0, 0, …
## $ World.War.II                                                 <int> 0, 0, 0, …
## $ Racing                                                       <int> 0, 0, 0, …
## $ Pike.and.Shot                                                <int> 0, 0, 0, …
## $ World.War.I                                                  <int> 0, 0, 0, …
## $ Humor                                                        <int> 0, 0, 0, …
## $ Sports                                                       <int> 0, 0, 0, …
## $ Mafia                                                        <int> 0, 0, 0, …
## $ American.Indian.Wars                                         <int> 0, 0, 0, …
## $ Napoleonic                                                   <int> 0, 0, 0, …
## $ American.Revolutionary.War                                   <int> 0, 0, 0, …
## $ Vietnam.War                                                  <int> 0, 0, 0, …
## $ American.Civil.War                                           <int> 0, 0, 0, …
## $ Number                                                       <int> 0, 0, 0, …
## $ Trivia                                                       <int> 0, 0, 0, …
## $ Music                                                        <int> 0, 0, 0, …
## $ Korean.War                                                   <int> 0, 0, 0, …
## $ City.Building                                                <int> 0, 0, 0, …
## $ Political                                                    <int> 1, 0, 0, …
## $ Math                                                         <int> 0, 0, 0, …
## $ Maze                                                         <int> 0, 0, 0, …
## $ Theme_Food...Cooking                                         <int> 0, 0, 0, …
## $ Theme_Superheroes                                            <int> 0, 0, 0, …
## $ Theme_Anime...Manga                                          <int> 0, 0, 0, …
## $ Theme_Cthulhu.Mythos                                         <int> 0, 0, 0, …
## $ Theme_Alternate.History                                      <int> 0, 0, 0, …
## $ Theme_Anthropomorphic.Animals                                <int> 0, 0, 0, …
## $ Theme_Vikings                                                <int> 0, 0, 0, …
## $ Theme_Post.Apocalyptic                                       <int> 0, 0, 0, …
## $ Theme_Time.Travel                                            <int> 0, 0, 0, …
## $ Theme_Colonial                                               <int> 0, 0, 0, …
## $ Theme_Mystery...Cri                                          <int> 0, 0, 0, …
## $ Theme_Robots                                                 <int> 0, 0, 0, …
## $ Theme_Retro                                                  <int> 0, 0, 0, …
## $ Theme_Mad.Science...Mad.Scientist                            <int> 0, 0, 0, …
## $ Theme_Mining                                                 <int> 0, 0, 0, …
## $ Theme_Art                                                    <int> 0, 0, 0, …
## $ Theme_Archaeology...Paleontology                             <int> 0, 0, 0, …
## $ Theme_Witches                                                <int> 0, 0, 0, …
## $ Theme_Deserts                                                <int> 0, 0, 0, …
## $ Theme_Tropical                                               <int> 0, 0, 0, …
## $ Theme_Steampunk                                              <int> 0, 0, 0, …
## $ Theme_Gardening                                              <int> 0, 0, 0, …
## $ Theme_Sieg                                                   <int> 0, 0, 0, …
## $ Theme_Flowers                                                <int> 0, 0, 0, …
## $ Theme_Natur                                                  <int> 0, 0, 0, …
## $ Theme_Native.Americans...First.Peoples                       <int> 0, 0, 0, …
## $ Theme_Circus                                                 <int> 0, 0, 0, …
## $ Theme_Ninjas                                                 <int> 0, 0, 0, …
## $ Theme_King.Arthur...The.Knights.of.the.Round.Table...Camelot <int> 0, 0, 0, …
## $ Theme_Cyberpunk                                              <int> 0, 0, 0, …
## $ Theme_Submarines                                             <int> 0, 0, 0, …
## $ Theme_Construction                                           <int> 0, 0, 0, …
## $ Theme_Samurai                                                <int> 0, 0, 1, …
## $ Theme_Fantasy.Sports                                         <int> 0, 0, 0, …
## $ Theme_Love...Romanc                                          <int> 0, 0, 0, …
## $ Theme_Biology                                                <int> 0, 0, 0, …
## $ Theme_Kaiju                                                  <int> 0, 0, 0, …
## $ Theme_Gladiators                                             <int> 0, 0, 0, …
## $ Theme_City                                                   <int> 0, 0, 0, …
## $ Theme_Villainy                                               <int> 0, 0, 0, …
## $ Theme_Weather                                                <int> 0, 0, 0, …
## $ Theme_Amusement.Parks...Theme.Parks                          <int> 0, 0, 0, …
## $ Theme_Airships...Blimps...Dirigibles...Zeppelins             <int> 0, 0, 0, …
## $ Theme_Spooky.Old.Houses                                      <int> 0, 0, 0, …
## $ Theme_Mech.Warfar                                            <int> 0, 0, 0, …
## $ Theme_Polic                                                  <int> 0, 0, 0, …
## $ Theme_Books...Libraries                                      <int> 0, 0, 0, …
## $ Theme_Alchemy                                                <int> 0, 0, 0, …
## $ Theme_Tropical.Islands                                       <int> 0, 0, 0, …
## $ Theme_Boardgaming                                            <int> 0, 0, 0, …
## $ Theme_Oil...Gas...Petroleu                                   <int> 0, 0, 0, …
## $ Theme_Arcade.Video.Games                                     <int> 0, 0, 0, …
## $ Theme_Safaris                                                <int> 0, 0, 0, …
## $ Theme_Dreams...Nightmares                                    <int> 0, 0, 0, …
## $ Theme_Evolution                                              <int> 0, 0, 0, …
## $ Theme_Survival                                               <int> 0, 0, 0, …
## $ Theme_Fruit                                                  <int> 0, 0, 0, …
## $ Theme_Jail...Prison..Modern.                                 <int> 0, 0, 0, …
## $ Theme_Movie.Industry                                         <int> 0, 0, 0, …
## $ Theme_Animal.Battles                                         <int> 0, 0, 0, …
## $ Theme_Firefighting                                           <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Nintendo                             <int> 0, 0, 0, …
## $ Theme_Climate.Chang                                          <int> 0, 0, 0, …
## $ Theme_Helicopters                                            <int> 0, 0, 0, …
## $ Theme_Psychology                                             <int> 0, 0, 0, …
## $ Theme_Sci.Fi.Sports                                          <int> 0, 0, 0, …
## $ Theme_Sewing...Knitting...Cloth.Making                       <int> 0, 0, 0, …
## $ Theme_Beaches                                                <int> 0, 0, 0, …
## $ Theme_Under.the.Sea                                          <int> 0, 0, 0, …
## $ Theme_Aztecs                                                 <int> 0, 0, 0, …
## $ Theme_Trees.and.Forests                                      <int> 0, 0, 0, …
## $ Theme_Mayans                                                 <int> 0, 0, 0, …
## $ Theme_Chibis                                                 <int> 0, 0, 0, …
## $ Theme_US.National.Parks                                      <int> 0, 0, 0, …
## $ Theme_Scienc                                                 <int> 0, 0, 0, …
## $ Theme_Chemistry                                              <int> 0, 0, 0, …
## $ Theme_Volcanoes                                              <int> 0, 0, 0, …
## $ Theme_Fictional.Games                                        <int> 0, 0, 0, …
## $ Theme_Druids                                                 <int> 0, 0, 0, …
## $ Theme_Music.Making...Makers                                  <int> 0, 0, 0, …
## $ Theme_Psychic.Powers                                         <int> 0, 0, 0, …
## $ Theme_Trash...Garbag                                         <int> 0, 0, 0, …
## $ Theme_Silk.Road                                              <int> 0, 0, 0, …
## $ Theme_Cereal.Games                                           <int> 0, 0, 0, …
## $ Theme_Automotive.Industry                                    <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Tetris                               <int> 0, 0, 0, …
## $ Theme_TV.Detectives                                          <int> 0, 0, 0, …
## $ Theme_Romance.of.the.Three.Kingdoms                          <int> 0, 0, 0, …
## $ Theme_Trucks                                                 <int> 0, 0, 0, …
## $ Theme_Hanseatic.Leagu                                        <int> 0, 0, 0, …
## $ Theme_Chivalry...Jousting...Tournaments..Medieval.Europe.    <int> 0, 0, 0, …
## $ Theme_Cannibals...Cannibalis                                 <int> 0, 0, 0, …
## $ Theme_Canals                                                 <int> 0, 0, 0, …
## $ Theme_My.Best.Lif                                            <int> 0, 0, 0, …
## $ Theme_Nuclear.option                                         <int> 0, 0, 0, …
## $ Theme_Astronomy                                              <int> 0, 0, 0, …
## $ Theme_Cemeteries...Graveyards                                <int> 0, 0, 0, …
## $ Theme_Movies                                                 <int> 0, 0, 0, …
## $ Theme_Hackers                                                <int> 0, 0, 0, …
## $ Theme_Pub..Bars..Bistros...                                  <int> 0, 0, 0, …
## $ Theme_Jewelry                                                <int> 0, 0, 0, …
## $ Theme_School...College...University                          <int> 0, 0, 0, …
## $ Theme_UFOs                                                   <int> 0, 0, 0, …
## $ Theme_Templ                                                  <int> 0, 0, 0, …
## $ Theme_Mail...Stamps...The.Post.Offic                         <int> 0, 0, 0, …
## $ Theme_Memes                                                  <int> 0, 0, 0, …
## $ Theme_Computer...Information.Technology.Industry             <int> 0, 0, 0, …
## $ Theme_Attorneys...Courts                                     <int> 0, 0, 0, …
## $ Theme_Hot.Air.Balloons                                       <int> 0, 0, 0, …
## $ Theme_Journalis                                              <int> 0, 0, 0, …
## $ Theme_Motorcycles                                            <int> 0, 0, 0, …
## $ Theme_Rubik.s.Cub                                            <int> 0, 0, 0, …
## $ Theme_Dolls                                                  <int> 0, 0, 0, …
## $ Theme_Traffic...Driving                                      <int> 0, 0, 0, …
## $ Theme_FIFA.World.Cup                                         <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Pokémon                              <int> 0, 0, 0, …
## $ Theme_Floating.islands.in.the.sky                            <int> 0, 0, 0, …
## $ Theme_Dieselpunk                                             <int> 0, 0, 0, …
## $ Theme_Disney.Theme.Parks                                     <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Resident.Evil                        <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..SEGA                                 <int> 0, 0, 0, …
## $ Theme_Teaching.Programming                                   <int> 0, 0, 0, …
## $ Theme_Battle.Royal                                           <int> 0, 0, 0, …
## $ Theme_Earthquakes                                            <int> 0, 0, 0, …
## $ Theme_Bacteria                                               <int> 0, 0, 0, …
## $ Theme_Painting...Paintings                                   <int> 0, 0, 0, …
## $ Theme_Television..TV..Industry                               <int> 0, 0, 0, …
## $ Theme_Knights.Templar                                        <int> 0, 0, 0, …
## $ Theme_African.Americans                                      <int> 0, 0, 0, …
## $ Theme_Hell                                                   <int> 0, 0, 0, …
## $ Theme_Tiki.Cultur                                            <int> 0, 0, 0, …
## $ Theme_Astrology                                              <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..The.Oregon.Trail                     <int> 0, 0, 0, …
## $ Theme_Māori                                                  <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Super.Mario.Bros.                    <int> 0, 0, 0, …
## $ Theme_Mushrooms                                              <int> 0, 0, 0, …
## $ Theme_Pulp                                                   <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Minecraft                            <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Final.Fantasy                        <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Carmen.Sandiego                      <int> 0, 0, 0, …
## $ Theme_Care.Bears                                             <int> 0, 0, 0, …
## $ Theme_Hike.Hiking                                            <int> 0, 0, 0, …
## $ Theme_Inuit.Peoples                                          <int> 0, 0, 0, …
## $ Theme_Perfu                                                  <int> 0, 0, 0, …
## $ Theme_Camping                                                <int> 0, 0, 0, …
## $ Theme_Latin.American.Political.Games                         <int> 0, 0, 0, …
## $ Theme_Spanish.Political.Games                                <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Doo                                  <int> 0, 0, 0, …
## $ Theme_Fashion                                                <int> 0, 0, 0, …
## $ Theme_Geocaching                                             <int> 0, 0, 0, …
## $ Theme_Ecology                                                <int> 0, 0, 0, …
## $ Theme_Chernobyl                                              <int> 0, 0, 0, …
## $ Theme_Photography                                            <int> 0, 0, 0, …
## $ Theme_French.Foreign.Legion                                  <int> 0, 0, 0, …
## $ Theme_Cruise.ships                                           <int> 0, 0, 0, …
## $ Theme_Apache.Tribes                                          <int> 0, 0, 0, …
## $ Theme_Rivers                                                 <int> 0, 0, 0, …
## $ Theme_Flags.identification                                   <int> 0, 0, 0, …
## $ Exploration                                                  <int> 0, 0, 0, …
## $ Miniatures                                                   <int> 0, 0, 0, …
## $ Territory.Building                                           <int> 0, 0, 0, …
## $ Card.Game                                                    <int> 0, 1, 0, …
## $ Educational                                                  <int> 0, 0, 0, …
## $ Puzzle                                                       <int> 0, 0, 0, …
## $ Collectible.Components                                       <int> 0, 0, 0, …
## $ Word.Game                                                    <int> 0, 0, 0, …
## $ Print...Play                                                 <int> 0, 0, 0, …
## $ Electronic                                                   <int> 0, 0, 0, …
games_features$Name <- iconv(games_features$Name, from = "", to = "UTF-8")
#normalizing scaled values
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
games_features <- games_features |>
  mutate(across(c(GameWeight, AvgRating, BayesAvgRating),
                ~ rescale(., to = c(0,1), na.rm = TRUE)))
unique(games_features$BestPlayers)
##  [1]  5  0  3  4  6  2  7  8  1 13 12 15 14  9
# creating dummy variables to represent player size
games_features <- games_features|>
  mutate(players_1 = ifelse(MinPlayers <= 1 & MaxPlayers >= 1, 1, 0),
         players_2 = ifelse(MinPlayers <= 2 & MaxPlayers >= 2, 1, 0),
         players_3 = ifelse(MinPlayers <= 3 & MaxPlayers >= 3, 1, 0),
         players_4 = ifelse(MinPlayers <= 4 & MaxPlayers >= 4, 1, 0),
         players_5 = ifelse(MinPlayers <= 5 & MaxPlayers >= 5, 1, 0),
         players_6 = ifelse(MinPlayers <= 6 & MaxPlayers >= 6, 1, 0),
         players_7 = ifelse(MinPlayers <= 7 & MaxPlayers >= 7, 1, 0),
         players_8plus = ifelse(MaxPlayers >= 8, 1, 0))
# creating dummy variables to represent length
games_features <- games_features |>
  mutate(playtime_short = ifelse(ComMaxPlaytime <= 30, 1, 0),
         playtime_medium = ifelse(ComMaxPlaytime > 30 & ComMaxPlaytime <= 90, 1, 0),
         playtime_long = ifelse(ComMaxPlaytime > 90 & ComMaxPlaytime <=240, 1, 0),
         playtime_epic = ifelse(ComMaxPlaytime > 240,1,0))

Content filtering

#feature matrix
feature_matrix <- games_features |>
  select(-c(Description, YearPublished, StdDev, MinPlayers, MaxPlayers, BestPlayers, GoodPlayers, ComAgeRec, NumOwned, NumWant, NumWish, NumWeightVotes, MfgPlaytime, ComMinPlaytime, ComMaxPlaytime, MfgAgeRec, NumUserRatings, NumComments, NumAlternates, NumExpansions, NumImplementations, IsReimplementation, Family, ImagePath, Rank.boardgame, Rank.strategygames, Rank.abstracts, Rank.familygames, Rank.partygames, Rank.thematic, Rank.cgs, Rank.wargames, Rank.childrensgames, LanguageEase))
rownames(feature_matrix) <- games$BGGId
fmn <- feature_matrix |>
  select(-c(Name, BGGId))
fmn <- as.matrix(fmn)
fmn_scaled <- scale(fmn)
library(proxy)
## 
## Attaching package: 'proxy'
## The following objects are masked from 'package:stats':
## 
##     as.dist, dist
## The following object is masked from 'package:base':
## 
##     as.matrix
#similarity_matrix <- proxy::simil(fmn, method = "cosine")
# switched to only need to compute similarity for one game at a time for processing ease

At this stage, rather than computing a full similarity matrix, to save memory I will only generate a similarity vector to a given game that is input in order to find games of a similar content type.

recommend_games <- function(game_name, fmn_scaled, games_features, top_n = 5) {
  if (!(game_name %in% games_features$Name)) {
    stop("Game not found.")
  }

  idx <- which(games_features$Name == game_name)
  target_vector <- fmn_scaled[idx, ]

  # Normalize target vector
  target_norm <- sqrt(sum(target_vector^2))
  target_vector_norm <- target_vector / target_norm

  # Normalize all rows in fmn_scaled
  fmn_norms <- sqrt(rowSums(fmn_scaled^2))
  fmn_scaled_norm <- fmn_scaled / fmn_norms

  # Compute cosine similarity as dot product of normalized vectors
  sim_scores <- fmn_scaled_norm %*% target_vector_norm

  sim_scores[idx] <- NA  # exclude itself

  top_idx <- order(sim_scores, decreasing = TRUE)[1:top_n]
  result <- games_features[top_idx, ]
  result$Similarity <- sim_scores[top_idx]

  return(result[, c("Name", "AvgRating", "Similarity")])
}

Testing the content model

recommend_games("Catan", fmn_scaled, games_features, top_n = 5)
##                                    Name AvgRating Similarity
## 21184   Catan: 25th Anniversary Edition 0.7777968  0.9842892
## 20922 Catan: 25 Jahre Jubiläums-Edition 0.7492494  0.9783564
## 21838                 CATAN: 3D Edition 0.7312430  0.9539107
## 16257          Rivals for Catan: Deluxe 0.6924172  0.7523690
## 14705                 Frontier Stations 0.6341018  0.7509682
recommend_games("Cranium", fmn_scaled, games_features, top_n = 5)
##                                 Name AvgRating Similarity
## 8197                     Cranium WOW 0.5738232  0.9354405
## 10355 Cranium: Disney Family Edition 0.5294693  0.9333669
## 10878           Cranium Brain Breaks 0.3810172  0.8855267
## 4623               Oodles of Doodles 0.5812344  0.7887374
## 7224                Pictionary Mania 0.5652837  0.7878504
recommend_games("Chrononauts", fmn_scaled, games_features, top_n = 5)
##                                                   Name AvgRating Similarity
## 4856                        Early American Chrononauts 0.6271233  0.6935418
## 18229                     1918/1919: Storm in the West 0.7694749  0.6501301
## 9949                 Back to the Future: The Card Game 0.5368276  0.6110098
## 15089 Sealion: The Proposed German Invasion of England 0.7106355  0.5551929
## 91                                    Fortress America 0.6371774  0.5441202

Results make sense, are clearly related/similar thematically

Collaborative Filtering

library(Matrix)
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
user_item <- ratings |>
  select(Username, BGGId, Rating)

# Create integer IDs for users and games
user_item <- user_item |>
  mutate(
    user_id = as.integer(factor(Username)),
    game_id = match(as.character(BGGId), rownames(fmn))
  )
if (any(is.na(user_item$game_id))) {
  warning("Some BGGId in user ratings not found in feature matrix rownames!")
}
# Build sparse matrix
rating_matrix <- sparseMatrix(
  i = user_item$user_id,
  j = user_item$game_id,
  x = user_item$Rating
)

rownames(rating_matrix) <- levels(factor(user_item$Username))
colnames(rating_matrix) <- rownames(fmn)
user_map <- ratings |>
  distinct(Username) |>
  mutate(user_id = as.integer(factor(Username)))

game_map <- ratings |>
  distinct(BGGId) |>
  mutate(game_id = as.integer(factor(BGGId)))

# 3. Join IDs back to ratings
ratings <- ratings |>
  left_join(user_map, by = "Username") |>
  left_join(game_map, by = "BGGId")
game_map_with_names <- game_map |>
  left_join(games_features %>% select(BGGId, Name), by = "BGGId")

exporting for recosystem

set.seed(589623)

n <- nrow(user_item)
train_indices <- sample(n, size = 0.8 * n)

train_data <- user_item[train_indices, ]
test_data  <- user_item[-train_indices, ]
write.table(train_data[, c("user_id", "game_id", "Rating")],
            file = "train_data.txt", sep = " ", row.names = FALSE, col.names = FALSE)

write.table(test_data[, c("user_id", "game_id", "Rating")],
            file = "test_data.txt", sep = " ", row.names = FALSE, col.names = FALSE)
r <- Reco()
r$train(data_file("train_data.txt"), 
        opts = list(dim = 30, costp_l2 = 0.1, costq_l2 = 0.1, 
                    lrate = 0.1, niter = 20, nthread = 2, verbose = TRUE))
## iter      tr_rmse          obj
##    0       1.7481   6.9840e+07
##    1       1.2184   4.5324e+07
##    2       1.1966   4.4294e+07
##    3       1.1781   4.3639e+07
##    4       1.1584   4.3017e+07
##    5       1.1411   4.2530e+07
##    6       1.1258   4.2115e+07
##    7       1.1114   4.1767e+07
##    8       1.0986   4.1479e+07
##    9       1.0873   4.1220e+07
##   10       1.0775   4.1017e+07
##   11       1.0688   4.0834e+07
##   12       1.0613   4.0675e+07
##   13       1.0547   4.0566e+07
##   14       1.0485   4.0458e+07
##   15       1.0431   4.0359e+07
##   16       1.0381   4.0271e+07
##   17       1.0335   4.0199e+07
##   18       1.0294   4.0136e+07
##   19       1.0256   4.0075e+07
r$predict(data_file("test_data.txt"), out_file("test_preds.txt"))
## prediction output generated at test_preds.txt
# Read predictions and actual ratings
pred_ratings <- scan("test_preds.txt")
true_ratings <- test_data$Rating


rmse <- sqrt(mean((pred_ratings - true_ratings)^2))
cat("RMSE:", rmse)
## RMSE: 1.14356

This is lowest RMSE found thus far

Hybrid Model

hybrid score = alpha * cf predicted rating + (1-alpha) * content similarity

target_user_id <- 94972  
all_game_ids <- 1:nrow(fmn)
# Prepare a data.frame with all game_ids for this user
to_predict <- data.frame(
  user_id = rep(target_user_id, length(all_game_ids)),
  game_id = all_game_ids
)

# Save to file for recosystem predict
write.table(to_predict, "to_predict.txt", row.names = FALSE, col.names = FALSE, sep = " ")
r$predict(data_file("to_predict.txt"), out_file("predicted_ratings.txt"))
## prediction output generated at predicted_ratings.txt
predicted_ratings <- scan("predicted_ratings.txt")
target_game_name <- "Cranium"  # Example
target_game_bggid <- games_features |>
  filter(Name == target_game_name) |>
  pull(BGGId)

target_idx <- match(as.character(target_game_bggid), rownames(fmn))

target_vector <- fmn[target_idx, , drop = FALSE]

similarity_vector <- apply(fmn, 1, function(x) coop::cosine(x, as.numeric(target_vector)))
normalize <- function(x) {
  rng <- range(x, na.rm = TRUE)
  if (diff(rng) == 0) return(rep(0, length(x)))
  (x - rng[1]) / (rng[2] - rng[1])
}

cf_norm <- normalize(predicted_ratings)
content_norm <- normalize(similarity_vector)

alpha <- 0.7
hybrid_scores <- alpha * cf_norm + (1 - alpha) * content_norm

top_indices <- order(hybrid_scores, decreasing = TRUE)[1:10]

recommended_games <- games_features[top_indices, ]
recommended_games$hybrid_score <- hybrid_scores[top_indices]

print(recommended_games[, c("Name", "AvgRating", "hybrid_score")])
##                                                           Name AvgRating
## 6978                                               Celebrities 0.6971394
## 8501                                  Time's Up! Title Recall! 0.7509918
## 19914                                Draw Your Own Conclusions 0.6566839
## 15888                            Monikers: Something Something 0.7955260
## 13105                      Telestrations: 12 Player Party Pack 0.7563508
## 20305                               Monikers: Serious Nonsense 0.8216525
## 6181  The von Reisswitz Kriegsspiel: The Prussian Army Wargame 0.8107734
## 8560                                         Time's Up! Deluxe 0.7266504
## 5079                                                SEEKRIEG 5 0.8258394
## 14843                                     Monikers: Shmonikers 0.7891910
##       hybrid_score
## 6978     0.8237585
## 8501     0.8076922
## 19914    0.8062398
## 15888    0.7914088
## 13105    0.7788029
## 20305    0.7764042
## 6181     0.7709252
## 8560     0.7696423
## 5079     0.7664679
## 14843    0.7647787
library(dplyr)
library(coop)

# Define user and game for diagnostics
target_user_name <- "doctorchewy"
target_game_name <- "Cranium"

# 1. Find user_id and game_id (numeric indices)
target_user_id <- user_item |>
  filter(Username == target_user_name) |>
  pull(user_id) %>% unique()

target_game_bggid <- games_features |>
  filter(Name == target_game_name) |>
  pull(BGGId) |>
  as.character()

target_game_id <- match(target_game_bggid, rownames(fmn))

# 2. Predicted ratings vector for this user across all games 
all_game_ids <- 1:nrow(fmn)
to_predict <- data.frame(
  user_id = rep(target_user_id, length(all_game_ids)),
  game_id = all_game_ids
)
write.table(to_predict, "to_predict.txt", row.names=FALSE, col.names=FALSE, sep=" ")
r$predict(data_file("to_predict.txt"), out_file("predicted_ratings.txt"))
## prediction output generated at predicted_ratings.txt
predicted_ratings <- scan("predicted_ratings.txt")

# 3. Content similarity vector for target game
target_vector <- fmn[target_game_id, , drop=FALSE]
similarity_vector <- apply(fmn, 1, function(x) coop::cosine(x, as.numeric(target_vector)))

# 4. Normalize both vectors
normalize <- function(x) {
  rng <- range(x, na.rm=TRUE)
  if (diff(rng) == 0) return(rep(0, length(x)))
  (x - rng[1]) / (rng[2] - rng[1])
}

cf_norm <- normalize(predicted_ratings)
content_norm <- normalize(similarity_vector)

# 5. Hybrid scores
alpha <- 0.7
hybrid_scores <- alpha * cf_norm + (1 - alpha) * content_norm

# 6. Create a diagnostics table
diagnostics <- data.frame(
  game_id = all_game_ids,
  BGGId = rownames(fmn),
  Name = games_features$Name,
  predicted_rating = predicted_ratings,
  cf_norm = cf_norm,
  content_sim = similarity_vector,
  content_norm = content_norm,
  hybrid_score = hybrid_scores
)

# 7. Sort by hybrid score descending
diagnostics <- diagnostics |>
  arrange(desc(hybrid_score))

# 8. Print top 20 recommendations
print(diagnostics[1:20, c("Name", "predicted_rating", "content_sim", "hybrid_score")])
##                                                                 Name
## 22303                                                    Celebrities
## 36553                                       Time's Up! Title Recall!
## 273349                                     Draw Your Own Conclusions
## 195709                                 Monikers: Something Something
## 153016                           Telestrations: 12 Player Party Pack
## 283152                                    Monikers: Serious Nonsense
## 16957       The von Reisswitz Kriegsspiel: The Prussian Army Wargame
## 37141                                              Time's Up! Deluxe
## 10683                                                     SEEKRIEG 5
## 179448                                          Monikers: Shmonikers
## 317434 Exit: The Game – Advent Calendar: The Mystery of the Ice Cave
## 221248                 Monikers: The Shut Up & Sit Down Nonsense Box
## 283151                                            Monikers: Classics
## 225694                                                      Decrypto
## 156546                                                      Monikers
## 1353                                                      Time's Up!
## 300905                                                       Top Ten
## 230262                                      Time's Up! Party Edition
## 255249                                       Monikers: More Monikers
## 288798                                  Kings of War (Third Edition)
##        predicted_rating content_sim hybrid_score
## 22303           8.17639   0.7547290    0.8237585
## 36553           8.35746   0.6585509    0.8076922
## 273349          8.85681   0.5331518    0.8062398
## 195709          8.30380   0.6183834    0.7914088
## 153016          7.80184   0.6985402    0.7788029
## 283152          8.41862   0.5416780    0.7764042
## 16957           8.76465   0.4401870    0.7709252
## 37141           7.84216   0.6589059    0.7696423
## 10683           9.10264   0.3439728    0.7664679
## 179448          8.06421   0.5893809    0.7647787
## 317434          8.28106   0.5231809    0.7605491
## 221248          7.85606   0.6184773    0.7582819
## 283151          7.90600   0.6015733    0.7567996
## 225694          7.83545   0.6168093    0.7562445
## 156546          8.07902   0.5528170    0.7546701
## 1353            7.53506   0.6834772    0.7544313
## 300905          7.98350   0.5675750    0.7521194
## 230262          7.52977   0.6670239    0.7489973
## 255249          8.13832   0.5187112    0.7486092
## 288798          8.38108   0.4561433    0.7474113
# 9. Inspect how many ratings this user has given
user_ratings <- user_item |> filter(user_id == target_user_id)
cat("Number of ratings by user:", nrow(user_ratings), "\n")
## Number of ratings by user: 74
# 10. See top rated games by this user
user_top_ratings <- user_ratings |>
  left_join(games_features |> select(BGGId, Name), by = c("BGGId" = "BGGId")) |>
  arrange(desc(Rating))

print(head(user_top_ratings, 10))
##       Username  BGGId Rating user_id game_id
## 1  doctorchewy  12493     10   94972    5504
## 2  doctorchewy 115746     10   94972   11221
## 3  doctorchewy 233078     10   94972   17835
## 4  doctorchewy 178900     10   94972   14812
## 5  doctorchewy  43111     10   94972    9191
## 6  doctorchewy  54625     10   94972    9374
## 7  doctorchewy 181279     10   94972   14990
## 8  doctorchewy 128671     10   94972   11737
## 9  doctorchewy 162009     10   94972   13710
## 10 doctorchewy  96848      9   94972   10666
##                                        Name
## 1          Twilight Imperium: Third Edition
## 2           War of the Ring: Second Edition
## 3         Twilight Imperium: Fourth Edition
## 4                                 Codenames
## 5                    Chaos in the Old World
## 6                Space Hulk (Third Edition)
## 7    Fury of Dracula (Third/Fourth Edition)
## 8  Spartacus: A Game of Blood and Treachery
## 9                        The U.S. Civil War
## 10                   Mage Knight Board Game

Precision and Recall at K

get_predicted_ratings <- function(target_user_name, fmn_scaled, games_features, game_map, user_ratings, k = 20) {
  # Step 1: Get the target user_id from the user_ratings table
  target_user_id <- ratings |>
    filter(Username == target_user_name) |>
    distinct(user_id) |>
    pull(user_id)

  if (length(target_user_id) == 0) {
    stop("Username not found.")
  }

  # Step 2: Filter game_map to include only valid games in fmn
  valid_bgg_ids <- as.integer(rownames(fmn))  
  filtered_game_map <- game_map |>
    filter(BGGId %in% valid_bgg_ids)

  all_game_ids <- filtered_game_map$game_id

  # Step 3: Create the to_predict file
  print(target_user_id)
  length(target_user_id)
  to_predict <- data.frame(
    user_id = rep(target_user_id, length(all_game_ids)),
    game_id = all_game_ids
  )
  cat("Number of predictions to make:", nrow(to_predict), "\n")
  stopifnot(length(unique(to_predict$game_id)) == length(to_predict$game_id))


  write.table(to_predict, "to_predict.txt", row.names = FALSE, col.names = FALSE, sep = " ")

  # Step 4: Predict
  unlink("predicted_ratings.txt")  
  r$predict(data_file("to_predict.txt"), out_file("predicted_ratings.txt"))

  predicted_ratings <- scan("predicted_ratings.txt")

  if (length(predicted_ratings) != length(all_game_ids)) {
    stop("Prediction count mismatch.")
  }

  # Step 5: Combine predictions by BGGId
  recommended_games <- data.frame(
    game_id = all_game_ids,
    predicted_rating = predicted_ratings
  ) |>
    left_join(filtered_game_map, by = "game_id")  # adds BGGId

  # Step 6: Merge with game using BGGId
  recommendations <- recommended_games |>
    left_join(games, by = "BGGId") |>
    arrange(desc(predicted_rating)) |>
    head(k)

  return(recommendations)
}
target_user_name <- "doctorchewy"
recommendations <- get_predicted_ratings(
  target_user_name,
  fmn_scaled,
  games_features,
  game_map,
  train_data,
  k = 20
)
## [1] 94972
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
print(recommendations[, c("Name", "AvgRating", "predicted_rating")])
##                                                                                    Name
## 1                                                          1985: Deadly Northern Lights
## 2                                                       Aeon's End: Legacy of Gravehold
## 3                                             Roads to Gettysburg II: Lee Strikes North
## 4                                                                         Go First Dice
## 5                                                           SAGA Rulebook (2nd Edition)
## 6                                                                      Anno Domini 1666
## 7                                                      Exceed: Street Fighter – Ryu Box
## 8                                                   Uprising: Curse of the Last Emperor
## 9                                 Hood Strikes North: The Tennessee Campaign, Fall 1864
## 10                                       Panzers Last Stand: Battles for Budapest, 1945
## 11                                System Gateway (fan expansion for Android: Netrunner)
## 12                                      Stonewall Jackson's Way II: Battles of Bull Run
## 13                                                                           SEEKRIEG 5
## 14                                        The Battle of Five Armies Collector's Edition
## 15                                                                          Tenkatoitsu
## 16            The Lord of the Rings: The Card Game – Two-Player Limited Edition Starter
## 17 Middle-earth Strategy Battle Game: The Lord of the Rings – Battle of Pelennor Fields
## 18                        Primer: The Gamer's Source for Battles from the Age of Reason
## 19                                    Last Chance for Victory: The Battle of Gettysburg
## 20                                                         Pax Renaissance: 2nd Edition
##    AvgRating predicted_rating
## 1    9.00426          9.54133
## 2    8.98333          9.39428
## 3    8.81429          9.25537
## 4    8.38298          9.25001
## 5    8.26404          9.24569
## 6    8.83424          9.23730
## 7    7.98966          9.23215
## 8    8.52968          9.22590
## 9    8.75643          9.18448
## 10   8.84286          9.16529
## 11   9.48333          9.12131
## 12   8.60965          9.11160
## 13   8.36897          9.10264
## 14   8.94429          9.09501
## 15   8.43846          9.09098
## 16   8.70610          9.08256
## 17   8.68023          9.07252
## 18   9.13610          9.05598
## 19   8.78548          9.04711
## 20   8.64195          9.04528
# uses hybrid score that is only based on one user, not used in the end
# 1. Get the user's predicted CF ratings (a numeric vector with names = game IDs)
cf_predicted_ratings_user <- predicted_ratings

# 2. Create user profile vector by averaging features of games user rated highly
#liked_games <- test_data %>% filter(user_id == target_user_id & Rating >= 5) %>% pull(BGGId)
#user_profile <- colMeans(fmn_scaled[rownames(fmn_scaled) %in% liked_games, ])
compute_user_profile <- function(target_user_name, test_data, fmn) {
    liked_games <- test_data |> 
        filter(Username == target_user_name & Rating >= 5) |> 
        pull(BGGId)

    # Check if the user has any liked games
    if (length(liked_games) == 0) {
        stop("No relevant games found for the user.")
    }

    # Ensure the liked games exist in the feature matrix (fmn)
    missing_games <- setdiff(liked_games, rownames(fmn))
    if (length(missing_games) > 0) {
        stop("Some liked games are missing from the feature matrix.")
    }

    # Calculate the user profile
    user_profile <- colMeans(fmn[rownames(fmn) %in% liked_games, ])
    return(user_profile)
}

# Example usage:
target_user_name <- "doctorchewy"  # Replace with another user ID
user_profile <- compute_user_profile(target_user_name, test_data, fmn)


# 3. Compute content similarity of all games to user profile
content_similarities <- apply(fmn_scaled, 1, function(x) coop::cosine(x, user_profile))

# 4. Normalize and combine
cf_norm <- normalize(cf_predicted_ratings_user)
content_norm <- normalize(content_similarities)

alpha <- 0.7
hybrid_scores_user <- alpha * cf_norm + (1 - alpha) * content_norm
recommend_top_k_table <- function(hybrid_scores_user, games_features, k = 20) {
  top_indices <- order(hybrid_scores_user, decreasing = TRUE)[1:k]
  
  top_k <- games_features[top_indices, ] |>
    mutate(rank = 1:k,
           hybrid_score = hybrid_scores_user[top_indices])
  
  return(top_k)
}

# 5. Get top k recommendations
top_k_games <- recommend_top_k_table(hybrid_scores_user, games_features, k = 20)
print(top_k_games[, c("Name", "AvgRating", "hybrid_score")])
##                                                           Name AvgRating
## 20790                             1985: Deadly Northern Lights 0.8974378
## 21759                          Aeon's End: Legacy of Gravehold 0.8950790
## 20367                      Uprising: Curse of the Last Emperor 0.8439517
## 19402                                      Dungeon Universalis 0.8832036
## 9538                       War of the Ring Collector's Edition 0.8596015
## 14811 Last Blitzkrieg: Wacht am Rhein, The Battle of the Bulge 0.8657089
## 18297                  Here I Stand: 500th Anniversary Edition 0.8512052
## 21632         Middara: Unintentional Malum – Act 1 Version 1.1 0.8998835
## 18641                            Mage Knight: Ultimate Edition 0.8912516
## 14398                            High Frontier (Third Edition) 0.8112152
## 14144                     Middara: Unintentional Malum – Act 1 0.8797414
## 21494                               Frostgrave: Second Edition 0.8828114
## 18635                              SAGA Rulebook (2nd Edition) 0.8140136
## 5079                                                SEEKRIEG 5 0.8258394
## 11221                          War of the Ring: Second Edition 0.8417202
## 16870                Roads to Gettysburg II: Lee Strikes North 0.8760278
## 21445                                               Last Light 0.8405966
## 19334          Middle-Earth Strategy Battle Game: Rules Manual 0.8864967
## 20977                                     Aeon's End: Outcasts 0.8698585
## 18108                                         Anno Domini 1666 0.8782762
##       hybrid_score
## 20790    0.9322644
## 21759    0.9299475
## 20367    0.9284137
## 19402    0.9240713
## 9538     0.9149444
## 14811    0.9129439
## 18297    0.9109864
## 21632    0.9105744
## 18641    0.9102848
## 14398    0.9042349
## 14144    0.9018775
## 21494    0.8996732
## 18635    0.8979558
## 5079     0.8970580
## 11221    0.8945543
## 16870    0.8926004
## 21445    0.8924940
## 19334    0.8919679
## 20977    0.8912792
## 18108    0.8897442
#actual top_k_games code
top_k_games <- recommend_top_k_table(hybrid_scores_user, games_features, k = 20)
print(top_k_games[, c("Name", "AvgRating", "hybrid_score")])
##                                                           Name AvgRating
## 20790                             1985: Deadly Northern Lights 0.8974378
## 21759                          Aeon's End: Legacy of Gravehold 0.8950790
## 20367                      Uprising: Curse of the Last Emperor 0.8439517
## 19402                                      Dungeon Universalis 0.8832036
## 9538                       War of the Ring Collector's Edition 0.8596015
## 14811 Last Blitzkrieg: Wacht am Rhein, The Battle of the Bulge 0.8657089
## 18297                  Here I Stand: 500th Anniversary Edition 0.8512052
## 21632         Middara: Unintentional Malum – Act 1 Version 1.1 0.8998835
## 18641                            Mage Knight: Ultimate Edition 0.8912516
## 14398                            High Frontier (Third Edition) 0.8112152
## 14144                     Middara: Unintentional Malum – Act 1 0.8797414
## 21494                               Frostgrave: Second Edition 0.8828114
## 18635                              SAGA Rulebook (2nd Edition) 0.8140136
## 5079                                                SEEKRIEG 5 0.8258394
## 11221                          War of the Ring: Second Edition 0.8417202
## 16870                Roads to Gettysburg II: Lee Strikes North 0.8760278
## 21445                                               Last Light 0.8405966
## 19334          Middle-Earth Strategy Battle Game: Rules Manual 0.8864967
## 20977                                     Aeon's End: Outcasts 0.8698585
## 18108                                         Anno Domini 1666 0.8782762
##       hybrid_score
## 20790    0.9322644
## 21759    0.9299475
## 20367    0.9284137
## 19402    0.9240713
## 9538     0.9149444
## 14811    0.9129439
## 18297    0.9109864
## 21632    0.9105744
## 18641    0.9102848
## 14398    0.9042349
## 14144    0.9018775
## 21494    0.8996732
## 18635    0.8979558
## 5079     0.8970580
## 11221    0.8945543
## 16870    0.8926004
## 21445    0.8924940
## 19334    0.8919679
## 20977    0.8912792
## 18108    0.8897442
get_top_k_games_for_user <- function(hybrid_scores_user, games_features, k = 20) {
  top_indices <- order(hybrid_scores_user, decreasing = TRUE)[1:k]
  top_games <- games_features[top_indices, ]
  return(top_games$BGGId)
}
compute_precision_recall_user <- function(target_user_name, top_k_games, test_data, k = 20) {
  # Convert both to character to ensure valid comparison
  user_relevant <- test_data |>
    filter(Username == target_user_name & Rating >= 5) |>
    pull(BGGId) |>
    as.character()

  recommended <- as.character(top_k_games)

  if(length(user_relevant) == 0) {
    warning(glue::glue("User {target_user_name} has no relevant items in test set."))
    return(list(precision = NA, recall = NA))
  }

  hits <- length(intersect(recommended, user_relevant))
  precision <- hits / k
  recall <- hits / length(user_relevant)

  list(precision = precision, recall = recall)
}
test_games <- test_data |>
  filter(user_id == target_user_id) |>
  pull(game_id)

# Check overlap
recommended_game_ids <- recommendations$game_id 

precision_at_k <- sum(recommended_game_ids %in% test_games) / length(recommended_game_ids)
recall_at_k <- sum(recommended_game_ids %in% test_games) / length(test_games)

cat("Precision@K:", precision_at_k, "\n")
## Precision@K: 0
cat("Recall@K:", recall_at_k, "\n")
## Recall@K: 0
# checking using another user
target_user_name <- "jedipanda"
recommendations <- get_predicted_ratings(
  target_user_name,
  fmn_scaled,
  games_features,
  game_map,
  train_data,
  k = 20
)
## [1] 172917
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
print(recommendations[, c("Name", "AvgRating", "predicted_rating")])
##                                                                         Name
## 1                                        Uprising: Curse of the Last Emperor
## 2                                                                   Ark Nova
## 3                                            Aeon's End: Legacy of Gravehold
## 4  The Lord of the Rings: The Card Game – Two-Player Limited Edition Starter
## 5                                                                     Aeolis
## 6                                       Great Western Trail (Second Edition)
## 7                                               Small World Designer Edition
## 8                                                  Pandemic Legacy: Season 0
## 9                                                 The Crew: Mission Deep Sea
## 10                          Hungarian Rhapsody: The Eastern Front in Hungary
## 11                                                                   Sinners
## 12                                Clank!: Legacy – Acquisitions Incorporated
## 13                                                        Ultimate Railroads
## 14                                           Euthia: Torment of Resurrection
## 15                                                Frostgrave: Second Edition
## 16                     System Gateway (fan expansion for Android: Netrunner)
## 17             Primer: The Gamer's Source for Battles from the Age of Reason
## 18                                                          El Dorado Canyon
## 19      From Salerno to Rome: World War II – The Italian Campaign, 1943-1944
## 20                                                       Dungeon Universalis
##    AvgRating predicted_rating
## 1    8.52968          8.62236
## 2    8.47839          8.51781
## 3    8.98333          8.33894
## 4    8.70610          8.30722
## 5    9.25000          8.29257
## 6    8.45802          8.23670
## 7    8.99640          8.13869
## 8    8.64327          8.13128
## 9    8.44932          8.09217
## 10   8.50000          8.08981
## 11   8.62308          8.03312
## 12   8.69604          8.03202
## 13   8.39017          8.02636
## 14   8.87023          8.02532
## 15   8.87448          8.02442
## 16   9.48333          8.01960
## 17   9.13610          8.01839
## 18   8.73600          8.00758
## 19   8.46875          7.98916
## 20   8.87796          7.98526
# Evaluating Precision@K and Recall@K for a sample of users
evaluate_precision_recall <- function(usernames, k = 20) {
  results <- data.frame(
    Username = character(),
    Precision = numeric(),
    Recall = numeric(),
    stringsAsFactors = FALSE
  )

  for (name in usernames) {
    cat("Evaluating:", name, "\n")

    # Try-catch in case of errors like missing user
    tryCatch({
      # Get top-K recommended games
      recommendations <- get_predicted_ratings(
        target_user_name = name,
        fmn_scaled = fmn_scaled,
        games_features = games_features,
        game_map = game_map,
        user_ratings = ratings,
        k = k
      )

      recommended_bggids <- recommendations$BGGId

      # Get actual relevant games from the test set (rated > threshold)
      relevant_items <- ratings |>
        filter(Username == name, Rating >= 7) |>
        pull(BGGId)

      hits <- sum(recommended_bggids %in% relevant_items)

      precision <- hits / k
      recall <- ifelse(length(relevant_items) == 0, NA, hits / length(relevant_items))

      results <- rbind(results, data.frame(
        Username = name,
        Precision = precision,
        Recall = recall
      ))

    }, error = function(e) {
      message("Error for user ", name, ": ", e$message)
    })
  }

  return(results)
}
# Get top 10 users by # of ratings
top_users <- ratings |>
  group_by(Username) |>
  summarise(num_rated = n()) |>
  arrange(desc(num_rated)) |>
  slice(1:10) |>
  pull(Username)

# Run evaluation
eval_results <- evaluate_precision_recall(top_users, k = 20)
## Evaluating: oldgoat3769967 
## [1] 272394
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
## Evaluating: warta 
## [1] 392170
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
## Evaluating: leffe dubbel 
## [1] 210557
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
## Evaluating: TomVasel 
## [1] 373548
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
## Evaluating: Doel 
## [1] 95188
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
## Evaluating: Hessu68 
## [1] 152540
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
## Evaluating: Walt Mulder 
## [1] 391362
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
## Evaluating: fake 
## [1] 116827
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
## Evaluating: Tolkana 
## [1] 372562
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
## Evaluating: Nap16 
## [1] 259041
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
print(eval_results)
##          Username Precision       Recall
## 1  oldgoat3769967      0.10 0.0006406150
## 2           warta      0.10 0.0003658314
## 3    leffe dubbel      0.15 0.0015576324
## 4        TomVasel      0.15 0.0010952903
## 5            Doel      0.10 0.0004479283
## 6         Hessu68      0.15 0.0033519553
## 7     Walt Mulder      0.25 0.0012651822
## 8            fake      0.00 0.0000000000
## 9         Tolkana      0.10 0.0005083884
## 10          Nap16      0.05 0.0004955401

Novelty, Diversity, & Serendipity

#checking novelty of generated recommendations
calculate_novelty <- function(recommendations) {
  ranks <- as.numeric(as.character(recommendations$NumUserRatings))
  # Convert ranks to popularity score between 0 and 1 (higher rank means less popular)
  popularity <- ranks / max(ranks)  

  # Novelty = average of -log2(popularity)
  novelty_scores <- -log2(popularity)
  mean(novelty_scores)
}
calculate_novelty(recommendations)
## [1] 4.78752
sim_matrix <- coop::cosine(t(fmn_scaled))
#forming sparse matrix for all combinations
library(Matrix)
sms <- as(sim_matrix, 'dgCMatrix')
calculate_diversity <- function(recommendations, sms, fmn_scaled) {
  recommended_bggids <- as.character(recommendations$BGGId)
  all_bggids <- rownames(fmn_scaled)
  top_game_indices <- match(recommended_bggids, all_bggids)
  
  if (any(is.na(top_game_indices))) {
    warning("Some recommended games not found in fmn_scaled rownames.")
    recommended_bggids <- recommended_bggids[!is.na(top_game_indices)]
    top_game_indices <- top_game_indices[!is.na(top_game_indices)]
  }
  
  # Extract submatrix of similarities among recommended games
  sub_sim <- sim_matrix[top_game_indices, top_game_indices]
  
  # Diversity = average dissimilarity = 1 - average similarity 
  n <- nrow(sub_sim)
  if (n <= 1) return(0)  
  
  # Remove diagonal by setting it to NA (self-similarity = 1)
  diag(sub_sim) <- NA
  
  avg_similarity <- mean(sub_sim, na.rm = TRUE)
  diversity <- 1 - avg_similarity
  
  return(diversity)
}
#checking diversity of sample generated recommendations
calculate_diversity(recommendations, sms, fmn_scaled)
## [1] 0.9378894
users_vector <- unique(user_ratings$Username)
# users_vector: character vector of usernames to evaluate
compute_avg_diversity <- function(users_vector, k = 20, ...) {
  diversity_scores <- numeric(length(users_vector))
  
  for (i in seq_along(users_vector)) {
    user <- users_vector[i]
    
    # Get top-k recommendations for this user
    recs <- get_predicted_ratings(user, fmn_scaled, games_features, game_map, ratings, k)
    
    # Calculate diversity for this user's recommendations
    diversity_scores[i] <- calculate_diversity(recs, sms, fmn_scaled)
  }
  
  avg_diversity <- mean(diversity_scores, na.rm = TRUE)
  return(list(
    per_user_diversity = diversity_scores,
    avg_diversity = avg_diversity
  ))
}

# Example from known usernames:
usernames_to_test <- c("doctorchewy", "jedipanda", "oldgoat3769967", "leffe dubbel", "TomVasel")
diversity_results <- compute_avg_diversity(usernames_to_test, k = 20)
## [1] 94972
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
## [1] 172917
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
## [1] 272394
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
## [1] 210557
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
## [1] 373548
## Number of predictions to make: 21925 
## prediction output generated at predicted_ratings.txt
print(diversity_results$avg_diversity)
## [1] 0.9231404
print(diversity_results$per_user_diversity)
## [1] 0.9164356 0.9378894 0.9118758 0.9068803 0.9426207
#serendipitous discovery - something not in the 1000 most rated games
popularity_threshold <- sort(games$NumUserRatings, decreasing = TRUE)[1000]

print(popularity_threshold)
## [1] 3531
# this represents .86% of the userbase 
calculate_serendipity <- function(recommendations, ratings, target_user_name, k = 20, threshold = 5) {


  # Filter user’s liked games in test set (relevant items)
  user_liked_games <- ratings |>
    filter(Username == target_user_name & Rating >= threshold) |>
    pull(BGGId) |>
    as.character()

  if (length(user_liked_games) == 0) {
    warning(paste("User", target_user_name, "has no relevant items in test set."))
    return(NA)
  }
  
  recs_k <- recommendations$BGGId[1:k] %>% as.character()

  popular_games <- games_features |>
    filter(NumUserRatings >= popularity_threshold) |>
    pull(BGGId) |>
    as.character()

  # Serendipitous recommendations exclude liked and popular games
  serendipitous_recs <- setdiff(recs_k, union(user_liked_games, popular_games))

  serendipity_score <- length(serendipitous_recs) / k
  return(serendipity_score)
}

calculate_serendipity(recommendations, ratings, target_user_name, k = 20, threshold = 5)
## [1] 0.95

Initializing

get_predicted_ratings <- function(target_user_name, fmn_scaled, games_features, game_map, user_ratings, k = 20) {
  # Step 1: Get the target user_id from the user_ratings table
  target_user_id <- ratings |>
    filter(Username == target_user_name) |>
    distinct(user_id) |>
    pull(user_id)

  if (length(target_user_id) == 0) {
    stop("Username not found.")
  }

  # Step 2: Filter game_map to include only valid games in fmn
  valid_bgg_ids <- as.integer(rownames(fmn))  # BGGIds used in training
  filtered_game_map <- game_map %>%
    filter(BGGId %in% valid_bgg_ids)

  all_game_ids <- filtered_game_map$game_id

  # Step 3: Create the to_predict file
  print(target_user_id)
  length(target_user_id)
  to_predict <- data.frame(
    user_id = rep(target_user_id, length(all_game_ids)),
    game_id = all_game_ids
  )
  cat("Number of predictions to make:", nrow(to_predict), "\n")
  stopifnot(length(unique(to_predict$game_id)) == length(to_predict$game_id))


  write.table(to_predict, "to_predict.txt", row.names = FALSE, col.names = FALSE, sep = " ")

  # Step 4: Predict
  unlink("predicted_ratings.txt")  
  r$predict(data_file("to_predict.txt"), out_file("predicted_ratings.txt"))

  predicted_ratings <- scan("predicted_ratings.txt")

  if (length(predicted_ratings) != length(all_game_ids)) {
    stop("Prediction count mismatch.")
  }

  # Step 5: Combine predictions with BGGId
  recommended_games <- data.frame(
    game_id = all_game_ids,
    predicted_rating = predicted_ratings
  ) %>%
    left_join(filtered_game_map, by = "game_id")  # adds BGGId

  # Step 6: Merge with game using BGGId
  recommendations <- recommended_games %>%
    left_join(games, by = "BGGId") %>%
    arrange(desc(predicted_rating)) %>%
    head(k)

  return(recommendations)
}

compute_all_metrics <- function(user_name, recommendations, test_data, sms, fmn_scaled, games_features, ratings, popularity_threshold = NULL) {
  
  # 1. Precision & Recall ------------------------------------------------
  # Get relevant items user liked in test_data (rating >= 5)
  user_relevant <- test_data %>%
    filter(Username == user_name & Rating >= 5) %>%
    pull(BGGId) %>%
    as.character()
  
  recommended_bggids <- as.character(recommendations$BGGId)
  
  hits <- length(intersect(recommended_bggids, user_relevant))
  
  precision <- if(length(recommended_bggids) > 0) hits / length(recommended_bggids) else NA
  recall <- if(length(user_relevant) > 0) hits / length(user_relevant) else NA
  
  # 2. Novelty -------------------------------------------------------------
  # Define popularity cutoff if not given
  if(is.null(popularity_threshold)) {
    popularity_threshold <- sort(games$NumUserRatings, decreasing = TRUE)[1000]
  }
  
  
  novelty_score <- mean(log2(1 + max(games$NumUserRatings) / (recommendations$NumUserRatings + 1)), na.rm = TRUE)

  
  # 3. Diversity -----------------------------------------------------------
  # Calculate pairwise similarity among recommended games
  rec_indices <- which(rownames(fmn_scaled) %in% recommended_bggids)
  
  if(length(rec_indices) < 2) {
    diversity <- NA  # can't compute diversity with fewer than 2 games
  } else {
    sim_submatrix <- sms[rec_indices, rec_indices]
    diversity <- 1 - mean(sim_submatrix[upper.tri(sim_submatrix)])
  }
  
  # 4. Serendipity ---------------------------------------------------------
  # Serendipity = recommendations that are relevant but NOT popular
  # Popular games above popularity_threshold
  popular_games <- games_features |>
    filter(NumUserRatings >= popularity_threshold) |>
    pull(BGGId) %>%
    as.character()
  
  # Serendipitous items = relevant recommended games NOT in popular_games
  serendipitous <- intersect(setdiff(recommended_bggids, popular_games), user_relevant)
  
  serendipity <- if(length(recommended_bggids) > 0) length(serendipitous) / length(recommended_bggids) else NA
  
  # Return all metrics in a named list
  list(
    precision = precision,
    recall = recall,
    novelty = novelty_score,
    diversity = diversity,
    serendipity = serendipity
  )
}
get_cold_start_recommendations <- function(games_features, n = 50) {
  games_features |>
    arrange(desc(NumUserRatings))|>
    head(n)
}

# 4. ➕ Define game-based recommendation function here
get_similar_games <- function(game_name, similarity_matrix, game_map, games_features, num_recs = 20) {
  game_id <- game_map_with_names |>
    filter(Name == game_name) |>
    pull(BGGId)
  
  if (is.na(game_id) || !(game_id %in% rownames(similarity_matrix))) {
    return(NULL)
  }
  
  sim_scores <- similarity_matrix[as.character(game_id), ]
  sim_scores <- sort(sim_scores, decreasing = TRUE)
  
  sim_scores <- sim_scores[names(sim_scores) != as.character(game_id)]
  
  top_ids <- names(head(sim_scores, num_recs))
  
  games_features %>%
    filter(BGGId %in% as.numeric(top_ids)) |>
    arrange(match(BGGId, as.numeric(top_ids)))
}
library(shiny)
library(dplyr)

ui <- fluidPage(
  titlePanel("Board Game Recommender"),
  
  sidebarLayout(
    sidebarPanel(
      radioButtons("rec_type", "Recommendation Type:",
               choices = c("By User" = "user", "By Game" = "game")),
  
      conditionalPanel(
        condition = "input.rec_type == 'user'",
        textInput("username", "Enter Username:", value = "")
  ),
  
      conditionalPanel(
        condition = "input.rec_type == 'game'",
        selectInput("selected_game", "Choose a Game:",
                choices = sort(unique(games_features$Name)))
  ),
  
    numericInput("num_recs", "Number of Recommendations:", value = 20, min = 5, max = 50),
    actionButton("go", "Get Recommendations")
),

    
    mainPanel(
      uiOutput("recommendations"),   
      verbatimTextOutput("metrics")
    )
  )
)
## Warning: The select input "selected_game" contains a large number of options;
## consider using server-side selectize for massively improved performance. See
## the Details section of the ?selectizeInput help topic.
server <- function(input, output, session) {

  observeEvent(input$go, {
    req(input$num_recs)

    if (input$rec_type == "user") {
      req(input$username)

      if (!(input$username %in% ratings$Username)) {
        cold_recs <- get_cold_start_recommendations(games_features, input$num_recs)

        output$recommendations <- renderUI({
          game_items <- lapply(1:nrow(cold_recs), function(i) {
            tags$div(
              tags$img(src = cold_recs$ImagePath[i], height = "100px"),
              tags$p(cold_recs$Name[i]),
              tags$p(paste("Avg Rating:", round(cold_recs$AvgRating[i], 2)))
            )
          })
          do.call(tagList, game_items)
        })

        output$metrics <- renderText("New user detected — showing popular games.")
        return()
      }

      # User-based recommendations for existing user
      recs <- get_predicted_ratings(
        target_user_name = input$username,
        fmn_scaled = fmn_scaled,
        games_features = games_features,
        game_map = game_map,
        user_ratings = ratings,
        k = input$num_recs
      )

    } else if (input$rec_type == "game") {
      req(input$selected_game)

      recs <- get_similar_games(
        game_name = input$selected_game,
        similarity_matrix = sms,
        game_map = game_map_with_names,
        games_features = games_features,
        num_recs = input$num_recs
      )

      if (is.null(recs)) {
        output$recommendations <- renderTable({
          data.frame(Message = paste("Game", input$selected_game, "not found or no similar games."))
        })
        output$metrics <- renderText("")
        return()
      }
    }

output$recommendations <- renderUI({
  game_items <- lapply(1:nrow(recs), function(i) {

    div_content <- list(
      tags$img(src = recs$ImagePath[i], height = "100px"),
      tags$strong(recs$Name[i])
    )

    # Only append the predicted rating if it exists
    if ("predicted_rating" %in% colnames(recs)) {
      div_content <- append(div_content, list(
        tags$div(paste("Predicted Rating:", round(recs$predicted_rating[i], 2)))
      ))
    }

    # Only append average rating if it exists
    if ("AvgRating" %in% colnames(recs)) {
      div_content <- append(div_content, list(
        tags$div(paste("Avg Rating:", round(recs$AvgRating[i], 2)))
      ))
    }

    tags$div(style = "margin-bottom: 20px;", div_content)
  })

  do.call(tagList, game_items)
})

    # Metrics only if user-based
    if (input$rec_type == "user") {
      metrics_list <- compute_all_metrics(
        user_name = input$username,
        recommendations = recs,
        test_data = test_data,
        sms = sms,
        fmn_scaled = fmn_scaled,
        games_features = games_features,
        ratings = ratings
      )
      output$metrics <- renderPrint({ metrics_list })
    } else {
      output$metrics <- renderText("")
    }

  })  # end observeEvent
}
shinyApp(ui, server)
Shiny applications not supported in static R Markdown documents