library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(recosystem)
Through Kaggle, I was able to find a collection of ratings for, including informative descriptions of board games from a prominent board game resources, boardgamesgeek.com. The database is comprised of approximately 19 million ratings from around 411,000 users of around 22,000 different board games. In total, the data is contained across 9 tables. One table contains the ratings. A second contains based game information such as year released, number of players, and a text description. A third table breaks down ratings by game instead of user, providing the number of times each game was given each rating (0-10 by tenths). Three tables provide themes, mechanics, and subcategories associated with each Game ID. The final three tables are centered around the game makers, looking at associated artists, designers, and publishers, but only given data if the creator is associated with three or more games.
games <- read.csv("C:/Users/ddebo/Downloads/archive(2)/games.csv")
mechanics <- read.csv("C:/Users/ddebo/Downloads/archive(2)/mechanics.csv")
themes <- read.csv("C:/Users/ddebo/Downloads/archive(2)/themes.csv")
subcategories <- read.csv("C:/Users/ddebo/Downloads/archive(2)/subcategories.csv")
designers <- read.csv("C:/Users/ddebo/Downloads/archive(2)/designers_reduced.csv")
ratings <- read.csv('C:/Users/ddebo/Downloads/archive(2)/user_ratings.csv')
head(mechanics)
## BGGId Alliances Area.Majority...Influence Auction.Bidding Dice.Rolling
## 1 1 1 1 1 1
## 2 2 0 0 0 0
## 3 3 0 1 0 0
## 4 4 0 1 1 0
## 5 5 0 0 0 0
## 6 6 0 0 0 1
## Hand.Management Simultaneous.Action.Selection Trick.taking Hexagon.Grid
## 1 1 1 0 0
## 2 0 0 1 0
## 3 1 0 0 1
## 4 0 0 0 0
## 5 1 0 0 0
## 6 0 0 0 0
## Once.Per.Game.Abilities Set.Collection Tile.Placement Action.Points
## 1 0 0 0 0
## 2 0 0 0 0
## 3 1 1 1 0
## 4 0 1 0 1
## 5 0 0 1 0
## 6 0 0 0 0
## Investment Market Square.Grid Stock.Holding Victory.Points.as.a.Resource
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 1 1 1 1 1
## 6 0 0 0 0 0
## Enclosure Pattern.Building Pattern.Recognition Modular.Board
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## Network.and.Route.Building Point.to.Point.Movement Melding.and.Splaying
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## Negotiation Trading Push.Your.Luck Income Race Random.Production
## 1 1 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## Variable.Set.up Roll...Spin.and.Move Variable.Player.Powers Action.Queue Bias
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## Grid.Movement Lose.a.Turn Programmed.Movement
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## Scenario...Mission...Campaign.Game Voting Events Paper.and.Pencil
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## Player.Elimination Role.Playing Movement.Points Simulation
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## Variable.Phase.Order Area.Movement Commodity.Speculation Cooperative.Game
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## Deduction Sudden.Death.Ending Connections Highest.Lowest.Scoring
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## Betting.and.Bluffing Memory Score.and.Reset.Game Layering Map.Addition
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## Secret.Unit.Deployment Increase.Value.of.Unchosen.Resources
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## Ratio...Combat.Results.Table Take.That Team.Based.Game
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## Campaign...Battle.Card.Driven Tech.Trees...Tech.Tracks Player.Judge
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## Chit.Pull.System Three.Dimensional.Movement Action.Drafting
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## Minimap.Resolution Stat.Check.Resolution Action.Timer Pick.up.and.Deliver
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## Map.Deformation Bingo Crayon.Rail.System Multiple.Maps Hidden.Roles
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## Line.Drawing Tug.of.War Pattern.Movement Static.Capture
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## Different.Dice.Movement Chaining Ladder.Climbing Predictive.Bid
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## Solo...Solitaire.Game Line.of.Sight Critical.Hits.and.Failures Interrupts
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## Zone.of.Control Bribery End.Game.Bonuses Area.Impulse Worker.Placement
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## Measurement.Movement Map.Reduction Real.Time Resource.to.Move Mancala
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## Ownership Kill.Steal Hidden.Movement Track.Movement Deck.Construction
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## Drafting TableauBuilding Prisoner.s.Dilemma Hidden.Victory.Points
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## Movement.Template Slide.Push Targeted.Clues Command.Cards Grid.Coverage
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## Relative.Movement Action.Event Card.Play.Conflict.Resolution
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## I.Cut..You.Choose Die.Icon.Resolution Elapsed.Real.Time.Ending
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## Advantage.Token Storytelling Catch.the.Leader
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## Roles.with.Asymmetric.Information Traitor.Game Moving.Multiple.Units
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## Semi.Cooperative.Game Communication.Limits Time.Track Speed.Matching
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## Cube.Tower Re.rolling.and.Locking Impulse.Movement Loans Delayed.Purchase
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## Deck..Bag..and.Pool.Building Move.Through.Deck Single.Loser.Game Matching
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## Induction Physical.Removal Narrative.Choice...Paragraph Pieces.as.Map Follow
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## Finale.Ending Order.Counters Contracts Passed.Action.Token King.of.the.Hill
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## Action.Retrieval Force.Commitment Rondel Automatic.Resource.Growth
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## Legacy.Game Dexterity Physical
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
head(ratings)
## BGGId Rating Username
## 1 213788 8 Tonydorrf
## 2 213788 8 tachyon14k
## 3 213788 8 Ungotter
## 4 213788 8 brainlocki3
## 5 213788 8 PPMP
## 6 213788 8 Dychotimer
games_features <- games |>
left_join(mechanics, by = "BGGId") |>
left_join(themes, by = "BGGId") |>
left_join(subcategories, by = "BGGId")
#left_join(designers, by = "BGGId") #added too many columns with too little impact on recs/RMSE
glimpse(games_features)
## Rows: 21,925
## Columns: 432
## $ BGGId <int> 1, 2, 3, …
## $ Name <chr> "Die Mach…
## $ Description <chr> "die mach…
## $ YearPublished <int> 1986, 198…
## $ GameWeight <dbl> 4.3206, 1…
## $ AvgRating <dbl> 7.61428, …
## $ BayesAvgRating <dbl> 7.10363, …
## $ StdDev <dbl> 1.57979, …
## $ MinPlayers <int> 3, 3, 2, …
## $ MaxPlayers <int> 5, 4, 4, …
## $ ComAgeRec <dbl> 14.366667…
## $ LanguageEase <dbl> 1.395833,…
## $ BestPlayers <int> 5, 0, 3, …
## $ GoodPlayers <chr> "['4', '5…
## $ NumOwned <int> 7498, 128…
## $ NumWant <int> 501, 72, …
## $ NumWish <int> 2039, 191…
## $ NumWeightVotes <int> 761, 54, …
## $ MfgPlaytime <int> 240, 30, …
## $ ComMinPlaytime <int> 240, 30, …
## $ ComMaxPlaytime <int> 240, 30, …
## $ MfgAgeRec <int> 14, 12, 1…
## $ NumUserRatings <int> 5354, 562…
## $ NumComments <int> 0, 0, 0, …
## $ NumAlternates <int> 2, 0, 6, …
## $ NumExpansions <int> 0, 0, 0, …
## $ NumImplementations <int> 0, 2, 1, …
## $ IsReimplementation <int> 0, 1, 0, …
## $ Family <chr> "Classic …
## $ Kickstarted <int> 0, 0, 0, …
## $ ImagePath <chr> "https://…
## $ Rank.boardgame <int> 316, 3993…
## $ Rank.strategygames <int> 180, 1577…
## $ Rank.abstracts <int> 21926, 21…
## $ Rank.familygames <int> 21926, 21…
## $ Rank.thematic <int> 21926, 21…
## $ Rank.cgs <int> 21926, 21…
## $ Rank.wargames <int> 21926, 21…
## $ Rank.partygames <int> 21926, 21…
## $ Rank.childrensgames <int> 21926, 21…
## $ Cat.Thematic <int> 0, 0, 0, …
## $ Cat.Strategy <int> 1, 1, 1, …
## $ Cat.War <int> 0, 0, 0, …
## $ Cat.Family <int> 0, 0, 0, …
## $ Cat.CGS <int> 0, 0, 0, …
## $ Cat.Abstract <int> 0, 0, 0, …
## $ Cat.Party <int> 0, 0, 0, …
## $ Cat.Childrens <int> 0, 0, 0, …
## $ Alliances <int> 1, 0, 0, …
## $ Area.Majority...Influence <int> 1, 0, 1, …
## $ Auction.Bidding <int> 1, 0, 0, …
## $ Dice.Rolling <int> 1, 0, 0, …
## $ Hand.Management <int> 1, 0, 1, …
## $ Simultaneous.Action.Selection <int> 1, 0, 0, …
## $ Trick.taking <int> 0, 1, 0, …
## $ Hexagon.Grid <int> 0, 0, 1, …
## $ Once.Per.Game.Abilities <int> 0, 0, 1, …
## $ Set.Collection <int> 0, 0, 1, …
## $ Tile.Placement <int> 0, 0, 1, …
## $ Action.Points <int> 0, 0, 0, …
## $ Investment <int> 0, 0, 0, …
## $ Market <int> 0, 0, 0, …
## $ Square.Grid <int> 0, 0, 0, …
## $ Stock.Holding <int> 0, 0, 0, …
## $ Victory.Points.as.a.Resource <int> 0, 0, 0, …
## $ Enclosure <int> 0, 0, 0, …
## $ Pattern.Building <int> 0, 0, 0, …
## $ Pattern.Recognition <int> 0, 0, 0, …
## $ Modular.Board <int> 0, 0, 0, …
## $ Network.and.Route.Building <int> 0, 0, 0, …
## $ Point.to.Point.Movement <int> 0, 0, 0, …
## $ Melding.and.Splaying <int> 0, 0, 0, …
## $ Negotiation <int> 1, 0, 0, …
## $ Trading <int> 0, 0, 0, …
## $ Push.Your.Luck <int> 0, 0, 0, …
## $ Income <int> 0, 0, 0, …
## $ Race <int> 0, 0, 0, …
## $ Random.Production <int> 0, 0, 0, …
## $ Variable.Set.up <int> 0, 0, 0, …
## $ Roll...Spin.and.Move <int> 0, 0, 0, …
## $ Variable.Player.Powers <int> 0, 0, 0, …
## $ Action.Queue <int> 0, 0, 0, …
## $ Bias <int> 0, 0, 0, …
## $ Grid.Movement <int> 0, 0, 0, …
## $ Lose.a.Turn <int> 0, 0, 0, …
## $ Programmed.Movement <int> 0, 0, 0, …
## $ Scenario...Mission...Campaign.Game <int> 0, 0, 0, …
## $ Voting <int> 0, 0, 0, …
## $ Events <int> 0, 0, 0, …
## $ Paper.and.Pencil <int> 0, 0, 0, …
## $ Player.Elimination <int> 0, 0, 0, …
## $ Role.Playing <int> 0, 0, 0, …
## $ Movement.Points <int> 0, 0, 0, …
## $ Simulation <int> 0, 0, 0, …
## $ Variable.Phase.Order <int> 0, 0, 0, …
## $ Area.Movement <int> 0, 0, 0, …
## $ Commodity.Speculation <int> 0, 0, 0, …
## $ Cooperative.Game <int> 0, 0, 0, …
## $ Deduction <int> 0, 0, 0, …
## $ Sudden.Death.Ending <int> 0, 0, 0, …
## $ Connections <int> 0, 0, 0, …
## $ Highest.Lowest.Scoring <int> 0, 0, 0, …
## $ Betting.and.Bluffing <int> 0, 0, 0, …
## $ Memory <int> 0, 0, 0, …
## $ Score.and.Reset.Game <int> 0, 0, 0, …
## $ Layering <int> 0, 0, 0, …
## $ Map.Addition <int> 0, 0, 0, …
## $ Secret.Unit.Deployment <int> 0, 0, 0, …
## $ Increase.Value.of.Unchosen.Resources <int> 0, 0, 0, …
## $ Ratio...Combat.Results.Table <int> 0, 0, 0, …
## $ Take.That <int> 0, 0, 0, …
## $ Team.Based.Game <int> 0, 0, 0, …
## $ Campaign...Battle.Card.Driven <int> 0, 0, 0, …
## $ Tech.Trees...Tech.Tracks <int> 0, 0, 0, …
## $ Player.Judge <int> 0, 0, 0, …
## $ Chit.Pull.System <int> 0, 0, 0, …
## $ Three.Dimensional.Movement <int> 0, 0, 0, …
## $ Action.Drafting <int> 0, 0, 0, …
## $ Minimap.Resolution <int> 0, 0, 0, …
## $ Stat.Check.Resolution <int> 0, 0, 0, …
## $ Action.Timer <int> 0, 0, 0, …
## $ Pick.up.and.Deliver <int> 0, 0, 0, …
## $ Map.Deformation <int> 0, 0, 0, …
## $ Bingo <int> 0, 0, 0, …
## $ Crayon.Rail.System <int> 0, 0, 0, …
## $ Multiple.Maps <int> 0, 0, 0, …
## $ Hidden.Roles <int> 0, 0, 0, …
## $ Line.Drawing <int> 0, 0, 0, …
## $ Tug.of.War <int> 0, 0, 0, …
## $ Pattern.Movement <int> 0, 0, 0, …
## $ Static.Capture <int> 0, 0, 0, …
## $ Different.Dice.Movement <int> 0, 0, 0, …
## $ Chaining <int> 0, 0, 0, …
## $ Ladder.Climbing <int> 0, 0, 0, …
## $ Predictive.Bid <int> 0, 0, 0, …
## $ Solo...Solitaire.Game <int> 0, 0, 0, …
## $ Line.of.Sight <int> 0, 0, 0, …
## $ Critical.Hits.and.Failures <int> 0, 0, 0, …
## $ Interrupts <int> 0, 0, 0, …
## $ Zone.of.Control <int> 0, 0, 0, …
## $ Bribery <int> 0, 0, 0, …
## $ End.Game.Bonuses <int> 0, 0, 0, …
## $ Area.Impulse <int> 0, 0, 0, …
## $ Worker.Placement <int> 0, 0, 0, …
## $ Measurement.Movement <int> 0, 0, 0, …
## $ Map.Reduction <int> 0, 0, 0, …
## $ Real.Time <int> 0, 0, 0, …
## $ Resource.to.Move <int> 0, 0, 0, …
## $ Mancala <int> 0, 0, 0, …
## $ Ownership <int> 0, 0, 0, …
## $ Kill.Steal <int> 0, 0, 0, …
## $ Hidden.Movement <int> 0, 0, 0, …
## $ Track.Movement <int> 0, 0, 0, …
## $ Deck.Construction <int> 0, 0, 0, …
## $ Drafting <int> 0, 0, 0, …
## $ TableauBuilding <int> 0, 0, 0, …
## $ Prisoner.s.Dilemma <int> 0, 0, 0, …
## $ Hidden.Victory.Points <int> 0, 0, 0, …
## $ Movement.Template <int> 0, 0, 0, …
## $ Slide.Push <int> 0, 0, 0, …
## $ Targeted.Clues <int> 0, 0, 0, …
## $ Command.Cards <int> 0, 0, 0, …
## $ Grid.Coverage <int> 0, 0, 0, …
## $ Relative.Movement <int> 0, 0, 0, …
## $ Action.Event <int> 0, 0, 0, …
## $ Card.Play.Conflict.Resolution <int> 0, 0, 0, …
## $ I.Cut..You.Choose <int> 0, 0, 0, …
## $ Die.Icon.Resolution <int> 0, 0, 0, …
## $ Elapsed.Real.Time.Ending <int> 0, 0, 0, …
## $ Advantage.Token <int> 0, 0, 0, …
## $ Storytelling <int> 0, 0, 0, …
## $ Catch.the.Leader <int> 0, 0, 0, …
## $ Roles.with.Asymmetric.Information <int> 0, 0, 0, …
## $ Traitor.Game <int> 0, 0, 0, …
## $ Moving.Multiple.Units <int> 0, 0, 0, …
## $ Semi.Cooperative.Game <int> 0, 0, 0, …
## $ Communication.Limits <int> 0, 0, 0, …
## $ Time.Track <int> 0, 0, 0, …
## $ Speed.Matching <int> 0, 0, 0, …
## $ Cube.Tower <int> 0, 0, 0, …
## $ Re.rolling.and.Locking <int> 0, 0, 0, …
## $ Impulse.Movement <int> 0, 0, 0, …
## $ Loans <int> 0, 0, 0, …
## $ Delayed.Purchase <int> 0, 0, 0, …
## $ Deck..Bag..and.Pool.Building <int> 0, 0, 0, …
## $ Move.Through.Deck <int> 0, 0, 0, …
## $ Single.Loser.Game <int> 0, 0, 0, …
## $ Matching <int> 0, 0, 0, …
## $ Induction <int> 0, 0, 0, …
## $ Physical.Removal <int> 0, 0, 0, …
## $ Narrative.Choice...Paragraph <int> 0, 0, 0, …
## $ Pieces.as.Map <int> 0, 0, 0, …
## $ Follow <int> 0, 0, 0, …
## $ Finale.Ending <int> 0, 0, 0, …
## $ Order.Counters <int> 0, 0, 0, …
## $ Contracts <int> 0, 0, 0, …
## $ Passed.Action.Token <int> 0, 0, 0, …
## $ King.of.the.Hill <int> 0, 0, 0, …
## $ Action.Retrieval <int> 0, 0, 0, …
## $ Force.Commitment <int> 0, 0, 0, …
## $ Rondel <int> 0, 0, 0, …
## $ Automatic.Resource.Growth <int> 0, 0, 0, …
## $ Legacy.Game <int> 0, 0, 0, …
## $ Dexterity <int> 0, 0, 0, …
## $ Physical <int> 0, 0, 0, …
## $ Adventure <int> 0, 0, 0, …
## $ Fantasy <int> 0, 1, 0, …
## $ Fighting <int> 0, 0, 0, …
## $ Environmental <int> 0, 0, 0, …
## $ Medical <int> 0, 0, 0, …
## $ Economic <int> 1, 0, 0, …
## $ Industry...Manufacturing <int> 0, 0, 0, …
## $ Transportation <int> 0, 0, 0, …
## $ Science.Fiction <int> 0, 0, 0, …
## $ Space.Exploration <int> 0, 0, 0, …
## $ Civilization <int> 0, 0, 0, …
## $ Civil.War <int> 0, 0, 0, …
## $ Movies...TV...Radio.theme <int> 0, 0, 0, …
## $ Novel.based <int> 0, 0, 0, …
## $ Age.of.Reason <int> 0, 0, 0, …
## $ Mythology <int> 0, 0, 0, …
## $ Renaissance <int> 0, 0, 0, …
## $ American.West <int> 0, 0, 0, …
## $ Animals <int> 0, 0, 0, …
## $ Modern.Warfare <int> 0, 0, 0, …
## $ Medieval <int> 0, 0, 1, …
## $ Ancient <int> 0, 0, 0, …
## $ Nautical <int> 0, 0, 0, …
## $ Post.Napoleonic <int> 0, 0, 0, …
## $ Horror <int> 0, 0, 0, …
## $ Farming <int> 0, 0, 0, …
## $ Religious <int> 0, 0, 0, …
## $ Travel <int> 0, 0, 0, …
## $ Murder.Mystery <int> 0, 0, 0, …
## $ Pirates <int> 0, 0, 0, …
## $ Comic.Book...Strip <int> 0, 0, 0, …
## $ Mature...Adult <int> 0, 0, 0, …
## $ Video.Game.Theme <int> 0, 0, 0, …
## $ Spies.Secret.Agents <int> 0, 0, 0, …
## $ Arabian <int> 0, 0, 0, …
## $ Prehistoric <int> 0, 0, 0, …
## $ Trains <int> 0, 0, 0, …
## $ Aviation...Flight <int> 0, 0, 0, …
## $ Zombies <int> 0, 0, 0, …
## $ World.War.II <int> 0, 0, 0, …
## $ Racing <int> 0, 0, 0, …
## $ Pike.and.Shot <int> 0, 0, 0, …
## $ World.War.I <int> 0, 0, 0, …
## $ Humor <int> 0, 0, 0, …
## $ Sports <int> 0, 0, 0, …
## $ Mafia <int> 0, 0, 0, …
## $ American.Indian.Wars <int> 0, 0, 0, …
## $ Napoleonic <int> 0, 0, 0, …
## $ American.Revolutionary.War <int> 0, 0, 0, …
## $ Vietnam.War <int> 0, 0, 0, …
## $ American.Civil.War <int> 0, 0, 0, …
## $ Number <int> 0, 0, 0, …
## $ Trivia <int> 0, 0, 0, …
## $ Music <int> 0, 0, 0, …
## $ Korean.War <int> 0, 0, 0, …
## $ City.Building <int> 0, 0, 0, …
## $ Political <int> 1, 0, 0, …
## $ Math <int> 0, 0, 0, …
## $ Maze <int> 0, 0, 0, …
## $ Theme_Food...Cooking <int> 0, 0, 0, …
## $ Theme_Superheroes <int> 0, 0, 0, …
## $ Theme_Anime...Manga <int> 0, 0, 0, …
## $ Theme_Cthulhu.Mythos <int> 0, 0, 0, …
## $ Theme_Alternate.History <int> 0, 0, 0, …
## $ Theme_Anthropomorphic.Animals <int> 0, 0, 0, …
## $ Theme_Vikings <int> 0, 0, 0, …
## $ Theme_Post.Apocalyptic <int> 0, 0, 0, …
## $ Theme_Time.Travel <int> 0, 0, 0, …
## $ Theme_Colonial <int> 0, 0, 0, …
## $ Theme_Mystery...Cri <int> 0, 0, 0, …
## $ Theme_Robots <int> 0, 0, 0, …
## $ Theme_Retro <int> 0, 0, 0, …
## $ Theme_Mad.Science...Mad.Scientist <int> 0, 0, 0, …
## $ Theme_Mining <int> 0, 0, 0, …
## $ Theme_Art <int> 0, 0, 0, …
## $ Theme_Archaeology...Paleontology <int> 0, 0, 0, …
## $ Theme_Witches <int> 0, 0, 0, …
## $ Theme_Deserts <int> 0, 0, 0, …
## $ Theme_Tropical <int> 0, 0, 0, …
## $ Theme_Steampunk <int> 0, 0, 0, …
## $ Theme_Gardening <int> 0, 0, 0, …
## $ Theme_Sieg <int> 0, 0, 0, …
## $ Theme_Flowers <int> 0, 0, 0, …
## $ Theme_Natur <int> 0, 0, 0, …
## $ Theme_Native.Americans...First.Peoples <int> 0, 0, 0, …
## $ Theme_Circus <int> 0, 0, 0, …
## $ Theme_Ninjas <int> 0, 0, 0, …
## $ Theme_King.Arthur...The.Knights.of.the.Round.Table...Camelot <int> 0, 0, 0, …
## $ Theme_Cyberpunk <int> 0, 0, 0, …
## $ Theme_Submarines <int> 0, 0, 0, …
## $ Theme_Construction <int> 0, 0, 0, …
## $ Theme_Samurai <int> 0, 0, 1, …
## $ Theme_Fantasy.Sports <int> 0, 0, 0, …
## $ Theme_Love...Romanc <int> 0, 0, 0, …
## $ Theme_Biology <int> 0, 0, 0, …
## $ Theme_Kaiju <int> 0, 0, 0, …
## $ Theme_Gladiators <int> 0, 0, 0, …
## $ Theme_City <int> 0, 0, 0, …
## $ Theme_Villainy <int> 0, 0, 0, …
## $ Theme_Weather <int> 0, 0, 0, …
## $ Theme_Amusement.Parks...Theme.Parks <int> 0, 0, 0, …
## $ Theme_Airships...Blimps...Dirigibles...Zeppelins <int> 0, 0, 0, …
## $ Theme_Spooky.Old.Houses <int> 0, 0, 0, …
## $ Theme_Mech.Warfar <int> 0, 0, 0, …
## $ Theme_Polic <int> 0, 0, 0, …
## $ Theme_Books...Libraries <int> 0, 0, 0, …
## $ Theme_Alchemy <int> 0, 0, 0, …
## $ Theme_Tropical.Islands <int> 0, 0, 0, …
## $ Theme_Boardgaming <int> 0, 0, 0, …
## $ Theme_Oil...Gas...Petroleu <int> 0, 0, 0, …
## $ Theme_Arcade.Video.Games <int> 0, 0, 0, …
## $ Theme_Safaris <int> 0, 0, 0, …
## $ Theme_Dreams...Nightmares <int> 0, 0, 0, …
## $ Theme_Evolution <int> 0, 0, 0, …
## $ Theme_Survival <int> 0, 0, 0, …
## $ Theme_Fruit <int> 0, 0, 0, …
## $ Theme_Jail...Prison..Modern. <int> 0, 0, 0, …
## $ Theme_Movie.Industry <int> 0, 0, 0, …
## $ Theme_Animal.Battles <int> 0, 0, 0, …
## $ Theme_Firefighting <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Nintendo <int> 0, 0, 0, …
## $ Theme_Climate.Chang <int> 0, 0, 0, …
## $ Theme_Helicopters <int> 0, 0, 0, …
## $ Theme_Psychology <int> 0, 0, 0, …
## $ Theme_Sci.Fi.Sports <int> 0, 0, 0, …
## $ Theme_Sewing...Knitting...Cloth.Making <int> 0, 0, 0, …
## $ Theme_Beaches <int> 0, 0, 0, …
## $ Theme_Under.the.Sea <int> 0, 0, 0, …
## $ Theme_Aztecs <int> 0, 0, 0, …
## $ Theme_Trees.and.Forests <int> 0, 0, 0, …
## $ Theme_Mayans <int> 0, 0, 0, …
## $ Theme_Chibis <int> 0, 0, 0, …
## $ Theme_US.National.Parks <int> 0, 0, 0, …
## $ Theme_Scienc <int> 0, 0, 0, …
## $ Theme_Chemistry <int> 0, 0, 0, …
## $ Theme_Volcanoes <int> 0, 0, 0, …
## $ Theme_Fictional.Games <int> 0, 0, 0, …
## $ Theme_Druids <int> 0, 0, 0, …
## $ Theme_Music.Making...Makers <int> 0, 0, 0, …
## $ Theme_Psychic.Powers <int> 0, 0, 0, …
## $ Theme_Trash...Garbag <int> 0, 0, 0, …
## $ Theme_Silk.Road <int> 0, 0, 0, …
## $ Theme_Cereal.Games <int> 0, 0, 0, …
## $ Theme_Automotive.Industry <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Tetris <int> 0, 0, 0, …
## $ Theme_TV.Detectives <int> 0, 0, 0, …
## $ Theme_Romance.of.the.Three.Kingdoms <int> 0, 0, 0, …
## $ Theme_Trucks <int> 0, 0, 0, …
## $ Theme_Hanseatic.Leagu <int> 0, 0, 0, …
## $ Theme_Chivalry...Jousting...Tournaments..Medieval.Europe. <int> 0, 0, 0, …
## $ Theme_Cannibals...Cannibalis <int> 0, 0, 0, …
## $ Theme_Canals <int> 0, 0, 0, …
## $ Theme_My.Best.Lif <int> 0, 0, 0, …
## $ Theme_Nuclear.option <int> 0, 0, 0, …
## $ Theme_Astronomy <int> 0, 0, 0, …
## $ Theme_Cemeteries...Graveyards <int> 0, 0, 0, …
## $ Theme_Movies <int> 0, 0, 0, …
## $ Theme_Hackers <int> 0, 0, 0, …
## $ Theme_Pub..Bars..Bistros... <int> 0, 0, 0, …
## $ Theme_Jewelry <int> 0, 0, 0, …
## $ Theme_School...College...University <int> 0, 0, 0, …
## $ Theme_UFOs <int> 0, 0, 0, …
## $ Theme_Templ <int> 0, 0, 0, …
## $ Theme_Mail...Stamps...The.Post.Offic <int> 0, 0, 0, …
## $ Theme_Memes <int> 0, 0, 0, …
## $ Theme_Computer...Information.Technology.Industry <int> 0, 0, 0, …
## $ Theme_Attorneys...Courts <int> 0, 0, 0, …
## $ Theme_Hot.Air.Balloons <int> 0, 0, 0, …
## $ Theme_Journalis <int> 0, 0, 0, …
## $ Theme_Motorcycles <int> 0, 0, 0, …
## $ Theme_Rubik.s.Cub <int> 0, 0, 0, …
## $ Theme_Dolls <int> 0, 0, 0, …
## $ Theme_Traffic...Driving <int> 0, 0, 0, …
## $ Theme_FIFA.World.Cup <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Pokémon <int> 0, 0, 0, …
## $ Theme_Floating.islands.in.the.sky <int> 0, 0, 0, …
## $ Theme_Dieselpunk <int> 0, 0, 0, …
## $ Theme_Disney.Theme.Parks <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Resident.Evil <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..SEGA <int> 0, 0, 0, …
## $ Theme_Teaching.Programming <int> 0, 0, 0, …
## $ Theme_Battle.Royal <int> 0, 0, 0, …
## $ Theme_Earthquakes <int> 0, 0, 0, …
## $ Theme_Bacteria <int> 0, 0, 0, …
## $ Theme_Painting...Paintings <int> 0, 0, 0, …
## $ Theme_Television..TV..Industry <int> 0, 0, 0, …
## $ Theme_Knights.Templar <int> 0, 0, 0, …
## $ Theme_African.Americans <int> 0, 0, 0, …
## $ Theme_Hell <int> 0, 0, 0, …
## $ Theme_Tiki.Cultur <int> 0, 0, 0, …
## $ Theme_Astrology <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..The.Oregon.Trail <int> 0, 0, 0, …
## $ Theme_Māori <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Super.Mario.Bros. <int> 0, 0, 0, …
## $ Theme_Mushrooms <int> 0, 0, 0, …
## $ Theme_Pulp <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Minecraft <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Final.Fantasy <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Carmen.Sandiego <int> 0, 0, 0, …
## $ Theme_Care.Bears <int> 0, 0, 0, …
## $ Theme_Hike.Hiking <int> 0, 0, 0, …
## $ Theme_Inuit.Peoples <int> 0, 0, 0, …
## $ Theme_Perfu <int> 0, 0, 0, …
## $ Theme_Camping <int> 0, 0, 0, …
## $ Theme_Latin.American.Political.Games <int> 0, 0, 0, …
## $ Theme_Spanish.Political.Games <int> 0, 0, 0, …
## $ Theme_Video.Game.Theme..Doo <int> 0, 0, 0, …
## $ Theme_Fashion <int> 0, 0, 0, …
## $ Theme_Geocaching <int> 0, 0, 0, …
## $ Theme_Ecology <int> 0, 0, 0, …
## $ Theme_Chernobyl <int> 0, 0, 0, …
## $ Theme_Photography <int> 0, 0, 0, …
## $ Theme_French.Foreign.Legion <int> 0, 0, 0, …
## $ Theme_Cruise.ships <int> 0, 0, 0, …
## $ Theme_Apache.Tribes <int> 0, 0, 0, …
## $ Theme_Rivers <int> 0, 0, 0, …
## $ Theme_Flags.identification <int> 0, 0, 0, …
## $ Exploration <int> 0, 0, 0, …
## $ Miniatures <int> 0, 0, 0, …
## $ Territory.Building <int> 0, 0, 0, …
## $ Card.Game <int> 0, 1, 0, …
## $ Educational <int> 0, 0, 0, …
## $ Puzzle <int> 0, 0, 0, …
## $ Collectible.Components <int> 0, 0, 0, …
## $ Word.Game <int> 0, 0, 0, …
## $ Print...Play <int> 0, 0, 0, …
## $ Electronic <int> 0, 0, 0, …
games_features$Name <- iconv(games_features$Name, from = "", to = "UTF-8")
#normalizing scaled values
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
games_features <- games_features |>
mutate(across(c(GameWeight, AvgRating, BayesAvgRating),
~ rescale(., to = c(0,1), na.rm = TRUE)))
unique(games_features$BestPlayers)
## [1] 5 0 3 4 6 2 7 8 1 13 12 15 14 9
# creating dummy variables to represent player size
games_features <- games_features|>
mutate(players_1 = ifelse(MinPlayers <= 1 & MaxPlayers >= 1, 1, 0),
players_2 = ifelse(MinPlayers <= 2 & MaxPlayers >= 2, 1, 0),
players_3 = ifelse(MinPlayers <= 3 & MaxPlayers >= 3, 1, 0),
players_4 = ifelse(MinPlayers <= 4 & MaxPlayers >= 4, 1, 0),
players_5 = ifelse(MinPlayers <= 5 & MaxPlayers >= 5, 1, 0),
players_6 = ifelse(MinPlayers <= 6 & MaxPlayers >= 6, 1, 0),
players_7 = ifelse(MinPlayers <= 7 & MaxPlayers >= 7, 1, 0),
players_8plus = ifelse(MaxPlayers >= 8, 1, 0))
# creating dummy variables to represent length
games_features <- games_features |>
mutate(playtime_short = ifelse(ComMaxPlaytime <= 30, 1, 0),
playtime_medium = ifelse(ComMaxPlaytime > 30 & ComMaxPlaytime <= 90, 1, 0),
playtime_long = ifelse(ComMaxPlaytime > 90 & ComMaxPlaytime <=240, 1, 0),
playtime_epic = ifelse(ComMaxPlaytime > 240,1,0))
#feature matrix
feature_matrix <- games_features |>
select(-c(Description, YearPublished, StdDev, MinPlayers, MaxPlayers, BestPlayers, GoodPlayers, ComAgeRec, NumOwned, NumWant, NumWish, NumWeightVotes, MfgPlaytime, ComMinPlaytime, ComMaxPlaytime, MfgAgeRec, NumUserRatings, NumComments, NumAlternates, NumExpansions, NumImplementations, IsReimplementation, Family, ImagePath, Rank.boardgame, Rank.strategygames, Rank.abstracts, Rank.familygames, Rank.partygames, Rank.thematic, Rank.cgs, Rank.wargames, Rank.childrensgames, LanguageEase))
rownames(feature_matrix) <- games$BGGId
fmn <- feature_matrix |>
select(-c(Name, BGGId))
fmn <- as.matrix(fmn)
fmn_scaled <- scale(fmn)
library(proxy)
##
## Attaching package: 'proxy'
## The following objects are masked from 'package:stats':
##
## as.dist, dist
## The following object is masked from 'package:base':
##
## as.matrix
#similarity_matrix <- proxy::simil(fmn, method = "cosine")
# switched to only need to compute similarity for one game at a time for processing ease
At this stage, rather than computing a full similarity matrix, to save memory I will only generate a similarity vector to a given game that is input in order to find games of a similar content type.
recommend_games <- function(game_name, fmn_scaled, games_features, top_n = 5) {
if (!(game_name %in% games_features$Name)) {
stop("Game not found.")
}
idx <- which(games_features$Name == game_name)
target_vector <- fmn_scaled[idx, ]
# Normalize target vector
target_norm <- sqrt(sum(target_vector^2))
target_vector_norm <- target_vector / target_norm
# Normalize all rows in fmn_scaled
fmn_norms <- sqrt(rowSums(fmn_scaled^2))
fmn_scaled_norm <- fmn_scaled / fmn_norms
# Compute cosine similarity as dot product of normalized vectors
sim_scores <- fmn_scaled_norm %*% target_vector_norm
sim_scores[idx] <- NA # exclude itself
top_idx <- order(sim_scores, decreasing = TRUE)[1:top_n]
result <- games_features[top_idx, ]
result$Similarity <- sim_scores[top_idx]
return(result[, c("Name", "AvgRating", "Similarity")])
}
Testing the content model
recommend_games("Catan", fmn_scaled, games_features, top_n = 5)
## Name AvgRating Similarity
## 21184 Catan: 25th Anniversary Edition 0.7777968 0.9842892
## 20922 Catan: 25 Jahre Jubiläums-Edition 0.7492494 0.9783564
## 21838 CATAN: 3D Edition 0.7312430 0.9539107
## 16257 Rivals for Catan: Deluxe 0.6924172 0.7523690
## 14705 Frontier Stations 0.6341018 0.7509682
recommend_games("Cranium", fmn_scaled, games_features, top_n = 5)
## Name AvgRating Similarity
## 8197 Cranium WOW 0.5738232 0.9354405
## 10355 Cranium: Disney Family Edition 0.5294693 0.9333669
## 10878 Cranium Brain Breaks 0.3810172 0.8855267
## 4623 Oodles of Doodles 0.5812344 0.7887374
## 7224 Pictionary Mania 0.5652837 0.7878504
recommend_games("Chrononauts", fmn_scaled, games_features, top_n = 5)
## Name AvgRating Similarity
## 4856 Early American Chrononauts 0.6271233 0.6935418
## 18229 1918/1919: Storm in the West 0.7694749 0.6501301
## 9949 Back to the Future: The Card Game 0.5368276 0.6110098
## 15089 Sealion: The Proposed German Invasion of England 0.7106355 0.5551929
## 91 Fortress America 0.6371774 0.5441202
Results make sense, are clearly related/similar thematically
library(Matrix)
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
user_item <- ratings |>
select(Username, BGGId, Rating)
# Create integer IDs for users and games
user_item <- user_item |>
mutate(
user_id = as.integer(factor(Username)),
game_id = match(as.character(BGGId), rownames(fmn))
)
if (any(is.na(user_item$game_id))) {
warning("Some BGGId in user ratings not found in feature matrix rownames!")
}
# Build sparse matrix
rating_matrix <- sparseMatrix(
i = user_item$user_id,
j = user_item$game_id,
x = user_item$Rating
)
rownames(rating_matrix) <- levels(factor(user_item$Username))
colnames(rating_matrix) <- rownames(fmn)
user_map <- ratings |>
distinct(Username) |>
mutate(user_id = as.integer(factor(Username)))
game_map <- ratings |>
distinct(BGGId) |>
mutate(game_id = as.integer(factor(BGGId)))
# 3. Join IDs back to ratings
ratings <- ratings |>
left_join(user_map, by = "Username") |>
left_join(game_map, by = "BGGId")
game_map_with_names <- game_map |>
left_join(games_features %>% select(BGGId, Name), by = "BGGId")
exporting for recosystem
set.seed(589623)
n <- nrow(user_item)
train_indices <- sample(n, size = 0.8 * n)
train_data <- user_item[train_indices, ]
test_data <- user_item[-train_indices, ]
write.table(train_data[, c("user_id", "game_id", "Rating")],
file = "train_data.txt", sep = " ", row.names = FALSE, col.names = FALSE)
write.table(test_data[, c("user_id", "game_id", "Rating")],
file = "test_data.txt", sep = " ", row.names = FALSE, col.names = FALSE)
r <- Reco()
r$train(data_file("train_data.txt"),
opts = list(dim = 30, costp_l2 = 0.1, costq_l2 = 0.1,
lrate = 0.1, niter = 20, nthread = 2, verbose = TRUE))
## iter tr_rmse obj
## 0 1.7481 6.9840e+07
## 1 1.2184 4.5324e+07
## 2 1.1966 4.4294e+07
## 3 1.1781 4.3639e+07
## 4 1.1584 4.3017e+07
## 5 1.1411 4.2530e+07
## 6 1.1258 4.2115e+07
## 7 1.1114 4.1767e+07
## 8 1.0986 4.1479e+07
## 9 1.0873 4.1220e+07
## 10 1.0775 4.1017e+07
## 11 1.0688 4.0834e+07
## 12 1.0613 4.0675e+07
## 13 1.0547 4.0566e+07
## 14 1.0485 4.0458e+07
## 15 1.0431 4.0359e+07
## 16 1.0381 4.0271e+07
## 17 1.0335 4.0199e+07
## 18 1.0294 4.0136e+07
## 19 1.0256 4.0075e+07
r$predict(data_file("test_data.txt"), out_file("test_preds.txt"))
## prediction output generated at test_preds.txt
# Read predictions and actual ratings
pred_ratings <- scan("test_preds.txt")
true_ratings <- test_data$Rating
rmse <- sqrt(mean((pred_ratings - true_ratings)^2))
cat("RMSE:", rmse)
## RMSE: 1.14356
This is lowest RMSE found thus far
hybrid score = alpha * cf predicted rating + (1-alpha) * content similarity
target_user_id <- 94972
all_game_ids <- 1:nrow(fmn)
# Prepare a data.frame with all game_ids for this user
to_predict <- data.frame(
user_id = rep(target_user_id, length(all_game_ids)),
game_id = all_game_ids
)
# Save to file for recosystem predict
write.table(to_predict, "to_predict.txt", row.names = FALSE, col.names = FALSE, sep = " ")
r$predict(data_file("to_predict.txt"), out_file("predicted_ratings.txt"))
## prediction output generated at predicted_ratings.txt
predicted_ratings <- scan("predicted_ratings.txt")
target_game_name <- "Cranium" # Example
target_game_bggid <- games_features |>
filter(Name == target_game_name) |>
pull(BGGId)
target_idx <- match(as.character(target_game_bggid), rownames(fmn))
target_vector <- fmn[target_idx, , drop = FALSE]
similarity_vector <- apply(fmn, 1, function(x) coop::cosine(x, as.numeric(target_vector)))
normalize <- function(x) {
rng <- range(x, na.rm = TRUE)
if (diff(rng) == 0) return(rep(0, length(x)))
(x - rng[1]) / (rng[2] - rng[1])
}
cf_norm <- normalize(predicted_ratings)
content_norm <- normalize(similarity_vector)
alpha <- 0.7
hybrid_scores <- alpha * cf_norm + (1 - alpha) * content_norm
top_indices <- order(hybrid_scores, decreasing = TRUE)[1:10]
recommended_games <- games_features[top_indices, ]
recommended_games$hybrid_score <- hybrid_scores[top_indices]
print(recommended_games[, c("Name", "AvgRating", "hybrid_score")])
## Name AvgRating
## 6978 Celebrities 0.6971394
## 8501 Time's Up! Title Recall! 0.7509918
## 19914 Draw Your Own Conclusions 0.6566839
## 15888 Monikers: Something Something 0.7955260
## 13105 Telestrations: 12 Player Party Pack 0.7563508
## 20305 Monikers: Serious Nonsense 0.8216525
## 6181 The von Reisswitz Kriegsspiel: The Prussian Army Wargame 0.8107734
## 8560 Time's Up! Deluxe 0.7266504
## 5079 SEEKRIEG 5 0.8258394
## 14843 Monikers: Shmonikers 0.7891910
## hybrid_score
## 6978 0.8237585
## 8501 0.8076922
## 19914 0.8062398
## 15888 0.7914088
## 13105 0.7788029
## 20305 0.7764042
## 6181 0.7709252
## 8560 0.7696423
## 5079 0.7664679
## 14843 0.7647787
library(dplyr)
library(coop)
# Define user and game for diagnostics
target_user_name <- "doctorchewy"
target_game_name <- "Cranium"
# 1. Find user_id and game_id (numeric indices)
target_user_id <- user_item |>
filter(Username == target_user_name) |>
pull(user_id) %>% unique()
target_game_bggid <- games_features |>
filter(Name == target_game_name) |>
pull(BGGId) |>
as.character()
target_game_id <- match(target_game_bggid, rownames(fmn))
# 2. Predicted ratings vector for this user across all games
all_game_ids <- 1:nrow(fmn)
to_predict <- data.frame(
user_id = rep(target_user_id, length(all_game_ids)),
game_id = all_game_ids
)
write.table(to_predict, "to_predict.txt", row.names=FALSE, col.names=FALSE, sep=" ")
r$predict(data_file("to_predict.txt"), out_file("predicted_ratings.txt"))
## prediction output generated at predicted_ratings.txt
predicted_ratings <- scan("predicted_ratings.txt")
# 3. Content similarity vector for target game
target_vector <- fmn[target_game_id, , drop=FALSE]
similarity_vector <- apply(fmn, 1, function(x) coop::cosine(x, as.numeric(target_vector)))
# 4. Normalize both vectors
normalize <- function(x) {
rng <- range(x, na.rm=TRUE)
if (diff(rng) == 0) return(rep(0, length(x)))
(x - rng[1]) / (rng[2] - rng[1])
}
cf_norm <- normalize(predicted_ratings)
content_norm <- normalize(similarity_vector)
# 5. Hybrid scores
alpha <- 0.7
hybrid_scores <- alpha * cf_norm + (1 - alpha) * content_norm
# 6. Create a diagnostics table
diagnostics <- data.frame(
game_id = all_game_ids,
BGGId = rownames(fmn),
Name = games_features$Name,
predicted_rating = predicted_ratings,
cf_norm = cf_norm,
content_sim = similarity_vector,
content_norm = content_norm,
hybrid_score = hybrid_scores
)
# 7. Sort by hybrid score descending
diagnostics <- diagnostics |>
arrange(desc(hybrid_score))
# 8. Print top 20 recommendations
print(diagnostics[1:20, c("Name", "predicted_rating", "content_sim", "hybrid_score")])
## Name
## 22303 Celebrities
## 36553 Time's Up! Title Recall!
## 273349 Draw Your Own Conclusions
## 195709 Monikers: Something Something
## 153016 Telestrations: 12 Player Party Pack
## 283152 Monikers: Serious Nonsense
## 16957 The von Reisswitz Kriegsspiel: The Prussian Army Wargame
## 37141 Time's Up! Deluxe
## 10683 SEEKRIEG 5
## 179448 Monikers: Shmonikers
## 317434 Exit: The Game – Advent Calendar: The Mystery of the Ice Cave
## 221248 Monikers: The Shut Up & Sit Down Nonsense Box
## 283151 Monikers: Classics
## 225694 Decrypto
## 156546 Monikers
## 1353 Time's Up!
## 300905 Top Ten
## 230262 Time's Up! Party Edition
## 255249 Monikers: More Monikers
## 288798 Kings of War (Third Edition)
## predicted_rating content_sim hybrid_score
## 22303 8.17639 0.7547290 0.8237585
## 36553 8.35746 0.6585509 0.8076922
## 273349 8.85681 0.5331518 0.8062398
## 195709 8.30380 0.6183834 0.7914088
## 153016 7.80184 0.6985402 0.7788029
## 283152 8.41862 0.5416780 0.7764042
## 16957 8.76465 0.4401870 0.7709252
## 37141 7.84216 0.6589059 0.7696423
## 10683 9.10264 0.3439728 0.7664679
## 179448 8.06421 0.5893809 0.7647787
## 317434 8.28106 0.5231809 0.7605491
## 221248 7.85606 0.6184773 0.7582819
## 283151 7.90600 0.6015733 0.7567996
## 225694 7.83545 0.6168093 0.7562445
## 156546 8.07902 0.5528170 0.7546701
## 1353 7.53506 0.6834772 0.7544313
## 300905 7.98350 0.5675750 0.7521194
## 230262 7.52977 0.6670239 0.7489973
## 255249 8.13832 0.5187112 0.7486092
## 288798 8.38108 0.4561433 0.7474113
# 9. Inspect how many ratings this user has given
user_ratings <- user_item |> filter(user_id == target_user_id)
cat("Number of ratings by user:", nrow(user_ratings), "\n")
## Number of ratings by user: 74
# 10. See top rated games by this user
user_top_ratings <- user_ratings |>
left_join(games_features |> select(BGGId, Name), by = c("BGGId" = "BGGId")) |>
arrange(desc(Rating))
print(head(user_top_ratings, 10))
## Username BGGId Rating user_id game_id
## 1 doctorchewy 12493 10 94972 5504
## 2 doctorchewy 115746 10 94972 11221
## 3 doctorchewy 233078 10 94972 17835
## 4 doctorchewy 178900 10 94972 14812
## 5 doctorchewy 43111 10 94972 9191
## 6 doctorchewy 54625 10 94972 9374
## 7 doctorchewy 181279 10 94972 14990
## 8 doctorchewy 128671 10 94972 11737
## 9 doctorchewy 162009 10 94972 13710
## 10 doctorchewy 96848 9 94972 10666
## Name
## 1 Twilight Imperium: Third Edition
## 2 War of the Ring: Second Edition
## 3 Twilight Imperium: Fourth Edition
## 4 Codenames
## 5 Chaos in the Old World
## 6 Space Hulk (Third Edition)
## 7 Fury of Dracula (Third/Fourth Edition)
## 8 Spartacus: A Game of Blood and Treachery
## 9 The U.S. Civil War
## 10 Mage Knight Board Game
get_predicted_ratings <- function(target_user_name, fmn_scaled, games_features, game_map, user_ratings, k = 20) {
# Step 1: Get the target user_id from the user_ratings table
target_user_id <- ratings |>
filter(Username == target_user_name) |>
distinct(user_id) |>
pull(user_id)
if (length(target_user_id) == 0) {
stop("Username not found.")
}
# Step 2: Filter game_map to include only valid games in fmn
valid_bgg_ids <- as.integer(rownames(fmn))
filtered_game_map <- game_map |>
filter(BGGId %in% valid_bgg_ids)
all_game_ids <- filtered_game_map$game_id
# Step 3: Create the to_predict file
print(target_user_id)
length(target_user_id)
to_predict <- data.frame(
user_id = rep(target_user_id, length(all_game_ids)),
game_id = all_game_ids
)
cat("Number of predictions to make:", nrow(to_predict), "\n")
stopifnot(length(unique(to_predict$game_id)) == length(to_predict$game_id))
write.table(to_predict, "to_predict.txt", row.names = FALSE, col.names = FALSE, sep = " ")
# Step 4: Predict
unlink("predicted_ratings.txt")
r$predict(data_file("to_predict.txt"), out_file("predicted_ratings.txt"))
predicted_ratings <- scan("predicted_ratings.txt")
if (length(predicted_ratings) != length(all_game_ids)) {
stop("Prediction count mismatch.")
}
# Step 5: Combine predictions by BGGId
recommended_games <- data.frame(
game_id = all_game_ids,
predicted_rating = predicted_ratings
) |>
left_join(filtered_game_map, by = "game_id") # adds BGGId
# Step 6: Merge with game using BGGId
recommendations <- recommended_games |>
left_join(games, by = "BGGId") |>
arrange(desc(predicted_rating)) |>
head(k)
return(recommendations)
}
target_user_name <- "doctorchewy"
recommendations <- get_predicted_ratings(
target_user_name,
fmn_scaled,
games_features,
game_map,
train_data,
k = 20
)
## [1] 94972
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
print(recommendations[, c("Name", "AvgRating", "predicted_rating")])
## Name
## 1 1985: Deadly Northern Lights
## 2 Aeon's End: Legacy of Gravehold
## 3 Roads to Gettysburg II: Lee Strikes North
## 4 Go First Dice
## 5 SAGA Rulebook (2nd Edition)
## 6 Anno Domini 1666
## 7 Exceed: Street Fighter – Ryu Box
## 8 Uprising: Curse of the Last Emperor
## 9 Hood Strikes North: The Tennessee Campaign, Fall 1864
## 10 Panzers Last Stand: Battles for Budapest, 1945
## 11 System Gateway (fan expansion for Android: Netrunner)
## 12 Stonewall Jackson's Way II: Battles of Bull Run
## 13 SEEKRIEG 5
## 14 The Battle of Five Armies Collector's Edition
## 15 Tenkatoitsu
## 16 The Lord of the Rings: The Card Game – Two-Player Limited Edition Starter
## 17 Middle-earth Strategy Battle Game: The Lord of the Rings – Battle of Pelennor Fields
## 18 Primer: The Gamer's Source for Battles from the Age of Reason
## 19 Last Chance for Victory: The Battle of Gettysburg
## 20 Pax Renaissance: 2nd Edition
## AvgRating predicted_rating
## 1 9.00426 9.54133
## 2 8.98333 9.39428
## 3 8.81429 9.25537
## 4 8.38298 9.25001
## 5 8.26404 9.24569
## 6 8.83424 9.23730
## 7 7.98966 9.23215
## 8 8.52968 9.22590
## 9 8.75643 9.18448
## 10 8.84286 9.16529
## 11 9.48333 9.12131
## 12 8.60965 9.11160
## 13 8.36897 9.10264
## 14 8.94429 9.09501
## 15 8.43846 9.09098
## 16 8.70610 9.08256
## 17 8.68023 9.07252
## 18 9.13610 9.05598
## 19 8.78548 9.04711
## 20 8.64195 9.04528
# uses hybrid score that is only based on one user, not used in the end
# 1. Get the user's predicted CF ratings (a numeric vector with names = game IDs)
cf_predicted_ratings_user <- predicted_ratings
# 2. Create user profile vector by averaging features of games user rated highly
#liked_games <- test_data %>% filter(user_id == target_user_id & Rating >= 5) %>% pull(BGGId)
#user_profile <- colMeans(fmn_scaled[rownames(fmn_scaled) %in% liked_games, ])
compute_user_profile <- function(target_user_name, test_data, fmn) {
liked_games <- test_data |>
filter(Username == target_user_name & Rating >= 5) |>
pull(BGGId)
# Check if the user has any liked games
if (length(liked_games) == 0) {
stop("No relevant games found for the user.")
}
# Ensure the liked games exist in the feature matrix (fmn)
missing_games <- setdiff(liked_games, rownames(fmn))
if (length(missing_games) > 0) {
stop("Some liked games are missing from the feature matrix.")
}
# Calculate the user profile
user_profile <- colMeans(fmn[rownames(fmn) %in% liked_games, ])
return(user_profile)
}
# Example usage:
target_user_name <- "doctorchewy" # Replace with another user ID
user_profile <- compute_user_profile(target_user_name, test_data, fmn)
# 3. Compute content similarity of all games to user profile
content_similarities <- apply(fmn_scaled, 1, function(x) coop::cosine(x, user_profile))
# 4. Normalize and combine
cf_norm <- normalize(cf_predicted_ratings_user)
content_norm <- normalize(content_similarities)
alpha <- 0.7
hybrid_scores_user <- alpha * cf_norm + (1 - alpha) * content_norm
recommend_top_k_table <- function(hybrid_scores_user, games_features, k = 20) {
top_indices <- order(hybrid_scores_user, decreasing = TRUE)[1:k]
top_k <- games_features[top_indices, ] |>
mutate(rank = 1:k,
hybrid_score = hybrid_scores_user[top_indices])
return(top_k)
}
# 5. Get top k recommendations
top_k_games <- recommend_top_k_table(hybrid_scores_user, games_features, k = 20)
print(top_k_games[, c("Name", "AvgRating", "hybrid_score")])
## Name AvgRating
## 20790 1985: Deadly Northern Lights 0.8974378
## 21759 Aeon's End: Legacy of Gravehold 0.8950790
## 20367 Uprising: Curse of the Last Emperor 0.8439517
## 19402 Dungeon Universalis 0.8832036
## 9538 War of the Ring Collector's Edition 0.8596015
## 14811 Last Blitzkrieg: Wacht am Rhein, The Battle of the Bulge 0.8657089
## 18297 Here I Stand: 500th Anniversary Edition 0.8512052
## 21632 Middara: Unintentional Malum – Act 1 Version 1.1 0.8998835
## 18641 Mage Knight: Ultimate Edition 0.8912516
## 14398 High Frontier (Third Edition) 0.8112152
## 14144 Middara: Unintentional Malum – Act 1 0.8797414
## 21494 Frostgrave: Second Edition 0.8828114
## 18635 SAGA Rulebook (2nd Edition) 0.8140136
## 5079 SEEKRIEG 5 0.8258394
## 11221 War of the Ring: Second Edition 0.8417202
## 16870 Roads to Gettysburg II: Lee Strikes North 0.8760278
## 21445 Last Light 0.8405966
## 19334 Middle-Earth Strategy Battle Game: Rules Manual 0.8864967
## 20977 Aeon's End: Outcasts 0.8698585
## 18108 Anno Domini 1666 0.8782762
## hybrid_score
## 20790 0.9322644
## 21759 0.9299475
## 20367 0.9284137
## 19402 0.9240713
## 9538 0.9149444
## 14811 0.9129439
## 18297 0.9109864
## 21632 0.9105744
## 18641 0.9102848
## 14398 0.9042349
## 14144 0.9018775
## 21494 0.8996732
## 18635 0.8979558
## 5079 0.8970580
## 11221 0.8945543
## 16870 0.8926004
## 21445 0.8924940
## 19334 0.8919679
## 20977 0.8912792
## 18108 0.8897442
#actual top_k_games code
top_k_games <- recommend_top_k_table(hybrid_scores_user, games_features, k = 20)
print(top_k_games[, c("Name", "AvgRating", "hybrid_score")])
## Name AvgRating
## 20790 1985: Deadly Northern Lights 0.8974378
## 21759 Aeon's End: Legacy of Gravehold 0.8950790
## 20367 Uprising: Curse of the Last Emperor 0.8439517
## 19402 Dungeon Universalis 0.8832036
## 9538 War of the Ring Collector's Edition 0.8596015
## 14811 Last Blitzkrieg: Wacht am Rhein, The Battle of the Bulge 0.8657089
## 18297 Here I Stand: 500th Anniversary Edition 0.8512052
## 21632 Middara: Unintentional Malum – Act 1 Version 1.1 0.8998835
## 18641 Mage Knight: Ultimate Edition 0.8912516
## 14398 High Frontier (Third Edition) 0.8112152
## 14144 Middara: Unintentional Malum – Act 1 0.8797414
## 21494 Frostgrave: Second Edition 0.8828114
## 18635 SAGA Rulebook (2nd Edition) 0.8140136
## 5079 SEEKRIEG 5 0.8258394
## 11221 War of the Ring: Second Edition 0.8417202
## 16870 Roads to Gettysburg II: Lee Strikes North 0.8760278
## 21445 Last Light 0.8405966
## 19334 Middle-Earth Strategy Battle Game: Rules Manual 0.8864967
## 20977 Aeon's End: Outcasts 0.8698585
## 18108 Anno Domini 1666 0.8782762
## hybrid_score
## 20790 0.9322644
## 21759 0.9299475
## 20367 0.9284137
## 19402 0.9240713
## 9538 0.9149444
## 14811 0.9129439
## 18297 0.9109864
## 21632 0.9105744
## 18641 0.9102848
## 14398 0.9042349
## 14144 0.9018775
## 21494 0.8996732
## 18635 0.8979558
## 5079 0.8970580
## 11221 0.8945543
## 16870 0.8926004
## 21445 0.8924940
## 19334 0.8919679
## 20977 0.8912792
## 18108 0.8897442
get_top_k_games_for_user <- function(hybrid_scores_user, games_features, k = 20) {
top_indices <- order(hybrid_scores_user, decreasing = TRUE)[1:k]
top_games <- games_features[top_indices, ]
return(top_games$BGGId)
}
compute_precision_recall_user <- function(target_user_name, top_k_games, test_data, k = 20) {
# Convert both to character to ensure valid comparison
user_relevant <- test_data |>
filter(Username == target_user_name & Rating >= 5) |>
pull(BGGId) |>
as.character()
recommended <- as.character(top_k_games)
if(length(user_relevant) == 0) {
warning(glue::glue("User {target_user_name} has no relevant items in test set."))
return(list(precision = NA, recall = NA))
}
hits <- length(intersect(recommended, user_relevant))
precision <- hits / k
recall <- hits / length(user_relevant)
list(precision = precision, recall = recall)
}
test_games <- test_data |>
filter(user_id == target_user_id) |>
pull(game_id)
# Check overlap
recommended_game_ids <- recommendations$game_id
precision_at_k <- sum(recommended_game_ids %in% test_games) / length(recommended_game_ids)
recall_at_k <- sum(recommended_game_ids %in% test_games) / length(test_games)
cat("Precision@K:", precision_at_k, "\n")
## Precision@K: 0
cat("Recall@K:", recall_at_k, "\n")
## Recall@K: 0
# checking using another user
target_user_name <- "jedipanda"
recommendations <- get_predicted_ratings(
target_user_name,
fmn_scaled,
games_features,
game_map,
train_data,
k = 20
)
## [1] 172917
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
print(recommendations[, c("Name", "AvgRating", "predicted_rating")])
## Name
## 1 Uprising: Curse of the Last Emperor
## 2 Ark Nova
## 3 Aeon's End: Legacy of Gravehold
## 4 The Lord of the Rings: The Card Game – Two-Player Limited Edition Starter
## 5 Aeolis
## 6 Great Western Trail (Second Edition)
## 7 Small World Designer Edition
## 8 Pandemic Legacy: Season 0
## 9 The Crew: Mission Deep Sea
## 10 Hungarian Rhapsody: The Eastern Front in Hungary
## 11 Sinners
## 12 Clank!: Legacy – Acquisitions Incorporated
## 13 Ultimate Railroads
## 14 Euthia: Torment of Resurrection
## 15 Frostgrave: Second Edition
## 16 System Gateway (fan expansion for Android: Netrunner)
## 17 Primer: The Gamer's Source for Battles from the Age of Reason
## 18 El Dorado Canyon
## 19 From Salerno to Rome: World War II – The Italian Campaign, 1943-1944
## 20 Dungeon Universalis
## AvgRating predicted_rating
## 1 8.52968 8.62236
## 2 8.47839 8.51781
## 3 8.98333 8.33894
## 4 8.70610 8.30722
## 5 9.25000 8.29257
## 6 8.45802 8.23670
## 7 8.99640 8.13869
## 8 8.64327 8.13128
## 9 8.44932 8.09217
## 10 8.50000 8.08981
## 11 8.62308 8.03312
## 12 8.69604 8.03202
## 13 8.39017 8.02636
## 14 8.87023 8.02532
## 15 8.87448 8.02442
## 16 9.48333 8.01960
## 17 9.13610 8.01839
## 18 8.73600 8.00758
## 19 8.46875 7.98916
## 20 8.87796 7.98526
# Evaluating Precision@K and Recall@K for a sample of users
evaluate_precision_recall <- function(usernames, k = 20) {
results <- data.frame(
Username = character(),
Precision = numeric(),
Recall = numeric(),
stringsAsFactors = FALSE
)
for (name in usernames) {
cat("Evaluating:", name, "\n")
# Try-catch in case of errors like missing user
tryCatch({
# Get top-K recommended games
recommendations <- get_predicted_ratings(
target_user_name = name,
fmn_scaled = fmn_scaled,
games_features = games_features,
game_map = game_map,
user_ratings = ratings,
k = k
)
recommended_bggids <- recommendations$BGGId
# Get actual relevant games from the test set (rated > threshold)
relevant_items <- ratings |>
filter(Username == name, Rating >= 7) |>
pull(BGGId)
hits <- sum(recommended_bggids %in% relevant_items)
precision <- hits / k
recall <- ifelse(length(relevant_items) == 0, NA, hits / length(relevant_items))
results <- rbind(results, data.frame(
Username = name,
Precision = precision,
Recall = recall
))
}, error = function(e) {
message("Error for user ", name, ": ", e$message)
})
}
return(results)
}
# Get top 10 users by # of ratings
top_users <- ratings |>
group_by(Username) |>
summarise(num_rated = n()) |>
arrange(desc(num_rated)) |>
slice(1:10) |>
pull(Username)
# Run evaluation
eval_results <- evaluate_precision_recall(top_users, k = 20)
## Evaluating: oldgoat3769967
## [1] 272394
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
## Evaluating: warta
## [1] 392170
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
## Evaluating: leffe dubbel
## [1] 210557
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
## Evaluating: TomVasel
## [1] 373548
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
## Evaluating: Doel
## [1] 95188
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
## Evaluating: Hessu68
## [1] 152540
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
## Evaluating: Walt Mulder
## [1] 391362
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
## Evaluating: fake
## [1] 116827
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
## Evaluating: Tolkana
## [1] 372562
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
## Evaluating: Nap16
## [1] 259041
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
print(eval_results)
## Username Precision Recall
## 1 oldgoat3769967 0.10 0.0006406150
## 2 warta 0.10 0.0003658314
## 3 leffe dubbel 0.15 0.0015576324
## 4 TomVasel 0.15 0.0010952903
## 5 Doel 0.10 0.0004479283
## 6 Hessu68 0.15 0.0033519553
## 7 Walt Mulder 0.25 0.0012651822
## 8 fake 0.00 0.0000000000
## 9 Tolkana 0.10 0.0005083884
## 10 Nap16 0.05 0.0004955401
#checking novelty of generated recommendations
calculate_novelty <- function(recommendations) {
ranks <- as.numeric(as.character(recommendations$NumUserRatings))
# Convert ranks to popularity score between 0 and 1 (higher rank means less popular)
popularity <- ranks / max(ranks)
# Novelty = average of -log2(popularity)
novelty_scores <- -log2(popularity)
mean(novelty_scores)
}
calculate_novelty(recommendations)
## [1] 4.78752
sim_matrix <- coop::cosine(t(fmn_scaled))
#forming sparse matrix for all combinations
library(Matrix)
sms <- as(sim_matrix, 'dgCMatrix')
calculate_diversity <- function(recommendations, sms, fmn_scaled) {
recommended_bggids <- as.character(recommendations$BGGId)
all_bggids <- rownames(fmn_scaled)
top_game_indices <- match(recommended_bggids, all_bggids)
if (any(is.na(top_game_indices))) {
warning("Some recommended games not found in fmn_scaled rownames.")
recommended_bggids <- recommended_bggids[!is.na(top_game_indices)]
top_game_indices <- top_game_indices[!is.na(top_game_indices)]
}
# Extract submatrix of similarities among recommended games
sub_sim <- sim_matrix[top_game_indices, top_game_indices]
# Diversity = average dissimilarity = 1 - average similarity
n <- nrow(sub_sim)
if (n <= 1) return(0)
# Remove diagonal by setting it to NA (self-similarity = 1)
diag(sub_sim) <- NA
avg_similarity <- mean(sub_sim, na.rm = TRUE)
diversity <- 1 - avg_similarity
return(diversity)
}
#checking diversity of sample generated recommendations
calculate_diversity(recommendations, sms, fmn_scaled)
## [1] 0.9378894
users_vector <- unique(user_ratings$Username)
# users_vector: character vector of usernames to evaluate
compute_avg_diversity <- function(users_vector, k = 20, ...) {
diversity_scores <- numeric(length(users_vector))
for (i in seq_along(users_vector)) {
user <- users_vector[i]
# Get top-k recommendations for this user
recs <- get_predicted_ratings(user, fmn_scaled, games_features, game_map, ratings, k)
# Calculate diversity for this user's recommendations
diversity_scores[i] <- calculate_diversity(recs, sms, fmn_scaled)
}
avg_diversity <- mean(diversity_scores, na.rm = TRUE)
return(list(
per_user_diversity = diversity_scores,
avg_diversity = avg_diversity
))
}
# Example from known usernames:
usernames_to_test <- c("doctorchewy", "jedipanda", "oldgoat3769967", "leffe dubbel", "TomVasel")
diversity_results <- compute_avg_diversity(usernames_to_test, k = 20)
## [1] 94972
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
## [1] 172917
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
## [1] 272394
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
## [1] 210557
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
## [1] 373548
## Number of predictions to make: 21925
## prediction output generated at predicted_ratings.txt
print(diversity_results$avg_diversity)
## [1] 0.9231404
print(diversity_results$per_user_diversity)
## [1] 0.9164356 0.9378894 0.9118758 0.9068803 0.9426207
#serendipitous discovery - something not in the 1000 most rated games
popularity_threshold <- sort(games$NumUserRatings, decreasing = TRUE)[1000]
print(popularity_threshold)
## [1] 3531
# this represents .86% of the userbase
calculate_serendipity <- function(recommendations, ratings, target_user_name, k = 20, threshold = 5) {
# Filter user’s liked games in test set (relevant items)
user_liked_games <- ratings |>
filter(Username == target_user_name & Rating >= threshold) |>
pull(BGGId) |>
as.character()
if (length(user_liked_games) == 0) {
warning(paste("User", target_user_name, "has no relevant items in test set."))
return(NA)
}
recs_k <- recommendations$BGGId[1:k] %>% as.character()
popular_games <- games_features |>
filter(NumUserRatings >= popularity_threshold) |>
pull(BGGId) |>
as.character()
# Serendipitous recommendations exclude liked and popular games
serendipitous_recs <- setdiff(recs_k, union(user_liked_games, popular_games))
serendipity_score <- length(serendipitous_recs) / k
return(serendipity_score)
}
calculate_serendipity(recommendations, ratings, target_user_name, k = 20, threshold = 5)
## [1] 0.95
get_predicted_ratings <- function(target_user_name, fmn_scaled, games_features, game_map, user_ratings, k = 20) {
# Step 1: Get the target user_id from the user_ratings table
target_user_id <- ratings |>
filter(Username == target_user_name) |>
distinct(user_id) |>
pull(user_id)
if (length(target_user_id) == 0) {
stop("Username not found.")
}
# Step 2: Filter game_map to include only valid games in fmn
valid_bgg_ids <- as.integer(rownames(fmn)) # BGGIds used in training
filtered_game_map <- game_map %>%
filter(BGGId %in% valid_bgg_ids)
all_game_ids <- filtered_game_map$game_id
# Step 3: Create the to_predict file
print(target_user_id)
length(target_user_id)
to_predict <- data.frame(
user_id = rep(target_user_id, length(all_game_ids)),
game_id = all_game_ids
)
cat("Number of predictions to make:", nrow(to_predict), "\n")
stopifnot(length(unique(to_predict$game_id)) == length(to_predict$game_id))
write.table(to_predict, "to_predict.txt", row.names = FALSE, col.names = FALSE, sep = " ")
# Step 4: Predict
unlink("predicted_ratings.txt")
r$predict(data_file("to_predict.txt"), out_file("predicted_ratings.txt"))
predicted_ratings <- scan("predicted_ratings.txt")
if (length(predicted_ratings) != length(all_game_ids)) {
stop("Prediction count mismatch.")
}
# Step 5: Combine predictions with BGGId
recommended_games <- data.frame(
game_id = all_game_ids,
predicted_rating = predicted_ratings
) %>%
left_join(filtered_game_map, by = "game_id") # adds BGGId
# Step 6: Merge with game using BGGId
recommendations <- recommended_games %>%
left_join(games, by = "BGGId") %>%
arrange(desc(predicted_rating)) %>%
head(k)
return(recommendations)
}
compute_all_metrics <- function(user_name, recommendations, test_data, sms, fmn_scaled, games_features, ratings, popularity_threshold = NULL) {
# 1. Precision & Recall ------------------------------------------------
# Get relevant items user liked in test_data (rating >= 5)
user_relevant <- test_data %>%
filter(Username == user_name & Rating >= 5) %>%
pull(BGGId) %>%
as.character()
recommended_bggids <- as.character(recommendations$BGGId)
hits <- length(intersect(recommended_bggids, user_relevant))
precision <- if(length(recommended_bggids) > 0) hits / length(recommended_bggids) else NA
recall <- if(length(user_relevant) > 0) hits / length(user_relevant) else NA
# 2. Novelty -------------------------------------------------------------
# Define popularity cutoff if not given
if(is.null(popularity_threshold)) {
popularity_threshold <- sort(games$NumUserRatings, decreasing = TRUE)[1000]
}
novelty_score <- mean(log2(1 + max(games$NumUserRatings) / (recommendations$NumUserRatings + 1)), na.rm = TRUE)
# 3. Diversity -----------------------------------------------------------
# Calculate pairwise similarity among recommended games
rec_indices <- which(rownames(fmn_scaled) %in% recommended_bggids)
if(length(rec_indices) < 2) {
diversity <- NA # can't compute diversity with fewer than 2 games
} else {
sim_submatrix <- sms[rec_indices, rec_indices]
diversity <- 1 - mean(sim_submatrix[upper.tri(sim_submatrix)])
}
# 4. Serendipity ---------------------------------------------------------
# Serendipity = recommendations that are relevant but NOT popular
# Popular games above popularity_threshold
popular_games <- games_features |>
filter(NumUserRatings >= popularity_threshold) |>
pull(BGGId) %>%
as.character()
# Serendipitous items = relevant recommended games NOT in popular_games
serendipitous <- intersect(setdiff(recommended_bggids, popular_games), user_relevant)
serendipity <- if(length(recommended_bggids) > 0) length(serendipitous) / length(recommended_bggids) else NA
# Return all metrics in a named list
list(
precision = precision,
recall = recall,
novelty = novelty_score,
diversity = diversity,
serendipity = serendipity
)
}
get_cold_start_recommendations <- function(games_features, n = 50) {
games_features |>
arrange(desc(NumUserRatings))|>
head(n)
}
# 4. ➕ Define game-based recommendation function here
get_similar_games <- function(game_name, similarity_matrix, game_map, games_features, num_recs = 20) {
game_id <- game_map_with_names |>
filter(Name == game_name) |>
pull(BGGId)
if (is.na(game_id) || !(game_id %in% rownames(similarity_matrix))) {
return(NULL)
}
sim_scores <- similarity_matrix[as.character(game_id), ]
sim_scores <- sort(sim_scores, decreasing = TRUE)
sim_scores <- sim_scores[names(sim_scores) != as.character(game_id)]
top_ids <- names(head(sim_scores, num_recs))
games_features %>%
filter(BGGId %in% as.numeric(top_ids)) |>
arrange(match(BGGId, as.numeric(top_ids)))
}
library(shiny)
library(dplyr)
ui <- fluidPage(
titlePanel("Board Game Recommender"),
sidebarLayout(
sidebarPanel(
radioButtons("rec_type", "Recommendation Type:",
choices = c("By User" = "user", "By Game" = "game")),
conditionalPanel(
condition = "input.rec_type == 'user'",
textInput("username", "Enter Username:", value = "")
),
conditionalPanel(
condition = "input.rec_type == 'game'",
selectInput("selected_game", "Choose a Game:",
choices = sort(unique(games_features$Name)))
),
numericInput("num_recs", "Number of Recommendations:", value = 20, min = 5, max = 50),
actionButton("go", "Get Recommendations")
),
mainPanel(
uiOutput("recommendations"),
verbatimTextOutput("metrics")
)
)
)
## Warning: The select input "selected_game" contains a large number of options;
## consider using server-side selectize for massively improved performance. See
## the Details section of the ?selectizeInput help topic.
server <- function(input, output, session) {
observeEvent(input$go, {
req(input$num_recs)
if (input$rec_type == "user") {
req(input$username)
if (!(input$username %in% ratings$Username)) {
cold_recs <- get_cold_start_recommendations(games_features, input$num_recs)
output$recommendations <- renderUI({
game_items <- lapply(1:nrow(cold_recs), function(i) {
tags$div(
tags$img(src = cold_recs$ImagePath[i], height = "100px"),
tags$p(cold_recs$Name[i]),
tags$p(paste("Avg Rating:", round(cold_recs$AvgRating[i], 2)))
)
})
do.call(tagList, game_items)
})
output$metrics <- renderText("New user detected — showing popular games.")
return()
}
# User-based recommendations for existing user
recs <- get_predicted_ratings(
target_user_name = input$username,
fmn_scaled = fmn_scaled,
games_features = games_features,
game_map = game_map,
user_ratings = ratings,
k = input$num_recs
)
} else if (input$rec_type == "game") {
req(input$selected_game)
recs <- get_similar_games(
game_name = input$selected_game,
similarity_matrix = sms,
game_map = game_map_with_names,
games_features = games_features,
num_recs = input$num_recs
)
if (is.null(recs)) {
output$recommendations <- renderTable({
data.frame(Message = paste("Game", input$selected_game, "not found or no similar games."))
})
output$metrics <- renderText("")
return()
}
}
output$recommendations <- renderUI({
game_items <- lapply(1:nrow(recs), function(i) {
div_content <- list(
tags$img(src = recs$ImagePath[i], height = "100px"),
tags$strong(recs$Name[i])
)
# Only append the predicted rating if it exists
if ("predicted_rating" %in% colnames(recs)) {
div_content <- append(div_content, list(
tags$div(paste("Predicted Rating:", round(recs$predicted_rating[i], 2)))
))
}
# Only append average rating if it exists
if ("AvgRating" %in% colnames(recs)) {
div_content <- append(div_content, list(
tags$div(paste("Avg Rating:", round(recs$AvgRating[i], 2)))
))
}
tags$div(style = "margin-bottom: 20px;", div_content)
})
do.call(tagList, game_items)
})
# Metrics only if user-based
if (input$rec_type == "user") {
metrics_list <- compute_all_metrics(
user_name = input$username,
recommendations = recs,
test_data = test_data,
sms = sms,
fmn_scaled = fmn_scaled,
games_features = games_features,
ratings = ratings
)
output$metrics <- renderPrint({ metrics_list })
} else {
output$metrics <- renderText("")
}
}) # end observeEvent
}
shinyApp(ui, server)