library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
chess_data <- read.csv("Chess Tournament Data.csv")
chess_data <- chess_data[,-1]
Need to extract the players faced each round for each player. Borrowing some code from Project 1.
# First prep chess_data to allow for new data
chess_data$Expected.Score <- NA
chess_data$Score.Diff <- NA
# Import Raw Tournament Info
raw_tourn_info <- readChar("tournamentinfo.txt", file.info("tournamentinfo.txt")$size)
# Split player rows
tourn_info <- str_split(raw_tourn_info, "-+\r\n")[[1]]
tourn_info <- tourn_info[c(-1,-2)] #Drop irrelevant first indices
# Parse each row in tourn_info and extract necessary information
for (i in 1:length(tourn_info)){
# Find all faced opponents
round_opps <- tourn_info[i] %>%
str_extract_all(., "[WDLH]\\s+\\d+") %>%
.[[1]] %>%
str_extract("\\d+")
# Current Pre-Rating
Ra <- chess_data[i,"Pre.Rating"] #replace 1 with i
# Get pre-ratings for all faced opponents
Rb <- chess_data[round_opps,"Pre.Rating"]
# Other formula pieces
Qa <- 10 ^ (Ra/400)
Qb <- 10 ^ (Rb/400)
# Calculate expected score
Ea <- sum(Qa / (Qa + Qb)) %>% round(2)
# Populate appropriate columns in chess_data
chess_data$Expected.Score[i] <- Ea
chess_data$Score.Diff[i] <- chess_data$Total.Points[i] - Ea
}
head(chess_data)
## Name State Total.Points Pre.Rating Avg.Opponent.Pre.Rating
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519
## Expected.Score Score.Diff
## 1 5.16 0.84
## 2 3.78 2.22
## 3 1.95 4.05
## 4 4.74 0.76
## 5 4.38 1.12
## 6 4.94 0.06
# Top 5 performing players relative to their expected score
arrange(chess_data, desc(chess_data$Score.Diff)) %>%
head(5)
## Name State Total.Points Pre.Rating
## 1 ADITYA BAJAJ MI 6.0 1384
## 2 ZACHARY JAMES HOUGHTON MI 4.5 1220
## 3 ANVIT RAO MI 5.0 1365
## 4 JACOB ALEXANDER LAVALLEY MI 3.0 377
## 5 AMIYATOSH PWNANANDAM MI 3.5 980
## Avg.Opponent.Pre.Rating Expected.Score Score.Diff
## 1 1564 1.95 4.05
## 2 1484 1.37 3.13
## 3 1554 1.94 3.06
## 4 1358 0.04 2.96
## 5 1385 0.77 2.73
# Top 5 under-performing players relative to their expected score
arrange(chess_data, chess_data$Score.Diff) %>%
head(5)
## Name State Total.Points Pre.Rating Avg.Opponent.Pre.Rating
## 1 LOREN SCHWIEBERT MI 3.5 1745 1363
## 2 GEORGE AVERY JONES ON 3.5 1522 1144
## 3 JARED GE MI 3.0 1332 1150
## 4 RISHI SHETTY MI 3.5 1494 1260
## 5 JOSHUA DAVID LEE MI 3.5 1438 1150
## Expected.Score Score.Diff
## 1 6.28 -2.78
## 2 6.02 -2.52
## 3 5.01 -2.01
## 4 5.09 -1.59
## 5 4.96 -1.46
.
.
.
.
Scratch Notes
using https://stanislav-stankovic.medium.com/elo-rating-system-6196cc59941e for formula
Imagine that two players, we shall denote them as player A and player B are playing a PvP match against each other. These two players will have their corresponding Elo ratings, Ra, and Rb respectively - set Ra as main player, and Rb as each round
The expected outcome of a match Ea = Qa /(Qa + Qb), Qa = 10^(Ra/c), Qb = 10^(Rb/c), 0 ≤ Ea ≤ 1.
The actual outcome of the match Sa = 1 in the case of a win, Sa = 0 and in the case of a loss, and Sa = 0.5 in the case of a draw,