library(tidyverse)
library(stringr)
library(knitr)
theme_set(theme_minimal(base_size = 12))
Put
tournamentinfo.txt
in the same folder as this Rmd. If it’s elsewhere, change thepaths
below.
paths <- c("tournamentinfo.txt", "data/tournamentinfo.txt")
path <- paths[file.exists(paths)][1]
stopifnot(!is.na(path)) # fail fast if not found
raw <- readLines(path, warn = FALSE)
length(raw); head(raw, 20)
## [1] 196
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [7] "-----------------------------------------------------------------------------------------"
## [8] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [9] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [10] "-----------------------------------------------------------------------------------------"
## [11] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [12] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [13] "-----------------------------------------------------------------------------------------"
## [14] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [15] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [16] "-----------------------------------------------------------------------------------------"
## [17] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [18] " MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [19] "-----------------------------------------------------------------------------------------"
## [20] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
Each player record spans two lines: a header with pair#, name, points; and a detail line with rating & rounds.
clean <- raw |>
str_replace_all("\\r", "") |>
str_trim(side = "right")
tab <- clean[str_detect(clean, "\\|")]
# Pair lines (header/detail alternating)
stopifnot(length(tab) %% 2 == 0)
blocks <- tibble(
i = seq(1, length(tab), by = 2),
header = tab[i],
detail = tab[i + 1]
)
# Keep only real player headers (start with a number)
blocks <- blocks |>
filter(str_detect(header, "^\\s*\\d+\\s*\\|"))
nrow(blocks); head(blocks, 2)
## [1] 64
## # A tibble: 2 × 3
## i header detail
## <dbl> <chr> <chr>
## 1 3 " 1 | GARY HUA |6.0 |W 39|W 21|W 1… " O…
## 2 5 " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L … " M…
split_cols <- function(x) {
stringr::str_split(x, "\\|")[[1]] |> stringr::str_trim()
}
# Header: pair #, name, total points (first numeric after name), AND opponents (W/D/L/B/H/U + number)
parse_header <- function(x) {
cols <- split_cols(x)
pair_val <- stringr::str_extract(cols[1], "\\d+") |> as.integer()
name_val <- cols[2] |> stringr::str_squish() |> stringr::str_to_title()
# total points = first numeric-looking column after name
after_name <- cols[-(1:2)]
tp_first <- stringr::str_extract(after_name, "\\d+(?:\\.\\d+)?")
total_pts_val <- as.numeric(tp_first[which(!is.na(tp_first))[1]])
# opponents are on the header line in this file
tokens <- stringr::str_extract_all(x, "(?i)\\b[WDLHBU][\\s-]*\\d+")[[1]]
opp_nums <- as.integer(stringr::str_extract_all(paste(tokens, collapse=" "), "\\d+")[[1]])
opp_nums <- opp_nums[!is.na(opp_nums) & opp_nums > 0] # drop 0/byes
tibble::tibble(
pair = pair_val,
name = name_val,
total_pts = total_pts_val,
opponents = list(opp_nums)
)
}
# Detail: pre-rating from the detail line (e.g., "R: 1794")
parse_detail <- function(x) {
y <- x |> stringr::str_replace_all("\\t", " ") |> stringr::str_squish()
pre <- stringr::str_match(y, "R:\\s*(\\d+)")[,2] |> as.integer()
tibble::tibble(pre_rating = pre)
}
hdr <- purrr::map_dfr(blocks$header, parse_header)
dets <- purrr::map_dfr(blocks$detail, parse_detail)
players <- hdr |>
dplyr::bind_cols(dets) |>
dplyr::filter(!grepl("^Player\\s*Name$", name, ignore.case = TRUE))
# Quick check: opponents should not be all empty
players |>
dplyr::transmute(name, pre_rating, opp_len = lengths(opponents)) |>
head(10)
## # A tibble: 10 × 3
## name pre_rating opp_len
## <chr> <int> <int>
## 1 Gary Hua 1794 7
## 2 Dakshesh Daruri 1553 7
## 3 Aditya Bajaj 1384 7
## 4 Patrick H Schilling 1716 7
## 5 Hanshi Zuo 1655 7
## 6 Hansen Song 1686 7
## 7 Gary Dee Swathell 1649 7
## 8 Ezekiel Houghton 1641 7
## 9 Stefano Lee 1411 7
## 10 Anvit Rao 1365 7
Formula for expected score of Player A vs Opponent B:
\( E_A = \frac{1}{1 + 10^{(R_B - R_A)/400}} \)
# Lookup: pair -> pre_rating
lookup <- players |> select(pair, pre_rating)
# Expand to one row per (player, opponent)
opp_tbl <- players |>
mutate(opp_pair = opponents) |>
select(pair, name, pre_rating, total_pts, opp_pair) |>
tidyr::unnest_longer(opp_pair, values_to = "opp_pair", keep_empty = TRUE) |>
mutate(opp_pair = as.integer(opp_pair)) |>
filter(!is.na(opp_pair), opp_pair > 0, opp_pair %in% players$pair)
# Attach opponent pre-ratings
opp_joined <- opp_tbl |>
left_join(lookup, by = c("opp_pair" = "pair"), suffix = c("", "_opp"))
# Expected score for each game
elo_expected <- opp_joined |>
mutate(Expected = 1 / (1 + 10 ^ ((pre_rating_opp - pre_rating) / 400)))
# Sum expected per player
expected_total <- elo_expected |>
group_by(pair) |>
summarise(Expected_Total = sum(Expected, na.rm = TRUE), .groups = "drop")
# Join back to players; compute performance difference
results <- players |>
select(pair, name, pre_rating, total_pts) |>
left_join(expected_total, by = "pair") |>
mutate(Expected_Total = replace_na(Expected_Total, 0),
Performance_Diff = total_pts - Expected_Total)
knitr::kable(results |> arrange(desc(Performance_Diff)) |> head(10),
caption = "Top 10 by Performance Difference (Actual - Expected)",
digits = 2)
pair | name | pre_rating | total_pts | Expected_Total | Performance_Diff |
---|---|---|---|---|---|
3 | Aditya Bajaj | 1384 | 6.0 | 1.95 | 4.05 |
15 | Zachary James Houghton | 1220 | 4.5 | 1.37 | 3.13 |
10 | Anvit Rao | 1365 | 5.0 | 1.94 | 3.06 |
46 | Jacob Alexander Lavalley | 377 | 3.0 | 0.04 | 2.96 |
37 | Amiyatosh Pwnanandam | 980 | 3.5 | 0.77 | 2.73 |
9 | Stefano Lee | 1411 | 5.0 | 2.29 | 2.71 |
2 | Dakshesh Daruri | 1553 | 6.0 | 3.78 | 2.22 |
52 | Ethan Guo | 935 | 2.5 | 0.30 | 2.20 |
59 | Sean M Mc Cormick | 853 | 2.0 | 0.41 | 1.59 |
58 | Viraj Mohile | 917 | 2.0 | 0.43 | 1.57 |
name | Pre-Rating | Actual | Expected | Diff = Actual - Expected |
---|---|---|---|---|
Aditya Bajaj | 1384 | 6.0 | 1.95 | 4.05 |
Zachary James Houghton | 1220 | 4.5 | 1.37 | 3.13 |
Anvit Rao | 1365 | 5.0 | 1.94 | 3.06 |
Jacob Alexander Lavalley | 377 | 3.0 | 0.04 | 2.96 |
Amiyatosh Pwnanandam | 980 | 3.5 | 0.77 | 2.73 |
name | Pre-Rating | Actual | Expected | Diff = Actual - Expected |
---|---|---|---|---|
Loren Schwiebert | 1745 | 3.5 | 6.28 | -2.78 |
George Avery Jones | 1522 | 3.5 | 6.02 | -2.52 |
Jared Ge | 1332 | 3.0 | 5.01 | -2.01 |
Rishi Shetty | 1494 | 3.5 | 5.09 | -1.59 |
Joshua David Lee | 1438 | 3.5 | 4.96 | -1.46 |
out <- results |>
transmute(
Name = name,
`Pre-Rating` = pre_rating,
`Actual Score` = total_pts,
`Expected Score` = round(Expected_Total, 2),
`Performance Diff (Actual-Expected)` = round(Performance_Diff, 2)
)
readr::write_csv(out, "assignment_5B_elo_results.csv")
"Saved: assignment_5B_elo_results.csv"
## [1] "Saved: assignment_5B_elo_results.csv"
Method. I used the standard ELO expected score
formula
\(E = \frac{1}{1 +
10^{(R_{opp}-R_{self})/400}}\)
to compute each player’s expected score in every round from pre-ratings,
then summed the expectations per player. I compared
Actual tournament points to Expected
to get a performance difference (Actual − Expected).
Overall picture. Average expected points across all
players was 3.19.
The average performance difference was 0.25, indicating that, on
average, players performed about as ELO predicted, with notable
individual outliers.
Top performers. The largest positive differences
(overperformance) were:
- Aditya Bajaj (4.05)
- Zachary James Houghton (3.13)
- Anvit Rao (3.06)
- Jacob Alexander Lavalley (2.96)
- Amiyatosh Pwnanandam (2.73)
Underperformers. The largest negative differences
were:
- Loren Schwiebert (-2.78)
- George Avery Jones (-2.52)
- Jared Ge (-2.01)
- Rishi Shetty (-1.59)
- Joshua David Lee (-1.46)
Interpretation. Positive differences suggest players outperformed what their rating implied (could be improvement, preparation, or favorable pairings). Negative differences suggest the opposite (tough opposition, form, or variance). With short events, variance is meaningful—so these signals are directional, not definitive.
Limitations & next steps. This uses pre-ratings and assumes independence between rounds. A fuller model could weight by color (W/B), incorporate byes/forfeits, or estimate a provisional rating change \(\Delta R = K \times (\text{Actual} - \text{Expected})\). Repeating this across multiple tournaments would separate noise from trend.