Setup

library(tidyverse)
library(stringr)
library(knitr)
theme_set(theme_minimal(base_size = 12))

1) Load raw tournament text

Put tournamentinfo.txt in the same folder as this Rmd. If it’s elsewhere, change the paths below.

paths <- c("tournamentinfo.txt", "data/tournamentinfo.txt")
path <- paths[file.exists(paths)][1]
stopifnot(!is.na(path))  # fail fast if not found

raw <- readLines(path, warn = FALSE)
length(raw); head(raw, 20)
## [1] 196
##  [1] "-----------------------------------------------------------------------------------------" 
##  [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
##  [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
##  [4] "-----------------------------------------------------------------------------------------" 
##  [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
##  [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [7] "-----------------------------------------------------------------------------------------" 
##  [8] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
##  [9] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [10] "-----------------------------------------------------------------------------------------" 
## [11] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|" 
## [12] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
## [13] "-----------------------------------------------------------------------------------------" 
## [14] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|" 
## [15] "   MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |" 
## [16] "-----------------------------------------------------------------------------------------" 
## [17] "    5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|" 
## [18] "   MI | 14601533 / R: 1655   ->1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [19] "-----------------------------------------------------------------------------------------" 
## [20] "    6 | HANSEN SONG                     |5.0  |W  34|D  29|L  11|W  35|D  10|W  27|W  21|"

2) Segment into 2-line player blocks

Each player record spans two lines: a header with pair#, name, points; and a detail line with rating & rounds.

clean <- raw |>
  str_replace_all("\\r", "") |>
  str_trim(side = "right")

tab <- clean[str_detect(clean, "\\|")]

# Pair lines (header/detail alternating)
stopifnot(length(tab) %% 2 == 0)
blocks <- tibble(
  i = seq(1, length(tab), by = 2),
  header = tab[i],
  detail = tab[i + 1]
)

# Keep only real player headers (start with a number)
blocks <- blocks |>
  filter(str_detect(header, "^\\s*\\d+\\s*\\|"))

nrow(blocks); head(blocks, 2)
## [1] 64
## # A tibble: 2 × 3
##       i header                                                            detail
##   <dbl> <chr>                                                             <chr> 
## 1     3 "    1 | GARY HUA                        |6.0  |W  39|W  21|W  1… "   O…
## 2     5 "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   … "   M…

3) Parse fields we need (name, pre-rating, total points, opponents)

split_cols <- function(x) {
  stringr::str_split(x, "\\|")[[1]] |> stringr::str_trim()
}

# Header: pair #, name, total points (first numeric after name), AND opponents (W/D/L/B/H/U + number)
parse_header <- function(x) {
  cols <- split_cols(x)
  pair_val <- stringr::str_extract(cols[1], "\\d+") |> as.integer()
  name_val <- cols[2] |> stringr::str_squish() |> stringr::str_to_title()

  # total points = first numeric-looking column after name
  after_name <- cols[-(1:2)]
  tp_first <- stringr::str_extract(after_name, "\\d+(?:\\.\\d+)?")
  total_pts_val <- as.numeric(tp_first[which(!is.na(tp_first))[1]])

  # opponents are on the header line in this file
  tokens <- stringr::str_extract_all(x, "(?i)\\b[WDLHBU][\\s-]*\\d+")[[1]]
  opp_nums <- as.integer(stringr::str_extract_all(paste(tokens, collapse=" "), "\\d+")[[1]])
  opp_nums <- opp_nums[!is.na(opp_nums) & opp_nums > 0]  # drop 0/byes

  tibble::tibble(
    pair = pair_val,
    name = name_val,
    total_pts = total_pts_val,
    opponents = list(opp_nums)
  )
}

# Detail: pre-rating from the detail line (e.g., "R: 1794")
parse_detail <- function(x) {
  y <- x |> stringr::str_replace_all("\\t", " ") |> stringr::str_squish()
  pre <- stringr::str_match(y, "R:\\s*(\\d+)")[,2] |> as.integer()
  tibble::tibble(pre_rating = pre)
}

hdr  <- purrr::map_dfr(blocks$header, parse_header)
dets <- purrr::map_dfr(blocks$detail, parse_detail)

players <- hdr |>
  dplyr::bind_cols(dets) |>
  dplyr::filter(!grepl("^Player\\s*Name$", name, ignore.case = TRUE))

# Quick check: opponents should not be all empty
players |>
  dplyr::transmute(name, pre_rating, opp_len = lengths(opponents)) |>
  head(10)
## # A tibble: 10 × 3
##    name                pre_rating opp_len
##    <chr>                    <int>   <int>
##  1 Gary Hua                  1794       7
##  2 Dakshesh Daruri           1553       7
##  3 Aditya Bajaj              1384       7
##  4 Patrick H Schilling       1716       7
##  5 Hanshi Zuo                1655       7
##  6 Hansen Song               1686       7
##  7 Gary Dee Swathell         1649       7
##  8 Ezekiel Houghton          1641       7
##  9 Stefano Lee               1411       7
## 10 Anvit Rao                 1365       7

4) Compute ELO Expected Score per player

Formula for expected score of Player A vs Opponent B:
\( E_A = \frac{1}{1 + 10^{(R_B - R_A)/400}} \)

# Lookup: pair -> pre_rating
lookup <- players |> select(pair, pre_rating)

# Expand to one row per (player, opponent)
opp_tbl <- players |>
  mutate(opp_pair = opponents) |>
  select(pair, name, pre_rating, total_pts, opp_pair) |>
  tidyr::unnest_longer(opp_pair, values_to = "opp_pair", keep_empty = TRUE) |>
  mutate(opp_pair = as.integer(opp_pair)) |>
  filter(!is.na(opp_pair), opp_pair > 0, opp_pair %in% players$pair)

# Attach opponent pre-ratings
opp_joined <- opp_tbl |>
  left_join(lookup, by = c("opp_pair" = "pair"), suffix = c("", "_opp"))

# Expected score for each game
elo_expected <- opp_joined |>
  mutate(Expected = 1 / (1 + 10 ^ ((pre_rating_opp - pre_rating) / 400)))

# Sum expected per player
expected_total <- elo_expected |>
  group_by(pair) |>
  summarise(Expected_Total = sum(Expected, na.rm = TRUE), .groups = "drop")

# Join back to players; compute performance difference
results <- players |>
  select(pair, name, pre_rating, total_pts) |>
  left_join(expected_total, by = "pair") |>
  mutate(Expected_Total = replace_na(Expected_Total, 0),
         Performance_Diff = total_pts - Expected_Total)

knitr::kable(results |> arrange(desc(Performance_Diff)) |> head(10),
             caption = "Top 10 by Performance Difference (Actual - Expected)",
             digits = 2)
Top 10 by Performance Difference (Actual - Expected)
pair name pre_rating total_pts Expected_Total Performance_Diff
3 Aditya Bajaj 1384 6.0 1.95 4.05
15 Zachary James Houghton 1220 4.5 1.37 3.13
10 Anvit Rao 1365 5.0 1.94 3.06
46 Jacob Alexander Lavalley 377 3.0 0.04 2.96
37 Amiyatosh Pwnanandam 980 3.5 0.77 2.73
9 Stefano Lee 1411 5.0 2.29 2.71
2 Dakshesh Daruri 1553 6.0 3.78 2.22
52 Ethan Guo 935 2.5 0.30 2.20
59 Sean M Mc Cormick 853 2.0 0.41 1.59
58 Viraj Mohile 917 2.0 0.43 1.57

5) Top 5 Overperformers & Underperformers

Top 5 Overperformers
name Pre-Rating Actual Expected Diff = Actual - Expected
Aditya Bajaj 1384 6.0 1.95 4.05
Zachary James Houghton 1220 4.5 1.37 3.13
Anvit Rao 1365 5.0 1.94 3.06
Jacob Alexander Lavalley 377 3.0 0.04 2.96
Amiyatosh Pwnanandam 980 3.5 0.77 2.73
Top 5 Underperformers
name Pre-Rating Actual Expected Diff = Actual - Expected
Loren Schwiebert 1745 3.5 6.28 -2.78
George Avery Jones 1522 3.5 6.02 -2.52
Jared Ge 1332 3.0 5.01 -2.01
Rishi Shetty 1494 3.5 5.09 -1.59
Joshua David Lee 1438 3.5 4.96 -1.46

6) (Optional) Plot Performance Difference

7) Export CSV

out <- results |>
  transmute(
    Name = name,
    `Pre-Rating` = pre_rating,
    `Actual Score` = total_pts,
    `Expected Score` = round(Expected_Total, 2),
    `Performance Diff (Actual-Expected)` = round(Performance_Diff, 2)
  )

readr::write_csv(out, "assignment_5B_elo_results.csv")
"Saved: assignment_5B_elo_results.csv"
## [1] "Saved: assignment_5B_elo_results.csv"

8) Brief write-up

Method. I used the standard ELO expected score formula
\(E = \frac{1}{1 + 10^{(R_{opp}-R_{self})/400}}\)
to compute each player’s expected score in every round from pre-ratings, then summed the expectations per player. I compared Actual tournament points to Expected to get a performance difference (Actual − Expected).

Overall picture. Average expected points across all players was 3.19.
The average performance difference was 0.25, indicating that, on average, players performed about as ELO predicted, with notable individual outliers.

Top performers. The largest positive differences (overperformance) were:
- Aditya Bajaj (4.05)
- Zachary James Houghton (3.13)
- Anvit Rao (3.06)
- Jacob Alexander Lavalley (2.96)
- Amiyatosh Pwnanandam (2.73)

Underperformers. The largest negative differences were:
- Loren Schwiebert (-2.78)
- George Avery Jones (-2.52)
- Jared Ge (-2.01)
- Rishi Shetty (-1.59)
- Joshua David Lee (-1.46)

Interpretation. Positive differences suggest players outperformed what their rating implied (could be improvement, preparation, or favorable pairings). Negative differences suggest the opposite (tough opposition, form, or variance). With short events, variance is meaningful—so these signals are directional, not definitive.

Limitations & next steps. This uses pre-ratings and assumes independence between rounds. A fuller model could weight by color (W/B), incorporate byes/forfeits, or estimate a provisional rating change \(\Delta R = K \times (\text{Actual} - \text{Expected})\). Repeating this across multiple tournaments would separate noise from trend.