Chess Tournament ELO Rating

Author

XiaoFei Mei

Published

March 1, 2026

Approach

This assignment processes tournament data from a text file containing player information including names, states, points, pre-ratings, and opponent information. The goal is to calculate each player’s expected score using the ELO formula and identify which players outperformed and underperformed relative to their expectations.

ELO formula from the given assignment will be used: E = 1/(1 + 10^((R_opp - R_player)/400))

Load an definednd Read Data

This part is largely carried over from project 1. From the tournament txt file, we first identify separator lines and remove them.

# Read the data file
file_path <- "tournamentinfo.txt" 
data <- readLines(file_path, warn = FALSE)  

# Remove separator lines (dash lines)
separator_indices <- grep("^-+", data) 
data_lines <- data[-separator_indices]

# Remove empty lines
data_lines <- data_lines[data_lines != ""]
data_lines <- trimws(data_lines)

# Remove header lines
header_indices <- grep("Pair|Player Name|USCF ID|Num", data_lines, ignore.case = TRUE)
data_lines <- data_lines[-header_indices]

# Display first few lines to verify
head(data_lines, 10)
 [1] "1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
 [2] "ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
 [3] "2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
 [4] "MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
 [5] "3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|" 
 [6] "MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
 [7] "4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|" 
 [8] "MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |"
 [9] "5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|" 
[10] "MI | 14601533 / R: 1655   ->1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |"

Parse Player Data

Same way from project 1, I create couple for loops to extract player info such as name, state, total points, and opponents’ info. When handling provisional ratings, we only take digit after “R:” and end when digit ends.

# Initialize empty data frame
players <- data.frame()

# Parse data in pairs (each player has 2 lines of information)
for (i in seq(1, length(data_lines), by = 2)) {
  if (i + 1 <= length(data_lines)) {
    line1 <- data_lines[i]    # Player info line
    line2 <- data_lines[i + 1] # State/Rating line
    
    # Split by pipe delimiter
    parts1 <- strsplit(line1, "\\|")[[1]]
    parts2 <- strsplit(line2, "\\|")[[1]]
    
    # Extract player name
    player_name <- trimws(parts1[2])
    
    # Extract total points
    total_points <- trimws(parts1[3])
    
    # Extract opponent numbers
    opponents <- c()
    for (j in 4:length(parts1)) {  
      result <- trimws(parts1[j])
      
      if (result == "" || result == " ") {
        next
      }
      
      opp_num <- str_extract(result, "\\d+")
      if (!is.na(opp_num)) {
        opponents <- c(opponents, as.numeric(opp_num))
      }
    }
    
    # Extract state (first two characters)
    state_info <- trimws(parts2[1])
    state <- substr(state_info, 1, 2)
    
    # Extract pre-rating (handle "P" for provisional ratings)
    rating_text <- parts2[2]
    pattern <- "R:\\s*(\\d+)"
    match <- regexpr(pattern, rating_text)
    
    if (match != -1) {
      pre_rating <- as.numeric(regmatches(rating_text, regexec(pattern, rating_text))[[1]][2])
    } else {
      pre_rating <- NA
    }
    
    # Create player data frame
    player_data <- data.frame(
      Name = player_name,
      State = state,
      Points = as.numeric(total_points),
      PreRating = pre_rating,
      stringsAsFactors = FALSE 
    )
    
    # Add opponents list
    player_data$Opponents <- list(opponents)
    
    # Append to players data frame
    players <- rbind(players, player_data)
  }
}

# Display structure
str(players)
'data.frame':   64 obs. of  5 variables:
 $ Name     : chr  "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ" "PATRICK H SCHILLING" ...
 $ State    : chr  "ON" "MI" "MI" "MI" ...
 $ Points   : num  6 6 6 5.5 5.5 5 5 5 5 5 ...
 $ PreRating: num  1794 1553 1384 1716 1655 ...
 $ Opponents:List of 64
  ..$ : num  39 21 18 14 7 12 4
  ..$ : num  63 58 4 17 16 20 7
  ..$ : num  8 61 25 21 11 13 12
  ..$ : num  23 28 2 26 5 19 1
  ..$ : num  45 37 12 13 4 14 17
  ..$ : num  34 29 11 35 10 27 21
  ..$ : num  57 46 13 11 1 9 2
  ..$ : num  3 32 14 9 47 28 19
  ..$ : num  25 18 59 8 26 7 20
  ..$ : num  16 19 55 31 6 25 18
  ..$ : num  38 56 6 7 3 34 26
  ..$ : num  42 33 5 38 1 3
  ..$ : num  36 27 7 5 33 3 32
  ..$ : num  54 44 8 1 27 5 31
  ..$ : num  19 16 30 22 54 33 38
  ..$ : num  10 15 39 2 36
  ..$ : num  48 41 26 2 23 22 5
  ..$ : num  47 9 1 32 19 38 10
  ..$ : num  15 10 52 28 18 4 8
  ..$ : num  40 49 23 41 28 2 9
  ..$ : num  43 1 47 3 40 39 6
  ..$ : num  64 52 28 15 17 40
  ..$ : num  4 43 20 58 17 37 46
  ..$ : num  28 47 43 25 60 44 39
  ..$ : num  9 53 3 24 34 10 47
  ..$ : num  49 40 17 4 9 32 11
  ..$ : num  51 13 46 37 14 6
  ..$ : num  24 4 22 19 20 8 36
  ..$ : num  50 6 38 34 52 48
  ..$ : num  52 64 15 55 31 61 50
  ..$ : num  58 55 64 10 30 50 14
  ..$ : num  61 8 44 18 51 26 13
  ..$ : num  60 12 50 36 13 15 51
  ..$ : num  6 60 37 29 25 11 52
  ..$ : num  46 38 56 6 57 52 48
  ..$ : num  13 57 51 33 16 28
  ..$ : num  5 34 27 23 61
  ..$ : num  11 35 29 12 18 15
  ..$ : num  1 54 40 16 44 21 24
  ..$ : num  20 26 39 59 21 56 22
  ..$ : num  59 17 58 20
  ..$ : num  12 50 57 60 61 64 56
  ..$ : num  21 23 24 63 59 46 55
  ..$ : num  14 32 53 39 24 59
  ..$ : num  5 51 60 56 63 55 58
  ..$ : num  35 7 27 50 64 43 23
  ..$ : num  18 24 21 61 8 51 25
  ..$ : num  17 63 52 29 35
  ..$ : num  26 20 63 64 58
  ..$ : num  29 42 33 46 31 30
  ..$ : num  27 45 36 57 32 47 33
  ..$ : num  30 22 19 48 29 35 34
  ..$ : num  25 44 57
  ..$ : num  14 39 61 15 59 64
  ..$ : num  62 31 10 30 45 43
  ..$ : num  11 35 45 40 42
  ..$ : num  7 36 42 51 35 53
  ..$ : num  31 2 41 23 49 45
  ..$ : num  41 9 40 43 54 44
  ..$ : num  33 34 45 42 24
  ..$ : num  32 3 54 47 42 30 37
  ..$ : num 55
  ..$ : num  2 48 49 43 45
  ..$ : num  22 30 31 49 46 42 54

Calculate Average Opponent Rating

Few functions are created to look up opponent, and take mean of all the opponent’s pre-ratings. This part is also largely carried over from project 1.

# Create lookup table for ratings by player number
rating_lookup <- setNames(players$PreRating, 1:nrow(players))

# Function to calculate average opponent rating
calc_avg_opponent_rating <- function(opponent_numbers, rating_lookup) {
  if (length(opponent_numbers) == 0) {
    return(NA)
  }
  
  # Get ratings for opponents
  opponent_ratings <- c()
  for (opp in opponent_numbers) {
    rating <- rating_lookup[as.character(opp)]
    
    # Handle "P" ratings (extract numeric part)
    if (is.character(rating) && grepl("P", rating)) {
      rating <- as.numeric(gsub("P.*$", "", rating))
    }
    
    if (!is.na(rating)) {
      opponent_ratings <- c(opponent_ratings, rating)
    }
  }
  
  # Calculate mean
  if (length(opponent_ratings) > 0) {
    return(mean(opponent_ratings))
  } else {
    return(NA)
  }
}

# Apply function to all players
players$AvgOppRating <- sapply(players$Opponents, 
                               calc_avg_opponent_rating, 
                               rating_lookup = rating_lookup)

# Round to nearest integer
players$AvgOppRating <- round(players$AvgOppRating, 0)

# Create final data frame with required columns
final_data <- players[, c("Name", "State", "Points", "PreRating", "AvgOppRating")]

# Format points to show one decimal place
final_data$Points <- sprintf("%.1f", final_data$Points)

# Display first few rows
head(final_data, 10)
                  Name State Points PreRating AvgOppRating
1             GARY HUA    ON    6.0      1794         1605
2      DAKSHESH DARURI    MI    6.0      1553         1469
3         ADITYA BAJAJ    MI    6.0      1384         1564
4  PATRICK H SCHILLING    MI    5.5      1716         1574
5           HANSHI ZUO    MI    5.5      1655         1501
6          HANSEN SONG    OH    5.0      1686         1519
7    GARY DEE SWATHELL    MI    5.0      1649         1372
8     EZEKIEL HOUGHTON    MI    5.0      1641         1468
9          STEFANO LEE    ON    5.0      1411         1523
10           ANVIT RAO    MI    5.0      1365         1554

ELO Score Calculation

From the ELO formula, we create function to plug in player and opponents to get their expected score. Performance difference is calculated between actual and expected. This different would provide key insight for the requirement of this assignment - identify top players who outperformed and under performed during this tournament.

elo_expected <- function(player_rating, opponent_rating) {
  1 / (1 + 10^((opponent_rating - player_rating) / 400))
}

# Convert points back to numeric
players$ActualScore <- as.numeric(final_data$Points)

# create columns
players$ExpectedScore <- NA
players$GamesWithRatings <- 0
players$PerformanceDiff <- NA

# Calculate expected score for each player

for (i in 1:nrow(players)) {
  player_rating <- players$PreRating[i]
  opponent_numbers <- players$Opponents[[i]]
  
  # for players without rating
  if (is.na(player_rating) || length(opponent_numbers) == 0) {
    players$ExpectedScore[i] <- NA
    players$PerformanceDiff[i] <- NA
    next
  }
  
  # Calculate expected score for each opponent
  expected_sum <- 0
  valid_games <- 0
  
  for (opp_num in opponent_numbers) {
    # opponent's pre-rating
    opp_rating <- rating_lookup[as.character(opp_num)]
    
    # Handle "P" ratings - use the numeric part only
    if (is.character(opp_rating) && grepl("P", opp_rating)) {
      opp_rating <- as.numeric(gsub("P.*$", "", opp_rating))
    }
    
    # Only count games where both players have ratings
    if (!is.na(player_rating) && !is.na(opp_rating)) {
      expected <- elo_expected(player_rating, opp_rating)
      expected_sum <- expected_sum + expected
      valid_games <- valid_games + 1
    }
  }
  
  # save results
  if (valid_games > 0) {
    players$ExpectedScore[i] <- round(expected_sum, 2)
    players$GamesWithRatings[i] <- valid_games
    
    # Calculate performance difference (Actual - Expected)
    actual_score <- players$ActualScore[i]
    if (!is.na(actual_score)) {
      players$PerformanceDiff[i] <- round(actual_score - expected_sum, 2)
    }
  }
}

Create Results DataFrame

# Create final results df
elo_results <- data.frame(
  Name = players$Name,
  State = players$State,
  PreRating = players$PreRating,
  ActualScore = players$ActualScore,
  ExpectedScore = players$ExpectedScore,
  PerformanceDiff = players$PerformanceDiff,
  GamesPlayed = sapply(players$Opponents, length),
  GamesWithRatings = players$GamesWithRatings
)

elo_results <- elo_results[!is.na(elo_results$PerformanceDiff), ]

# Sort by performance difference
overperformers <- elo_results[order(-elo_results$PerformanceDiff), ]
underperformers <- elo_results[order(elo_results$PerformanceDiff), ]

Top 5 Overperformers and Underperformers

# Display Top 5 Overperformers

over_table <- overperformers[1:5, c("Name", "State", "PreRating", "ActualScore", "ExpectedScore", "PerformanceDiff")]
names(over_table) <- c("Player", "State", "Pre-Rating", "Actual", "Expected", "Diff")

kable(over_table, 
      caption = "Top 5 Overperformers",
      digits = 2,
      format = "html") |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) |>
  column_spec(6, color = "green", bold = TRUE) |>
  row_spec(0, background = "#f0f0f0", bold = TRUE)
Top 5 Overperformers
Player State Pre-Rating Actual Expected Diff
3 ADITYA BAJAJ MI 1384 6.0 1.95 4.05
15 ZACHARY JAMES HOUGHTON MI 1220 4.5 1.37 3.13
10 ANVIT RAO MI 1365 5.0 1.94 3.06
46 JACOB ALEXANDER LAVALLEY MI 377 3.0 0.04 2.96
37 AMIYATOSH PWNANANDAM MI 980 3.5 0.77 2.73
#Top 5 Underperformers
under_table <- underperformers[1:5, c("Name", "State", "PreRating", "ActualScore", "ExpectedScore", "PerformanceDiff")]
names(under_table) <- c("Player", "State", "Pre-Rating", "Actual", "Expected", "Diff")

kable(under_table, 
      caption = "Top 5 Underperformers",
      digits = 2,
      format = "html") |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) |>
  column_spec(6, color = "red", bold = TRUE) |>
  row_spec(0, background = "#f0f0f0", bold = TRUE)
Top 5 Underperformers
Player State Pre-Rating Actual Expected Diff
25 LOREN SCHWIEBERT MI 1745 3.5 6.28 -2.78
30 GEORGE AVERY JONES ON 1522 3.5 6.02 -2.52
42 JARED GE MI 1332 3.0 5.01 -2.01
31 RISHI SHETTY MI 1494 3.5 5.09 -1.59
35 JOSHUA DAVID LEE MI 1438 3.5 4.96 -1.46

Save Results to CSV

# Create enhanced final data with ELO results
final_with_elo <- final_data
final_with_elo$ActualScore <- as.numeric(final_data$Points)
final_with_elo$ExpectedScore <- NA
final_with_elo$PerformanceDiff <- NA

# Match with players data
for (i in 1:nrow(final_with_elo)) {
  player_name <- final_with_elo$Name[i]
  player_idx <- which(players$Name == player_name)[1]
  
  if (!is.na(player_idx)) {
    final_with_elo$ExpectedScore[i] <- players$ExpectedScore[player_idx]
    final_with_elo$PerformanceDiff[i] <- players$PerformanceDiff[player_idx]
  }
}

# Reorder columns
final_with_elo <- final_with_elo[, c("Name", "State", "Points", "PreRating", 
                                      "AvgOppRating", "ExpectedScore", "PerformanceDiff")]

# Save CSV files
write.csv(final_with_elo, "chess_results_with_elo.csv", row.names = FALSE, quote = TRUE)

Summary

ELO formula used pregame rating of each player to predict wining chances and expected result for the match. Compare to actual result, we can get an idea who outperformed our under performed. From this result, Adita Bajaj, Zachary James Houghton, Anvit Rao, Jacob Alexander Lavalley and Amitatosh Pwnanandam were ranked top players who exceeded expectation. On the other hand, we have Loren Schwiebert, George Avery Jones, Jared GE, Rishi Shety and Joshua David Lee performed under expectation.