library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
Chess ELO Expected Score Calculator # Formula source: Solon, Nate. “How Elo Ratings Actually Work.” Zwischenzug, # https://zwischenzug.substack.com/p/how-elo-ratings-actually-work
chess_data <- read_csv("chess_tournament_results.csv")
## Rows: 65 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Player_Name, State, Opponent_Ratings
## dbl (3): Total_Points, Pre_Rating, Avg_Opponent_Rating
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Display data structure
str(chess_data)
## spc_tbl_ [65 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Player_Name : chr [1:65] "Player Name" "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ" ...
## $ State : chr [1:65] "Num" "ON" "MI" "MI" ...
## $ Total_Points : num [1:65] NA 6 6 6 5.5 5.5 5 5 5 5 ...
## $ Pre_Rating : num [1:65] NA 1794 1553 1384 1716 ...
## $ Avg_Opponent_Rating: num [1:65] NA 1585 1440 1454 1660 ...
## $ Opponent_Ratings : chr [1:65] NA "1423;1595;1629;1666;1686;1712;1384" "1530;1092;1384;1604;1220;1564;1686" "1649;967;1229;1595;1365;1663;1712" ...
## - attr(*, "spec")=
## .. cols(
## .. Player_Name = col_character(),
## .. State = col_character(),
## .. Total_Points = col_double(),
## .. Pre_Rating = col_double(),
## .. Avg_Opponent_Rating = col_double(),
## .. Opponent_Ratings = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
head(chess_data)
## # A tibble: 6 × 6
## Player_Name State Total_Points Pre_Rating Avg_Opponent_Rating Opponent_Ratings
## <chr> <chr> <dbl> <dbl> <dbl> <chr>
## 1 Player Name Num NA NA NA <NA>
## 2 GARY HUA ON 6 1794 1585 1423;1595;1629;…
## 3 DAKSHESH D… MI 6 1553 1440 1530;1092;1384;…
## 4 ADITYA BAJ… MI 6 1384 1454 1649;967;1229;1…
## 5 PATRICK H … MI 5.5 1716 1660 1555;1552;1794;…
## 6 HANSHI ZUO MI 5.5 1655 1512 1199;1355;1712;…
# Clean the data - remove header row and invalid entries
clean_data <- chess_data %>%
filter(!is.na(Player_Name),
Player_Name != "Player Name",
!is.na(Total_Points),
!is.na(Pre_Rating),
!is.na(Opponent_Ratings),
Opponent_Ratings != "")
cat("Clean data count:", nrow(clean_data), "\n")
## Clean data count: 64
# Function to calculate expected score using USCF ELO formula
calculate_expected_score <- function(player_rating, opponent_ratings) {
expected_scores <- sapply(opponent_ratings, function(opp_rating) {
1 / (1 + 10^((opp_rating - player_rating) / 400))
})
return(sum(expected_scores))
}
# Function to parse opponent ratings from semicolon-separated string
parse_opponent_ratings <- function(opponent_string) {
ratings <- as.numeric(unlist(strsplit(opponent_string, ";")))
return(ratings[!is.na(ratings)]) # Remove any NA values
}
# Calculate expected scores and performance for each player
results <- clean_data %>%
rowwise() %>%
mutate(
opponent_ratings_list = list(parse_opponent_ratings(Opponent_Ratings)),
games_played = length(opponent_ratings_list),
expected_score = calculate_expected_score(Pre_Rating, opponent_ratings_list),
performance = Total_Points - expected_score
) %>%
ungroup() %>%
arrange(desc(performance))
# Display summary statistics
cat("\n=== ANALYSIS SUMMARY ===\n")
##
## === ANALYSIS SUMMARY ===
cat("Total players analyzed:", nrow(results), "\n")
## Total players analyzed: 64
cat("Mean performance difference:", round(mean(results$performance), 3), "\n")
## Mean performance difference: 0.382
cat("Standard deviation of performance:", round(sd(results$performance), 3), "\n\n")
## Standard deviation of performance: 1.172
# Top 5 Overperformers
cat("=== TOP 5 OVERPERFORMERS ===\n")
## === TOP 5 OVERPERFORMERS ===
top_overperformers <- head(results, 5)
for(i in 1:5) {
player <- top_overperformers[i, ]
cat(sprintf("%d. %s (%s)\n", i, player$Player_Name, player$State))
cat(sprintf(" Pre-Tournament Rating: %d\n", player$Pre_Rating))
cat(sprintf(" Actual Score: %.1f\n", player$Total_Points))
cat(sprintf(" Expected Score: %.2f\n", player$expected_score))
cat(sprintf(" Overperformance: +%.2f points\n", player$performance))
cat(sprintf(" Games Played: %d\n\n", player$games_played))
}
## 1. ADITYA BAJAJ (MI)
## Pre-Tournament Rating: 1384
## Actual Score: 6.0
## Expected Score: 2.86
## Overperformance: +3.14 points
## Games Played: 7
##
## 2. JACOB ALEXANDER LAVALLEY (MI)
## Pre-Tournament Rating: 377
## Actual Score: 3.0
## Expected Score: 0.02
## Overperformance: +2.98 points
## Games Played: 7
##
## 3. AMIYATOSH PWNANANDAM (MI)
## Pre-Tournament Rating: 980
## Actual Score: 3.5
## Expected Score: 0.66
## Overperformance: +2.84 points
## Games Played: 5
##
## 4. ZACHARY JAMES HOUGHTON (MI)
## Pre-Tournament Rating: 1220
## Actual Score: 4.5
## Expected Score: 2.11
## Overperformance: +2.39 points
## Games Played: 7
##
## 5. STEFANO LEE (ON)
## Pre-Tournament Rating: 1411
## Actual Score: 5.0
## Expected Score: 2.70
## Overperformance: +2.30 points
## Games Played: 7
# Top 5 Underperformers
cat("=== TOP 5 UNDERPERFORMERS ===\n")
## === TOP 5 UNDERPERFORMERS ===
bottom_performers <- tail(results, 5)
bottom_performers <- bottom_performers[order(bottom_performers$performance), ]
for(i in 1:5) {
player <- bottom_performers[i, ]
cat(sprintf("%d. %s (%s)\n", i, player$Player_Name, player$State))
cat(sprintf(" Pre-Tournament Rating: %d\n", player$Pre_Rating))
cat(sprintf(" Actual Score: %.1f\n", player$Total_Points))
cat(sprintf(" Expected Score: %.2f\n", player$expected_score))
cat(sprintf(" Underperformance: %.2f points\n", player$performance))
cat(sprintf(" Games Played: %d\n\n", player$games_played))
}
## 1. LOREN SCHWIEBERT (MI)
## Pre-Tournament Rating: 1745
## Actual Score: 3.5
## Expected Score: 6.01
## Underperformance: -2.51 points
## Games Played: 7
##
## 2. GEORGE AVERY JONES (ON)
## Pre-Tournament Rating: 1522
## Actual Score: 3.5
## Expected Score: 5.27
## Underperformance: -1.77 points
## Games Played: 7
##
## 3. JARED GE (MI)
## Pre-Tournament Rating: 1332
## Actual Score: 3.0
## Expected Score: 4.64
## Underperformance: -1.64 points
## Games Played: 7
##
## 4. JOSHUA DAVID LEE (MI)
## Pre-Tournament Rating: 1438
## Actual Score: 3.5
## Expected Score: 5.09
## Underperformance: -1.59 points
## Games Played: 7
##
## 5. CHIEDOZIE OKORIE (MI)
## Pre-Tournament Rating: 1602
## Actual Score: 3.5
## Expected Score: 4.73
## Underperformance: -1.23 points
## Games Played: 6
# Create a summary dataframe for further analysis
performance_summary <- results %>%
select(Player_Name, State, Pre_Rating, Total_Points, expected_score,
performance, games_played) %>%
mutate(
expected_score = round(expected_score, 2),
performance = round(performance, 2)
)
# Save results to CSV for further analysis
write_csv(performance_summary, "chess_performance_analysis.csv")
cat("Results saved to 'chess_performance_analysis.csv'\n")
## Results saved to 'chess_performance_analysis.csv'
# Data visualizations
if(require(ggplot2, quietly = TRUE)) {
# Performance distribution histogram
p1 <- ggplot(performance_summary, aes(x = performance)) +
geom_histogram(bins = 20, fill = "steelblue", alpha = 0.7) +
geom_vline(xintercept = 0, color = "red", linetype = "dashed") +
labs(title = "Distribution of Performance (Actual - Expected Score)",
x = "Performance Difference",
y = "Count") +
theme_minimal()
print(p1)
# Scatter plot: Rating vs Performance
p2 <- ggplot(performance_summary, aes(x = Pre_Rating, y = performance)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE, color = "red") +
geom_hline(yintercept = 0, color = "blue", linetype = "dashed") +
labs(title = "Pre-Tournament Rating vs Performance",
x = "Pre-Tournament Rating",
y = "Performance (Actual - Expected)") +
theme_minimal()
print(p2)
} else {
cat("Install ggplot2 package for visualizations: install.packages('ggplot2')\n")
}
## `geom_smooth()` using formula = 'y ~ x'
# Display final results table
cat("\n=== COMPLETE RESULTS (sorted by performance) ===\n")
##
## === COMPLETE RESULTS (sorted by performance) ===
print(performance_summary, n = Inf)
## # A tibble: 64 × 7
## Player_Name State Pre_Rating Total_Points expected_score performance
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 ADITYA BAJAJ MI 1384 6 2.86 3.14
## 2 JACOB ALEXANDER LAV… MI 377 3 0.02 2.98
## 3 AMIYATOSH PWNANANDAM MI 980 3.5 0.66 2.84
## 4 ZACHARY JAMES HOUGH… MI 1220 4.5 2.11 2.39
## 5 STEFANO LEE ON 1411 5 2.7 2.3
## 6 ETHAN GUO MI 935 2.5 0.3 2.2
## 7 ANVIT RAO MI 1365 5 2.85 2.15
## 8 PATRICK H SCHILLING MI 1716 5.5 3.45 2.05
## 9 SHIVAM JHA MI 1056 2.5 0.65 1.85
## 10 SEAN M MC CORMICK MI 853 2 0.28 1.72
## 11 DAKSHESH DARURI MI 1553 6 4.29 1.71
## 12 VIRAJ MOHILE MI 917 2 0.37 1.63
## 13 MIKE NIKITIN MI 1604 4 2.96 1.04
## 14 GARY DEE SWATHELL MI 1649 5 3.99 1.01
## 15 JULIA SHEN MI 967 1.5 0.5 1
## 16 HANSHI ZUO MI 1655 5.5 4.57 0.93
## 17 BRADLEY SHAW MI 1610 4.5 3.61 0.89
## 18 BRIAN LIU MI 1423 3 2.15 0.85
## 19 KENNETH J TACK MI 1663 4.5 3.66 0.84
## 20 MARISA RICCI MI 1153 2 1.21 0.79
## 21 GARY HUA ON 1794 6 5.23 0.77
## 22 TEJAS AYYAGARI MI 1011 2.5 1.79 0.71
## 23 KYLE WILLIAM MURPHY MI 1403 3 2.32 0.68
## 24 MICHAEL J MARTIN MI 1291 2.5 1.86 0.64
## 25 JUSTIN D SCHILLING MI 1199 3 2.37 0.63
## 26 ALAN BUI ON 1363 4 3.46 0.54
## 27 EZEKIEL HOUGHTON MI 1641 5 4.47 0.53
## 28 MICHAEL R ALDRICH MI 1229 4 3.55 0.45
## 29 SIDDHARTH JHA MI 1355 3.5 3.06 0.44
## 30 MICHAEL LU MI 1092 2 1.7 0.3
## 31 DANIEL KHAIN MI 1382 2.5 2.21 0.29
## 32 FOREST ZHANG MI 1348 3 2.77 0.23
## 33 JEZZEL FARKAS ON 955 1.5 1.3 0.2
## 34 ASHWIN BALAJI MI 1530 1 0.82 0.18
## 35 RONALD GRZEGORCZYK MI 1629 4 3.85 0.15
## 36 ROBERT GLEN VASEY MI 1283 3 2.87 0.13
## 37 EUGENE L MCCLURE MI 1555 4 3.88 0.12
## 38 JOSHUA PHILIP MATHE… ON 1441 3.5 3.4 0.1
## 39 SOFIA ADINA STANESC… MI 1507 3.5 3.48 0.02
## 40 TORRANCE HENRY JR MI 1666 4.5 4.48 0.02
## 41 ALEX KONG MI 1186 2 2 0
## 42 DINH DANG BUI ON 1563 4 4.04 -0.04
## 43 JASON ZHENG MI 1595 4 4.09 -0.09
## 44 GAURAV GIDWANI MI 1552 3.5 3.6 -0.1
## 45 DIPANKAR ROY MI 1564 4 4.14 -0.14
## 46 JOSE C YBARRA MI 1393 2 2.17 -0.17
## 47 THOMAS JOSEPH HOSMER MI 1175 1 1.27 -0.27
## 48 JADE GE MI 1449 3.5 3.79 -0.29
## 49 BEN LI MI 1163 1 1.3 -0.3
## 50 DAVID SUNDEEN MI 1600 4 4.31 -0.31
## 51 HANSEN SONG OH 1686 5 5.34 -0.34
## 52 JOEL R HENDON MI 1436 3 3.43 -0.43
## 53 CAMERON WILLIAM MC … MI 1712 4.5 5.04 -0.54
## 54 DEREK YAN MI 1242 3 3.61 -0.61
## 55 MICHAEL JEFFERY THO… MI 1399 3.5 4.24 -0.74
## 56 LARRY HODGE MI 1270 2 2.88 -0.88
## 57 MAX ZHU ON 1579 3.5 4.48 -0.98
## 58 ERIC WRIGHT MI 1362 2.5 3.49 -0.99
## 59 RISHI SHETTY MI 1494 3.5 4.56 -1.06
## 60 CHIEDOZIE OKORIE MI 1602 3.5 4.73 -1.23
## 61 JOSHUA DAVID LEE MI 1438 3.5 5.09 -1.59
## 62 JARED GE MI 1332 3 4.64 -1.64
## 63 GEORGE AVERY JONES ON 1522 3.5 5.27 -1.77
## 64 LOREN SCHWIEBERT MI 1745 3.5 6.01 -2.51
## # ℹ 1 more variable: games_played <int>