Load required libraries

library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)

Chess Data exploration and ELO Analysis

Chess ELO Expected Score Calculator # Formula source: Solon, Nate. “How Elo Ratings Actually Work.” Zwischenzug, # https://zwischenzug.substack.com/p/how-elo-ratings-actually-work

chess_data <- read_csv("chess_tournament_results.csv")
## Rows: 65 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Player_Name, State, Opponent_Ratings
## dbl (3): Total_Points, Pre_Rating, Avg_Opponent_Rating
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Display data structure
str(chess_data)
## spc_tbl_ [65 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Player_Name        : chr [1:65] "Player Name" "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ" ...
##  $ State              : chr [1:65] "Num" "ON" "MI" "MI" ...
##  $ Total_Points       : num [1:65] NA 6 6 6 5.5 5.5 5 5 5 5 ...
##  $ Pre_Rating         : num [1:65] NA 1794 1553 1384 1716 ...
##  $ Avg_Opponent_Rating: num [1:65] NA 1585 1440 1454 1660 ...
##  $ Opponent_Ratings   : chr [1:65] NA "1423;1595;1629;1666;1686;1712;1384" "1530;1092;1384;1604;1220;1564;1686" "1649;967;1229;1595;1365;1663;1712" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Player_Name = col_character(),
##   ..   State = col_character(),
##   ..   Total_Points = col_double(),
##   ..   Pre_Rating = col_double(),
##   ..   Avg_Opponent_Rating = col_double(),
##   ..   Opponent_Ratings = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
head(chess_data)
## # A tibble: 6 × 6
##   Player_Name State Total_Points Pre_Rating Avg_Opponent_Rating Opponent_Ratings
##   <chr>       <chr>        <dbl>      <dbl>               <dbl> <chr>           
## 1 Player Name Num           NA           NA                  NA <NA>            
## 2 GARY HUA    ON             6         1794                1585 1423;1595;1629;…
## 3 DAKSHESH D… MI             6         1553                1440 1530;1092;1384;…
## 4 ADITYA BAJ… MI             6         1384                1454 1649;967;1229;1…
## 5 PATRICK H … MI             5.5       1716                1660 1555;1552;1794;…
## 6 HANSHI ZUO  MI             5.5       1655                1512 1199;1355;1712;…
# Clean the data - remove header row and invalid entries
clean_data <- chess_data %>%
  filter(!is.na(Player_Name), 
         Player_Name != "Player Name",
         !is.na(Total_Points),
         !is.na(Pre_Rating),
         !is.na(Opponent_Ratings),
         Opponent_Ratings != "")

cat("Clean data count:", nrow(clean_data), "\n")
## Clean data count: 64
# Function to calculate expected score using USCF ELO formula
calculate_expected_score <- function(player_rating, opponent_ratings) {
  expected_scores <- sapply(opponent_ratings, function(opp_rating) {
    1 / (1 + 10^((opp_rating - player_rating) / 400))
  })
  return(sum(expected_scores))
}

Analysis Functions

# Function to parse opponent ratings from semicolon-separated string
parse_opponent_ratings <- function(opponent_string) {
  ratings <- as.numeric(unlist(strsplit(opponent_string, ";")))
  return(ratings[!is.na(ratings)])  # Remove any NA values
}

# Calculate expected scores and performance for each player
results <- clean_data %>%
  rowwise() %>%
  mutate(
    opponent_ratings_list = list(parse_opponent_ratings(Opponent_Ratings)),
    games_played = length(opponent_ratings_list),
    expected_score = calculate_expected_score(Pre_Rating, opponent_ratings_list),
    performance = Total_Points - expected_score
  ) %>%
  ungroup() %>%
  arrange(desc(performance))

Summary Statistics and Top Performers

# Display summary statistics
cat("\n=== ANALYSIS SUMMARY ===\n")
## 
## === ANALYSIS SUMMARY ===
cat("Total players analyzed:", nrow(results), "\n")
## Total players analyzed: 64
cat("Mean performance difference:", round(mean(results$performance), 3), "\n")
## Mean performance difference: 0.382
cat("Standard deviation of performance:", round(sd(results$performance), 3), "\n\n")
## Standard deviation of performance: 1.172

Top 5 Overperformers and Underperformers

# Top 5 Overperformers
cat("=== TOP 5 OVERPERFORMERS ===\n")
## === TOP 5 OVERPERFORMERS ===
top_overperformers <- head(results, 5)
for(i in 1:5) {
  player <- top_overperformers[i, ]
  cat(sprintf("%d. %s (%s)\n", i, player$Player_Name, player$State))
  cat(sprintf("   Pre-Tournament Rating: %d\n", player$Pre_Rating))
  cat(sprintf("   Actual Score: %.1f\n", player$Total_Points))
  cat(sprintf("   Expected Score: %.2f\n", player$expected_score))
  cat(sprintf("   Overperformance: +%.2f points\n", player$performance))
  cat(sprintf("   Games Played: %d\n\n", player$games_played))
}
## 1. ADITYA BAJAJ (MI)
##    Pre-Tournament Rating: 1384
##    Actual Score: 6.0
##    Expected Score: 2.86
##    Overperformance: +3.14 points
##    Games Played: 7
## 
## 2. JACOB ALEXANDER LAVALLEY (MI)
##    Pre-Tournament Rating: 377
##    Actual Score: 3.0
##    Expected Score: 0.02
##    Overperformance: +2.98 points
##    Games Played: 7
## 
## 3. AMIYATOSH PWNANANDAM (MI)
##    Pre-Tournament Rating: 980
##    Actual Score: 3.5
##    Expected Score: 0.66
##    Overperformance: +2.84 points
##    Games Played: 5
## 
## 4. ZACHARY JAMES HOUGHTON (MI)
##    Pre-Tournament Rating: 1220
##    Actual Score: 4.5
##    Expected Score: 2.11
##    Overperformance: +2.39 points
##    Games Played: 7
## 
## 5. STEFANO LEE (ON)
##    Pre-Tournament Rating: 1411
##    Actual Score: 5.0
##    Expected Score: 2.70
##    Overperformance: +2.30 points
##    Games Played: 7
# Top 5 Underperformers
cat("=== TOP 5 UNDERPERFORMERS ===\n")
## === TOP 5 UNDERPERFORMERS ===
bottom_performers <- tail(results, 5)
bottom_performers <- bottom_performers[order(bottom_performers$performance), ]
for(i in 1:5) {
  player <- bottom_performers[i, ]
  cat(sprintf("%d. %s (%s)\n", i, player$Player_Name, player$State))
  cat(sprintf("   Pre-Tournament Rating: %d\n", player$Pre_Rating))
  cat(sprintf("   Actual Score: %.1f\n", player$Total_Points))
  cat(sprintf("   Expected Score: %.2f\n", player$expected_score))
  cat(sprintf("   Underperformance: %.2f points\n", player$performance))
  cat(sprintf("   Games Played: %d\n\n", player$games_played))
}
## 1. LOREN SCHWIEBERT (MI)
##    Pre-Tournament Rating: 1745
##    Actual Score: 3.5
##    Expected Score: 6.01
##    Underperformance: -2.51 points
##    Games Played: 7
## 
## 2. GEORGE AVERY JONES (ON)
##    Pre-Tournament Rating: 1522
##    Actual Score: 3.5
##    Expected Score: 5.27
##    Underperformance: -1.77 points
##    Games Played: 7
## 
## 3. JARED GE (MI)
##    Pre-Tournament Rating: 1332
##    Actual Score: 3.0
##    Expected Score: 4.64
##    Underperformance: -1.64 points
##    Games Played: 7
## 
## 4. JOSHUA DAVID LEE (MI)
##    Pre-Tournament Rating: 1438
##    Actual Score: 3.5
##    Expected Score: 5.09
##    Underperformance: -1.59 points
##    Games Played: 7
## 
## 5. CHIEDOZIE OKORIE (MI)
##    Pre-Tournament Rating: 1602
##    Actual Score: 3.5
##    Expected Score: 4.73
##    Underperformance: -1.23 points
##    Games Played: 6

Save and Visualize Results

# Create a summary dataframe for further analysis
performance_summary <- results %>%
  select(Player_Name, State, Pre_Rating, Total_Points, expected_score, 
         performance, games_played) %>%
  mutate(
    expected_score = round(expected_score, 2),
    performance = round(performance, 2)
  )

# Save results to CSV for further analysis
write_csv(performance_summary, "chess_performance_analysis.csv")
cat("Results saved to 'chess_performance_analysis.csv'\n")
## Results saved to 'chess_performance_analysis.csv'

Data Visualization

# Data visualizations
if(require(ggplot2, quietly = TRUE)) {
  
  # Performance distribution histogram
  p1 <- ggplot(performance_summary, aes(x = performance)) +
    geom_histogram(bins = 20, fill = "steelblue", alpha = 0.7) +
    geom_vline(xintercept = 0, color = "red", linetype = "dashed") +
    labs(title = "Distribution of Performance (Actual - Expected Score)",
         x = "Performance Difference",
         y = "Count") +
    theme_minimal()
  
  print(p1)
  
  # Scatter plot: Rating vs Performance
  p2 <- ggplot(performance_summary, aes(x = Pre_Rating, y = performance)) +
    geom_point(alpha = 0.6) +
    geom_smooth(method = "lm", se = TRUE, color = "red") +
    geom_hline(yintercept = 0, color = "blue", linetype = "dashed") +
    labs(title = "Pre-Tournament Rating vs Performance",
         x = "Pre-Tournament Rating",
         y = "Performance (Actual - Expected)") +
    theme_minimal()
  
  print(p2)
  
} else {
  cat("Install ggplot2 package for visualizations: install.packages('ggplot2')\n")
}

## `geom_smooth()` using formula = 'y ~ x'

# Display final results table
cat("\n=== COMPLETE RESULTS (sorted by performance) ===\n")
## 
## === COMPLETE RESULTS (sorted by performance) ===
print(performance_summary, n = Inf)
## # A tibble: 64 × 7
##    Player_Name          State Pre_Rating Total_Points expected_score performance
##    <chr>                <chr>      <dbl>        <dbl>          <dbl>       <dbl>
##  1 ADITYA BAJAJ         MI          1384          6             2.86        3.14
##  2 JACOB ALEXANDER LAV… MI           377          3             0.02        2.98
##  3 AMIYATOSH PWNANANDAM MI           980          3.5           0.66        2.84
##  4 ZACHARY JAMES HOUGH… MI          1220          4.5           2.11        2.39
##  5 STEFANO LEE          ON          1411          5             2.7         2.3 
##  6 ETHAN GUO            MI           935          2.5           0.3         2.2 
##  7 ANVIT RAO            MI          1365          5             2.85        2.15
##  8 PATRICK H SCHILLING  MI          1716          5.5           3.45        2.05
##  9 SHIVAM JHA           MI          1056          2.5           0.65        1.85
## 10 SEAN M MC CORMICK    MI           853          2             0.28        1.72
## 11 DAKSHESH DARURI      MI          1553          6             4.29        1.71
## 12 VIRAJ MOHILE         MI           917          2             0.37        1.63
## 13 MIKE NIKITIN         MI          1604          4             2.96        1.04
## 14 GARY DEE SWATHELL    MI          1649          5             3.99        1.01
## 15 JULIA SHEN           MI           967          1.5           0.5         1   
## 16 HANSHI ZUO           MI          1655          5.5           4.57        0.93
## 17 BRADLEY SHAW         MI          1610          4.5           3.61        0.89
## 18 BRIAN LIU            MI          1423          3             2.15        0.85
## 19 KENNETH J TACK       MI          1663          4.5           3.66        0.84
## 20 MARISA RICCI         MI          1153          2             1.21        0.79
## 21 GARY HUA             ON          1794          6             5.23        0.77
## 22 TEJAS AYYAGARI       MI          1011          2.5           1.79        0.71
## 23 KYLE WILLIAM MURPHY  MI          1403          3             2.32        0.68
## 24 MICHAEL J MARTIN     MI          1291          2.5           1.86        0.64
## 25 JUSTIN D SCHILLING   MI          1199          3             2.37        0.63
## 26 ALAN BUI             ON          1363          4             3.46        0.54
## 27 EZEKIEL HOUGHTON     MI          1641          5             4.47        0.53
## 28 MICHAEL R ALDRICH    MI          1229          4             3.55        0.45
## 29 SIDDHARTH JHA        MI          1355          3.5           3.06        0.44
## 30 MICHAEL LU           MI          1092          2             1.7         0.3 
## 31 DANIEL KHAIN         MI          1382          2.5           2.21        0.29
## 32 FOREST ZHANG         MI          1348          3             2.77        0.23
## 33 JEZZEL FARKAS        ON           955          1.5           1.3         0.2 
## 34 ASHWIN BALAJI        MI          1530          1             0.82        0.18
## 35 RONALD GRZEGORCZYK   MI          1629          4             3.85        0.15
## 36 ROBERT GLEN VASEY    MI          1283          3             2.87        0.13
## 37 EUGENE L MCCLURE     MI          1555          4             3.88        0.12
## 38 JOSHUA PHILIP MATHE… ON          1441          3.5           3.4         0.1 
## 39 SOFIA ADINA STANESC… MI          1507          3.5           3.48        0.02
## 40 TORRANCE HENRY JR    MI          1666          4.5           4.48        0.02
## 41 ALEX KONG            MI          1186          2             2           0   
## 42 DINH DANG BUI        ON          1563          4             4.04       -0.04
## 43 JASON ZHENG          MI          1595          4             4.09       -0.09
## 44 GAURAV GIDWANI       MI          1552          3.5           3.6        -0.1 
## 45 DIPANKAR ROY         MI          1564          4             4.14       -0.14
## 46 JOSE C YBARRA        MI          1393          2             2.17       -0.17
## 47 THOMAS JOSEPH HOSMER MI          1175          1             1.27       -0.27
## 48 JADE GE              MI          1449          3.5           3.79       -0.29
## 49 BEN LI               MI          1163          1             1.3        -0.3 
## 50 DAVID SUNDEEN        MI          1600          4             4.31       -0.31
## 51 HANSEN SONG          OH          1686          5             5.34       -0.34
## 52 JOEL R HENDON        MI          1436          3             3.43       -0.43
## 53 CAMERON WILLIAM MC … MI          1712          4.5           5.04       -0.54
## 54 DEREK YAN            MI          1242          3             3.61       -0.61
## 55 MICHAEL JEFFERY THO… MI          1399          3.5           4.24       -0.74
## 56 LARRY HODGE          MI          1270          2             2.88       -0.88
## 57 MAX ZHU              ON          1579          3.5           4.48       -0.98
## 58 ERIC WRIGHT          MI          1362          2.5           3.49       -0.99
## 59 RISHI SHETTY         MI          1494          3.5           4.56       -1.06
## 60 CHIEDOZIE OKORIE     MI          1602          3.5           4.73       -1.23
## 61 JOSHUA DAVID LEE     MI          1438          3.5           5.09       -1.59
## 62 JARED GE             MI          1332          3             4.64       -1.64
## 63 GEORGE AVERY JONES   ON          1522          3.5           5.27       -1.77
## 64 LOREN SCHWIEBERT     MI          1745          3.5           6.01       -2.51
## # ℹ 1 more variable: games_played <int>