Introduction

This project analyzes chess tournament results from a text file. The goal is to extract relevant player information, including names, states, total points, and pre-tournament ratings and calculate the average pre-rating of each player’s opponents. The data is later compiled into a clean csv file for future analysis or database import.

Extract player names

lines <- readLines('https://raw.githubusercontent.com/vincent-usny/project1/refs/heads/main/project1.txt', warn = FALSE)
data <- lines[-c(1:4)] #remove first 4 lines

#get player names
players_lines <- data[seq(1, length(data), by = 3)]
parts_line1 <- strsplit(players_lines, '\\|')
names <- sapply(parts_line1, function(x) trimws(x[2]))

Extract player states

#get states
state_lines <- data[seq(2, length(data), by = 3)]
parts_line2 <- strsplit(state_lines, '\\|')
states <- sapply(parts_line2, function(x) trimws(x[1]))

Extract each player’s total_points and pre-tournament ratings

#get total points
total_points <- sapply(parts_line1, function(x) trimws(x[3]))

#get pre-rating
ratings <- sapply(parts_line2, function(x) x[2])
pre_rating <- as.numeric(sapply(ratings, function(x) {
  sub(".*R: *(\\d+).*", "\\1", x)
}))

Calculate each player’s avg_rating of all opponents

pair_nums <- sapply(players_lines, function(x) as.numeric(strsplit(x, "\\|")[[1]][1]))
pre_rating_lookup <- setNames(pre_rating, pair_nums)

avg_opp_rating <- sapply(parts_line1, function(y) {
  rounds <- y[4:10]
  opps <-c()
  for (r in rounds){
    r <- trimws(r)
    result <- substr(r,1,1)
    if (result %in% c('W','L','D')) {
    opp_num <- as.numeric(strsplit(r, "\\s+")[[1]][2])
    opps <- c(opps, pre_rating_lookup[opp_num])
    }
  }
  mean(opps)
})

avg_opp_rating <- round(avg_opp_rating)

Export csv file

chess_df <- data.frame(
  Player = names,
  State = states,
  Total_points = total_points,
  Pre_rating = pre_rating,
  Avg_opp_rating = avg_opp_rating
)

head(chess_df)
##                Player State Total_points Pre_rating Avg_opp_rating
## 1            GARY HUA    ON          6.0       1794           1605
## 2     DAKSHESH DARURI    MI          6.0       1553           1469
## 3        ADITYA BAJAJ    MI          6.0       1384           1564
## 4 PATRICK H SCHILLING    MI          5.5       1716           1574
## 5          HANSHI ZUO    MI          5.5       1655           1501
## 6         HANSEN SONG    OH          5.0       1686           1519

Week 5B Assignment

# Calculate expected scores 
exp_score <- sapply(seq_along(parts_line1), function(y) {
  rounds <- parts_line1[[y]][4:10]
  player_rating <- pre_rating[y]
  Es <- c()
  
  for (r in rounds){
    r <- trimws(r)
    result <- substr(r,1,1)
    if (result %in% c('W','L','D')) {
    opp_num <- as.numeric(strsplit(r, "\\s+")[[1]][2])
    opp_rating <- pre_rating_lookup[opp_num]
    E <- 1 / (1 + 10 ^((opp_rating - player_rating)/400))
    # formula source: https://handbook.fide.com/chapter/B022014      
    Es <- c(Es, E)
    }
  }
  sum(Es)
})

exp_score <- round(exp_score,2)
diff_score <- as.numeric(total_points) - exp_score

# create table for players and the diff btw actual
# and expected score
result <- data.frame(
  Player = names,
  Pre_rating = pre_rating,
  Actual_score = as.numeric(total_points),
  Expected_score = exp_score,
  Difference = diff_score
)
head(result)
##                Player Pre_rating Actual_score Expected_score Difference
## 1            GARY HUA       1794          6.0           5.16       0.84
## 2     DAKSHESH DARURI       1553          6.0           3.78       2.22
## 3        ADITYA BAJAJ       1384          6.0           1.95       4.05
## 4 PATRICK H SCHILLING       1716          5.5           4.74       0.76
## 5          HANSHI ZUO       1655          5.5           4.38       1.12
## 6         HANSEN SONG       1686          5.0           4.94       0.06

Week 5B Assignment

# sort the overperformers and underperformers
overperformers <- result[order(-result$Difference), ][1:5]
underperformers <- result[order(result$Difference), ][1:5]
head(overperformers,5)
##                      Player Pre_rating Actual_score Expected_score Difference
## 3              ADITYA BAJAJ       1384          6.0           1.95       4.05
## 15   ZACHARY JAMES HOUGHTON       1220          4.5           1.37       3.13
## 10                ANVIT RAO       1365          5.0           1.94       3.06
## 46 JACOB ALEXANDER LAVALLEY        377          3.0           0.04       2.96
## 37     AMIYATOSH PWNANANDAM        980          3.5           0.77       2.73
head(underperformers,5)
##                Player Pre_rating Actual_score Expected_score Difference
## 25   LOREN SCHWIEBERT       1745          3.5           6.28      -2.78
## 30 GEORGE AVERY JONES       1522          3.5           6.02      -2.52
## 42           JARED GE       1332          3.0           5.01      -2.01
## 31       RISHI SHETTY       1494          3.5           5.09      -1.59
## 35   JOSHUA DAVID LEE       1438          3.5           4.96      -1.46

Conclusion

The tournament data was successfully extracted and cleaned, creating a new dataset containing players’ names, states, total points, pre-tournament ratings, and average ratings of each player’s opponents. The csv file is now available to be imported into MySQL or other data tools.