Ingest chess tournament results .txt file and extract the followng player info:
tournament_data <- readLines("tournamentinfo.txt")
## Warning in readLines("tournamentinfo.txt"): incomplete final line found on
## 'tournamentinfo.txt'
head(tournament_data, 10)
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [7] "-----------------------------------------------------------------------------------------"
## [8] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [9] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [10] "-----------------------------------------------------------------------------------------"
names <- c()
states <- c()
points <- c()
ratings <- c()
opponents_list <- list()
pair_numbers <- c()
data_lines <- tournament_data[-(1:4)]
line_number <- 1
while (line_number <= length(data_lines)) {
if (grepl("^-+$", data_lines[line_number])) {
line_number <- line_number + 1
next
}
player_line <- data_lines[line_number]
player_parts <- strsplit(player_line, "\\|")[[1]]
player_parts <- trimws(player_parts)
pair_num <- as.numeric(gsub("[^0-9]", "", player_parts[1]))
pair_numbers <- c(pair_numbers, pair_num)
names <- c(names, player_parts[2])
points <- c(points, player_parts[3])
opponents <- c()
for (i in 4:10) {
if (i <= length(player_parts)) {
round_info <- player_parts[i]
if (nchar(round_info) > 1) {
opponent_num <- as.numeric(gsub("[^0-9]", "", round_info))
opponents <- c(opponents, opponent_num)
}
}
}
opponents_list[[length(opponents_list) + 1]] <- opponents
line_number <- line_number + 1
if (line_number <= length(data_lines) && !grepl("^-+$", data_lines[line_number])) {
state_line <- data_lines[line_number]
state_parts <- strsplit(state_line, "\\|")[[1]]
state_parts <- trimws(state_parts)
states <- c(states, state_parts[1])
rating_text <- state_parts[2]
rating_match <- regexpr("R:\\s*([0-9]+)", rating_text)
if (rating_match > 0) {
rating_str <- regmatches(rating_text, rating_match)
rating <- as.numeric(gsub("[^0-9]", "", rating_str))
ratings <- c(ratings, rating)
} else {
ratings <- c(ratings, NA)
}
} else {
states <- c(states, NA)
ratings <- c(ratings, NA)
}
line_number <- line_number + 1
}
rating_lookup <- data.frame(
PairNumber = pair_numbers,
PreRating = ratings
)
avg_opp_ratings <- c()
for (i in 1:length(opponents_list)) {
player_opponents <- opponents_list[[i]]
opponent_ratings <- c()
for (opp in player_opponents) {
idx <- which(rating_lookup$PairNumber == opp)
if (length(idx) > 0) {
opponent_ratings <- c(opponent_ratings, rating_lookup$PreRating[idx])
}
}
if (length(opponent_ratings) > 0) {
avg_rating <- mean(opponent_ratings, na.rm = TRUE)
avg_opp_ratings <- c(avg_opp_ratings, round(avg_rating))
} else {
avg_opp_ratings <- c(avg_opp_ratings, NA)
}
}
chess_results <- data.frame(
Name = names,
State = states,
Points = points,
PreRating = ratings,
AvgOppRating = avg_opp_ratings
)
head(chess_results)
## Name State Points PreRating AvgOppRating
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519
write.csv(chess_results, "tournament_results.csv", row.names = FALSE)
file.exists("tournament_results.csv")
## [1] TRUE
Rendered .csv contains players and the relevant data points:
I.e. Gary Hua (first player): - Name: GARY HUA - State: ON - Points: 6.0 - PreRating: 1794 - AvgOppRating: 1605
Note: AvgOppRating calculated by averaging pre-tournament ratings for all of Gary’s opponents.