Objective

Ingest chess tournament results .txt file and extract the followng player info:

Parsing

tournament_data <- readLines("tournamentinfo.txt")
## Warning in readLines("tournamentinfo.txt"): incomplete final line found on
## 'tournamentinfo.txt'
head(tournament_data, 10)
##  [1] "-----------------------------------------------------------------------------------------" 
##  [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
##  [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
##  [4] "-----------------------------------------------------------------------------------------" 
##  [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
##  [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [7] "-----------------------------------------------------------------------------------------" 
##  [8] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
##  [9] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [10] "-----------------------------------------------------------------------------------------"

Processing

names <- c()
states <- c()
points <- c()
ratings <- c()
opponents_list <- list()
pair_numbers <- c()

data_lines <- tournament_data[-(1:4)]

line_number <- 1
while (line_number <= length(data_lines)) {
  if (grepl("^-+$", data_lines[line_number])) {
    line_number <- line_number + 1
    next
  }
  
  player_line <- data_lines[line_number]
  player_parts <- strsplit(player_line, "\\|")[[1]]
  player_parts <- trimws(player_parts)
  
  pair_num <- as.numeric(gsub("[^0-9]", "", player_parts[1]))
  pair_numbers <- c(pair_numbers, pair_num)
  
  names <- c(names, player_parts[2])
  points <- c(points, player_parts[3])
  
  opponents <- c()
  for (i in 4:10) {
    if (i <= length(player_parts)) {
      round_info <- player_parts[i]
      if (nchar(round_info) > 1) {
        opponent_num <- as.numeric(gsub("[^0-9]", "", round_info))
        opponents <- c(opponents, opponent_num)
      }
    }
  }
  opponents_list[[length(opponents_list) + 1]] <- opponents
  
  line_number <- line_number + 1
  if (line_number <= length(data_lines) && !grepl("^-+$", data_lines[line_number])) {
    state_line <- data_lines[line_number]
    state_parts <- strsplit(state_line, "\\|")[[1]]
    state_parts <- trimws(state_parts)
    
    states <- c(states, state_parts[1])
    
    rating_text <- state_parts[2]
    rating_match <- regexpr("R:\\s*([0-9]+)", rating_text)
    if (rating_match > 0) {
      rating_str <- regmatches(rating_text, rating_match)
      rating <- as.numeric(gsub("[^0-9]", "", rating_str))
      ratings <- c(ratings, rating)
    } else {
      ratings <- c(ratings, NA)
    }
  } else {
    states <- c(states, NA)
    ratings <- c(ratings, NA)
  }
  
  line_number <- line_number + 1
}

rating_lookup <- data.frame(
  PairNumber = pair_numbers,
  PreRating = ratings
)

avg_opp_ratings <- c()
for (i in 1:length(opponents_list)) {
  player_opponents <- opponents_list[[i]]
  opponent_ratings <- c()
  
  for (opp in player_opponents) {
    idx <- which(rating_lookup$PairNumber == opp)
    if (length(idx) > 0) {
      opponent_ratings <- c(opponent_ratings, rating_lookup$PreRating[idx])
    }
  }
  
  if (length(opponent_ratings) > 0) {
    avg_rating <- mean(opponent_ratings, na.rm = TRUE)
    avg_opp_ratings <- c(avg_opp_ratings, round(avg_rating))
  } else {
    avg_opp_ratings <- c(avg_opp_ratings, NA)
  }
}

chess_results <- data.frame(
  Name = names,
  State = states,
  Points = points,
  PreRating = ratings,
  AvgOppRating = avg_opp_ratings
)

head(chess_results)
##                  Name State Points PreRating AvgOppRating
## 1            GARY HUA    ON    6.0      1794         1605
## 2     DAKSHESH DARURI    MI    6.0      1553         1469
## 3        ADITYA BAJAJ    MI    6.0      1384         1564
## 4 PATRICK H SCHILLING    MI    5.5      1716         1574
## 5          HANSHI ZUO    MI    5.5      1655         1501
## 6         HANSEN SONG    OH    5.0      1686         1519
write.csv(chess_results, "tournament_results.csv", row.names = FALSE)

file.exists("tournament_results.csv")
## [1] TRUE

Findings

Rendered .csv contains players and the relevant data points:

I.e. Gary Hua (first player): - Name: GARY HUA - State: ON - Points: 6.0 - PreRating: 1794 - AvgOppRating: 1605

Note: AvgOppRating calculated by averaging pre-tournament ratings for all of Gary’s opponents.