1) Read the tournament text

candidates <- c("tournamentinfo (1).txt","tournamentinfo.txt")
fname <- candidates[file.exists(candidates)][1]
stopifnot(!is.na(fname))
lines <- readr::read_lines(fname)
length(lines); lines[1:6]
## [1] 196
## [1] "-----------------------------------------------------------------------------------------" 
## [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
## [4] "-----------------------------------------------------------------------------------------" 
## [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
## [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"

2) Locate player blocks (each player = two lines)

top_idx <- grep("^\\s*\\d+\\s*\\|", lines)
top_idx <- top_idx[lines[pmin(top_idx+1, length(lines))] %>% stringr::str_detect("R:")]
length(top_idx)
## [1] 64

3) Helpers to parse fields from the two lines

parse_player <- function(i){
  top <- lines[i]
  bot <- lines[i+1]
  pair_num <- as.integer(stringr::str_match(top, "^\\s*(\\d+)\\s*\\|")[,2])
  name <- stringr::str_match(top, "^\\s*\\d+\\s*\\|\\s*([^|]+?)\\s*\\|")[,2] %>% stringr::str_squish()
  total_pts <- stringr::str_match(top, "^\\s*\\d+\\s*\\|[^|]+\\|\\s*([0-9]+\\.?[0-9]?)\\s*\\|")[,2] %>% as.numeric()
  opp_nums <- stringr::str_match_all(top, "[WDLBXHUF]\\s*(\\d+)")[[1]]
  opp_ids  <- if(nrow(opp_nums)) as.integer(opp_nums[,2]) else integer(0)
  state <- stringr::str_match(bot, "^\\s*([A-Z]{2})\\s*\\|")[,2]
  pre_rating <- stringr::str_match(bot, "R:\\s*([0-9]+)")[,2] %>% as.integer()
tibble(pair_num, name, state, total_pts, pre_rating, opp_ids = list(opp_ids))
}
players_raw <- purrr::map_dfr(top_idx, parse_player)
knitr::kable(head(players_raw, 5), caption = "Raw parsed players (first 5)")
Raw parsed players (first 5)
pair_num name state total_pts pre_rating opp_ids
1 GARY HUA ON 6.0 1794 39, 21, 18, 14, 7, 12, 4
2 DAKSHESH DARURI MI 6.0 1553 63, 58, 4, 17, 16, 20, 7
3 ADITYA BAJAJ MI 6.0 1384 8, 61, 25, 21, 11, 13, 12
4 PATRICK H SCHILLING MI 5.5 1716 23, 28, 2, 26, 5, 19, 1
5 HANSHI ZUO MI 5.5 1655 45, 37, 12, 13, 4, 14, 17

4) Compute average opponent pre-ratings

rating_map <- players_raw %>% dplyr::select(pair_num, pre_rating)
avg_opp <- players_raw %>%
  dplyr::rowwise() %>%
  dplyr::mutate(
    opp_pre = list(rating_map$pre_rating[match(opp_ids, rating_map$pair_num)]),
    games   = sum(!is.na(unlist(opp_pre))),
    avg_opp_pre = ifelse(games > 0, mean(unlist(opp_pre), na.rm = TRUE), NA_real_)
  ) %>%
  dplyr::ungroup()
result <- avg_opp %>%
  dplyr::transmute(
    player_name  = name,
    player_state = state,
    total_points = total_pts,
    pre_rating   = pre_rating,
    avg_opp_pre  = round(avg_opp_pre)
  )
knitr::kable(head(result, 10), caption = "Output preview (first 10 players)")
Output preview (first 10 players)
player_name player_state total_points pre_rating avg_opp_pre
GARY HUA ON 6.0 1794 1605
DAKSHESH DARURI MI 6.0 1553 1469
ADITYA BAJAJ MI 6.0 1384 1564
PATRICK H SCHILLING MI 5.5 1716 1574
HANSHI ZUO MI 5.5 1655 1501
HANSEN SONG OH 5.0 1686 1519
GARY DEE SWATHELL MI 5.0 1649 1372
EZEKIEL HOUGHTON MI 5.0 1641 1468
STEFANO LEE ON 5.0 1411 1523
ANVIT RAO MI 5.0 1365 1554

5) Save CSV (submission file)

readr::write_csv(result, "chess_players.csv")
list.files(pattern = "(ChessProject1|html|csv)$")
## [1] "chess_players.csv"  "ChessProject1.html"

6) Spot-check one expected player

result %>% dplyr::filter(grepl("^GARY HUA", player_name))
## # A tibble: 1 × 5
##   player_name player_state total_points pre_rating avg_opp_pre
##   <chr>       <chr>               <dbl>      <int>       <dbl>
## 1 GARY HUA    ON                      6       1794        1605