1) Read the tournament text
candidates <- c("tournamentinfo (1).txt","tournamentinfo.txt")
fname <- candidates[file.exists(candidates)][1]
stopifnot(!is.na(fname))
lines <- readr::read_lines(fname)
length(lines); lines[1:6]
## [1] 196
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
2) Locate player blocks (each player = two lines)
top_idx <- grep("^\\s*\\d+\\s*\\|", lines)
top_idx <- top_idx[lines[pmin(top_idx+1, length(lines))] %>% stringr::str_detect("R:")]
length(top_idx)
## [1] 64
3) Helpers to parse fields from the two lines
parse_player <- function(i){
top <- lines[i]
bot <- lines[i+1]
pair_num <- as.integer(stringr::str_match(top, "^\\s*(\\d+)\\s*\\|")[,2])
name <- stringr::str_match(top, "^\\s*\\d+\\s*\\|\\s*([^|]+?)\\s*\\|")[,2] %>% stringr::str_squish()
total_pts <- stringr::str_match(top, "^\\s*\\d+\\s*\\|[^|]+\\|\\s*([0-9]+\\.?[0-9]?)\\s*\\|")[,2] %>% as.numeric()
opp_nums <- stringr::str_match_all(top, "[WDLBXHUF]\\s*(\\d+)")[[1]]
opp_ids <- if(nrow(opp_nums)) as.integer(opp_nums[,2]) else integer(0)
state <- stringr::str_match(bot, "^\\s*([A-Z]{2})\\s*\\|")[,2]
pre_rating <- stringr::str_match(bot, "R:\\s*([0-9]+)")[,2] %>% as.integer()
tibble(pair_num, name, state, total_pts, pre_rating, opp_ids = list(opp_ids))
}
players_raw <- purrr::map_dfr(top_idx, parse_player)
knitr::kable(head(players_raw, 5), caption = "Raw parsed players (first 5)")
Raw parsed players (first 5)
1 |
GARY HUA |
ON |
6.0 |
1794 |
39, 21, 18, 14, 7, 12, 4 |
2 |
DAKSHESH DARURI |
MI |
6.0 |
1553 |
63, 58, 4, 17, 16, 20, 7 |
3 |
ADITYA BAJAJ |
MI |
6.0 |
1384 |
8, 61, 25, 21, 11, 13, 12 |
4 |
PATRICK H SCHILLING |
MI |
5.5 |
1716 |
23, 28, 2, 26, 5, 19, 1 |
5 |
HANSHI ZUO |
MI |
5.5 |
1655 |
45, 37, 12, 13, 4, 14, 17 |
4) Compute average opponent pre-ratings
rating_map <- players_raw %>% dplyr::select(pair_num, pre_rating)
avg_opp <- players_raw %>%
dplyr::rowwise() %>%
dplyr::mutate(
opp_pre = list(rating_map$pre_rating[match(opp_ids, rating_map$pair_num)]),
games = sum(!is.na(unlist(opp_pre))),
avg_opp_pre = ifelse(games > 0, mean(unlist(opp_pre), na.rm = TRUE), NA_real_)
) %>%
dplyr::ungroup()
result <- avg_opp %>%
dplyr::transmute(
player_name = name,
player_state = state,
total_points = total_pts,
pre_rating = pre_rating,
avg_opp_pre = round(avg_opp_pre)
)
knitr::kable(head(result, 10), caption = "Output preview (first 10 players)")
Output preview (first 10 players)
GARY HUA |
ON |
6.0 |
1794 |
1605 |
DAKSHESH DARURI |
MI |
6.0 |
1553 |
1469 |
ADITYA BAJAJ |
MI |
6.0 |
1384 |
1564 |
PATRICK H SCHILLING |
MI |
5.5 |
1716 |
1574 |
HANSHI ZUO |
MI |
5.5 |
1655 |
1501 |
HANSEN SONG |
OH |
5.0 |
1686 |
1519 |
GARY DEE SWATHELL |
MI |
5.0 |
1649 |
1372 |
EZEKIEL HOUGHTON |
MI |
5.0 |
1641 |
1468 |
STEFANO LEE |
ON |
5.0 |
1411 |
1523 |
ANVIT RAO |
MI |
5.0 |
1365 |
1554 |
5) Save CSV (submission file)
readr::write_csv(result, "chess_players.csv")
list.files(pattern = "(ChessProject1|html|csv)$")
## [1] "chess_players.csv" "ChessProject1.html"
6) Spot-check one expected player
result %>% dplyr::filter(grepl("^GARY HUA", player_name))
## # A tibble: 1 × 5
## player_name player_state total_points pre_rating avg_opp_pre
## <chr> <chr> <dbl> <int> <dbl>
## 1 GARY HUA ON 6 1794 1605