Place
tournamentinfo.txt
in the project root ordata/
. Adjust the path below if needed.
# Try common locations
paths <- c("tournamentinfo.txt", "data/tournamentinfo.txt")
path <- paths[file.exists(paths)][1]
stopifnot(!is.na(path)) # fail fast if not found
raw <- readLines(path, warn = FALSE)
length(raw)
## [1] 196
head(raw, 40)
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [7] "-----------------------------------------------------------------------------------------"
## [8] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [9] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [10] "-----------------------------------------------------------------------------------------"
## [11] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [12] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [13] "-----------------------------------------------------------------------------------------"
## [14] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [15] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [16] "-----------------------------------------------------------------------------------------"
## [17] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [18] " MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [19] "-----------------------------------------------------------------------------------------"
## [20] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
## [21] " OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
## [22] "-----------------------------------------------------------------------------------------"
## [23] " 7 | GARY DEE SWATHELL |5.0 |W 57|W 46|W 13|W 11|L 1|W 9|L 2|"
## [24] " MI | 11146376 / R: 1649 ->1673 |N:3 |W |B |W |B |B |W |W |"
## [25] "-----------------------------------------------------------------------------------------"
## [26] " 8 | EZEKIEL HOUGHTON |5.0 |W 3|W 32|L 14|L 9|W 47|W 28|W 19|"
## [27] " MI | 15142253 / R: 1641P17->1657P24 |N:3 |B |W |B |W |B |W |W |"
## [28] "-----------------------------------------------------------------------------------------"
## [29] " 9 | STEFANO LEE |5.0 |W 25|L 18|W 59|W 8|W 26|L 7|W 20|"
## [30] " ON | 14954524 / R: 1411 ->1564 |N:2 |W |B |W |B |W |B |B |"
## [31] "-----------------------------------------------------------------------------------------"
## [32] " 10 | ANVIT RAO |5.0 |D 16|L 19|W 55|W 31|D 6|W 25|W 18|"
## [33] " MI | 14150362 / R: 1365 ->1544 |N:3 |W |W |B |B |W |B |W |"
## [34] "-----------------------------------------------------------------------------------------"
## [35] " 11 | CAMERON WILLIAM MC LEMAN |4.5 |D 38|W 56|W 6|L 7|L 3|W 34|W 26|"
## [36] " MI | 12581589 / R: 1712 ->1696 |N:3 |B |W |B |W |B |W |B |"
## [37] "-----------------------------------------------------------------------------------------"
## [38] " 12 | KENNETH J TACK |4.5 |W 42|W 33|D 5|W 38|H |D 1|L 3|"
## [39] " MI | 12681257 / R: 1663 ->1670 |N:3 |W |B |W |B | |W |B |"
## [40] "-----------------------------------------------------------------------------------------"
The file typically has player records spanning 2 lines per player: a header line (pair #, name, state, total points) and a rounds/ratings line.
# 2) Segment into player blocks (robust)
clean <- raw |>
stringr::str_replace_all("\\r", "") |>
stringr::str_trim(side = "right")
tab <- clean[stringr::str_detect(clean, "\\|")]
# Find rows that start with a numeric pair id like " 1 | ..."
idx <- which(stringr::str_detect(tab, "^\\s*\\d+\\s*\\|"))
# Pair each header with its next line (detail)
blocks <- tibble::tibble(
header = tab[idx],
detail = tab[idx + 1]
)
# sanity checks
nrow(blocks)
## [1] 64
blocks$header[1]
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
blocks$detail[1]
## [1] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
We want: Name, State, Total Points, Pre-Rating, and the list of Opponent Pair Numbers.
# ---- helper ----
split_cols <- function(x) {
stringr::str_split(x, "\\|")[[1]] |> stringr::str_trim()
}
# ---- header parser: name, points, opponents (from HEADER line) ----
parse_header <- function(x) {
cols <- split_cols(x)
# Pair & Name
pair_val <- stringr::str_extract(cols[1], "\\d+") |> as.integer()
name_val <- cols[2] |> stringr::str_squish() |> stringr::str_to_title()
# Total Points = first numeric-looking field after name
after_name <- cols[-(1:2)]
tp_first <- stringr::str_extract(after_name, "\\d+(?:\\.\\d+)?")
total_pts_val <- as.numeric(tp_first[which(!is.na(tp_first))[1]])
# Opponent pair numbers live on the header line as W/D/L/B/H/U tokens
# e.g., "W 39", "D12", "L-7", "B 0"
tokens <- stringr::str_extract_all(x, "(?i)\\b[WDLHBU][\\s-]*\\d+")[[1]]
opp_nums <- as.integer(stringr::str_extract_all(paste(tokens, collapse = " "), "\\d+")[[1]])
opp_nums <- opp_nums[!is.na(opp_nums) & opp_nums > 0] # drop byes/zeros
tibble::tibble(
pair = pair_val,
name = name_val,
total_pts = total_pts_val,
opponents = list(opp_nums)
)
}
# ---- detail parser: state + pre-rating (from DETAIL line) ----
parse_detail <- function(x) {
y <- x |>
stringr::str_replace_all("\\t", " ") |>
stringr::str_squish()
# US states + Canadian provinces/territories
allowed_regions <- c(state.abb, "ON","QC","BC","AB","SK","MB","NB","NS","PE","NL","YT","NT","NU")
# State token
toks <- stringr::str_extract_all(y, "\\b[A-Z]{2}\\b")[[1]]
state_val <- toks[toks %in% allowed_regions]
state_val <- if (length(state_val)) state_val[1] else NA_character_
# Pre-rating like "R: 1794"
pre <- stringr::str_match(y, "R:\\s*(\\d+)")[,2] |> as.integer()
tibble::tibble(state = state_val, pre_rating = pre)
}
# ---- build players ----
hdr <- purrr::map_dfr(blocks$header, parse_header)
det <- purrr::map_dfr(blocks$detail, parse_detail)
players <- dplyr::bind_cols(hdr, det) |>
# Drop any stray header row (safety)
dplyr::filter(!grepl("^Player\\s*Name$", name, ignore.case = TRUE))
# Quick diagnostics
players |> dplyr::slice(1:5)
## # A tibble: 5 × 6
## pair name total_pts opponents state pre_rating
## <int> <chr> <dbl> <list> <chr> <int>
## 1 1 Gary Hua 6 <int [7]> ON 1794
## 2 2 Dakshesh Daruri 6 <int [7]> MI 1553
## 3 3 Aditya Bajaj 6 <int [7]> MI 1384
## 4 4 Patrick H Schilling 5.5 <int [7]> MI 1716
## 5 5 Hanshi Zuo 5.5 <int [7]> MI 1655
players |> dplyr::transmute(name, pair, pre_rating, opp_len = lengths(opponents)) |> head(12)
## # A tibble: 12 × 4
## name pair pre_rating opp_len
## <chr> <int> <int> <int>
## 1 Gary Hua 1 1794 7
## 2 Dakshesh Daruri 2 1553 7
## 3 Aditya Bajaj 3 1384 7
## 4 Patrick H Schilling 4 1716 7
## 5 Hanshi Zuo 5 1655 7
## 6 Hansen Song 6 1686 7
## 7 Gary Dee Swathell 7 1649 7
## 8 Ezekiel Houghton 8 1641 7
## 9 Stefano Lee 9 1411 7
## 10 Anvit Rao 10 1365 7
## 11 Cameron William Mc Leman 11 1712 7
## 12 Kenneth J Tack 12 1663 6
# Build lookup: pair -> pre_rating
lookup <- players |> dplyr::select(pair, pre_rating)
# Unnest opponent list into rows and clean opponent IDs
opp_tbl <- players |>
dplyr::mutate(opp_num = opponents) |>
dplyr::select(pair, name, state, total_pts, pre_rating, opp_num) |>
tidyr::unnest_longer(opp_num, values_to = "opp_pair", keep_empty = TRUE) |>
dplyr::mutate(opp_pair = as.integer(opp_pair)) |>
dplyr::filter(!is.na(opp_pair), opp_pair > 0, opp_pair != pair, opp_pair %in% players$pair)
# Join opponents to their pre-ratings
opp_joined <- opp_tbl |>
dplyr::left_join(lookup, by = c("opp_pair" = "pair"), suffix = c("", "_opp"))
# Compute mean opponent pre-rating per player
final <- opp_joined |>
dplyr::group_by(pair, name, state, total_pts, pre_rating) |>
dplyr::summarize(avg_opp_pre = mean(pre_rating_opp, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(avg_opp_pre = ifelse(is.nan(avg_opp_pre), NA, round(avg_opp_pre)))
The assignment’s example states that Gary Hua should have an average opponent pre-rating of 1605. Confirm below.
final |> filter(str_detect(name, regex("^Gary Hua$", ignore_case = TRUE)))
## # A tibble: 1 × 6
## pair name state total_pts pre_rating avg_opp_pre
## <int> <chr> <chr> <dbl> <int> <dbl>
## 1 1 Gary Hua ON 6 1794 1605
out <- final |>
transmute(
Name = name,
State = state,
`Total Points` = total_pts,
`Pre-Rating` = pre_rating,
`Avg Opponent Pre-Rating` = avg_opp_pre
)
write_csv(out, "project1_results.csv")
NA
and is excluded from the
average.