# Load Required Libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr) # Load this to use str_trim()
library(tidyr)
library(readr)
# Read the file
txt_lines <- readLines("tournamentinfo.txt")
## Warning in readLines("tournamentinfo.txt"): incomplete final line found on
## 'tournamentinfo.txt'
# Initialize an empty list to store player data
players <- list()
i <- 1 # Line index for iterating through the text file
while (i <= length(txt_lines)) {
line <- str_trim(txt_lines[i]) # Trim whitespace from the current line
# Check if the line contains a player's record (starts with a number followed by "|")
if (grepl("^\\d+ \\|", line)) {
# Extract the player's name from the structured format
player_name <- str_trim(str_extract(line, "(?<=\\| ).*?(?= \\|)"))
# Extract the total number of points the player scored in the tournament
total_points <- as.numeric(str_extract(line, "\\d+\\.\\d"))
# Extract state, USCF ID, and pre-rating from the next line
next_line <- ifelse(i + 1 <= length(txt_lines), str_trim(txt_lines[i + 1]), NA)
# Extract player’s state (first two uppercase letters in the next line)
player_state <- ifelse(!is.na(next_line), str_extract(next_line, "^[A-Z]{2}"), NA)
# Extract player's pre-tournament rating (number following "R: ")
pre_rating <- as.numeric(str_extract(next_line, "(?<=R: )\\d+"))
# Extract opponent numbers (players the current player competed against in each round)
opponent_nums <- as.numeric(unlist(str_extract_all(line, "(?<=W |L |D )\\d+")))
# Ensure opponent_nums is not empty; set to NA if no opponents found
if (length(opponent_nums) == 0) {
opponent_nums <- NA
}
# Only store player data if required fields (name, rating, and total points) exist
if (!is.na(player_name) && !is.na(pre_rating) && !is.na(total_points)) {
players[[length(players) + 1]] <- data.frame(
Name = player_name, # Player's name
State = player_state, # Player's state
TotalPoints = total_points, # Total points scored
PreRating = pre_rating, # Player's pre-tournament rating
Opponents = list(opponent_nums) # List of opponent IDs for each round
)
}
i <- i + 2 # Move to the next player's record (skip one extra line for state/rating)
} else {
i <- i + 1 # Continue to the next line if no match is found
}
}
# Convert the list of players into a data frame
players_df <- bind_rows(players)
# Display the first few rows of the extracted data
head(players_df)
## Name State TotalPoints PreRating NA.
## 1 GARY HUA ON 6.0 1794 NA
## 2 DAKSHESH DARURI MI 6.0 1553 NA
## 3 ADITYA BAJAJ MI 6.0 1384 NA
## 4 PATRICK H SCHILLING MI 5.5 1716 NA
## 5 HANSHI ZUO MI 5.5 1655 NA
## 6 HANSEN SONG OH 5.0 1686 NA
#Professor I really had trouble getting the ratings for the opponents, here is my code showcasing my attempt
#opponent_nums <- as.numeric(unlist(str_extract_all(line, "(?<=W |L |D )\\d+")))
# Create a lookup table for Pre-Ratings
#rating_lookup <- players_df %>%
# select(PlayerID, PreRating)
# Function to compute average opponent pre-rating
#calculate_avg_opponent_rating <- function(rounds) {
# Convert round data to numeric
# opponents <- as.numeric(rounds)
# opponents <- opponents[!is.na(opponents)] # Remove NA values
# Get the pre-ratings of the opponents using PlayerID
# valid_ratings <- rating_lookup$PreRating[rating_lookup$PlayerID %in% opponents]
# # Compute and return the mean of opponent pre-ratings
# if (length(valid_ratings) == 0) {
# return(NA)
# }
# return(mean(valid_ratings, na.rm = TRUE))
#}
# Apply function to calculate the average opponent pre-rating
#players_df <- players_df %>%
# rowwise() %>%
# mutate(AvgOpponentPreRating = calculate_avg_opponent_rating(c_across(Round1:Round7))) %>%
# ungroup()
print(players_df)
## Name State TotalPoints PreRating NA.
## 1 GARY HUA ON 6.0 1794 NA
## 2 DAKSHESH DARURI MI 6.0 1553 NA
## 3 ADITYA BAJAJ MI 6.0 1384 NA
## 4 PATRICK H SCHILLING MI 5.5 1716 NA
## 5 HANSHI ZUO MI 5.5 1655 NA
## 6 HANSEN SONG OH 5.0 1686 NA
## 7 GARY DEE SWATHELL MI 5.0 1649 NA
## 8 EZEKIEL HOUGHTON MI 5.0 1641 NA
## 9 STEFANO LEE ON 5.0 1411 NA
## 10 ANVIT RAO MI 5.0 1365 NA
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 NA
## 12 KENNETH J TACK MI 4.5 1663 NA
## 13 TORRANCE HENRY JR MI 4.5 1666 NA
## 14 BRADLEY SHAW MI 4.5 1610 NA
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 NA
## 16 MIKE NIKITIN MI 4.0 1604 NA
## 17 RONALD GRZEGORCZYK MI 4.0 1629 NA
## 18 DAVID SUNDEEN MI 4.0 1600 NA
## 19 DIPANKAR ROY MI 4.0 1564 NA
## 20 JASON ZHENG MI 4.0 1595 NA
## 21 DINH DANG BUI ON 4.0 1563 NA
## 22 EUGENE L MCCLURE MI 4.0 1555 NA
## 23 ALAN BUI ON 4.0 1363 NA
## 24 MICHAEL R ALDRICH MI 4.0 1229 NA
## 25 LOREN SCHWIEBERT MI 3.5 1745 NA
## 26 MAX ZHU ON 3.5 1579 NA
## 27 GAURAV GIDWANI MI 3.5 1552 NA
## 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507 NA
## 29 CHIEDOZIE OKORIE MI 3.5 1602 NA
## 30 GEORGE AVERY JONES ON 3.5 1522 NA
## 31 RISHI SHETTY MI 3.5 1494 NA
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441 NA
## 33 JADE GE MI 3.5 1449 NA
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399 NA
## 35 JOSHUA DAVID LEE MI 3.5 1438 NA
## 36 SIDDHARTH JHA MI 3.5 1355 NA
## 37 BRIAN LIU MI 3.0 1423 NA
## 38 JOEL R HENDON MI 3.0 1436 NA
## 39 FOREST ZHANG MI 3.0 1348 NA
## 40 KYLE WILLIAM MURPHY MI 3.0 1403 NA
## 41 JARED GE MI 3.0 1332 NA
## 42 ROBERT GLEN VASEY MI 3.0 1283 NA
## 43 JUSTIN D SCHILLING MI 3.0 1199 NA
## 44 DEREK YAN MI 3.0 1242 NA
## 45 ERIC WRIGHT MI 2.5 1362 NA
## 46 DANIEL KHAIN MI 2.5 1382 NA
## 47 MICHAEL J MARTIN MI 2.5 1291 NA
## 48 SHIVAM JHA MI 2.5 1056 NA
## 49 TEJAS AYYAGARI MI 2.5 1011 NA
## 50 JOSE C YBARRA MI 2.0 1393 NA
## 51 LARRY HODGE MI 2.0 1270 NA
## 52 ALEX KONG MI 2.0 1186 NA
## 53 MARISA RICCI MI 2.0 1153 NA
## 54 MICHAEL LU MI 2.0 1092 NA
## 55 ASHWIN BALAJI MI 1.0 1530 NA
## 56 THOMAS JOSEPH HOSMER MI 1.0 1175 NA
## 57 BEN LI MI 1.0 1163 NA