file_url <- "https://raw.githubusercontent.com/Amish22/DS607/refs/heads/main/tournamentinfo.txt"
lines <- readLines(file_url)
## Warning in readLines(file_url): incomplete final line found on
## 'https://raw.githubusercontent.com/Amish22/DS607/refs/heads/main/tournamentinfo.txt'
lines <- lines
head(lines)
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
players_data <- data.frame(Name = character(),
State = character(),
Points = numeric(),
PreRating = numeric(),
Opponents = character(),
stringsAsFactors = FALSE)
i <- 1
while (i <= length(lines)) {
if (grepl("^\\s*\\d+", lines[i])) {
player_info <- unlist(str_split(lines[i], "\\|"))
name <- str_trim(player_info[2])
points <- as.numeric(str_trim(player_info[3]))
state_info <- unlist(str_split(lines[i + 1], "\\|"))
state <- str_trim(state_info[1])
pre_rating <- as.numeric(str_extract(state_info[2], "(?<=R:\\s)\\d+"))
opponents <- paste(str_extract_all(lines[i], "\\d+")[[1]], collapse = " ")
players_data <- rbind(players_data,
data.frame(Name = name,
State = state,
Points = points,
PreRating = pre_rating,
Opponents = opponents,
stringsAsFactors = FALSE))
i <- i + 2
} else {
i <- i + 1
}
}
# extract the opponent's average rating
calculate_avg_rating <- function(opponents, players_data) {
opponent_ids <- as.numeric(str_extract_all(opponents, "\\d+")[[1]])
opponent_ratings <- players_data$PreRating[opponent_ids]
avg_rating <- mean(opponent_ratings, na.rm = TRUE)
return(avg_rating)
}
# average opponent rating
players_data$Avg_Opp_Rating <- sapply(players_data$Opponents, calculate_avg_rating, players_data = players_data)
final_players_data <- players_data %>%
select(Name, State, Points, PreRating, Avg_Opp_Rating)
head(final_players_data)
## Name State Points PreRating Avg_Opp_Rating
## 1 GARY HUA ON 6.0 1794 1635.222
## 2 DAKSHESH DARURI MI 6.0 1553 1575.875
## 3 ADITYA BAJAJ MI 6.0 1384 1632.500
## 4 PATRICK H SCHILLING MI 5.5 1716 1604.100
## 5 HANSHI ZUO MI 5.5 1655 1610.111
## 6 HANSEN SONG OH 5.0 1686 1552.444
CSV
write.csv(final_players_data, "final_players_data.csv", row.names = FALSE)