file_url <- "https://raw.githubusercontent.com/Amish22/DS607/refs/heads/main/tournamentinfo.txt"
lines <- readLines(file_url)
## Warning in readLines(file_url): incomplete final line found on
## 'https://raw.githubusercontent.com/Amish22/DS607/refs/heads/main/tournamentinfo.txt'
lines <- lines
head(lines)
## [1] "-----------------------------------------------------------------------------------------" 
## [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
## [4] "-----------------------------------------------------------------------------------------" 
## [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
## [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
players_data <- data.frame(Name = character(),
                           State = character(),
                           Points = numeric(),
                           PreRating = numeric(),
                           Opponents = character(),
                           stringsAsFactors = FALSE)


i <- 1
while (i <= length(lines)) {
  if (grepl("^\\s*\\d+", lines[i])) {
    
    player_info <- unlist(str_split(lines[i], "\\|"))
    name <- str_trim(player_info[2])
    
    points <- as.numeric(str_trim(player_info[3]))
    
    state_info <- unlist(str_split(lines[i + 1], "\\|"))
    state <- str_trim(state_info[1])
    
    pre_rating <- as.numeric(str_extract(state_info[2], "(?<=R:\\s)\\d+"))
    
    opponents <- paste(str_extract_all(lines[i], "\\d+")[[1]], collapse = " ")
    
    players_data <- rbind(players_data,
                          data.frame(Name = name,
                                     State = state,
                                     Points = points,
                                     PreRating = pre_rating,
                                     Opponents = opponents,
                                     stringsAsFactors = FALSE))
    
    i <- i + 2
  } else {
    i <- i + 1
  }
}

# extract the opponent's average rating
calculate_avg_rating <- function(opponents, players_data) {
  opponent_ids <- as.numeric(str_extract_all(opponents, "\\d+")[[1]])
  opponent_ratings <- players_data$PreRating[opponent_ids]
  avg_rating <- mean(opponent_ratings, na.rm = TRUE)
  return(avg_rating)
}

# average opponent rating
players_data$Avg_Opp_Rating <- sapply(players_data$Opponents, calculate_avg_rating, players_data = players_data)

final_players_data <- players_data %>%
  select(Name, State, Points, PreRating, Avg_Opp_Rating)

head(final_players_data)
##                  Name State Points PreRating Avg_Opp_Rating
## 1            GARY HUA    ON    6.0      1794       1635.222
## 2     DAKSHESH DARURI    MI    6.0      1553       1575.875
## 3        ADITYA BAJAJ    MI    6.0      1384       1632.500
## 4 PATRICK H SCHILLING    MI    5.5      1716       1604.100
## 5          HANSHI ZUO    MI    5.5      1655       1610.111
## 6         HANSEN SONG    OH    5.0      1686       1552.444

CSV

write.csv(final_players_data, "final_players_data.csv", row.names = FALSE)