library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
library(dplyr)
library(readr)

Load Data

tournament_data <- readLines("https://raw.githubusercontent.com/JaydeeJan/Data-607-Project-1/refs/heads/main/tournamentinfo.txt")

head(tournament_data, 20)
##  [1] "-----------------------------------------------------------------------------------------" 
##  [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
##  [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
##  [4] "-----------------------------------------------------------------------------------------" 
##  [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
##  [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [7] "-----------------------------------------------------------------------------------------" 
##  [8] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
##  [9] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [10] "-----------------------------------------------------------------------------------------" 
## [11] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|" 
## [12] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
## [13] "-----------------------------------------------------------------------------------------" 
## [14] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|" 
## [15] "   MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |" 
## [16] "-----------------------------------------------------------------------------------------" 
## [17] "    5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|" 
## [18] "   MI | 14601533 / R: 1655   ->1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [19] "-----------------------------------------------------------------------------------------" 
## [20] "    6 | HANSEN SONG                     |5.0  |W  34|D  29|L  11|W  35|D  10|W  27|W  21|"
tail(tournament_data, 20)
##  [1] "   MI | 14700365 / R:  917   -> 941     |     |W    |B    |W    |B    |W    |     |B    |"
##  [2] "-----------------------------------------------------------------------------------------"
##  [3] "   59 | SEAN M MC CORMICK               |2.0  |L  41|B    |L   9|L  40|L  43|W  54|L  44|"
##  [4] "   MI | 12841036 / R:  853   -> 878     |     |W    |     |B    |B    |W    |W    |B    |"
##  [5] "-----------------------------------------------------------------------------------------"
##  [6] "   60 | JULIA SHEN                      |1.5  |L  33|L  34|D  45|D  42|L  24|H    |U    |"
##  [7] "   MI | 14579262 / R:  967   -> 984     |     |W    |B    |B    |W    |B    |     |     |"
##  [8] "-----------------------------------------------------------------------------------------"
##  [9] "   61 | JEZZEL FARKAS                   |1.5  |L  32|L   3|W  54|L  47|D  42|L  30|L  37|"
## [10] "   ON | 15771592 / R:  955P11-> 979P18  |     |B    |W    |B    |W    |B    |W    |B    |"
## [11] "-----------------------------------------------------------------------------------------"
## [12] "   62 | ASHWIN BALAJI                   |1.0  |W  55|U    |U    |U    |U    |U    |U    |"
## [13] "   MI | 15219542 / R: 1530   ->1535     |     |B    |     |     |     |     |     |     |"
## [14] "-----------------------------------------------------------------------------------------"
## [15] "   63 | THOMAS JOSEPH HOSMER            |1.0  |L   2|L  48|D  49|L  43|L  45|H    |U    |"
## [16] "   MI | 15057092 / R: 1175   ->1125     |     |W    |B    |W    |B    |B    |     |     |"
## [17] "-----------------------------------------------------------------------------------------"
## [18] "   64 | BEN LI                          |1.0  |L  22|D  30|L  31|D  49|L  46|L  42|L  54|"
## [19] "   MI | 15006561 / R: 1163   ->1112     |     |B    |W    |W    |B    |W    |B    |B    |"
## [20] "-----------------------------------------------------------------------------------------"

Extract Player Information

# Function to extract player information using regular expressions

extract_player_info <- function(player_lines, tournament_data) {
  player_name <- str_trim(str_match(player_lines[1], "\\|\\s*([A-Z\\s]+)\\s*\\|")[2])
  player_state <- str_trim(str_match(player_lines[2], "\\s*([A-Z]{2})\\s*\\|")[2])
  total_points <- as.numeric(str_trim(str_match(player_lines[1], "\\|\\s*([0-9\\.]+)\\s*\\|")[2]))
  
  pre_rating <- as.numeric(str_trim(str_match(player_lines[2], "R:\\s*([0-9]+)")[2]))
  
  opponents <- unlist(str_match_all(player_lines[1], "[WLDB]\\s*(\\d+)"))
  
  opponent_ratings <- c()
  for (opponent in opponents) {
    # Find the opponent's entry by looking for the line starting with the opponent's number
    opp_start_line <- grep(paste0("^\\s*", opponent, "\\s*\\|"), tournament_data)
    if (length(opp_start_line) > 0) {
      # Extract the pre-rating of the opponent
      opp_line <- tournament_data[opp_start_line + 1] # Rating is in the line below
      opp_rating <- as.numeric(str_trim(str_match(opp_line, "R:\\s*([0-9]+)")[2]))
      if (!is.na(opp_rating)) {
        opponent_ratings <- c(opponent_ratings, opp_rating)
      }
    }
  }
  
  # Calculate the average opponent rating
  avg_opp_rating <- ifelse(length(opponent_ratings) > 0, mean(opponent_ratings), NA)
  
  # Return the extracted info as a data frame row
  data.frame(
    PlayerName = player_name,
    PlayerState = player_state,
    TotalPoints = total_points,
    PlayerPreRating = pre_rating,
    AvgOppRating = round(avg_opp_rating, 0)
  )
}

Initialize Data Frame To Store The Result

results <- data.frame(PlayerName = character(), PlayerState = character(),
                      TotalPoints = numeric(), PlayerPreRating = numeric(), AvgOppRating = numeric())

# Loop through the data and extract information for each player
player_start_lines <- grep("^\\s+\\d+\\s*\\|", tournament_data)

for (i in seq_along(player_start_lines)) {
  # Each player block consists of two consecutive lines
  player_lines <- tournament_data[player_start_lines[i]:(player_start_lines[i] + 1)]
  # Extract player info and add it to the results data frame
  player_info <- extract_player_info(player_lines, tournament_data)
  results <- rbind(results, player_info)
}

# Show the resulting data frame
head(results, 10)
##             PlayerName PlayerState TotalPoints PlayerPreRating AvgOppRating
## 1             GARY HUA          ON         6.0            1794         1605
## 2      DAKSHESH DARURI          MI         6.0            1553         1469
## 3         ADITYA BAJAJ          MI         6.0            1384         1564
## 4  PATRICK H SCHILLING          MI         5.5            1716         1574
## 5           HANSHI ZUO          MI         5.5            1655         1501
## 6          HANSEN SONG          OH         5.0            1686         1519
## 7    GARY DEE SWATHELL          MI         5.0            1649         1372
## 8     EZEKIEL HOUGHTON          MI         5.0            1641         1468
## 9          STEFANO LEE          ON         5.0            1411         1523
## 10           ANVIT RAO          MI         5.0            1365         1554
tail(results, 10)
##              PlayerName PlayerState TotalPoints PlayerPreRating AvgOppRating
## 55            ALEX KONG          MI         2.0            1186         1406
## 56         MARISA RICCI          MI         2.0            1153         1414
## 57           MICHAEL LU          MI         2.0            1092         1363
## 58         VIRAJ MOHILE          MI         2.0             917         1391
## 59    SEAN M MC CORMICK          MI         2.0             853         1319
## 60           JULIA SHEN          MI         1.5             967         1330
## 61        JEZZEL FARKAS          ON         1.5             955         1327
## 62        ASHWIN BALAJI          MI         1.0            1530         1186
## 63 THOMAS JOSEPH HOSMER          MI         1.0            1175         1350
## 64               BEN LI          MI         1.0            1163         1263

Generates the result to .CSV file

write.csv(results, file = "tournament_results.csv", row.names = FALSE)