library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
library(dplyr)
library(readr)
Load Data
tournament_data <- readLines("https://raw.githubusercontent.com/JaydeeJan/Data-607-Project-1/refs/heads/main/tournamentinfo.txt")
head(tournament_data, 20)
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [7] "-----------------------------------------------------------------------------------------"
## [8] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [9] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [10] "-----------------------------------------------------------------------------------------"
## [11] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [12] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [13] "-----------------------------------------------------------------------------------------"
## [14] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [15] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [16] "-----------------------------------------------------------------------------------------"
## [17] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [18] " MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [19] "-----------------------------------------------------------------------------------------"
## [20] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
tail(tournament_data, 20)
## [1] " MI | 14700365 / R: 917 -> 941 | |W |B |W |B |W | |B |"
## [2] "-----------------------------------------------------------------------------------------"
## [3] " 59 | SEAN M MC CORMICK |2.0 |L 41|B |L 9|L 40|L 43|W 54|L 44|"
## [4] " MI | 12841036 / R: 853 -> 878 | |W | |B |B |W |W |B |"
## [5] "-----------------------------------------------------------------------------------------"
## [6] " 60 | JULIA SHEN |1.5 |L 33|L 34|D 45|D 42|L 24|H |U |"
## [7] " MI | 14579262 / R: 967 -> 984 | |W |B |B |W |B | | |"
## [8] "-----------------------------------------------------------------------------------------"
## [9] " 61 | JEZZEL FARKAS |1.5 |L 32|L 3|W 54|L 47|D 42|L 30|L 37|"
## [10] " ON | 15771592 / R: 955P11-> 979P18 | |B |W |B |W |B |W |B |"
## [11] "-----------------------------------------------------------------------------------------"
## [12] " 62 | ASHWIN BALAJI |1.0 |W 55|U |U |U |U |U |U |"
## [13] " MI | 15219542 / R: 1530 ->1535 | |B | | | | | | |"
## [14] "-----------------------------------------------------------------------------------------"
## [15] " 63 | THOMAS JOSEPH HOSMER |1.0 |L 2|L 48|D 49|L 43|L 45|H |U |"
## [16] " MI | 15057092 / R: 1175 ->1125 | |W |B |W |B |B | | |"
## [17] "-----------------------------------------------------------------------------------------"
## [18] " 64 | BEN LI |1.0 |L 22|D 30|L 31|D 49|L 46|L 42|L 54|"
## [19] " MI | 15006561 / R: 1163 ->1112 | |B |W |W |B |W |B |B |"
## [20] "-----------------------------------------------------------------------------------------"
Initialize Data Frame To Store The Result
results <- data.frame(PlayerName = character(), PlayerState = character(),
TotalPoints = numeric(), PlayerPreRating = numeric(), AvgOppRating = numeric())
# Loop through the data and extract information for each player
player_start_lines <- grep("^\\s+\\d+\\s*\\|", tournament_data)
for (i in seq_along(player_start_lines)) {
# Each player block consists of two consecutive lines
player_lines <- tournament_data[player_start_lines[i]:(player_start_lines[i] + 1)]
# Extract player info and add it to the results data frame
player_info <- extract_player_info(player_lines, tournament_data)
results <- rbind(results, player_info)
}
# Show the resulting data frame
head(results, 10)
## PlayerName PlayerState TotalPoints PlayerPreRating AvgOppRating
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519
## 7 GARY DEE SWATHELL MI 5.0 1649 1372
## 8 EZEKIEL HOUGHTON MI 5.0 1641 1468
## 9 STEFANO LEE ON 5.0 1411 1523
## 10 ANVIT RAO MI 5.0 1365 1554
tail(results, 10)
## PlayerName PlayerState TotalPoints PlayerPreRating AvgOppRating
## 55 ALEX KONG MI 2.0 1186 1406
## 56 MARISA RICCI MI 2.0 1153 1414
## 57 MICHAEL LU MI 2.0 1092 1363
## 58 VIRAJ MOHILE MI 2.0 917 1391
## 59 SEAN M MC CORMICK MI 2.0 853 1319
## 60 JULIA SHEN MI 1.5 967 1330
## 61 JEZZEL FARKAS ON 1.5 955 1327
## 62 ASHWIN BALAJI MI 1.0 1530 1186
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175 1350
## 64 BEN LI MI 1.0 1163 1263
Generates the result to .CSV file
write.csv(results, file = "tournament_results.csv", row.names = FALSE)