Project 1

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.6
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.1     ✔ tibble    3.3.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
data <- readLines("https://raw.githubusercontent.com/Jeovany97/Data-607/refs/heads/main/Project%201/tournamentinfo.txt", warn = FALSE)

#Removing dash lines and header row
clean_data <- data[!str_detect(data, "^-+$")]
clean_data <- clean_data[-(1:2)]
head(clean_data)
[1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
[2] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
[3] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
[4] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
[5] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
[6] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
#Storing information for each line
name_point <- clean_data[seq(1, length(clean_data), 2)]
state_rating <- clean_data[seq(2, length(clean_data), 2)]
name_point[1]
[1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
state_rating[1]
[1] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
# Finding data using Regex
name <- str_trim(str_extract(name_point, "(\\s?[A-Z]{2,}\\s?){2,}+"))
state     <- str_trim(str_extract(state_rating, "[A-Z]{2}"))
total_points <- as.numeric(str_extract(name_point, "\\d+\\.\\d+"))
pre_rating <- as.numeric(str_extract(state_rating, "(?<=R:\\s{0,5})\\d+"))
name[1]
[1] "GARY HUA"
state[1]
[1] "ON"
total_points[1]
[1] 6
pre_rating[1]
[1] 1794
# Information needed to calculate average 
player_id <- as.numeric(str_extract(name_point, "\\d+"))
opponents <- str_extract_all(name_point, "(?<=[WLD]\\s{1,5})\\d+")
opponents_id <- data.frame(id = player_id, rating = pre_rating)
player_id[1]
[1] 1
opponents[1]
[[1]]
[1] "39" "21" "18" "14" "7"  "12" "4" 
opponents_id[1, ]
  id rating
1  1   1794
#Calculating average opponet rating
avg_opp_rating <- sapply(opponents, function(opp_list) {
  # Convert extracted IDs to numeric
  ids <- as.numeric(opp_list)
  # Find their ratings in our lookup table
  opp_ratings <- opponents_id$rating[match(ids, opponents_id$id)]
  # Return the mean (rounded to nearest integer)
  round(mean(opp_ratings, na.rm = TRUE))
})
avg_opp_rating[1]
[1] 1605
#Create the csv file
final_df <- data.frame(
  `Player’s Name` = name,
  `Player’s State` = state,
  `Total Number of Points` = total_points,
  `Player’s Pre-Rating` = pre_rating,
  `Average Pre Chess Rating of Opponents` = avg_opp_rating
)

head(final_df)
    Player.s.Name Player.s.State Total.Number.of.Points Player.s.Pre.Rating
1        GARY HUA             ON                    6.0                1794
2 DAKSHESH DARURI             MI                    6.0                1553
3    ADITYA BAJAJ             MI                    6.0                1384
4         PATRICK             MI                    5.5                1716
5      HANSHI ZUO             MI                    5.5                1655
6     HANSEN SONG             OH                    5.0                1686
  Average.Pre.Chess.Rating.of.Opponents
1                                  1605
2                                  1469
3                                  1564
4                                  1574
5                                  1501
6                                  1519
write.csv(final_df, "Project_1_Chess_Tournament.csv", row.names = FALSE)