chess_data <- read.csv("https://raw.githubusercontent.com/Kingtilon1/DATA607/main/tournamentinfo.txt", skip = 3, header = FALSE)
chess_data2 <- str_split(chess_data[, ], "-", simplify = TRUE)

store players info

player_names <- unlist(str_extract_all(chess_data2[, ], "\\w+[[:space:]]\\w+([[:space:]]\\w+)*", simplify = TRUE))
player_names <- player_names[!player_names[,] == "", ]

## states
player_states <- unlist(str_extract_all(chess_data2[, ], "[A-Z][A-Z][[:space:]][\\|]"))
player_states <- str_split(player_states, "[[:space:]][\\|]", simplify = TRUE)
player_states <- player_states[, -2]

## total points 
totalPoints <- unlist(str_extract_all(chess_data2[, ], "(\\d+)[.](\\d+)", simplify = TRUE))
totalPoints <- totalPoints[!totalPoints[,] == "", ]



## player rating
player_ratings <- unlist(str_extract_all(chess_data2[, ], "[R:]([[:space:]]+)([[:alnum:]]+)([[:space:]]*)", simplify = TRUE))
player_ratings <- unlist(str_extract_all(player_ratings, "\\d+[[:alnum:]]+", simplify = TRUE))
player_ratings <- unlist(str_extract_all(player_ratings, "\\d\\d\\d+", simplify = TRUE))
player_ratings <- player_ratings[!player_ratings[,] == "", ]
player_ratings <- as.numeric(player_ratings)

## opponent strings
Opponent_Info <- unlist(str_extract_all(chess_data2[, ], "([\\|][A-Z]([[:space:]]+)\\d*[\\|])([A-Z]([[:space:]]+)\\d*[\\|])*", simplify = TRUE))
Opponents_Data <- matrix(ncol = 7)
Opponents_Data <- unlist(str_extract_all(Opponent_Info[, ], "\\d+", simplify = TRUE))
Opponents_Data <- Opponents_Data[rowSums(Opponents_Data == "") != ncol(Opponents_Data), ]

Now lets calculate the opponents average player ratings and create a dataframe to store the results

# Create a matrix of numeric opponent ratings
opponent_ratings <- matrix(NA, nrow = nrow(Opponents_Data), ncol = ncol(Opponents_Data))

# Fill the matrix with numeric opponent ratings
for (row in 1:nrow(Opponents_Data)) {
  for (col in 1:ncol(Opponents_Data)) {
    if (Opponents_Data[row, col] != "") {
      index <- Opponents_Data[row, col]
      index <- strtoi(index, base = 0L)
      opponent_ratings[row, col] <- player_ratings[index]
    }
  }
}

# Calculate mean while ignoring NAs
averages_of_ratings <- rowMeans(opponent_ratings, na.rm = TRUE)

# Create a data frame with the results
tourn_results <- data.frame(
  "Player Name" = player_names,
  "State" = player_states,
  "Points" = totalPoints,
  "Player_Rating" = player_ratings,
  "Opponent Avg Player_Rating" = round(averages_of_ratings)
)

lets view the new data frame holding the organzied data

head(tourn_results)
##           Player.Name State Points Player_Rating Opponent.Avg.Player_Rating
## 1            GARY HUA    ON    6.0          1794                       1605
## 2     DAKSHESH DARURI    MI    6.0          1553                       1469
## 3        ADITYA BAJAJ    MI    6.0          1384                       1564
## 4 PATRICK H SCHILLING    MI    5.5          1716                       1574
## 5          HANSHI ZUO    MI    5.5          1655                       1501
## 6         HANSEN SONG    OH    5.0          1686                       1519

save to csv

write.csv(tourn_results, file = "finished_results.csv", row.names = FALSE)