library(stringr)
library(DT)

url <- "https://raw.githubusercontent.com/MRobinson112/Data_607_Project-1/main/tournamentinfo.txt"
chess_data <- read.csv(url, skip = 3, header = FALSE)

# split the information in the "chess_data" variable,  using a hyphen '-' as the delimiter.
data_split <- str_split(chess_data[, ], "-", simplify = TRUE)

# Retrieve Player Names 
PlayerNames <- unlist(str_extract_all(data_split[, ], "\\w+[[:space:]]\\w+([[:space:]]\\w+)*"))

# Remove empty rows
PlayerNames <- PlayerNames[PlayerNames != ""]

# Retrieve Player States.
PlayerStates <- unlist(str_extract_all(data_split[, ], "[A-Z][A-Z][[:space:]]\\|"))

# Split Player States on '|', removing the last column
PlayerStates <- str_split(PlayerStates, "[[:space:]]\\|", simplify = TRUE)[, -2]

# Retrieve Total Number of Points 
TotalPoints <- unlist(str_extract_all(data_split[, ], "(\\d+)[.](\\d+)"))

# Remove empty rows
TotalPoints <- TotalPoints[TotalPoints != ""]

# Extract Pre-Ratings 
PreRatings <- unlist(str_extract_all(data_split[, ], "R:([[:space:]]+)([[:alnum:]]+)([[:space:]]*)"))
PreRatings <- unlist(str_extract_all(PreRatings, "\\d+"))

# Remove empty rows and convert to numeric
PreRatings <- as.numeric(PreRatings[PreRatings != ""])

# Extract Opponent data 
opponent_data <- unlist(str_extract_all(data_split[, ], "([\\|][A-Z]([[:space:]]+)\\d*[\\|])([A-Z]([[:space:]]+)\\d*[\\|])*"))

# Separate the Opponent data into distinct indexes for each opponent.
Opponents <- unlist(str_extract_all(opponent_data, "\\d+"))

# Remove empty rows
Opponents <- Opponents[Opponents != ""]

# Determine the mean ratings of opponents for each player.
RatingAverages <- numeric(length = length(Opponents))
for (i in 1:length(Opponents)) {
  index <- as.integer(Opponents[i])
  RatingAverages[i] <- mean(PreRatings[index])
}

# Check that  all components have the same length
min_length <- min(length(PlayerNames), length(PlayerStates), length(TotalPoints), length(PreRatings), length(Opponents))
PlayerNames <- PlayerNames[1:min_length]
PlayerStates <- PlayerStates[1:min_length]
TotalPoints <- TotalPoints[1:min_length]
PreRatings <- PreRatings[1:min_length]
RatingAverages <- RatingAverages[1:min_length]

# Create the TournamentResults data frame
TournamentResults <- data.frame(
  PlayerName = PlayerNames,
  PlayerState = PlayerStates,
  TotalPoints = as.numeric(TotalPoints),
  PlayerPreRating = PreRatings,
  Avg_PreChess_Rating_Of_Opponent = RatingAverages
)

# Rename the columns
colnames(TournamentResults) <- c(
  "Player Name",
  "Player State",
  "Total Number of Points",
  "Player's Pre-Rating",
  "Average Pre Chess Rating of Opponents"
)

# Write the data to a CSV file
write.csv(TournamentResults, file = "~/Data_607/Outcome.csv", row.names = FALSE)