library(dplyr)
library(readr)
chess_data <- readLines("chessdata.txt")
player_start_indices <- grep("^\\s*\\d+\\s+\\|", chess_data)
parse_player_data <- function(player_lines) {
name <- sub("^\\s*\\d+\\s+\\|\\s*([A-Za-z]+\\s+[A-Za-z]+).*", "\\1", player_lines[1])
name <- trimws(name)
state <- sub("^\\s*([A-Z]{2})\\s+\\|.*", "\\1", player_lines[2])
total_points <- as.numeric(sub("^.*\\|([0-9.]+)\\s*\\|.*", "\\1", player_lines[1]))
pre_rating <- as.numeric(sub("^.*R:\\s*(\\d+).*", "\\1", player_lines[2]))
opponent_ids <- as.numeric(gsub("[^0-9]", "", unlist(strsplit(player_lines[1], "\\|"))[4:10]))
return(list(
name = name,
state = state,
total_points = total_points,
pre_rating = pre_rating,
opponent_ids = opponent_ids
))
}
players_data <- lapply(seq_along(player_start_indices), function(i) {
start_index <- player_start_indices[i]
end_index <- ifelse(i == length(player_start_indices), length(chess_data), player_start_indices[i + 1] - 1)
parse_player_data(chess_data[start_index:end_index])
})
player_rating_lookup <- sapply(players_data, function(player) player$pre_rating)
calculate_avg_opponent_rating <- function(opponent_ids) {
opponent_ratings <- player_rating_lookup[opponent_ids]
return(mean(opponent_ratings, na.rm = TRUE))
}
players_df <- data.frame(
Player_Name = sapply(players_data, function(player) player$name),
State = sapply(players_data, function(player) player$state),
Total_Points = sapply(players_data, function(player) player$total_points),
Pre_Rating = sapply(players_data, function(player) player$pre_rating),
Avg_Opponent_Rating = sapply(players_data, function(player) calculate_avg_opponent_rating(player$opponent_ids))
)
write_csv(players_df, "chess_elo_project1.csv")