This project analyzes chess tournament results from a text file. The goal is to extract relevant player information, including names, states, total points, and pre-tournament ratings and calculate the average pre-rating of each player’s opponents. The data is later compiled into a clean csv file for future analysis or database import.
lines <- readLines('https://raw.githubusercontent.com/vincent-usny/project1/refs/heads/main/project1.txt', warn = FALSE)
data <- lines[-c(1:4)] #remove first 4 lines
#get player names
players_lines <- data[seq(1, length(data), by = 3)]
parts_line1 <- strsplit(players_lines, '\\|')
names <- sapply(parts_line1, function(x) trimws(x[2]))
#get states
state_lines <- data[seq(2, length(data), by = 3)]
parts_line2 <- strsplit(state_lines, '\\|')
states <- sapply(parts_line2, function(x) trimws(x[1]))
#get total points
total_points <- sapply(parts_line1, function(x) trimws(x[3]))
#get pre-rating
ratings <- sapply(parts_line2, function(x) x[2])
pre_rating <- as.numeric(sapply(ratings, function(x) {
sub(".*R: *(\\d+).*", "\\1", x)
}))
pair_nums <- sapply(players_lines, function(x) as.numeric(strsplit(x, "\\|")[[1]][1]))
pre_rating_lookup <- setNames(pre_rating, pair_nums)
avg_opp_rating <- sapply(parts_line1, function(y) {
rounds <- y[4:10]
opps <-c()
for (r in rounds){
r <- trimws(r)
result <- substr(r,1,1)
if (result %in% c('W','L','D')) {
opp_num <- as.numeric(strsplit(r, "\\s+")[[1]][2])
opps <- c(opps, pre_rating_lookup[opp_num])
}
}
mean(opps)
})
avg_opp_rating <- round(avg_opp_rating)
chess_df <- data.frame(
Player = names,
State = states,
Total_points = total_points,
Pre_rating = pre_rating,
Avg_opp_rating = avg_opp_rating
)
head(chess_df)
## Player State Total_points Pre_rating Avg_opp_rating
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519
write.csv(chess_df, 'chess_tournament.csv', row.names = FALSE)
The tournament data was successfully extracted and cleaned, creating a new dataset containing players’ names, states, total points, pre-tournament ratings, and average ratings of each player’s opponents. The csv file is now available to be imported into MySQL or other data tools.