rawdata_lines <- readLines("tournamentinfo.txt")
## Warning in readLines("tournamentinfo.txt"): incomplete final line found on
## 'tournamentinfo.txt'
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
# Let's remove separators/header/blank lines using regular expression function
player_lines <- rawdata_lines[!grepl("^-{3,}|Pair|Num|^\\s*$", rawdata_lines)]
stopifnot(length(player_lines) %% 2 == 0) ## two lines per player
# group into pairs of lines (1 header + 1 info line per player)
chunks <- split(player_lines, rep(seq_len(length(player_lines)/2), each = 2))
# parse each player chunk robustly using '|' split
players_list <- lapply(chunks, function(chunk) {
l1 <- chunk[1]
l2 <- chunk[2]
# split on '|' and trim whitespace
p1_fields <- str_split(l1, "\\|")[[1]] %>% str_trim()
p2_fields <- str_split(l2, "\\|")[[1]] %>% str_trim()
# pair number (first field), name (second), total points (third)
pair_num <- as.integer(str_extract(p1_fields[1], "\\d+"))
name <- p1_fields[2]
total_pts <- as.numeric(str_extract(p1_fields[3], "\\d+\\.?\\d*"))
# rounds fields: everything after the first three fields
rounds <- p1_fields[-c(1:3)]
# extract opponent numbers from round fields (if present)
opp_nums <- as.integer(na.omit(sapply(rounds, function(x) str_extract(x, "\\d+"))))
# state = first field of line2 (empty -> NA)
state_raw <- p2_fields[1]
state <- ifelse(nchar(state_raw) == 0, NA_character_, state_raw)
# pre-rating: first integer after "R:"
rating_field <- ifelse(length(p2_fields) >= 2, p2_fields[2], "")
pre_rating <- str_extract(rating_field, "R:\\s*\\d+")
pre_rating <- as.numeric(str_remove(pre_rating, "R:\\s*"))
tibble(
Pair = pair_num,
Name = name,
State = state,
TotalPoints = total_pts,
PreRating = pre_rating,
Opponents = list(opp_nums)
)
})
players_df <- bind_rows(players_list)
# Let's Build lookup of pre-ratings by pair (named vector)
pre_by_pair <- setNames(players_df$PreRating, players_df$Pair)
# Compute average pre-rating of opponents (safe for missing / blank opponents)
players_df <- players_df %>%
rowwise() %>%
mutate(
OppAvgPre = if (length(unlist(Opponents)) > 0) {
ceiling(mean(pre_by_pair[as.character(unlist(Opponents))], na.rm = TRUE))
} else {
NA_real_
}
) %>%
ungroup()
# Input data
Chess_tournamentPlayersinfo_df <- players_df %>%
select(Pair, Name, State, TotalPoints, PreRating, OppAvgPre)
In order to perform these calculations, we will use the Elo rating system for chess formula as followed: ExpectedScore <- 1 / (1 + 10^((Rb - Ra)/400)) where Rb represents the Opponents rating and Ra the player rating
# Expected score per game
players_df$ExpectedPerGame <- 1 / (1 + 10^((players_df$OppAvgPre - players_df$PreRating)/400))
# Expected score over 7 games
players_df$ExpectedTotal <- players_df$ExpectedPerGame * 7
players_df$ExpectedTotal <- round(players_df$ExpectedTotal,2)
# Difference between actual and expected
players_df$Diff <- players_df$TotalPoints - players_df$ExpectedTotal
players_df$Diff <- round(players_df$Diff,2)
head(players_df[, c("Name","State", "TotalPoints","PreRating","OppAvgPre", "ExpectedTotal", "Diff")],10)
# Sort players by Diff descending
overperformers <- players_df[order(-players_df$Diff), ]
top5_over <- head(overperformers, 5)
# Final results
top5_over[, c("Name", "TotalPoints", "ExpectedTotal", "Diff")]
# Sort players by Diff ascending
underperformers <- players_df[order(players_df$Diff), ]
top5_under <- head(underperformers, 5)
# Final results
top5_under[, c("Name", "TotalPoints", "ExpectedTotal", "Diff")]