We will first recreate the players chess tournament information from the Project 1.

rawdata_lines <- readLines("tournamentinfo.txt")
## Warning in readLines("tournamentinfo.txt"): incomplete final line found on
## 'tournamentinfo.txt'

Parsing and extracting Chess tournament players info as in Project 1

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)

# Let's remove separators/header/blank lines using regular expression function

player_lines <- rawdata_lines[!grepl("^-{3,}|Pair|Num|^\\s*$", rawdata_lines)]
stopifnot(length(player_lines) %% 2 == 0) ## two lines per player

# group into pairs of lines (1 header + 1 info line per player)

chunks <- split(player_lines, rep(seq_len(length(player_lines)/2), each = 2))

# parse each player chunk robustly using '|' split

players_list <- lapply(chunks, function(chunk) {
  l1 <- chunk[1] 
  
  l2 <- chunk[2] 
  
  # split on '|' and trim whitespace
  
  p1_fields <- str_split(l1, "\\|")[[1]] %>% str_trim()
  
  p2_fields <- str_split(l2, "\\|")[[1]] %>% str_trim()
  
  # pair number (first field), name (second), total points (third)
  
  pair_num <- as.integer(str_extract(p1_fields[1], "\\d+"))
  name <- p1_fields[2]
  total_pts <- as.numeric(str_extract(p1_fields[3], "\\d+\\.?\\d*"))
  
  # rounds fields: everything after the first three fields
  rounds <- p1_fields[-c(1:3)]
  # extract opponent numbers from round fields (if present)
  opp_nums <- as.integer(na.omit(sapply(rounds, function(x) str_extract(x, "\\d+"))))
  
  # state = first field of line2 (empty -> NA)
  state_raw <- p2_fields[1]
  state <- ifelse(nchar(state_raw) == 0, NA_character_, state_raw)
  
  # pre-rating: first integer after "R:"
  rating_field <- ifelse(length(p2_fields) >= 2, p2_fields[2], "")
  pre_rating <- str_extract(rating_field, "R:\\s*\\d+")
  pre_rating <- as.numeric(str_remove(pre_rating, "R:\\s*"))
  
  tibble(
    Pair = pair_num,
    Name = name,
    State = state,
    TotalPoints = total_pts,
    PreRating = pre_rating,
    Opponents = list(opp_nums)
  )
})

players_df <- bind_rows(players_list)

# Let's Build lookup of pre-ratings by pair (named vector)

pre_by_pair <- setNames(players_df$PreRating, players_df$Pair)

# Compute average pre-rating of opponents (safe for missing / blank opponents)

players_df <- players_df %>%
  rowwise() %>%
  mutate(
    OppAvgPre = if (length(unlist(Opponents)) > 0) {
      ceiling(mean(pre_by_pair[as.character(unlist(Opponents))], na.rm = TRUE))
    } else {
      NA_real_
    }
  ) %>%
  ungroup()


# Input data
Chess_tournamentPlayersinfo_df <- players_df %>%
  select(Pair, Name, State, TotalPoints, PreRating, OppAvgPre)

Let’s Calculate every players expected total score and the difference from their actual total score.

In order to perform these calculations, we will use the Elo rating system for chess formula as followed: ExpectedScore <- 1 / (1 + 10^((Rb - Ra)/400)) where Rb represents the Opponents rating and Ra the player rating

# Expected score per game
players_df$ExpectedPerGame <- 1 / (1 + 10^((players_df$OppAvgPre - players_df$PreRating)/400))

# Expected score over 7 games
players_df$ExpectedTotal <- players_df$ExpectedPerGame * 7
players_df$ExpectedTotal <- round(players_df$ExpectedTotal,2)

# Difference between actual and expected
players_df$Diff <- players_df$TotalPoints - players_df$ExpectedTotal
players_df$Diff <- round(players_df$Diff,2)

head(players_df[, c("Name","State", "TotalPoints","PreRating","OppAvgPre", "ExpectedTotal", "Diff")],10)

Five most overperformed relatively to their expected score.

# Sort players by Diff descending 

overperformers <- players_df[order(-players_df$Diff), ]
top5_over <- head(overperformers, 5)

# Final results
top5_over[, c("Name", "TotalPoints", "ExpectedTotal", "Diff")]

Five most underperformed relatively to their expected score.

# Sort players by Diff ascending

underperformers <- players_df[order(players_df$Diff), ]
top5_under <- head(underperformers, 5)

# Final results
top5_under[, c("Name", "TotalPoints", "ExpectedTotal", "Diff")]