EC 2 - ELOs

Load and Clean Data

I used some of the code from project 1 to load and clean the data frame:

raw <- readLines(url("https://raw.githubusercontent.com/ShanaFarber/cuny-sps/master/DATA_607/project1/player_stats.txt"))

player_names <- unlist(str_extract_all(raw[-c(1, 2, 3, 4)], "([A-Z])+\\s([A-Z](\\s)?)*([A-Z])+"))

totals <- unlist(str_extract_all(raw, "\\d\\.\\d"))

pre_ratings <- unlist(str_extract_all(raw[-c(1, 2, 3, 4)], "\\d+(P\\d+)?(\\s+)?\\->"))

pre_ratings <- str_remove(pre_ratings, "(P\\d+)?(\\s+)?->")

# include player ID for future reference
chess_stats <- data.frame(player_id = c(1:64), player_name = player_names, total = totals,
    player_start_rating = pre_ratings)

chess_stats$total <- as.numeric(chess_stats$total)
chess_stats$player_start_rating <- as.numeric(chess_stats$player_start_rating)

rounds <- unlist(str_extract_all(unlist((str_extract_all(str_remove(raw, "^\\s+[A-Z].+"),
    "\\|[WLDXUHB]\\s+(\\d+)?"))), "[WLDXUHB]\\s+(\\d+)?"))

num_players <- length(player_names)

num_rounds <- length(rounds)/num_players

# create a seven column matrix from rounds to have a data frame of the results
# of each round for each player
rounds <- matrix(rounds, byrow = T, ncol = num_rounds)

full_stats <- chess_stats %>%
    cbind(rounds) %>%
    pivot_longer(cols = c("1", "2", "3", "4", "5", "6", "7"), names_to = "round",
        values_to = "results")

full_stats <- full_stats %>%
    mutate(opp_player_id = as.integer(str_extract(results, "\\d+")))

ELO Formulas

The formula for calculating the expected score is as follows:

\[ E_{A} = {1 \over {1 + 10^{(R_{B} - R_{A})/400}}} \]

Additional formula for calculating player’s new rating based on performance:

\[ new \space rating = rating + 32(score - expected \space score) \] Formulas found here.

Using this formula, I created a function to calculate the expected score for a player:

expectedScore <- function(a_rating, b_rating) {
    expected_score <- 1/(1 + 10^((b_rating - a_rating)/400))

    return(expected_score)
}

I created another function to calculate the new rating of each player after each round:

newRating <- function(rating, score, score_expected) {
    new_rating <- rating + 32 * (score - score_expected)

    return(round(new_rating, 0))
}

ELO Calculations

In order to perform the calculations, the following information is required: - Player name - Player’s pre-rating - Player’s opponents - The results of each round

After each round, the ratings of each player and their opponent must be adjusted based on the results of the previous round.

The scores for each round are awarded as follows:

W = 1
L = 0
D = \(1 \over 2\)

I created a data frame with these values:

# score values
result_score <- data.frame(results = c("W", "L", "D"), actual_score = c(1, 0, 0.5))

# combining with original data frame to have actual result of each round
full_stats <- full_stats %>%
    mutate(results = str_extract(results, "[WLD]")) %>%
    left_join(result_score, by = "results")

In order to do the overall calculation, I first made a separate data frame for each round to calculate the new player ratings after each round. For each instance, I applied expectedScore() to get the expected score for the round and then newRating() to get the new rating for each player. I then changed the values of opp_ratings to reflect the change from the previous round.

# initial opponent ratings
opp_ratings <- data.frame(opp_player_id = c(1:64), opp_start_rating = chess_stats$player_start_rating)

# round one expected result and new player ratings
round_one_results <- full_stats %>%
    filter(round == 1) %>%
    left_join(opp_ratings, by = "opp_player_id") %>%
    mutate(expected_score = expectedScore(player_start_rating, opp_start_rating),
        player_new_rating = newRating(player_start_rating, actual_score, expected_score))

# adjust opponent player rating for next round
for (i in 1:64) {
    if (!is.na(round_one_results$player_new_rating)[i]) {
        opp_ratings$opp_start_rating[i] <- round_one_results$player_new_rating[i]
    } else {
        opp_ratings$opp_start_rating[i] <- opp_ratings$opp_start_rating[i]
    }
}

# round two expected result and new player ratings
round_two_results <- full_stats %>%
    filter(round == 2) %>%
    left_join(opp_ratings, by = "opp_player_id") %>%
    mutate(player_start_rating = round_one_results$player_new_rating, expected_score = expectedScore(player_start_rating,
        opp_start_rating), player_new_rating = newRating(player_start_rating, actual_score,
        expected_score))

# adjust opponent player rating for next round
for (i in 1:64) {
    if (!is.na(round_two_results$player_new_rating)[i]) {
        opp_ratings$opp_start_rating[i] <- round_two_results$player_new_rating[i]
    } else {
        opp_ratings$opp_start_rating[i] <- opp_ratings$opp_start_rating[i]
    }
}

# round three expected result and new player ratings
round_three_results <- full_stats %>%
    filter(round == 3) %>%
    left_join(opp_ratings, by = "opp_player_id") %>%
    mutate(player_start_rating = round_two_results$player_new_rating, expected_score = expectedScore(player_start_rating,
        opp_start_rating), player_new_rating = newRating(player_start_rating, actual_score,
        expected_score))

# adjust opponent player rating for next round
for (i in 1:64) {
    if (!is.na(round_three_results$player_new_rating)[i]) {
        opp_ratings$opp_start_rating[i] <- round_three_results$player_new_rating[i]
    } else {
        opp_ratings$opp_start_rating[i] <- opp_ratings$opp_start_rating[i]
    }
}

# round four expected result and new player ratings
round_four_results <- full_stats %>%
    filter(round == 4) %>%
    left_join(opp_ratings, by = "opp_player_id") %>%
    mutate(player_start_rating = round_three_results$player_new_rating, expected_score = expectedScore(player_start_rating,
        opp_start_rating), player_new_rating = newRating(player_start_rating, actual_score,
        expected_score))

# adjust opponent player rating for next round
for (i in 1:64) {
    if (!is.na(round_four_results$player_new_rating)[i]) {
        opp_ratings$opp_start_rating[i] <- round_four_results$player_new_rating[i]
    } else {
        opp_ratings$opp_start_rating[i] <- opp_ratings$opp_start_rating[i]
    }
}

# round five expected result and new player ratings
round_five_results <- full_stats %>%
    filter(round == 5) %>%
    left_join(opp_ratings, by = "opp_player_id") %>%
    mutate(player_start_rating = round_four_results$player_new_rating, expected_score = expectedScore(player_start_rating,
        opp_start_rating), player_new_rating = newRating(player_start_rating, actual_score,
        expected_score))

# adjust opponent player rating for next round
for (i in 1:64) {
    if (!is.na(round_five_results$player_new_rating)[i]) {
        opp_ratings$opp_start_rating[i] <- round_five_results$player_new_rating[i]
    } else {
        opp_ratings$opp_start_rating[i] <- opp_ratings$opp_start_rating[i]
    }
}

# round six expected result and new player ratings
round_six_results <- full_stats %>%
    filter(round == 6) %>%
    left_join(opp_ratings, by = "opp_player_id") %>%
    mutate(player_start_rating = round_five_results$player_new_rating, expected_score = expectedScore(player_start_rating,
        opp_start_rating), player_new_rating = newRating(player_start_rating, actual_score,
        expected_score))

# adjust opponent player rating for next round
for (i in 1:64) {
    if (!is.na(round_six_results$player_new_rating)[i]) {
        opp_ratings$opp_start_rating[i] <- round_six_results$player_new_rating[i]
    } else {
        opp_ratings$opp_start_rating[i] <- opp_ratings$opp_start_rating[i]
    }
}

# round seven expected result and new player ratings
round_seven_results <- full_stats %>%
    filter(round == 7) %>%
    left_join(opp_ratings, by = "opp_player_id") %>%
    mutate(player_start_rating = round_six_results$player_new_rating, expected_score = expectedScore(player_start_rating,
        opp_start_rating), player_new_rating = newRating(player_start_rating, actual_score,
        expected_score))

# adjust opponent player rating for next round
for (i in 1:64) {
    if (!is.na(round_seven_results$player_new_rating)[i]) {
        opp_ratings$opp_start_rating[i] <- round_seven_results$player_new_rating[i]
    } else {
        opp_ratings$opp_start_rating[i] <- opp_ratings$opp_start_rating[i]
    }
}

Using the post-rating from the initial raw data, I checked the results of my calculations by comparing to the player_new_rating after round 7:

post_ratings <- unlist(str_extract_all(raw[-c(1, 2, 3, 4)], "->(\\s+)?\\d+(P\\d+)?"))
post_ratings <- str_remove(post_ratings, "->(\\s+)?")
post_ratings <- str_remove(post_ratings, "(P\\d+)")

compare_ratings <- data.frame(player_id = c(1:64), pre_rating = pre_ratings, post_seven_rating = opp_ratings$opp_start_rating,
    actual_post_rating = post_ratings)

knitr::kable(head(compare_ratings))

player_id	pre_rating	post_seven_rating	actual_post_rating
1	1794	1820	1817
2	1553	1621	1663
3	1384	1507	1640
4	1716	1742	1744
5	1655	1691	1690
6	1686	1688	1687

I notice that my new rating for each player does not exactly match the post-rating from the cross table. However, if we look at player 3’s pre and post ratings, and the results from each match, we see that he won 5/6 rounds and lost 1, and his rating increased by 256 points. However, according to the explanation in the video and the K value chosen for calculating the new rating, the rating could only increase by a maximum of 32(5) = 160 for games won. I am therefore unsure how the actual post-rating value was calculated and whether they used a different K factor for different players.

Based on this Wikipedia article, the K factor changes based on the number of games a player has already played in so it is possible the K factors used in the actual chess cross table are different for each player based on the number of games they have played.

Moving on, I bound these tables into a data frame of the results of all rounds:

all_rounds <- rbind(round_one_results, round_two_results, round_three_results, round_four_results,
    round_five_results, round_six_results, round_seven_results)

I used summarize() to calculate the overall expected score for each player:

# table of expected scores
expected_scores <- all_rounds %>%
    filter(!is.na(results)) %>%
    group_by(player_name) %>%
    summarize(expected_score = sum(expected_score))

I then joined this to a table of the player’s actual scores:

# create table of expected scores
expected_scores <- all_rounds %>%
    mutate(expected_scores = expectedScore(player_start_rating, opp_start_rating)) %>%
    group_by(player_name) %>%
    summarize(expected_score = sum(expected_scores))

# create table of actual scores
actual_scores <- read.csv(url("https://raw.githubusercontent.com/ShanaFarber/cuny-sps/master/DATA_607/project1/player_stats.csv")) %>%
    transmute(player_name, actual_score = total)

compare_scores <- actual_scores %>%
    left_join(expected_scores, on = "player_name") %>%
    mutate(score_diff = actual_score - expected_score)

## Joining, by = "player_name"

Conclusion

Who were the five players who most over-performed relative to their expected score?

overperformed <- compare_scores %>%
    arrange(desc(score_diff))

knitr::kable(head(overperformed, n = 5))

player_name	actual_score	expected_score	score_diff
ADITYA BAJAJ	6.0	2.1595274	3.840473
JACOB ALEXANDER LAVALLEY	3.0	0.0521352	2.947865
ZACHARY JAMES HOUGHTON	4.5	1.6542121	2.845788
ANVIT RAO	5.0	2.1878462	2.812154
STEFANO LEE	5.0	2.4909780	2.509022

Who were the five players who most under-performed relative to their expected score?

underperformed <- compare_scores %>%
    arrange(score_diff)

knitr::kable(head(underperformed, n = 5))

player_name	actual_score	expected_score	score_diff
LOREN SCHWIEBERT	3.5	6.089633	-2.589633
GEORGE AVERY JONES	3.5	5.827445	-2.327445
JARED GE	3.0	4.746449	-1.746449
BEN LI	1.0	2.425461	-1.425461
RISHI SHETTY	3.5	4.907391	-1.407391

Original Way

Initially I had done the problem this way. However, this does not account for the change in player ratings based on the results of each round.

opp_ratings <- data.frame(opp_player_id = c(1:64), opp_start_rating = chess_stats$player_start_rating)

expected_scores <- full_stats %>%
    filter(!is.na(actual_score)) %>%
    left_join(opp_ratings, by = "opp_player_id") %>%
    mutate(expected_score = expectedScore(player_start_rating, opp_start_rating)) %>%
    group_by(player_name) %>%
    summarize(expected_score = sum(expected_score))

compare_scores <- actual_scores %>%
    left_join(expected_scores, by = "player_name") %>%
    mutate(score_diff = actual_score - expected_score)

# 5 most over-performed
overperformed <- compare_scores %>%
    arrange(desc(score_diff))

knitr::kable(head(overperformed, n = 5))

player_name	actual_score	expected_score	score_diff
ADITYA BAJAJ	6.0	1.9450879	4.054912
ZACHARY JAMES HOUGHTON	4.5	1.3733089	3.126691
ANVIT RAO	5.0	1.9448541	3.055146
JACOB ALEXANDER LAVALLEY	3.0	0.0432498	2.956750
AMIYATOSH PWNANANDAM	3.5	0.7734529	2.726547

# 5 most under-performed
underperformed <- compare_scores %>%
    arrange(score_diff)

knitr::kable(head(underperformed, n = 5))

player_name	actual_score	expected_score	score_diff
LOREN SCHWIEBERT	3.5	6.275650	-2.775650
GEORGE AVERY JONES	3.5	6.018220	-2.518220
JARED GE	3.0	5.010416	-2.010416
RISHI SHETTY	3.5	5.092465	-1.592465
JOSHUA DAVID LEE	3.5	4.957890	-1.457890