R Markdown

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.2
## Warning: package 'ggplot2' was built under R version 4.4.2
## Warning: package 'tibble' was built under R version 4.4.2
## Warning: package 'stringr' was built under R version 4.4.2
## Warning: package 'lubridate' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(knitr)
## Warning: package 'knitr' was built under R version 4.4.2

Based on difference in ratings between the chess players and each of their opponents in our Project 1 tournament, calculate each player’s expected score (e.g. 4.3) and the difference from their actual score (e.g 4.0). List the five players who most overperformed relative to their expected score, and the five players that most underperformed relative to their expected score.

You’ll find some small differences in different implementation of ELO formulas. You may use any reasonably-sourced formula, but please cite your source. ***

The formula use for expected score: EA=11+10(RB−RA)/400

#read .csv file from github

datafram<- read.csv("https://raw.githubusercontent.com/asadny82/Data607/refs/heads/main/tournament.csv")

function Calculating expected score:

expected_score <- function(player_rating, oppnt_rating) {
  result <- 1 / (1 + 10^((oppnt_rating - player_rating)/400))
  return(result)
}

#use the function, use lood and enter the score in dataset

for (i in 1:nrow(datafram)) {
  oppnts <- datafram[i,5:11]
  ratings <- c()
  player_rating <- datafram[i,'pre_rating']
  total_exp_score <- 0
  for (j in 1:length(oppnts)) {
    if (!is.na(oppnts[[j]])) {
      ratings <- c(ratings, datafram[oppnts[[j]], 'pre_rating'])
    }
  }
  for (j in 1:length(ratings)) {
    if (!is.na(ratings[[j]])) {
      exp_score <- expected_score(player_rating, ratings[[j]])
      total_exp_score <- total_exp_score + exp_score
    }
  }
  
  datafram[i, 'total_exp_score'] <- total_exp_score
}

#calculate the score different between total_pts and total_exp_score

datafram <- datafram %>%
  mutate(score_difference = total_pts - total_exp_score)

arrange the data frame for five players that most over and underperformed relative to their expected scores.

datafram %>%
  arrange(desc(score_difference)) %>%
  select(!matches('oppnt[0-7]')) %>%
  head(5)
##                       name state total_pts pre_rating avg_oppnt_rtg
## 1             ADITYA BAJAJ    MI       6.0       1384      1563.571
## 2   ZACHARY JAMES HOUGHTON    MI       4.5       1220      1483.857
## 3                ANVIT RAO    MI       5.0       1365      1554.143
## 4 JACOB ALEXANDER LAVALLEY    MI       3.0        377      1357.714
## 5     AMIYATOSH PWNANANDAM    MI       3.5        980      1384.800
##   total_exp_score score_difference
## 1      1.94508791         4.054912
## 2      1.37330887         3.126691
## 3      1.94485405         3.055146
## 4      0.04324981         2.956750
## 5      0.77345290         2.726547
datafram %>%
  arrange(score_difference) %>%
  select(!matches('oppnt[0-7]')) %>%
  head(5)
##                 name state total_pts pre_rating avg_oppnt_rtg total_exp_score
## 1   LOREN SCHWIEBERT    MI       3.5       1745      1363.286        6.275650
## 2 GEORGE AVERY JONES    ON       3.5       1522      1144.143        6.018220
## 3           JARED GE    MI       3.0       1332      1149.857        5.010416
## 4       RISHI SHETTY    MI       3.5       1494      1259.857        5.092465
## 5   JOSHUA DAVID LEE    MI       3.5       1438      1149.714        4.957890
##   score_difference
## 1        -2.775650
## 2        -2.518220
## 3        -2.010416
## 4        -1.592465
## 5        -1.457890

plot a graphwith actual and expected scores.

ggplot(datafram, aes(total_pts,total_exp_score)) +
  geom_smooth(method = 'lm', formula = 'y~x', color = 'green', alpha = 0.7) +
  geom_smooth(method = 'loess', formula = 'y~x', color = 'blue', alpha = 0.9) +
  geom_point()