library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.2
## Warning: package 'ggplot2' was built under R version 4.4.2
## Warning: package 'tibble' was built under R version 4.4.2
## Warning: package 'stringr' was built under R version 4.4.2
## Warning: package 'lubridate' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(knitr)
## Warning: package 'knitr' was built under R version 4.4.2
Based on difference in ratings between the chess players and each of their opponents in our Project 1 tournament, calculate each player’s expected score (e.g. 4.3) and the difference from their actual score (e.g 4.0). List the five players who most overperformed relative to their expected score, and the five players that most underperformed relative to their expected score.
You’ll find some small differences in different implementation of ELO formulas. You may use any reasonably-sourced formula, but please cite your source. ***
The formula use for expected score: EA=11+10(RB−RA)/400
#read .csv file from github
datafram<- read.csv("https://raw.githubusercontent.com/asadny82/Data607/refs/heads/main/tournament.csv")
function Calculating expected score:
expected_score <- function(player_rating, oppnt_rating) {
result <- 1 / (1 + 10^((oppnt_rating - player_rating)/400))
return(result)
}
#use the function, use lood and enter the score in dataset
for (i in 1:nrow(datafram)) {
oppnts <- datafram[i,5:11]
ratings <- c()
player_rating <- datafram[i,'pre_rating']
total_exp_score <- 0
for (j in 1:length(oppnts)) {
if (!is.na(oppnts[[j]])) {
ratings <- c(ratings, datafram[oppnts[[j]], 'pre_rating'])
}
}
for (j in 1:length(ratings)) {
if (!is.na(ratings[[j]])) {
exp_score <- expected_score(player_rating, ratings[[j]])
total_exp_score <- total_exp_score + exp_score
}
}
datafram[i, 'total_exp_score'] <- total_exp_score
}
#calculate the score different between total_pts and total_exp_score
datafram <- datafram %>%
mutate(score_difference = total_pts - total_exp_score)
datafram %>%
arrange(desc(score_difference)) %>%
select(!matches('oppnt[0-7]')) %>%
head(5)
## name state total_pts pre_rating avg_oppnt_rtg
## 1 ADITYA BAJAJ MI 6.0 1384 1563.571
## 2 ZACHARY JAMES HOUGHTON MI 4.5 1220 1483.857
## 3 ANVIT RAO MI 5.0 1365 1554.143
## 4 JACOB ALEXANDER LAVALLEY MI 3.0 377 1357.714
## 5 AMIYATOSH PWNANANDAM MI 3.5 980 1384.800
## total_exp_score score_difference
## 1 1.94508791 4.054912
## 2 1.37330887 3.126691
## 3 1.94485405 3.055146
## 4 0.04324981 2.956750
## 5 0.77345290 2.726547
datafram %>%
arrange(score_difference) %>%
select(!matches('oppnt[0-7]')) %>%
head(5)
## name state total_pts pre_rating avg_oppnt_rtg total_exp_score
## 1 LOREN SCHWIEBERT MI 3.5 1745 1363.286 6.275650
## 2 GEORGE AVERY JONES ON 3.5 1522 1144.143 6.018220
## 3 JARED GE MI 3.0 1332 1149.857 5.010416
## 4 RISHI SHETTY MI 3.5 1494 1259.857 5.092465
## 5 JOSHUA DAVID LEE MI 3.5 1438 1149.714 4.957890
## score_difference
## 1 -2.775650
## 2 -2.518220
## 3 -2.010416
## 4 -1.592465
## 5 -1.457890
ggplot(datafram, aes(total_pts,total_exp_score)) +
geom_smooth(method = 'lm', formula = 'y~x', color = 'green', alpha = 0.7) +
geom_smooth(method = 'loess', formula = 'y~x', color = 'blue', alpha = 0.9) +
geom_point()