Import table & get it formatted

Need to import text file in some way - will take some experimenting but can start with delimiter of “—————————————————————————————–”.

chess <- read_delim("https://raw.githubusercontent.com/jacshap/Data607/refs/heads/main/tournamentinfo.txt", delim = "-----------------------------------------------------------------------------------------", col_names = TRUE, show_col_types = FALSE)

## New names:
## • `` -> `...1`
## • `` -> `...2`

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

#to fix problems let`s get rid of 2nd column and omit NA rows to clean
names(chess)[1] <- "base"
chess <- chess %>% select(base)
chess <- na.omit(chess)

Now we`ve read in the file by row and need to split per the delimiter “|”. Retitle columns, drop last column because R wanted to keep the last “|” in Round 7, and drop first two rows because those used to be the headers.

library(tidyr)
chess <- separate_wider_delim(chess, cols = base, delim = "|", names = c("Pair_Num", "Player_Name", "Total_Pts", "Round_1", "Round_2", "Round_3", "Round_4", "Round_5", "Round_6", "Round_7", "extra"), too_many = "merge") %>% select(-last_col()) %>% slice(-(1:2))
head(chess)

Lets get players state and other info from second line into first line with player`s name

# Need to combine two rows at a time. Mutate with a group # and try the above again.

chess_manip <- chess %>% mutate(Group = n() / 2)

# Googled "r combine two rows at a time in dataframe"
chess_manip <- chess %>% mutate(Group = ceiling(row_number() / 2))

chess_manip <- chess_manip %>% group_by(Group) %>% summarise(Pair_Num = paste(Pair_Num, collapse = ","), Player_Name = paste(Player_Name, collapse = ","), Total_Pts = paste(Total_Pts, collapse = ","), Round_1 = paste(Round_1, collapse = ","), Round_2 = paste(Round_2, collapse = ","), Round_3 = paste(Round_3, collapse = ","), Round_4 = paste(Round_4, collapse = ","), Round_5 = paste(Round_5, collapse = ","), Round_6 = paste(Round_6, collapse = ","), Round_7 = paste(Round_7, collapse = ","))

print(chess_manip)

## # A tibble: 64 × 11
##    Group Pair_Num  Player_Name Total_Pts Round_1 Round_2 Round_3 Round_4 Round_5
##    <dbl> <chr>     <chr>       <chr>     <chr>   <chr>   <chr>   <chr>   <chr>  
##  1     1 "    1 ,… " GARY HUA… "6.0  ,N… "W  39… "W  21… "W  18… "W  14… "W   7…
##  2     2 "    2 ,… " DAKSHESH… "6.0  ,N… "W  63… "W  58… "L   4… "W  17… "W  16…
##  3     3 "    3 ,… " ADITYA B… "6.0  ,N… "L   8… "W  61… "W  25… "W  21… "W  11…
##  4     4 "    4 ,… " PATRICK … "5.5  ,N… "W  23… "D  28… "W   2… "W  26… "D   5…
##  5     5 "    5 ,… " HANSHI Z… "5.5  ,N… "W  45… "W  37… "D  12… "D  13… "D   4…
##  6     6 "    6 ,… " HANSEN S… "5.0  ,N… "W  34… "D  29… "L  11… "W  35… "D  10…
##  7     7 "    7 ,… " GARY DEE… "5.0  ,N… "W  57… "W  46… "W  13… "W  11… "L   1…
##  8     8 "    8 ,… " EZEKIEL … "5.0  ,N… "W   3… "W  32… "L  14… "L   9… "W  47…
##  9     9 "    9 ,… " STEFANO … "5.0  ,N… "W  25… "L  18… "W  59… "W   8… "W  26…
## 10    10 "   10 ,… " ANVIT RA… "5.0  ,N… "D  16… "L  19… "W  55… "W  31… "D   6…
## # ℹ 54 more rows
## # ℹ 2 more variables: Round_6 <chr>, Round_7 <chr>

Let’s rename Group to Pair_Num, Pair_Num to State & get rid of stuff before comma, and for columns of Total Points through Round 7 let’s get rid of the stuff after the comma.

NOTE: This method does not keep post-game ratings or N values

# Rename Columns
chess_manip <- chess_manip %>% rename(State = Pair_Num, Pair_Num = Group)

# Split and keep 2nd part of resulting list
chess_manip <- chess_manip %>% mutate(State = sapply(strsplit(State, ","), `[`,2))

# Similar for Total Pts & Rounds columns but keep first thing. Also remembered can use across()
chess_manip <- chess_manip %>% mutate(across(Total_Pts:Round_7, ~ sapply(strsplit(.x, ","), `[`,1)))


# Split Rounds columns on the space delimiter to keep who they played

#chess_manip %>% mutate(across(Round_1:Round_7, ~ sapply(strsplit(.x, " "), `[`,2)))
# Not keeping who they played, just keeping blank. Need to try different method? - Apparently adding in the + works on spaces
chess_manip <- chess_manip %>% mutate(across(Round_1:Round_7, ~ sapply(strsplit(.x, " +"), `[`,2)))


# Split Player_Name column on the comma and keep the pre-game rating in new column
chess_manip <- separate_wider_delim(chess_manip, cols = Player_Name, delim = ",", names = c("Player_Name", "Pre_Game_Rank"))

# splitted, now manipulate pre-game rank to be left in column
chess_manip <- chess_manip %>% mutate(Pre_Game_Rank = sapply(strsplit(Pre_Game_Rank, "R: "), `[`, 2))

# kept everything to the right of "R: "
chess_manip <- chess_manip %>% mutate(Pre_Game_Rank = if_else(str_detect(Pre_Game_Rank,'P'), sapply(strsplit(Pre_Game_Rank, 'P'), `[`, 1), Pre_Game_Rank))

# got rid of P stuff
#chess_manip %>% mutate(Pre_Game_Rank, sapply(strsplit(Pre_Game_Rank, ' +'), `[`,1))
# not working - try above if_else because some are already done
#chess_manip %>% mutate(Pre_Game_Rank = if_else(str_detect(Pre_Game_Rank,' '), sapply(strsplit(Pre_Game_Rank, ' '), `[`, 1), Pre_Game_Rank))
# close but got rid of some values. Get rid of spaces to left then try again
chess_manip <- chess_manip %>% mutate(across(Pair_Num:Round_7, ~ trimws(.x, which = "left")))
chess_manip %>% mutate(Pre_Game_Rank = if_else(str_detect(Pre_Game_Rank,' +'), sapply(strsplit(Pre_Game_Rank, ' +'), `[`, 1), Pre_Game_Rank))

# Success!!
chess_manip <- chess_manip %>% mutate(Pre_Game_Rank = if_else(str_detect(Pre_Game_Rank,' +'), sapply(strsplit(Pre_Game_Rank, ' +'), `[`, 1), Pre_Game_Rank))

Probably also a good idea to clean up extra spaces

chess_manip <- chess_manip %>% mutate(across(Pair_Num:Round_7, ~ trimws(.x, which = "right")))
chess_manip <- chess_manip %>% mutate(across(Pair_Num:Round_7, ~ trimws(.x, which = "left")))
print(chess_manip)

## # A tibble: 64 × 12
##    Pair_Num State Player_Name    Pre_Game_Rank Total_Pts Round_1 Round_2 Round_3
##    <chr>    <chr> <chr>          <chr>         <chr>     <chr>   <chr>   <chr>  
##  1 1        ON    GARY HUA       1794          6.0       39      21      18     
##  2 2        MI    DAKSHESH DARU… 1553          6.0       63      58      4      
##  3 3        MI    ADITYA BAJAJ   1384          6.0       8       61      25     
##  4 4        MI    PATRICK H SCH… 1716          5.5       23      28      2      
##  5 5        MI    HANSHI ZUO     1655          5.5       45      37      12     
##  6 6        OH    HANSEN SONG    1686          5.0       34      29      11     
##  7 7        MI    GARY DEE SWAT… 1649          5.0       57      46      13     
##  8 8        MI    EZEKIEL HOUGH… 1641          5.0       3       32      14     
##  9 9        ON    STEFANO LEE    1411          5.0       25      18      59     
## 10 10       MI    ANVIT RAO      1365          5.0       16      19      55     
## # ℹ 54 more rows
## # ℹ 4 more variables: Round_4 <chr>, Round_5 <chr>, Round_6 <chr>,
## #   Round_7 <chr>

Calculate expected score

Prompt: “Based on difference in ratings between the chess players and each of their opponents in our Project 1 tournament, calculate each player’s expected score (e.g. 4.3) and the difference from their actual score (e.g 4.0). List the five players who most overperformed relative to their expected score, and the five players that most underperformed relative to their expected score.”

I’m interpreting this as asking across the tournament what is the expected score per game and add those up and that’s the total expected score. Then subtract that from the actual score and see top 5 and bottom 5.

Expected Score of Player A: E(A) = 1/(1+10^((Rating_B - Rating_A)/400)) (source: https://en.wikipedia.org/wiki/Elo_rating_system#:~:text=A%20player’s%20expected%20score%20is,and%200%25%20chance%20of%20drawing.)

First chunk attempt - tried to be clean and do it in dataframe format. Kept getting stuck and couldn’t troubleshoot.

# Test case of finding position of Pair_Num that matches Round_1 and print that position for Pre_Game_Rank for first row
round1 <- chess_manip %>% filter(Pair_Num == 1) %>% pull(Round_1)
pre_game1 <- chess_manip %>% filter(Pair_Num == round1) %>% pull(Pre_Game_Rank)
print(pre_game1)

# To make it formulaic can try to take all values for the rounds per person and put into lists and then mutate the lists into columns.

# Prep lists for for loop (couldn't figure out how to do it formulaicly with string manipulation in Project 1)

# E1 <- c()
# E2 <- c()
# E3 <- c()
# E4 <- c()
# E5 <- c()
# E6 <- c()
# E7 <- c()
player_rating_list<-c()
pre_game1_list<-c()
pre_game7_list<-c()
E_df <- data.frame(E1 = numeric(0), E2 = numeric(0), E3 = numeric(0), E4 = numeric(0), E5 = numeric(0), E6 = numeric(0), E7 = numeric(0))
pre_game5_list <- c()

# For each row, loop will get player's pre-game rating, tournament opponent's row number for each round (e.g., round1), and will pull that opponents pre-game rating (pre_game1) based on row position. Also making all opponent pre-game ratings numeric after pulling.
# Then will do expected value for each round and append to a list. Outside the loop will mutate the lists to chess_manip to show what they are, then do a sum column to get total expected score and compare to actual score, then do top 5 and bottom 5.

for (i in 1:nrow(chess_manip)){
  player_rating <- chess_manip %>% filter(Pair_Num == i) %>% pull(Pre_Game_Rank)
  player_rating <- as.numeric(player_rating)
  round1 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_1)
  pre_game1 <- chess_manip %>% filter(Pair_Num == round1) %>% pull(Pre_Game_Rank)
  pre_game1 <- as.numeric(pre_game1)
  round2 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_2)
  pre_game2 <- chess_manip %>% filter(Pair_Num == round2) %>% pull(Pre_Game_Rank)
  pre_game2 <- as.numeric(pre_game2)
  round3 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_3)
  pre_game3 <- chess_manip %>% filter(Pair_Num == round3) %>% pull(Pre_Game_Rank)
  pre_game3 <- as.numeric(pre_game3)
  round4 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_4)
  pre_game4 <- chess_manip %>% filter(Pair_Num == round4) %>% pull(Pre_Game_Rank)
  pre_game4 <- as.numeric(pre_game4)
  round5 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_5)
  pre_game5 <- chess_manip %>% filter(Pair_Num == round5) %>% pull(Pre_Game_Rank)
  #testing
  pre_game5[is.na(pre_game5)] <- 0
  pre_game5 <- as.numeric(pre_game5)
  round6 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_6)
  pre_game6 <- chess_manip %>% filter(Pair_Num == round6) %>% pull(Pre_Game_Rank)
  pre_game6 <- as.numeric(pre_game6)
  round7 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_7)
  pre_game7 <- chess_manip %>% filter(Pair_Num == round7) %>% pull(Pre_Game_Rank)
  pre_game7 <- as.numeric(pre_game7)
  
  new_E1 <- 1/(1+10^((pre_game1 - player_rating)/400))
  #trying to make NA values 0 to at least get into the data frame
  new_E1[is.na(new_E1)] <- 0
  new_E2 <- 1/(1+10^((pre_game2 - player_rating)/400))
  new_E3 <- 1/(1+10^((pre_game3 - player_rating)/400))
  new_E4 <- 1/(1+10^((pre_game4 - player_rating)/400))
  new_E5 <- 1/(1+10^((pre_game5 - player_rating)/400))
  new_E5[is.na(new_E5)] <- 0
  new_E6 <- 1/(1+10^((pre_game6 - player_rating)/400))
  new_E7 <- 1/(1+10^((pre_game7 - player_rating)/400))
  
  new_E_row <- data.frame(E1 = new_E1, E2 = new_E2, E3 = new_E3, E4 = new_E4, E5 = new_E5, E6 = new_E6, E7 = new_E7)
  #Getting stuck at row 12 because Round 5 has an NA. It's assigning it as numeric (empty)
  E_df <- rbind(E_df, new_E_row)
  
  # E1 <- c(E1, new_E1)
  # E2 <- c(E2, new_E2)
  # E3 <- c(E3, new_E3)
  # E4 <- c(E4, new_E4)
  # E5 <- c(E5, new_E5)
  # E6 <- c(E6, new_E6)
  # E7 <- c(E7, new_E7)
  
  player_rating_list<-c(player_rating_list, player_rating)
  pre_game1_list<-c(pre_game1_list, pre_game1)
  pre_game7_list<-c(pre_game7_list, pre_game7)
  pre_game5_list<-c(pre_game5_list, pre_game5)
}


print(E1)
print(pre_game1_list)
print(player_rating_list)
#sanity check that first E1 should be .887035727
1/(1+10^((1436 - 1794)/400))


print(E7)
#double sanity check that 49th row of E7 should be 0.363923100
1/(1+10^((0 - 1291)/400))
#hmm this didn't work, let's make lists of player_rating, pre_game1, and pre_game7 to diagnose
print(player_rating_list) #works correctly
print(pre_game7_list) #saying the 49th value is 1283
# Aha! So the list isn't being populated if there is an NA value for Round_7
# Let's change from list format to dataframe to maybe fix this? At least visualize what's happening

print(pre_game5_list)
#ok, it's getting stuck at row 12. Let's just do same method from project 1 and put in lists, I may have overcomplicated by trying to simplify --> putting in another chunk

2nd chunk - tried to do lists but got stuck at row 12 with a NA value in row 5. Diagnosed and fixed in next chunk

# Test case of finding position of Pair_Num that matches Round_1 and print that position for Pre_Game_Rank for first row
round1 <- chess_manip %>% filter(Pair_Num == 1) %>% pull(Round_1)
pre_game1 <- chess_manip %>% filter(Pair_Num == round1) %>% pull(Pre_Game_Rank)
print(pre_game1)

# To make it formulaic can try to take all values for the rounds per person and put into lists and then mutate the lists into columns.

# Prep lists for for loop (couldn't figure out how to do it formulaicly with string manipulation in Project 1)

E1 <- c()
E2 <- c()
E3 <- c()
E4 <- c()
E5 <- c()
E6 <- c()
E7 <- c()
player_rating_list<-c()
pre_game1_list<-c()
pre_game7_list<-c()
#E_df <- data.frame(E1 = numeric(0), E2 = numeric(0), E3 = numeric(0), E4 = numeric(0), E5 = numeric(0), E6 = numeric(0), E7 = numeric(0))
E_list <- c()
pre_game5_list <- c()

# For each row, loop will get player's pre-game rating, tournament opponent's row number for each round (e.g., round1), and will pull that opponents pre-game rating (pre_game1) based on row position. Also making all opponent pre-game ratings numeric after pulling.
# Then will do expected value for each round and append to a list. Outside the loop will mutate the lists to chess_manip to show what they are, then do a sum column to get total expected score and compare to actual score, then do top 5 and bottom 5.

#Troubleshooting: filter rows 1 to 63 to see how round 6 and 7 are handled
filt_chess_manip <- chess_manip %>% slice(1:63)

for (i in 1:nrow(chess_manip)){
  player_rating <- chess_manip %>% filter(Pair_Num == i) %>% pull(Pre_Game_Rank)
  
  round1 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_1)
  pre_game1 <- chess_manip %>% filter(Pair_Num == round1) %>% pull(Pre_Game_Rank)
  
  round2 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_2)
  pre_game2 <- chess_manip %>% filter(Pair_Num == round2) %>% pull(Pre_Game_Rank)
  
  round3 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_3)
  pre_game3 <- chess_manip %>% filter(Pair_Num == round3) %>% pull(Pre_Game_Rank)
  
  round4 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_4)
  pre_game4 <- chess_manip %>% filter(Pair_Num == round4) %>% pull(Pre_Game_Rank)
  
  round5 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_5)
  pre_game5 <- chess_manip %>% filter(Pair_Num == round5) %>% pull(Pre_Game_Rank)
  
  round6 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_6)
  pre_game6 <- chess_manip %>% filter(Pair_Num == round6) %>% pull(Pre_Game_Rank)
  
  round7 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_7)
  pre_game7 <- chess_manip %>% filter(Pair_Num == round7) %>% pull(Pre_Game_Rank)
  
  # put in list for troubleshooting
  pre_game_char <- c(pre_game1, pre_game2, pre_game3, pre_game4, pre_game5, pre_game6, pre_game7)
  
  # make numeric
  player_rating <- as.numeric(player_rating)
  pre_game1 <- as.numeric(pre_game1)
  pre_game2 <- as.numeric(pre_game2)
  pre_game3 <- as.numeric(pre_game3)
  pre_game4 <- as.numeric(pre_game4)
  pre_game5 <- as.numeric(pre_game5)
  pre_game6 <- as.numeric(pre_game6)
  pre_game7 <- as.numeric(pre_game7)
  # player_rating <- if_else(player_rating =="NA", 1000000000, as.numeric(player_rating))
  # pre_game1 <- if_else(player_rating =="NA", 1000000000, as.numeric(pre_game1))
  # pre_game2 <- if_else(player_rating =="NA", 1000000000, as.numeric(pre_game2))
  # pre_game3 <- if_else(player_rating =="NA", 1000000000, as.numeric(pre_game3))
  # pre_game4 <- if_else(player_rating =="NA", 1000000000, as.numeric(pre_game4))
  # pre_game5 <- if_else(player_rating =="NA", 1000000000, as.numeric(pre_game5))
  # pre_game6 <- if_else(player_rating =="NA", 1000000000, as.numeric(pre_game6))
  # pre_game7 <- if_else(player_rating =="NA", 1000000000, as.numeric(pre_game7))

  new_E1 <- 1/(1+10^((pre_game1 - player_rating)/400))
  new_E2 <- 1/(1+10^((pre_game2 - player_rating)/400))
  new_E3 <- 1/(1+10^((pre_game3 - player_rating)/400))
  new_E4 <- 1/(1+10^((pre_game4 - player_rating)/400))
  new_E5 <- 1/(1+10^((pre_game5 - player_rating)/400))
  new_E6 <- 1/(1+10^((pre_game6 - player_rating)/400))
  new_E7 <- 1/(1+10^((pre_game7 - player_rating)/400))
  
  
  E1 <- c(E1, new_E1)
  E2 <- c(E2, new_E2)
  E3 <- c(E3, new_E3)
  E4 <- c(E4, new_E4)
  E5 <- c(E5, new_E5)
  E6 <- c(E6, new_E6)
  E7 <- c(E7, new_E7)
  
  player_rating_list<-c(player_rating_list, player_rating)
  pre_game1_list<-c(pre_game1_list, pre_game1)
  pre_game7_list<-c(pre_game7_list, pre_game7)
  pre_game5_list<-c(pre_game5_list, pre_game5)
}


print(E1)
print(pre_game1_list)
print(player_rating_list)
#sanity check that first E1 should be .887035727
1/(1+10^((1436 - 1794)/400))


print(E7)
#double sanity check that 49th row of E7 should be 0.363923100
1/(1+10^((0 - 1291)/400))
#hmm this didn't work, let's make lists of player_rating, pre_game1, and pre_game7 to diagnose
print(player_rating_list) #works correctly
print(pre_game7_list) #saying the 49th value is 1283
# Aha! So the list isn't being populated if there is an NA value for Round_7
# Let's change from list format to dataframe to maybe fix this? At least visualize what's happening

print(pre_game5_list)
#ok, it's getting stuck at row 12. Let's just do same method from project 1 and put in lists, I may have overcomplicated by trying to simplify --> putting in another chunk

3rd chunk attempt - troubleshooting 2nd chunk

# Test case of finding position of Pair_Num that matches Round_1 and print that position for Pre_Game_Rank for first row
round1 <- chess_manip %>% filter(Pair_Num == 1) %>% pull(Round_1)
pre_game1 <- chess_manip %>% filter(Pair_Num == round1) %>% pull(Pre_Game_Rank)
print(pre_game1)

## [1] "1436"

# To make it formulaic can try to take all values for the rounds per person and put into lists and then mutate the lists into columns.

# Prep lists for for loop (couldn't figure out how to do it formulaicly with string manipulation in Project 1)

E1 <- c()
E2 <- c()
E3 <- c()
E4 <- c()
E5 <- c()
E6 <- c()
E7 <- c()
player_rating_list<-c()
pre_game1_list<-c()
pre_game7_list<-c()
#E_df <- data.frame(E1 = numeric(0), E2 = numeric(0), E3 = numeric(0), E4 = numeric(0), E5 = numeric(0), E6 = numeric(0), E7 = numeric(0))
E_list <- c()
pre_game5_list <- c()

# For each row, loop will get player's pre-game rating, tournament opponent's row number for each round (e.g., round1), and will pull that opponents pre-game rating (pre_game1) based on row position. Also making all opponent pre-game ratings numeric after pulling.
# Then will do expected value for each round and append to a list. Outside the loop will mutate the lists to chess_manip to show what they are, then do a sum column to get total expected score and compare to actual score, then do top 5 and bottom 5.

#Troubleshooting: filter rows 1 to 63 to see how round 6 and 7 are handled
filt_chess_manip <- chess_manip %>% slice(1:64) 
filt_chess_manip <- filt_chess_manip %>% replace(is.na(.),"0")

for (i in 1:nrow(filt_chess_manip)){
  player_rating <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Pre_Game_Rank)
  
  round1 <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Round_1)
  pre_game1 <- filt_chess_manip %>% filter(Pair_Num == round1) %>% pull(Pre_Game_Rank)
  pre_game1 <- if(length(which(!is.na(pre_game1))) == 0){'1000000'} else {pre_game1}
  
  round2 <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Round_2)
  pre_game2 <- filt_chess_manip %>% filter(Pair_Num == round2) %>% pull(Pre_Game_Rank)
  pre_game2 <- if(length(which(!is.na(pre_game2))) == 0){'1000000'} else {pre_game2}
  
  round3 <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Round_3)
  pre_game3 <- filt_chess_manip %>% filter(Pair_Num == round3) %>% pull(Pre_Game_Rank)
  pre_game3 <- if(length(which(!is.na(pre_game3))) == 0){'1000000'} else {pre_game3}
  
  round4 <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Round_4)
  pre_game4 <- filt_chess_manip %>% filter(Pair_Num == round4) %>% pull(Pre_Game_Rank)
  pre_game4 <- if(length(which(!is.na(pre_game4))) == 0){'1000000'} else {pre_game4}
  
  round5 <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Round_5)
  pre_game5 <- filt_chess_manip %>% filter(Pair_Num == round5) %>% pull(Pre_Game_Rank)
  #is.na(pre_game5) <- pre_game5 == "NA"
  pre_game5 <- if(length(which(!is.na(pre_game5))) == 0){'1000000'} else {pre_game5}
  
  round6 <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Round_6)
  pre_game6 <- filt_chess_manip %>% filter(Pair_Num == round6) %>% pull(Pre_Game_Rank)
  pre_game6 <- if(length(which(!is.na(pre_game6))) == 0){'1000000'} else {pre_game6}
  
  round7 <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Round_7)
  pre_game7 <- filt_chess_manip %>% filter(Pair_Num == round7) %>% pull(Pre_Game_Rank)
  pre_game7 <- if(length(which(!is.na(pre_game7))) == 0){'1000000'} else {pre_game7}
  
  # put in list for troubleshooting
  pre_game_char <- c(pre_game1, pre_game2, pre_game3, pre_game4, pre_game5, pre_game6, pre_game7)
  
  # make numeric
  numplayer_rating <- as.numeric(player_rating)
  numpre_game1 <- as.numeric(pre_game1)
  numpre_game2 <- as.numeric(pre_game2)
  numpre_game3 <- as.numeric(pre_game3)
  numpre_game4 <- as.numeric(pre_game4)
  numpre_game5 <- as.numeric(pre_game5)
  numpre_game6 <- as.numeric(pre_game6)
  numpre_game7 <- as.numeric(pre_game7)
  # numplayer_rating <- if_else(identical(player_rating, character(0)), 1000000000, as.numeric(player_rating))
  # numpre_game1 <- if_else(identical(pre_game1, character(0)), 1000000000, as.numeric(pre_game1))
  # numpre_game2 <- if_else(identical(pre_game2, character(0)), 1000000000, as.numeric(pre_game2))
  # numpre_game3 <- if_else(identical(pre_game3, character(0)), 1000000000, as.numeric(pre_game3))
  # numpre_game4 <- if_else(identical(pre_game4, character(0)), 1000000000, as.numeric(pre_game4))
  # numpre_game5 <- if_else(identical(pre_game5, character(0)), 1000000000, as.numeric(pre_game5))
  # numpre_game6 <- if_else(identical(pre_game6, character(0)), 1000000000, as.numeric(pre_game6))
  # numpre_game7 <- if_else(identical(pre_game7, character(0)), 1000000000, as.numeric(pre_game7))

  new_E1 <- 1/(1+10^((numpre_game1 - numplayer_rating)/400))
  new_E2 <- 1/(1+10^((numpre_game2 - numplayer_rating)/400))
  new_E3 <- 1/(1+10^((numpre_game3 - numplayer_rating)/400))
  new_E4 <- 1/(1+10^((numpre_game4 - numplayer_rating)/400))
  new_E5 <- 1/(1+10^((numpre_game5 - numplayer_rating)/400))
  new_E6 <- 1/(1+10^((numpre_game6 - numplayer_rating)/400))
  new_E7 <- 1/(1+10^((numpre_game7 - numplayer_rating)/400))
  
  
  E1 <- c(E1, new_E1)
  E2 <- c(E2, new_E2)
  E3 <- c(E3, new_E3)
  E4 <- c(E4, new_E4)
  E5 <- c(E5, new_E5)
  E6 <- c(E6, new_E6)
  E7 <- c(E7, new_E7)
  
  player_rating_list<-c(player_rating_list, player_rating)
  pre_game1_list<-c(pre_game1_list, pre_game1)
  pre_game7_list<-c(pre_game7_list, pre_game7)
  pre_game5_list<-c(pre_game5_list, pre_game5)
}


print(E1)

##  [1] 0.887035727 0.898068279 0.185516412 0.884119356 0.915089111 0.839175318
##  [7] 0.961072531 0.814483588 0.127565396 0.201685256 0.840723043 0.870500258
## [13] 0.856959139 0.876229319 0.121295049 0.798314744 0.805627694 0.797386314
## [19] 0.878704951 0.805627694 0.833662469 0.905212604 0.115880644 0.167940159
## [25] 0.872434604 0.839950695 0.957476582 0.832059841 0.958633151 0.967043138
## [31] 0.965158361 0.942548209 0.941288554 0.160824682 0.997779072 0.143040861
## [37] 0.000000000 0.159276957 0.112964273 0.194372306 0.959536674 0.129499742
## [43] 0.166337531 0.000000000 0.084910889 0.002220928 0.202613686 0.194372306
## [49] 0.160049305 0.041366849 0.042523418 0.032956862 0.000000000 0.123770681
## [55] 0.121295049 0.000000000 0.038927469 0.034841639 0.040463326 0.058711446
## [61] 0.057451791 0.878704951 0.101931721 0.094787396

print(pre_game1_list)

##  [1] "1436"    "1175"    "1641"    "1363"    "1242"    "1399"    "1092"   
##  [8] "1384"    "1745"    "1604"    "1423"    "1332"    "1355"    "1270"   
## [15] "1564"    "1365"    "1382"    "1362"    "1220"    "1348"    "1283"   
## [22] "1163"    "1716"    "1507"    "1411"    "1291"    "1011"    "1229"   
## [29] "1056"    "935"     "917"     "955"     "967"     "1686"    "377"    
## [36] "1666"    "1000000" "1712"    "1794"    "1595"    "853"     "1663"   
## [43] "1563"    "1000000" "1655"    "1438"    "1600"    "1629"    "1579"   
## [50] "1602"    "1552"    "1522"    "1000000" "1610"    "1530"    "1000000"
## [57] "1649"    "1494"    "1403"    "1449"    "1441"    "1186"    "1553"   
## [64] "1555"

print(player_rating_list)

##  [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980"  "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377"  "1362" "1382" "1291" "1056"
## [51] "1011" "935"  "1393" "1270" "1186" "1153" "1092" "917"  "853"  "967" 
## [61] "955"  "1530" "1175" "1163"

#sanity check that first E1 should be .887035727
1/(1+10^((1436 - 1794)/400))

## [1] 0.8870357

print(E7)

##  [1] 0.610402422 0.365256665 0.167137310 0.389597578 0.537347317 0.669969014
##  [7] 0.634743335 0.609032603 0.257464441 0.205418085 0.682570385 0.832862690
## [13] 0.785026737 0.660999091 0.237115020 0.000000000 0.462652683 0.794581915
## [19] 0.390967397 0.742535559 0.330030986 0.767024918 0.996584031 0.232975082
## [25] 0.900673036 0.317429615 0.000000000 0.705781360 0.000000000 0.935986774
## [31] 0.339000909 0.214973263 0.925623716 0.935293499 0.579899760 0.294218640
## [37] 0.535915927 0.762884980 0.767024918 0.232975082 0.000000000 0.736994761
## [43] 0.636076900 0.879926688 0.866556644 0.003415969 0.099326964 0.420100240
## [49] 0.000000000 0.064013226 0.074376284 0.064706501 0.000000000 0.649294947
## [55] 0.363923100 0.263005239 0.000000000 0.133443356 0.120073312 0.000000000
## [61] 0.464084073 0.000000000 0.000000000 0.350705053

#double sanity check that 49th row of E7 should be 0 because they didn't play that round
1/(1+10^((1000000 - 1291)/400))

## [1] 0

#hmm this didn't work, let's make lists of player_rating, pre_game1, and pre_game7 to diagnose
print(player_rating_list) #works correctly

##  [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980"  "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377"  "1362" "1382" "1291" "1056"
## [51] "1011" "935"  "1393" "1270" "1186" "1153" "1092" "917"  "853"  "967" 
## [61] "955"  "1530" "1175" "1163"

print(pre_game7_list) #was saying the 49th value is 1283, now saying 0.

##  [1] "1716"    "1649"    "1663"    "1794"    "1629"    "1563"    "1553"   
##  [8] "1564"    "1595"    "1600"    "1579"    "1384"    "1441"    "1494"   
## [15] "1423"    "1000000" "1655"    "1365"    "1641"    "1411"    "1686"   
## [22] "1348"    "377"     "1436"    "1362"    "1712"    "1000000" "1355"   
## [29] "1000000" "1056"    "1610"    "1666"    "1011"    "935"     "1382"   
## [36] "1507"    "955"     "1220"    "1229"    "1555"    "1000000" "1153"   
## [43] "1186"    "853"     "917"     "1363"    "1745"    "1438"    "1000000"
## [50] "1522"    "1449"    "1399"    "1000000" "1163"    "1283"    "1332"   
## [57] "1000000" "1242"    "1199"    "1000000" "980"     "1000000" "1000000"
## [64] "1270"

# Aha! So the list isn't being populated if there is an NA value for Round_7
# Let's change from list format to dataframe to maybe fix this? At least visualize what's happening
print(pre_game5_list)

##  [1] "1649"    "1604"    "1712"    "1655"    "1716"    "1365"    "1794"   
##  [8] "1362"    "1579"    "1686"    "1384"    "1000000" "1449"    "1552"   
## [15] "1270"    "1553"    "1363"    "1564"    "1600"    "1507"    "1348"   
## [22] "1000000" "1629"    "967"     "1399"    "1411"    "1610"    "1595"   
## [29] "935"     "1494"    "1522"    "1011"    "1666"    "1745"    "1092"   
## [36] "1000000" "1000000" "1000000" "1199"    "1563"    "1000000" "955"    
## [43] "853"     "1436"    "1175"    "1163"    "1641"    "1000000" "917"    
## [50] "1000000" "1441"    "1602"    "1000000" "1220"    "1000000" "1000000"
## [57] "1438"    "1291"    "1283"    "1229"    "1332"    "1000000" "1242"   
## [64] "377"

#ok, it's getting stuck at row 12. Let's just do same method from project 1 and put in lists, I may have overcomplicated by trying to simplify --> putting in another chunk
# After slicing, I'm seeing the NA values are being pulled as character(0), it's messing stuff up.

# Fixed - see below

Fixed the above double sanity check. If the value in the chess_manip table was NA, I changed the value to 0 to give it a character value. Then if the pull in the first part was an empty character vector (character(0)) the dplyr if_else function wouldn’t work because it needed a false value that wasn’t length of 0, which is what I was trying to correct. Was examining variable that was character(0) to see if it was logically anythign and kept getting “logical(0)” - used https://stackoverflow.com/questions/48626193/logical0-in-if-statement to fix. Using base “if else” sattement format worked - I put it after each opponent round calculation to make sure it was working. If it was a character(0) that meant that they didn’t play, so in order to make the expected value 0 I replaced character(0) with a big number and it worked!

Now need to mutate expected values onto chess_manip table and sum

E_chess_manip <- chess_manip %>% mutate(E_R1 = E1, E_R2 = E2, E_R3 = E3, E_R4 = E4, E_R5 = E5, E_R6 = E6, E_R7 = E7)
E_chess_manip <- E_chess_manip %>% group_by(Pair_Num) %>% mutate(E_tot = sum(E_R1, E_R2, E_R3, E_R4, E_R5, E_R6, E_R7))
knitr::kable(E_chess_manip)

Pair_Num	State	Player_Name	Pre_Game_Rank	Total_Pts	Round_1	Round_2	Round_3	Round_4	Round_5	Round_6	Round_7	E_R1	E_R2	E_R3	E_R4	E_R5	E_R6	E_R7	E_tot
1	ON	GARY HUA	1794	6.0	39	21	18	14	7	12	4	0.8870357	0.7907981	0.7533861	0.7425356	0.6973451	0.6800707	0.6104024	5.1615736
2	MI	DAKSHESH DARURI	1553	6.0	63	58	4	17	16	20	7	0.8980683	0.9749402	0.2812432	0.3923389	0.4271277	0.4398499	0.3652567	3.7788248
3	MI	ADITYA BAJAJ	1384	6.0	8	61	25	21	11	13	12	0.1855164	0.9219774	0.1112454	0.2630052	0.1314590	0.1647472	0.1671373	1.9450879
4	MI	PATRICK H SCHILLING	1716	5.5	23	28	2	26	5	19	1	0.8841194	0.7690759	0.7187568	0.6875382	0.5868950	0.7057814	0.3895976	4.7417643
5	MI	HANSHI ZUO	1655	5.5	45	37	12	13	4	14	17	0.9150891	0.9798780	0.4884891	0.4841750	0.4131050	0.5644005	0.5373473	4.3824840
6	OH	HANSEN SONG	1686	5.0	34	29	11	35	10	27	21	0.8391753	0.6185841	0.4626527	0.8065275	0.8638715	0.6838163	0.6699690	4.9445965
7	MI	GARY DEE SWATHELL	1649	5.0	57	46	13	11	1	9	2	0.9610725	0.9993397	0.4755545	0.4103165	0.3026549	0.7973863	0.6347433	4.5810679
8	MI	EZEKIEL HOUGHTON	1641	5.0	3	32	14	9	47	28	19	0.8144836	0.7597469	0.5444946	0.7898442	0.8328627	0.6838163	0.6090326	5.0342809
9	ON	STEFANO LEE	1411	5.0	25	18	59	8	26	7	20	0.1275654	0.2520005	0.9612873	0.2101558	0.2754618	0.2026137	0.2574644	2.2865489
10	MI	ANVIT RAO	1365	5.0	16	19	55	31	6	25	18	0.2016853	0.2413054	0.7369948	0.3224394	0.1361285	0.1008826	0.2054181	1.9448541
11	MI	CAMERON WILLIAM MC LEMAN	1712	4.5	38	56	6	7	3	34	26	0.8407230	0.9615010	0.5373473	0.5896835	0.8685410	0.8583646	0.6825704	5.3387308
12	MI	KENNETH J TACK	1663	4.5	42	33	5	38	NA	1	3	0.8705003	0.7741480	0.5115109	0.7992400	0.0000000	0.3199293	0.8328627	4.1081911
13	MI	TORRANCE HENRY JR	1666	4.5	36	27	7	5	33	3	32	0.8569591	0.6584145	0.5244455	0.5158250	0.7771531	0.8352528	0.7850267	4.9530768
14	MI	BRADLEY SHAW	1610	4.5	54	44	8	1	27	5	31	0.8762293	0.9141903	0.4555054	0.2574644	0.5827019	0.4355995	0.6609991	4.1826899
15	MI	ZACHARY JAMES HOUGHTON	1220	4.5	19	16	30	22	54	33	38	0.1212950	0.0988132	0.1495097	0.1269261	0.4285369	0.2111129	0.2371150	1.3733089
16	MI	MIKE NIKITIN	1604	4.0	10	15	NA	39	2	36	NA	0.7983147	0.9011868	0.0000000	0.7245382	0.5728723	0.8074242	0.0000000	3.8043362
17	MI	RONALD GRZEGORCZYK	1629	4.0	48	41	26	2	23	22	5	0.8056277	0.7859966	0.5714631	0.6076611	0.8221846	0.6049129	0.4626527	4.6604987
18	MI	DAVID SUNDEEN	1600	4.0	47	9	1	32	19	38	10	0.7973863	0.7479995	0.2466139	0.7140789	0.5516235	0.7347571	0.7945819	4.5870412
19	MI	DIPANKAR ROY	1564	4.0	15	10	52	28	18	4	8	0.8787050	0.7586946	0.9739366	0.5813015	0.4483765	0.2942186	0.3909674	4.3262002
20	MI	JASON ZHENG	1595	4.0	40	49	23	41	28	2	9	0.8056277	0.8519483	0.7917488	0.7512408	0.6240018	0.5601501	0.7425356	5.1272531
21	ON	DINH DANG BUI	1563	4.0	43	1	47	3	40	39	6	0.8336625	0.2092019	0.7607961	0.7369948	0.7751529	0.6750402	0.3300310	4.3208793
22	MI	EUGENE L MCCLURE	1555	4.0	64	52	28	15	NA	17	40	0.9052126	0.9725887	0.5686414	0.8730739	0.0000000	0.3950871	0.7670249	4.4816286
23	ON	ALAN BUI	1363	4.0	4	43	20	58	17	37	46	0.1158806	0.6131368	0.2082512	0.9287326	0.1778154	0.9006730	0.9965840	3.9410737
24	MI	MICHAEL R ALDRICH	1229	4.0	28	47	43	25	60	44	39	0.1679402	0.3174296	0.4229075	0.0487842	0.8187933	0.5430665	0.2329751	2.5518963
25	MI	LOREN SCHWIEBERT	1745	3.5	9	53	3	24	34	10	47	0.8724346	0.8835283	0.8887546	0.9512158	0.8799267	0.8991174	0.9006730	6.2756504
26	ON	MAX ZHU	1579	3.5	49	40	17	4	9	32	11	0.8399507	0.7907981	0.4285369	0.3124618	0.7245382	0.6887736	0.3174296	4.1024889
27	MI	GAURAV GIDWANI	1552	3.5	51	13	46	37	14	6	NA	0.9574766	0.3415855	0.9988465	0.9641774	0.4172981	0.3161837	0.0000000	3.9955678
28	MI	SOFIA ADINA STANESCU-BELLU	1507	3.5	24	4	22	19	20	8	36	0.8320598	0.2309241	0.4313586	0.4186985	0.3759982	0.3161837	0.7057814	3.3110043
29	MI	CHIEDOZIE OKORIE	1602	3.5	50	6	38	34	52	48	NA	0.9586332	0.3814159	0.7369948	0.7628850	0.9789496	0.7801296	0.0000000	4.5990080
30	ON	GEORGE AVERY JONES	1522	3.5	52	64	15	55	31	61	50	0.9670431	0.8876113	0.8504903	0.8737104	0.5402082	0.9631699	0.9359868	6.0182200
31	MI	RISHI SHETTY	1494	3.5	58	55	64	10	30	50	14	0.9651584	0.8548292	0.8705003	0.6775606	0.4597918	0.9256237	0.3390009	5.0924648
32	ON	JOSHUA PHILIP MATHEWS	1441	3.5	61	8	44	18	51	26	13	0.9425482	0.2402531	0.8010809	0.2859211	0.9223905	0.3112264	0.2149733	3.7183935
33	MI	JADE GE	1449	3.5	60	12	50	36	13	15	51	0.9412886	0.2258520	0.9057054	0.6320700	0.2228469	0.7888871	0.9256237	4.6422736
34	MI	MICHAEL JEFFERY THOMAS	1399	3.5	6	60	37	29	25	11	52	0.1608247	0.9232107	0.9177346	0.2371150	0.1200733	0.1416354	0.9352935	3.4358872
35	MI	JOSHUA DAVID LEE	1438	3.5	46	38	56	6	57	52	48	0.9977791	0.5215733	0.8376155	0.1934725	0.8799267	0.9476236	0.5798998	4.9578904
36	MI	SIDDHARTH JHA	1355	3.5	13	57	51	33	NA	16	28	0.1430409	0.8196458	0.8787050	0.3679300	0.0000000	0.1925758	0.2942186	2.6961161
37	MI	AMIYATOSH PWNANANDAM	980	3.5	NA	5	34	27	NA	23	61	0.0000000	0.0201220	0.0822654	0.0358226	0.0000000	0.0993270	0.5359159	0.7734529
38	MI	BRIAN LIU	1423	3.0	11	35	29	12	NA	18	15	0.1592770	0.4784267	0.2630052	0.2007600	0.0000000	0.2652429	0.7628850	2.1295968
39	MI	JOEL R HENDON	1436	3.0	1	54	40	16	44	21	24	0.1129643	0.7222345	0.6240018	0.2754618	0.7964547	0.3249598	0.7670249	3.6231017
40	MI	FOREST ZHANG	1348	3.0	20	26	39	59	21	56	22	0.1943723	0.2092019	0.3759982	0.9452902	0.2248471	0.7544540	0.2329751	2.9371389
41	MI	KYLE WILLIAM MURPHY	1403	3.0	59	17	58	20	NA	NA	NA	0.9595367	0.2140034	0.9425482	0.2487592	0.0000000	0.0000000	0.0000000	2.3648475
42	MI	JARED GE	1332	3.0	12	50	57	60	61	64	56	0.1294997	0.8304449	0.7992400	0.8910109	0.8975401	0.7256856	0.7369948	5.0104161
43	MI	ROBERT GLEN VASEY	1283	3.0	21	23	24	63	59	46	55	0.1663375	0.3868632	0.5770925	0.6506046	0.9223905	0.9945968	0.6360769	4.3339621
44	MI	JUSTIN D SCHILLING	1199	3.0	NA	14	32	53	39	24	59	0.0000000	0.0858097	0.1989191	0.2466139	0.2035453	0.4569335	0.8799267	2.0717482
45	MI	DEREK YAN	1242	3.0	5	51	60	56	63	55	58	0.0849109	0.7907981	0.8296328	0.6253514	0.5952430	0.5798998	0.8665566	4.3723927
46	MI	JACOB ALEXANDER LAVALLEY	377	3.0	35	7	27	50	64	43	23	0.0022209	0.0006603	0.0011535	0.0196730	0.0107230	0.0054032	0.0034160	0.0432498
47	MI	ERIC WRIGHT	1362	2.5	18	24	21	61	8	51	25	0.2026137	0.6825704	0.2392039	0.9123667	0.1671373	0.8829346	0.0993270	3.1861535
48	MI	DANIEL KHAIN	1382	2.5	17	63	NA	52	NA	29	35	0.1943723	0.7670249	0.0000000	0.9291127	0.0000000	0.2198704	0.4201002	2.5304806
49	MI	MICHAEL J MARTIN	1291	2.5	26	20	63	64	58	NA	NA	0.1600493	0.1480517	0.6609991	0.6763017	0.8959411	0.0000000	0.0000000	2.5413428
50	MI	SHIVAM JHA	1056	2.5	29	42	33	46	NA	31	30	0.0413668	0.1695551	0.0942946	0.9803270	0.0000000	0.0743763	0.0640132	1.4239331
51	MI	TEJAS AYYAGARI	1011	2.5	27	45	36	57	32	47	33	0.0425234	0.2092019	0.1212950	0.3854986	0.0776095	0.1170654	0.0743763	1.0275702
52	MI	ETHAN GUO	935	2.5	30	22	19	48	29	35	34	0.0329569	0.0274113	0.0260634	0.0708873	0.0210504	0.0523764	0.0647065	0.2954521
53	MI	JOSE C YBARRA	1393	2.0	NA	25	NA	44	NA	57	NA	0.0000000	0.1164717	0.0000000	0.7533861	0.0000000	0.8497569	0.0000000	1.7196146
54	MI	LARRY HODGE	1270	2.0	14	39	61	NA	15	59	64	0.1237707	0.2777655	0.8597585	0.0000000	0.5714631	0.9168612	0.6492949	3.3989140
55	MI	ALEX KONG	1186	2.0	62	31	10	30	NA	45	43	0.1212950	0.1451708	0.2630052	0.1262896	0.0000000	0.4201002	0.3639231	1.4397840
56	MI	MARISA RICCI	1153	2.0	NA	11	35	45	NA	40	42	0.0000000	0.0384990	0.1623845	0.3746486	0.0000000	0.2455460	0.2630052	1.0840834
57	MI	MICHAEL LU	1092	2.0	7	36	42	51	35	53	NA	0.0389275	0.1803542	0.2007600	0.6145014	0.1200733	0.1502431	0.0000000	1.3048595
58	MI	VIRAJ MOHILE	917	2.0	31	2	41	23	49	NA	45	0.0348416	0.0250598	0.0574518	0.0712674	0.1040589	0.0000000	0.1334434	0.4261229
59	MI	SEAN M MC CORMICK	853	2.0	41	NA	9	40	43	54	44	0.0404633	0.0000000	0.0387127	0.0547098	0.0776095	0.0831388	0.1200733	0.4147074
60	MI	JULIA SHEN	967	1.5	33	34	45	42	24	NA	NA	0.0587114	0.0767893	0.1703672	0.1089891	0.1812067	0.0000000	0.0000000	0.5960637
61	ON	JEZZEL FARKAS	955	1.5	32	3	54	47	42	30	37	0.0574518	0.0780226	0.1402415	0.0876333	0.1024599	0.0368301	0.4640841	0.9667233
62	MI	ASHWIN BALAJI	1530	1.0	55	NA	NA	NA	NA	NA	NA	0.8787050	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.8787050
63	MI	THOMAS JOSEPH HOSMER	1175	1.0	2	48	49	43	45	NA	NA	0.1019317	0.2329751	0.3390009	0.3493954	0.4047570	0.0000000	0.0000000	1.4280600
64	MI	BEN LI	1163	1.0	22	30	31	49	46	42	54	0.0947874	0.1123887	0.1294997	0.3236983	0.9892770	0.2743144	0.3507051	2.2746706

Calculate difference from actual score from table

E_chess_manip$Total_Pts <- as.numeric(E_chess_manip$Total_Pts)
E_chess_manip <- E_chess_manip %>% mutate(Score_minus_Expected = Total_Pts - E_tot)
E_chess_manip <- E_chess_manip %>% arrange(desc(Score_minus_Expected))

5 players who most overperformed relative to their expected score, and the 5 players that most underperformed relative to their expected score

final <- E_chess_manip %>% select(Pair_Num, State, Player_Name, Pre_Game_Rank, Total_Pts, E_tot, Score_minus_Expected)
head(final, n=5)

final <- final %>% arrange(Score_minus_Expected)
head(final, n=5)

Fin

Assignment 5B: ELO Calculations

Jacob Shapiro

2025-09-28