Import table & get it formatted

Need to import text file in some way - will take some experimenting but can start with delimiter of “—————————————————————————————–”.

chess <- read_delim("https://raw.githubusercontent.com/jacshap/Data607/refs/heads/main/tournamentinfo.txt", delim = "-----------------------------------------------------------------------------------------", col_names = TRUE, show_col_types = FALSE) 
## New names:
## • `` -> `...1`
## • `` -> `...2`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
#to fix problems let`s get rid of 2nd column and omit NA rows to clean
names(chess)[1] <- "base"
chess <- chess %>% select(base)
chess <- na.omit(chess)

Now we`ve read in the file by row and need to split per the delimiter “|”. Retitle columns, drop last column because R wanted to keep the last “|” in Round 7, and drop first two rows because those used to be the headers.

library(tidyr)
chess <- separate_wider_delim(chess, cols = base, delim = "|", names = c("Pair_Num", "Player_Name", "Total_Pts", "Round_1", "Round_2", "Round_3", "Round_4", "Round_5", "Round_6", "Round_7", "extra"), too_many = "merge") %>% select(-last_col()) %>% slice(-(1:2))
head(chess)

Lets get players state and other info from second line into first line with player`s name

# Need to combine two rows at a time. Mutate with a group # and try the above again.

chess_manip <- chess %>% mutate(Group = n() / 2)

# Googled "r combine two rows at a time in dataframe"
chess_manip <- chess %>% mutate(Group = ceiling(row_number() / 2))

chess_manip <- chess_manip %>% group_by(Group) %>% summarise(Pair_Num = paste(Pair_Num, collapse = ","), Player_Name = paste(Player_Name, collapse = ","), Total_Pts = paste(Total_Pts, collapse = ","), Round_1 = paste(Round_1, collapse = ","), Round_2 = paste(Round_2, collapse = ","), Round_3 = paste(Round_3, collapse = ","), Round_4 = paste(Round_4, collapse = ","), Round_5 = paste(Round_5, collapse = ","), Round_6 = paste(Round_6, collapse = ","), Round_7 = paste(Round_7, collapse = ","))

print(chess_manip)
## # A tibble: 64 × 11
##    Group Pair_Num  Player_Name Total_Pts Round_1 Round_2 Round_3 Round_4 Round_5
##    <dbl> <chr>     <chr>       <chr>     <chr>   <chr>   <chr>   <chr>   <chr>  
##  1     1 "    1 ,… " GARY HUA… "6.0  ,N… "W  39… "W  21… "W  18… "W  14… "W   7…
##  2     2 "    2 ,… " DAKSHESH… "6.0  ,N… "W  63… "W  58… "L   4… "W  17… "W  16…
##  3     3 "    3 ,… " ADITYA B… "6.0  ,N… "L   8… "W  61… "W  25… "W  21… "W  11…
##  4     4 "    4 ,… " PATRICK … "5.5  ,N… "W  23… "D  28… "W   2… "W  26… "D   5…
##  5     5 "    5 ,… " HANSHI Z… "5.5  ,N… "W  45… "W  37… "D  12… "D  13… "D   4…
##  6     6 "    6 ,… " HANSEN S… "5.0  ,N… "W  34… "D  29… "L  11… "W  35… "D  10…
##  7     7 "    7 ,… " GARY DEE… "5.0  ,N… "W  57… "W  46… "W  13… "W  11… "L   1…
##  8     8 "    8 ,… " EZEKIEL … "5.0  ,N… "W   3… "W  32… "L  14… "L   9… "W  47…
##  9     9 "    9 ,… " STEFANO … "5.0  ,N… "W  25… "L  18… "W  59… "W   8… "W  26…
## 10    10 "   10 ,… " ANVIT RA… "5.0  ,N… "D  16… "L  19… "W  55… "W  31… "D   6…
## # ℹ 54 more rows
## # ℹ 2 more variables: Round_6 <chr>, Round_7 <chr>

Let’s rename Group to Pair_Num, Pair_Num to State & get rid of stuff before comma, and for columns of Total Points through Round 7 let’s get rid of the stuff after the comma.

NOTE: This method does not keep post-game ratings or N values

# Rename Columns
chess_manip <- chess_manip %>% rename(State = Pair_Num, Pair_Num = Group)

# Split and keep 2nd part of resulting list
chess_manip <- chess_manip %>% mutate(State = sapply(strsplit(State, ","), `[`,2))

# Similar for Total Pts & Rounds columns but keep first thing. Also remembered can use across()
chess_manip <- chess_manip %>% mutate(across(Total_Pts:Round_7, ~ sapply(strsplit(.x, ","), `[`,1)))


# Split Rounds columns on the space delimiter to keep who they played

#chess_manip %>% mutate(across(Round_1:Round_7, ~ sapply(strsplit(.x, " "), `[`,2)))
# Not keeping who they played, just keeping blank. Need to try different method? - Apparently adding in the + works on spaces
chess_manip <- chess_manip %>% mutate(across(Round_1:Round_7, ~ sapply(strsplit(.x, " +"), `[`,2)))


# Split Player_Name column on the comma and keep the pre-game rating in new column
chess_manip <- separate_wider_delim(chess_manip, cols = Player_Name, delim = ",", names = c("Player_Name", "Pre_Game_Rank"))

# splitted, now manipulate pre-game rank to be left in column
chess_manip <- chess_manip %>% mutate(Pre_Game_Rank = sapply(strsplit(Pre_Game_Rank, "R: "), `[`, 2))

# kept everything to the right of "R: "
chess_manip <- chess_manip %>% mutate(Pre_Game_Rank = if_else(str_detect(Pre_Game_Rank,'P'), sapply(strsplit(Pre_Game_Rank, 'P'), `[`, 1), Pre_Game_Rank))

# got rid of P stuff
#chess_manip %>% mutate(Pre_Game_Rank, sapply(strsplit(Pre_Game_Rank, ' +'), `[`,1))
# not working - try above if_else because some are already done
#chess_manip %>% mutate(Pre_Game_Rank = if_else(str_detect(Pre_Game_Rank,' '), sapply(strsplit(Pre_Game_Rank, ' '), `[`, 1), Pre_Game_Rank))
# close but got rid of some values. Get rid of spaces to left then try again
chess_manip <- chess_manip %>% mutate(across(Pair_Num:Round_7, ~ trimws(.x, which = "left")))
chess_manip %>% mutate(Pre_Game_Rank = if_else(str_detect(Pre_Game_Rank,' +'), sapply(strsplit(Pre_Game_Rank, ' +'), `[`, 1), Pre_Game_Rank))
# Success!!
chess_manip <- chess_manip %>% mutate(Pre_Game_Rank = if_else(str_detect(Pre_Game_Rank,' +'), sapply(strsplit(Pre_Game_Rank, ' +'), `[`, 1), Pre_Game_Rank))

Probably also a good idea to clean up extra spaces

chess_manip <- chess_manip %>% mutate(across(Pair_Num:Round_7, ~ trimws(.x, which = "right")))
chess_manip <- chess_manip %>% mutate(across(Pair_Num:Round_7, ~ trimws(.x, which = "left")))
print(chess_manip)
## # A tibble: 64 × 12
##    Pair_Num State Player_Name    Pre_Game_Rank Total_Pts Round_1 Round_2 Round_3
##    <chr>    <chr> <chr>          <chr>         <chr>     <chr>   <chr>   <chr>  
##  1 1        ON    GARY HUA       1794          6.0       39      21      18     
##  2 2        MI    DAKSHESH DARU… 1553          6.0       63      58      4      
##  3 3        MI    ADITYA BAJAJ   1384          6.0       8       61      25     
##  4 4        MI    PATRICK H SCH… 1716          5.5       23      28      2      
##  5 5        MI    HANSHI ZUO     1655          5.5       45      37      12     
##  6 6        OH    HANSEN SONG    1686          5.0       34      29      11     
##  7 7        MI    GARY DEE SWAT… 1649          5.0       57      46      13     
##  8 8        MI    EZEKIEL HOUGH… 1641          5.0       3       32      14     
##  9 9        ON    STEFANO LEE    1411          5.0       25      18      59     
## 10 10       MI    ANVIT RAO      1365          5.0       16      19      55     
## # ℹ 54 more rows
## # ℹ 4 more variables: Round_4 <chr>, Round_5 <chr>, Round_6 <chr>,
## #   Round_7 <chr>

Calculate expected score

Prompt: “Based on difference in ratings between the chess players and each of their opponents in our Project 1 tournament, calculate each player’s expected score (e.g. 4.3) and the difference from their actual score (e.g 4.0). List the five players who most overperformed relative to their expected score, and the five players that most underperformed relative to their expected score.”

I’m interpreting this as asking across the tournament what is the expected score per game and add those up and that’s the total expected score. Then subtract that from the actual score and see top 5 and bottom 5.

Expected Score of Player A: E(A) = 1/(1+10^((Rating_B - Rating_A)/400)) (source: https://en.wikipedia.org/wiki/Elo_rating_system#:~:text=A%20player’s%20expected%20score%20is,and%200%25%20chance%20of%20drawing.)

First chunk attempt - tried to be clean and do it in dataframe format. Kept getting stuck and couldn’t troubleshoot.

# Test case of finding position of Pair_Num that matches Round_1 and print that position for Pre_Game_Rank for first row
round1 <- chess_manip %>% filter(Pair_Num == 1) %>% pull(Round_1)
pre_game1 <- chess_manip %>% filter(Pair_Num == round1) %>% pull(Pre_Game_Rank)
print(pre_game1)

# To make it formulaic can try to take all values for the rounds per person and put into lists and then mutate the lists into columns.

# Prep lists for for loop (couldn't figure out how to do it formulaicly with string manipulation in Project 1)

# E1 <- c()
# E2 <- c()
# E3 <- c()
# E4 <- c()
# E5 <- c()
# E6 <- c()
# E7 <- c()
player_rating_list<-c()
pre_game1_list<-c()
pre_game7_list<-c()
E_df <- data.frame(E1 = numeric(0), E2 = numeric(0), E3 = numeric(0), E4 = numeric(0), E5 = numeric(0), E6 = numeric(0), E7 = numeric(0))
pre_game5_list <- c()

# For each row, loop will get player's pre-game rating, tournament opponent's row number for each round (e.g., round1), and will pull that opponents pre-game rating (pre_game1) based on row position. Also making all opponent pre-game ratings numeric after pulling.
# Then will do expected value for each round and append to a list. Outside the loop will mutate the lists to chess_manip to show what they are, then do a sum column to get total expected score and compare to actual score, then do top 5 and bottom 5.

for (i in 1:nrow(chess_manip)){
  player_rating <- chess_manip %>% filter(Pair_Num == i) %>% pull(Pre_Game_Rank)
  player_rating <- as.numeric(player_rating)
  round1 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_1)
  pre_game1 <- chess_manip %>% filter(Pair_Num == round1) %>% pull(Pre_Game_Rank)
  pre_game1 <- as.numeric(pre_game1)
  round2 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_2)
  pre_game2 <- chess_manip %>% filter(Pair_Num == round2) %>% pull(Pre_Game_Rank)
  pre_game2 <- as.numeric(pre_game2)
  round3 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_3)
  pre_game3 <- chess_manip %>% filter(Pair_Num == round3) %>% pull(Pre_Game_Rank)
  pre_game3 <- as.numeric(pre_game3)
  round4 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_4)
  pre_game4 <- chess_manip %>% filter(Pair_Num == round4) %>% pull(Pre_Game_Rank)
  pre_game4 <- as.numeric(pre_game4)
  round5 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_5)
  pre_game5 <- chess_manip %>% filter(Pair_Num == round5) %>% pull(Pre_Game_Rank)
  #testing
  pre_game5[is.na(pre_game5)] <- 0
  pre_game5 <- as.numeric(pre_game5)
  round6 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_6)
  pre_game6 <- chess_manip %>% filter(Pair_Num == round6) %>% pull(Pre_Game_Rank)
  pre_game6 <- as.numeric(pre_game6)
  round7 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_7)
  pre_game7 <- chess_manip %>% filter(Pair_Num == round7) %>% pull(Pre_Game_Rank)
  pre_game7 <- as.numeric(pre_game7)
  
  new_E1 <- 1/(1+10^((pre_game1 - player_rating)/400))
  #trying to make NA values 0 to at least get into the data frame
  new_E1[is.na(new_E1)] <- 0
  new_E2 <- 1/(1+10^((pre_game2 - player_rating)/400))
  new_E3 <- 1/(1+10^((pre_game3 - player_rating)/400))
  new_E4 <- 1/(1+10^((pre_game4 - player_rating)/400))
  new_E5 <- 1/(1+10^((pre_game5 - player_rating)/400))
  new_E5[is.na(new_E5)] <- 0
  new_E6 <- 1/(1+10^((pre_game6 - player_rating)/400))
  new_E7 <- 1/(1+10^((pre_game7 - player_rating)/400))
  
  new_E_row <- data.frame(E1 = new_E1, E2 = new_E2, E3 = new_E3, E4 = new_E4, E5 = new_E5, E6 = new_E6, E7 = new_E7)
  #Getting stuck at row 12 because Round 5 has an NA. It's assigning it as numeric (empty)
  E_df <- rbind(E_df, new_E_row)
  
  # E1 <- c(E1, new_E1)
  # E2 <- c(E2, new_E2)
  # E3 <- c(E3, new_E3)
  # E4 <- c(E4, new_E4)
  # E5 <- c(E5, new_E5)
  # E6 <- c(E6, new_E6)
  # E7 <- c(E7, new_E7)
  
  player_rating_list<-c(player_rating_list, player_rating)
  pre_game1_list<-c(pre_game1_list, pre_game1)
  pre_game7_list<-c(pre_game7_list, pre_game7)
  pre_game5_list<-c(pre_game5_list, pre_game5)
}


print(E1)
print(pre_game1_list)
print(player_rating_list)
#sanity check that first E1 should be .887035727
1/(1+10^((1436 - 1794)/400))


print(E7)
#double sanity check that 49th row of E7 should be 0.363923100
1/(1+10^((0 - 1291)/400))
#hmm this didn't work, let's make lists of player_rating, pre_game1, and pre_game7 to diagnose
print(player_rating_list) #works correctly
print(pre_game7_list) #saying the 49th value is 1283
# Aha! So the list isn't being populated if there is an NA value for Round_7
# Let's change from list format to dataframe to maybe fix this? At least visualize what's happening

print(pre_game5_list)
#ok, it's getting stuck at row 12. Let's just do same method from project 1 and put in lists, I may have overcomplicated by trying to simplify --> putting in another chunk

2nd chunk - tried to do lists but got stuck at row 12 with a NA value in row 5. Diagnosed and fixed in next chunk

# Test case of finding position of Pair_Num that matches Round_1 and print that position for Pre_Game_Rank for first row
round1 <- chess_manip %>% filter(Pair_Num == 1) %>% pull(Round_1)
pre_game1 <- chess_manip %>% filter(Pair_Num == round1) %>% pull(Pre_Game_Rank)
print(pre_game1)

# To make it formulaic can try to take all values for the rounds per person and put into lists and then mutate the lists into columns.

# Prep lists for for loop (couldn't figure out how to do it formulaicly with string manipulation in Project 1)

E1 <- c()
E2 <- c()
E3 <- c()
E4 <- c()
E5 <- c()
E6 <- c()
E7 <- c()
player_rating_list<-c()
pre_game1_list<-c()
pre_game7_list<-c()
#E_df <- data.frame(E1 = numeric(0), E2 = numeric(0), E3 = numeric(0), E4 = numeric(0), E5 = numeric(0), E6 = numeric(0), E7 = numeric(0))
E_list <- c()
pre_game5_list <- c()

# For each row, loop will get player's pre-game rating, tournament opponent's row number for each round (e.g., round1), and will pull that opponents pre-game rating (pre_game1) based on row position. Also making all opponent pre-game ratings numeric after pulling.
# Then will do expected value for each round and append to a list. Outside the loop will mutate the lists to chess_manip to show what they are, then do a sum column to get total expected score and compare to actual score, then do top 5 and bottom 5.

#Troubleshooting: filter rows 1 to 63 to see how round 6 and 7 are handled
filt_chess_manip <- chess_manip %>% slice(1:63)

for (i in 1:nrow(chess_manip)){
  player_rating <- chess_manip %>% filter(Pair_Num == i) %>% pull(Pre_Game_Rank)
  
  round1 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_1)
  pre_game1 <- chess_manip %>% filter(Pair_Num == round1) %>% pull(Pre_Game_Rank)
  
  round2 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_2)
  pre_game2 <- chess_manip %>% filter(Pair_Num == round2) %>% pull(Pre_Game_Rank)
  
  round3 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_3)
  pre_game3 <- chess_manip %>% filter(Pair_Num == round3) %>% pull(Pre_Game_Rank)
  
  round4 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_4)
  pre_game4 <- chess_manip %>% filter(Pair_Num == round4) %>% pull(Pre_Game_Rank)
  
  round5 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_5)
  pre_game5 <- chess_manip %>% filter(Pair_Num == round5) %>% pull(Pre_Game_Rank)
  
  round6 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_6)
  pre_game6 <- chess_manip %>% filter(Pair_Num == round6) %>% pull(Pre_Game_Rank)
  
  round7 <- chess_manip %>% filter(Pair_Num == i) %>% pull(Round_7)
  pre_game7 <- chess_manip %>% filter(Pair_Num == round7) %>% pull(Pre_Game_Rank)
  
  # put in list for troubleshooting
  pre_game_char <- c(pre_game1, pre_game2, pre_game3, pre_game4, pre_game5, pre_game6, pre_game7)
  
  # make numeric
  player_rating <- as.numeric(player_rating)
  pre_game1 <- as.numeric(pre_game1)
  pre_game2 <- as.numeric(pre_game2)
  pre_game3 <- as.numeric(pre_game3)
  pre_game4 <- as.numeric(pre_game4)
  pre_game5 <- as.numeric(pre_game5)
  pre_game6 <- as.numeric(pre_game6)
  pre_game7 <- as.numeric(pre_game7)
  # player_rating <- if_else(player_rating =="NA", 1000000000, as.numeric(player_rating))
  # pre_game1 <- if_else(player_rating =="NA", 1000000000, as.numeric(pre_game1))
  # pre_game2 <- if_else(player_rating =="NA", 1000000000, as.numeric(pre_game2))
  # pre_game3 <- if_else(player_rating =="NA", 1000000000, as.numeric(pre_game3))
  # pre_game4 <- if_else(player_rating =="NA", 1000000000, as.numeric(pre_game4))
  # pre_game5 <- if_else(player_rating =="NA", 1000000000, as.numeric(pre_game5))
  # pre_game6 <- if_else(player_rating =="NA", 1000000000, as.numeric(pre_game6))
  # pre_game7 <- if_else(player_rating =="NA", 1000000000, as.numeric(pre_game7))

  new_E1 <- 1/(1+10^((pre_game1 - player_rating)/400))
  new_E2 <- 1/(1+10^((pre_game2 - player_rating)/400))
  new_E3 <- 1/(1+10^((pre_game3 - player_rating)/400))
  new_E4 <- 1/(1+10^((pre_game4 - player_rating)/400))
  new_E5 <- 1/(1+10^((pre_game5 - player_rating)/400))
  new_E6 <- 1/(1+10^((pre_game6 - player_rating)/400))
  new_E7 <- 1/(1+10^((pre_game7 - player_rating)/400))
  
  
  E1 <- c(E1, new_E1)
  E2 <- c(E2, new_E2)
  E3 <- c(E3, new_E3)
  E4 <- c(E4, new_E4)
  E5 <- c(E5, new_E5)
  E6 <- c(E6, new_E6)
  E7 <- c(E7, new_E7)
  
  player_rating_list<-c(player_rating_list, player_rating)
  pre_game1_list<-c(pre_game1_list, pre_game1)
  pre_game7_list<-c(pre_game7_list, pre_game7)
  pre_game5_list<-c(pre_game5_list, pre_game5)
}


print(E1)
print(pre_game1_list)
print(player_rating_list)
#sanity check that first E1 should be .887035727
1/(1+10^((1436 - 1794)/400))


print(E7)
#double sanity check that 49th row of E7 should be 0.363923100
1/(1+10^((0 - 1291)/400))
#hmm this didn't work, let's make lists of player_rating, pre_game1, and pre_game7 to diagnose
print(player_rating_list) #works correctly
print(pre_game7_list) #saying the 49th value is 1283
# Aha! So the list isn't being populated if there is an NA value for Round_7
# Let's change from list format to dataframe to maybe fix this? At least visualize what's happening

print(pre_game5_list)
#ok, it's getting stuck at row 12. Let's just do same method from project 1 and put in lists, I may have overcomplicated by trying to simplify --> putting in another chunk

3rd chunk attempt - troubleshooting 2nd chunk

# Test case of finding position of Pair_Num that matches Round_1 and print that position for Pre_Game_Rank for first row
round1 <- chess_manip %>% filter(Pair_Num == 1) %>% pull(Round_1)
pre_game1 <- chess_manip %>% filter(Pair_Num == round1) %>% pull(Pre_Game_Rank)
print(pre_game1)
## [1] "1436"
# To make it formulaic can try to take all values for the rounds per person and put into lists and then mutate the lists into columns.

# Prep lists for for loop (couldn't figure out how to do it formulaicly with string manipulation in Project 1)

E1 <- c()
E2 <- c()
E3 <- c()
E4 <- c()
E5 <- c()
E6 <- c()
E7 <- c()
player_rating_list<-c()
pre_game1_list<-c()
pre_game7_list<-c()
#E_df <- data.frame(E1 = numeric(0), E2 = numeric(0), E3 = numeric(0), E4 = numeric(0), E5 = numeric(0), E6 = numeric(0), E7 = numeric(0))
E_list <- c()
pre_game5_list <- c()

# For each row, loop will get player's pre-game rating, tournament opponent's row number for each round (e.g., round1), and will pull that opponents pre-game rating (pre_game1) based on row position. Also making all opponent pre-game ratings numeric after pulling.
# Then will do expected value for each round and append to a list. Outside the loop will mutate the lists to chess_manip to show what they are, then do a sum column to get total expected score and compare to actual score, then do top 5 and bottom 5.

#Troubleshooting: filter rows 1 to 63 to see how round 6 and 7 are handled
filt_chess_manip <- chess_manip %>% slice(1:64) 
filt_chess_manip <- filt_chess_manip %>% replace(is.na(.),"0")

for (i in 1:nrow(filt_chess_manip)){
  player_rating <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Pre_Game_Rank)
  
  round1 <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Round_1)
  pre_game1 <- filt_chess_manip %>% filter(Pair_Num == round1) %>% pull(Pre_Game_Rank)
  pre_game1 <- if(length(which(!is.na(pre_game1))) == 0){'1000000'} else {pre_game1}
  
  round2 <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Round_2)
  pre_game2 <- filt_chess_manip %>% filter(Pair_Num == round2) %>% pull(Pre_Game_Rank)
  pre_game2 <- if(length(which(!is.na(pre_game2))) == 0){'1000000'} else {pre_game2}
  
  round3 <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Round_3)
  pre_game3 <- filt_chess_manip %>% filter(Pair_Num == round3) %>% pull(Pre_Game_Rank)
  pre_game3 <- if(length(which(!is.na(pre_game3))) == 0){'1000000'} else {pre_game3}
  
  round4 <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Round_4)
  pre_game4 <- filt_chess_manip %>% filter(Pair_Num == round4) %>% pull(Pre_Game_Rank)
  pre_game4 <- if(length(which(!is.na(pre_game4))) == 0){'1000000'} else {pre_game4}
  
  round5 <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Round_5)
  pre_game5 <- filt_chess_manip %>% filter(Pair_Num == round5) %>% pull(Pre_Game_Rank)
  #is.na(pre_game5) <- pre_game5 == "NA"
  pre_game5 <- if(length(which(!is.na(pre_game5))) == 0){'1000000'} else {pre_game5}
  
  round6 <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Round_6)
  pre_game6 <- filt_chess_manip %>% filter(Pair_Num == round6) %>% pull(Pre_Game_Rank)
  pre_game6 <- if(length(which(!is.na(pre_game6))) == 0){'1000000'} else {pre_game6}
  
  round7 <- filt_chess_manip %>% filter(Pair_Num == i) %>% pull(Round_7)
  pre_game7 <- filt_chess_manip %>% filter(Pair_Num == round7) %>% pull(Pre_Game_Rank)
  pre_game7 <- if(length(which(!is.na(pre_game7))) == 0){'1000000'} else {pre_game7}
  
  # put in list for troubleshooting
  pre_game_char <- c(pre_game1, pre_game2, pre_game3, pre_game4, pre_game5, pre_game6, pre_game7)
  
  # make numeric
  numplayer_rating <- as.numeric(player_rating)
  numpre_game1 <- as.numeric(pre_game1)
  numpre_game2 <- as.numeric(pre_game2)
  numpre_game3 <- as.numeric(pre_game3)
  numpre_game4 <- as.numeric(pre_game4)
  numpre_game5 <- as.numeric(pre_game5)
  numpre_game6 <- as.numeric(pre_game6)
  numpre_game7 <- as.numeric(pre_game7)
  # numplayer_rating <- if_else(identical(player_rating, character(0)), 1000000000, as.numeric(player_rating))
  # numpre_game1 <- if_else(identical(pre_game1, character(0)), 1000000000, as.numeric(pre_game1))
  # numpre_game2 <- if_else(identical(pre_game2, character(0)), 1000000000, as.numeric(pre_game2))
  # numpre_game3 <- if_else(identical(pre_game3, character(0)), 1000000000, as.numeric(pre_game3))
  # numpre_game4 <- if_else(identical(pre_game4, character(0)), 1000000000, as.numeric(pre_game4))
  # numpre_game5 <- if_else(identical(pre_game5, character(0)), 1000000000, as.numeric(pre_game5))
  # numpre_game6 <- if_else(identical(pre_game6, character(0)), 1000000000, as.numeric(pre_game6))
  # numpre_game7 <- if_else(identical(pre_game7, character(0)), 1000000000, as.numeric(pre_game7))

  new_E1 <- 1/(1+10^((numpre_game1 - numplayer_rating)/400))
  new_E2 <- 1/(1+10^((numpre_game2 - numplayer_rating)/400))
  new_E3 <- 1/(1+10^((numpre_game3 - numplayer_rating)/400))
  new_E4 <- 1/(1+10^((numpre_game4 - numplayer_rating)/400))
  new_E5 <- 1/(1+10^((numpre_game5 - numplayer_rating)/400))
  new_E6 <- 1/(1+10^((numpre_game6 - numplayer_rating)/400))
  new_E7 <- 1/(1+10^((numpre_game7 - numplayer_rating)/400))
  
  
  E1 <- c(E1, new_E1)
  E2 <- c(E2, new_E2)
  E3 <- c(E3, new_E3)
  E4 <- c(E4, new_E4)
  E5 <- c(E5, new_E5)
  E6 <- c(E6, new_E6)
  E7 <- c(E7, new_E7)
  
  player_rating_list<-c(player_rating_list, player_rating)
  pre_game1_list<-c(pre_game1_list, pre_game1)
  pre_game7_list<-c(pre_game7_list, pre_game7)
  pre_game5_list<-c(pre_game5_list, pre_game5)
}


print(E1)
##  [1] 0.887035727 0.898068279 0.185516412 0.884119356 0.915089111 0.839175318
##  [7] 0.961072531 0.814483588 0.127565396 0.201685256 0.840723043 0.870500258
## [13] 0.856959139 0.876229319 0.121295049 0.798314744 0.805627694 0.797386314
## [19] 0.878704951 0.805627694 0.833662469 0.905212604 0.115880644 0.167940159
## [25] 0.872434604 0.839950695 0.957476582 0.832059841 0.958633151 0.967043138
## [31] 0.965158361 0.942548209 0.941288554 0.160824682 0.997779072 0.143040861
## [37] 0.000000000 0.159276957 0.112964273 0.194372306 0.959536674 0.129499742
## [43] 0.166337531 0.000000000 0.084910889 0.002220928 0.202613686 0.194372306
## [49] 0.160049305 0.041366849 0.042523418 0.032956862 0.000000000 0.123770681
## [55] 0.121295049 0.000000000 0.038927469 0.034841639 0.040463326 0.058711446
## [61] 0.057451791 0.878704951 0.101931721 0.094787396
print(pre_game1_list)
##  [1] "1436"    "1175"    "1641"    "1363"    "1242"    "1399"    "1092"   
##  [8] "1384"    "1745"    "1604"    "1423"    "1332"    "1355"    "1270"   
## [15] "1564"    "1365"    "1382"    "1362"    "1220"    "1348"    "1283"   
## [22] "1163"    "1716"    "1507"    "1411"    "1291"    "1011"    "1229"   
## [29] "1056"    "935"     "917"     "955"     "967"     "1686"    "377"    
## [36] "1666"    "1000000" "1712"    "1794"    "1595"    "853"     "1663"   
## [43] "1563"    "1000000" "1655"    "1438"    "1600"    "1629"    "1579"   
## [50] "1602"    "1552"    "1522"    "1000000" "1610"    "1530"    "1000000"
## [57] "1649"    "1494"    "1403"    "1449"    "1441"    "1186"    "1553"   
## [64] "1555"
print(player_rating_list)
##  [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980"  "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377"  "1362" "1382" "1291" "1056"
## [51] "1011" "935"  "1393" "1270" "1186" "1153" "1092" "917"  "853"  "967" 
## [61] "955"  "1530" "1175" "1163"
#sanity check that first E1 should be .887035727
1/(1+10^((1436 - 1794)/400))
## [1] 0.8870357
print(E7)
##  [1] 0.610402422 0.365256665 0.167137310 0.389597578 0.537347317 0.669969014
##  [7] 0.634743335 0.609032603 0.257464441 0.205418085 0.682570385 0.832862690
## [13] 0.785026737 0.660999091 0.237115020 0.000000000 0.462652683 0.794581915
## [19] 0.390967397 0.742535559 0.330030986 0.767024918 0.996584031 0.232975082
## [25] 0.900673036 0.317429615 0.000000000 0.705781360 0.000000000 0.935986774
## [31] 0.339000909 0.214973263 0.925623716 0.935293499 0.579899760 0.294218640
## [37] 0.535915927 0.762884980 0.767024918 0.232975082 0.000000000 0.736994761
## [43] 0.636076900 0.879926688 0.866556644 0.003415969 0.099326964 0.420100240
## [49] 0.000000000 0.064013226 0.074376284 0.064706501 0.000000000 0.649294947
## [55] 0.363923100 0.263005239 0.000000000 0.133443356 0.120073312 0.000000000
## [61] 0.464084073 0.000000000 0.000000000 0.350705053
#double sanity check that 49th row of E7 should be 0 because they didn't play that round
1/(1+10^((1000000 - 1291)/400))
## [1] 0
#hmm this didn't work, let's make lists of player_rating, pre_game1, and pre_game7 to diagnose
print(player_rating_list) #works correctly
##  [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980"  "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377"  "1362" "1382" "1291" "1056"
## [51] "1011" "935"  "1393" "1270" "1186" "1153" "1092" "917"  "853"  "967" 
## [61] "955"  "1530" "1175" "1163"
print(pre_game7_list) #was saying the 49th value is 1283, now saying 0. 
##  [1] "1716"    "1649"    "1663"    "1794"    "1629"    "1563"    "1553"   
##  [8] "1564"    "1595"    "1600"    "1579"    "1384"    "1441"    "1494"   
## [15] "1423"    "1000000" "1655"    "1365"    "1641"    "1411"    "1686"   
## [22] "1348"    "377"     "1436"    "1362"    "1712"    "1000000" "1355"   
## [29] "1000000" "1056"    "1610"    "1666"    "1011"    "935"     "1382"   
## [36] "1507"    "955"     "1220"    "1229"    "1555"    "1000000" "1153"   
## [43] "1186"    "853"     "917"     "1363"    "1745"    "1438"    "1000000"
## [50] "1522"    "1449"    "1399"    "1000000" "1163"    "1283"    "1332"   
## [57] "1000000" "1242"    "1199"    "1000000" "980"     "1000000" "1000000"
## [64] "1270"
# Aha! So the list isn't being populated if there is an NA value for Round_7
# Let's change from list format to dataframe to maybe fix this? At least visualize what's happening
print(pre_game5_list)
##  [1] "1649"    "1604"    "1712"    "1655"    "1716"    "1365"    "1794"   
##  [8] "1362"    "1579"    "1686"    "1384"    "1000000" "1449"    "1552"   
## [15] "1270"    "1553"    "1363"    "1564"    "1600"    "1507"    "1348"   
## [22] "1000000" "1629"    "967"     "1399"    "1411"    "1610"    "1595"   
## [29] "935"     "1494"    "1522"    "1011"    "1666"    "1745"    "1092"   
## [36] "1000000" "1000000" "1000000" "1199"    "1563"    "1000000" "955"    
## [43] "853"     "1436"    "1175"    "1163"    "1641"    "1000000" "917"    
## [50] "1000000" "1441"    "1602"    "1000000" "1220"    "1000000" "1000000"
## [57] "1438"    "1291"    "1283"    "1229"    "1332"    "1000000" "1242"   
## [64] "377"
#ok, it's getting stuck at row 12. Let's just do same method from project 1 and put in lists, I may have overcomplicated by trying to simplify --> putting in another chunk
# After slicing, I'm seeing the NA values are being pulled as character(0), it's messing stuff up.

# Fixed - see below 

Fixed the above double sanity check. If the value in the chess_manip table was NA, I changed the value to 0 to give it a character value. Then if the pull in the first part was an empty character vector (character(0)) the dplyr if_else function wouldn’t work because it needed a false value that wasn’t length of 0, which is what I was trying to correct. Was examining variable that was character(0) to see if it was logically anythign and kept getting “logical(0)” - used https://stackoverflow.com/questions/48626193/logical0-in-if-statement to fix. Using base “if else” sattement format worked - I put it after each opponent round calculation to make sure it was working. If it was a character(0) that meant that they didn’t play, so in order to make the expected value 0 I replaced character(0) with a big number and it worked!

Now need to mutate expected values onto chess_manip table and sum

E_chess_manip <- chess_manip %>% mutate(E_R1 = E1, E_R2 = E2, E_R3 = E3, E_R4 = E4, E_R5 = E5, E_R6 = E6, E_R7 = E7)
E_chess_manip <- E_chess_manip %>% group_by(Pair_Num) %>% mutate(E_tot = sum(E_R1, E_R2, E_R3, E_R4, E_R5, E_R6, E_R7))
knitr::kable(E_chess_manip)
Pair_Num State Player_Name Pre_Game_Rank Total_Pts Round_1 Round_2 Round_3 Round_4 Round_5 Round_6 Round_7 E_R1 E_R2 E_R3 E_R4 E_R5 E_R6 E_R7 E_tot
1 ON GARY HUA 1794 6.0 39 21 18 14 7 12 4 0.8870357 0.7907981 0.7533861 0.7425356 0.6973451 0.6800707 0.6104024 5.1615736
2 MI DAKSHESH DARURI 1553 6.0 63 58 4 17 16 20 7 0.8980683 0.9749402 0.2812432 0.3923389 0.4271277 0.4398499 0.3652567 3.7788248
3 MI ADITYA BAJAJ 1384 6.0 8 61 25 21 11 13 12 0.1855164 0.9219774 0.1112454 0.2630052 0.1314590 0.1647472 0.1671373 1.9450879
4 MI PATRICK H SCHILLING 1716 5.5 23 28 2 26 5 19 1 0.8841194 0.7690759 0.7187568 0.6875382 0.5868950 0.7057814 0.3895976 4.7417643
5 MI HANSHI ZUO 1655 5.5 45 37 12 13 4 14 17 0.9150891 0.9798780 0.4884891 0.4841750 0.4131050 0.5644005 0.5373473 4.3824840
6 OH HANSEN SONG 1686 5.0 34 29 11 35 10 27 21 0.8391753 0.6185841 0.4626527 0.8065275 0.8638715 0.6838163 0.6699690 4.9445965
7 MI GARY DEE SWATHELL 1649 5.0 57 46 13 11 1 9 2 0.9610725 0.9993397 0.4755545 0.4103165 0.3026549 0.7973863 0.6347433 4.5810679
8 MI EZEKIEL HOUGHTON 1641 5.0 3 32 14 9 47 28 19 0.8144836 0.7597469 0.5444946 0.7898442 0.8328627 0.6838163 0.6090326 5.0342809
9 ON STEFANO LEE 1411 5.0 25 18 59 8 26 7 20 0.1275654 0.2520005 0.9612873 0.2101558 0.2754618 0.2026137 0.2574644 2.2865489
10 MI ANVIT RAO 1365 5.0 16 19 55 31 6 25 18 0.2016853 0.2413054 0.7369948 0.3224394 0.1361285 0.1008826 0.2054181 1.9448541
11 MI CAMERON WILLIAM MC LEMAN 1712 4.5 38 56 6 7 3 34 26 0.8407230 0.9615010 0.5373473 0.5896835 0.8685410 0.8583646 0.6825704 5.3387308
12 MI KENNETH J TACK 1663 4.5 42 33 5 38 NA 1 3 0.8705003 0.7741480 0.5115109 0.7992400 0.0000000 0.3199293 0.8328627 4.1081911
13 MI TORRANCE HENRY JR 1666 4.5 36 27 7 5 33 3 32 0.8569591 0.6584145 0.5244455 0.5158250 0.7771531 0.8352528 0.7850267 4.9530768
14 MI BRADLEY SHAW 1610 4.5 54 44 8 1 27 5 31 0.8762293 0.9141903 0.4555054 0.2574644 0.5827019 0.4355995 0.6609991 4.1826899
15 MI ZACHARY JAMES HOUGHTON 1220 4.5 19 16 30 22 54 33 38 0.1212950 0.0988132 0.1495097 0.1269261 0.4285369 0.2111129 0.2371150 1.3733089
16 MI MIKE NIKITIN 1604 4.0 10 15 NA 39 2 36 NA 0.7983147 0.9011868 0.0000000 0.7245382 0.5728723 0.8074242 0.0000000 3.8043362
17 MI RONALD GRZEGORCZYK 1629 4.0 48 41 26 2 23 22 5 0.8056277 0.7859966 0.5714631 0.6076611 0.8221846 0.6049129 0.4626527 4.6604987
18 MI DAVID SUNDEEN 1600 4.0 47 9 1 32 19 38 10 0.7973863 0.7479995 0.2466139 0.7140789 0.5516235 0.7347571 0.7945819 4.5870412
19 MI DIPANKAR ROY 1564 4.0 15 10 52 28 18 4 8 0.8787050 0.7586946 0.9739366 0.5813015 0.4483765 0.2942186 0.3909674 4.3262002
20 MI JASON ZHENG 1595 4.0 40 49 23 41 28 2 9 0.8056277 0.8519483 0.7917488 0.7512408 0.6240018 0.5601501 0.7425356 5.1272531
21 ON DINH DANG BUI 1563 4.0 43 1 47 3 40 39 6 0.8336625 0.2092019 0.7607961 0.7369948 0.7751529 0.6750402 0.3300310 4.3208793
22 MI EUGENE L MCCLURE 1555 4.0 64 52 28 15 NA 17 40 0.9052126 0.9725887 0.5686414 0.8730739 0.0000000 0.3950871 0.7670249 4.4816286
23 ON ALAN BUI 1363 4.0 4 43 20 58 17 37 46 0.1158806 0.6131368 0.2082512 0.9287326 0.1778154 0.9006730 0.9965840 3.9410737
24 MI MICHAEL R ALDRICH 1229 4.0 28 47 43 25 60 44 39 0.1679402 0.3174296 0.4229075 0.0487842 0.8187933 0.5430665 0.2329751 2.5518963
25 MI LOREN SCHWIEBERT 1745 3.5 9 53 3 24 34 10 47 0.8724346 0.8835283 0.8887546 0.9512158 0.8799267 0.8991174 0.9006730 6.2756504
26 ON MAX ZHU 1579 3.5 49 40 17 4 9 32 11 0.8399507 0.7907981 0.4285369 0.3124618 0.7245382 0.6887736 0.3174296 4.1024889
27 MI GAURAV GIDWANI 1552 3.5 51 13 46 37 14 6 NA 0.9574766 0.3415855 0.9988465 0.9641774 0.4172981 0.3161837 0.0000000 3.9955678
28 MI SOFIA ADINA STANESCU-BELLU 1507 3.5 24 4 22 19 20 8 36 0.8320598 0.2309241 0.4313586 0.4186985 0.3759982 0.3161837 0.7057814 3.3110043
29 MI CHIEDOZIE OKORIE 1602 3.5 50 6 38 34 52 48 NA 0.9586332 0.3814159 0.7369948 0.7628850 0.9789496 0.7801296 0.0000000 4.5990080
30 ON GEORGE AVERY JONES 1522 3.5 52 64 15 55 31 61 50 0.9670431 0.8876113 0.8504903 0.8737104 0.5402082 0.9631699 0.9359868 6.0182200
31 MI RISHI SHETTY 1494 3.5 58 55 64 10 30 50 14 0.9651584 0.8548292 0.8705003 0.6775606 0.4597918 0.9256237 0.3390009 5.0924648
32 ON JOSHUA PHILIP MATHEWS 1441 3.5 61 8 44 18 51 26 13 0.9425482 0.2402531 0.8010809 0.2859211 0.9223905 0.3112264 0.2149733 3.7183935
33 MI JADE GE 1449 3.5 60 12 50 36 13 15 51 0.9412886 0.2258520 0.9057054 0.6320700 0.2228469 0.7888871 0.9256237 4.6422736
34 MI MICHAEL JEFFERY THOMAS 1399 3.5 6 60 37 29 25 11 52 0.1608247 0.9232107 0.9177346 0.2371150 0.1200733 0.1416354 0.9352935 3.4358872
35 MI JOSHUA DAVID LEE 1438 3.5 46 38 56 6 57 52 48 0.9977791 0.5215733 0.8376155 0.1934725 0.8799267 0.9476236 0.5798998 4.9578904
36 MI SIDDHARTH JHA 1355 3.5 13 57 51 33 NA 16 28 0.1430409 0.8196458 0.8787050 0.3679300 0.0000000 0.1925758 0.2942186 2.6961161
37 MI AMIYATOSH PWNANANDAM 980 3.5 NA 5 34 27 NA 23 61 0.0000000 0.0201220 0.0822654 0.0358226 0.0000000 0.0993270 0.5359159 0.7734529
38 MI BRIAN LIU 1423 3.0 11 35 29 12 NA 18 15 0.1592770 0.4784267 0.2630052 0.2007600 0.0000000 0.2652429 0.7628850 2.1295968
39 MI JOEL R HENDON 1436 3.0 1 54 40 16 44 21 24 0.1129643 0.7222345 0.6240018 0.2754618 0.7964547 0.3249598 0.7670249 3.6231017
40 MI FOREST ZHANG 1348 3.0 20 26 39 59 21 56 22 0.1943723 0.2092019 0.3759982 0.9452902 0.2248471 0.7544540 0.2329751 2.9371389
41 MI KYLE WILLIAM MURPHY 1403 3.0 59 17 58 20 NA NA NA 0.9595367 0.2140034 0.9425482 0.2487592 0.0000000 0.0000000 0.0000000 2.3648475
42 MI JARED GE 1332 3.0 12 50 57 60 61 64 56 0.1294997 0.8304449 0.7992400 0.8910109 0.8975401 0.7256856 0.7369948 5.0104161
43 MI ROBERT GLEN VASEY 1283 3.0 21 23 24 63 59 46 55 0.1663375 0.3868632 0.5770925 0.6506046 0.9223905 0.9945968 0.6360769 4.3339621
44 MI JUSTIN D SCHILLING 1199 3.0 NA 14 32 53 39 24 59 0.0000000 0.0858097 0.1989191 0.2466139 0.2035453 0.4569335 0.8799267 2.0717482
45 MI DEREK YAN 1242 3.0 5 51 60 56 63 55 58 0.0849109 0.7907981 0.8296328 0.6253514 0.5952430 0.5798998 0.8665566 4.3723927
46 MI JACOB ALEXANDER LAVALLEY 377 3.0 35 7 27 50 64 43 23 0.0022209 0.0006603 0.0011535 0.0196730 0.0107230 0.0054032 0.0034160 0.0432498
47 MI ERIC WRIGHT 1362 2.5 18 24 21 61 8 51 25 0.2026137 0.6825704 0.2392039 0.9123667 0.1671373 0.8829346 0.0993270 3.1861535
48 MI DANIEL KHAIN 1382 2.5 17 63 NA 52 NA 29 35 0.1943723 0.7670249 0.0000000 0.9291127 0.0000000 0.2198704 0.4201002 2.5304806
49 MI MICHAEL J MARTIN 1291 2.5 26 20 63 64 58 NA NA 0.1600493 0.1480517 0.6609991 0.6763017 0.8959411 0.0000000 0.0000000 2.5413428
50 MI SHIVAM JHA 1056 2.5 29 42 33 46 NA 31 30 0.0413668 0.1695551 0.0942946 0.9803270 0.0000000 0.0743763 0.0640132 1.4239331
51 MI TEJAS AYYAGARI 1011 2.5 27 45 36 57 32 47 33 0.0425234 0.2092019 0.1212950 0.3854986 0.0776095 0.1170654 0.0743763 1.0275702
52 MI ETHAN GUO 935 2.5 30 22 19 48 29 35 34 0.0329569 0.0274113 0.0260634 0.0708873 0.0210504 0.0523764 0.0647065 0.2954521
53 MI JOSE C YBARRA 1393 2.0 NA 25 NA 44 NA 57 NA 0.0000000 0.1164717 0.0000000 0.7533861 0.0000000 0.8497569 0.0000000 1.7196146
54 MI LARRY HODGE 1270 2.0 14 39 61 NA 15 59 64 0.1237707 0.2777655 0.8597585 0.0000000 0.5714631 0.9168612 0.6492949 3.3989140
55 MI ALEX KONG 1186 2.0 62 31 10 30 NA 45 43 0.1212950 0.1451708 0.2630052 0.1262896 0.0000000 0.4201002 0.3639231 1.4397840
56 MI MARISA RICCI 1153 2.0 NA 11 35 45 NA 40 42 0.0000000 0.0384990 0.1623845 0.3746486 0.0000000 0.2455460 0.2630052 1.0840834
57 MI MICHAEL LU 1092 2.0 7 36 42 51 35 53 NA 0.0389275 0.1803542 0.2007600 0.6145014 0.1200733 0.1502431 0.0000000 1.3048595
58 MI VIRAJ MOHILE 917 2.0 31 2 41 23 49 NA 45 0.0348416 0.0250598 0.0574518 0.0712674 0.1040589 0.0000000 0.1334434 0.4261229
59 MI SEAN M MC CORMICK 853 2.0 41 NA 9 40 43 54 44 0.0404633 0.0000000 0.0387127 0.0547098 0.0776095 0.0831388 0.1200733 0.4147074
60 MI JULIA SHEN 967 1.5 33 34 45 42 24 NA NA 0.0587114 0.0767893 0.1703672 0.1089891 0.1812067 0.0000000 0.0000000 0.5960637
61 ON JEZZEL FARKAS 955 1.5 32 3 54 47 42 30 37 0.0574518 0.0780226 0.1402415 0.0876333 0.1024599 0.0368301 0.4640841 0.9667233
62 MI ASHWIN BALAJI 1530 1.0 55 NA NA NA NA NA NA 0.8787050 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.8787050
63 MI THOMAS JOSEPH HOSMER 1175 1.0 2 48 49 43 45 NA NA 0.1019317 0.2329751 0.3390009 0.3493954 0.4047570 0.0000000 0.0000000 1.4280600
64 MI BEN LI 1163 1.0 22 30 31 49 46 42 54 0.0947874 0.1123887 0.1294997 0.3236983 0.9892770 0.2743144 0.3507051 2.2746706

Calculate difference from actual score from table

E_chess_manip$Total_Pts <- as.numeric(E_chess_manip$Total_Pts)
E_chess_manip <- E_chess_manip %>% mutate(Score_minus_Expected = Total_Pts - E_tot)
E_chess_manip <- E_chess_manip %>% arrange(desc(Score_minus_Expected))

5 players who most overperformed relative to their expected score, and the 5 players that most underperformed relative to their expected score

final <- E_chess_manip %>% select(Pair_Num, State, Player_Name, Pre_Game_Rank, Total_Pts, E_tot, Score_minus_Expected)
head(final, n=5)
final <- final %>% arrange(Score_minus_Expected)
head(final, n=5)

Fin