chess_raw <- suppressWarnings(readLines("https://raw.githubusercontent.com/mehtablocker/cuny_607/master/tournamentinfo.txt"))
chess_raw %>% head()
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
# Get rid of useless rows and columns
chess <- chess_raw[-seq(1,length(chess_raw), by=3)]
chess_df <- suppressWarnings(read_delim(chess, "|")) %>% select(-11)
# Clean up attributes and column names
attr(chess_df, "spec") <- NULL
names(chess_df) <- names(chess_df) %>% trimws()
names(chess_df)[grepl("Round", names(chess_df))] <- paste0("opp_round_", 1:7)
chess_df <- chess_df %>% rename(total_pts=Total, tournament_id=Pair) %>% slice(-1)
chess_df <- suppressWarnings(chess_df %>% separate(`Player Name`, sep="/", into=c("player", "rtg")))
chess_df <- chess_df %>% separate(rtg, sep="->", into=c("pre_rtg", "post_rtg"))
chess_df <- chess_df %>% mutate(pre_rtg=parse_number(pre_rtg), post_rtg=parse_number(post_rtg))
# The lead and lag functions are very useful
chess_df <- chess_df %>% mutate(pre_rtg=lead(pre_rtg,1), post_rtg=lead(post_rtg,1), state=lead(tournament_id,1), uscf_id=lead(player,1))
chess_df <- chess_df %>% slice(seq(1, nrow(chess_df), by=2))
chess_df <- suppressWarnings(chess_df %>% mutate_at(vars(starts_with("opp_")), function(x) as.vector(parse_number(x))))
Make sure column values are of the correct data type
chess_df$tournament_id <- chess_df$tournament_id %>% trimws() %>% as.integer()
chess_df$player <- chess_df$player %>% trimws()
chess_df$total_pts <- chess_df$total_pts %>% trimws() %>% as.numeric()
chess_df$state <- chess_df$state %>% trimws()
chess_df$uscf_id <- chess_df$uscf_id %>% trimws() %>% as.integer
Create a new data frame by looping through the current columns of opponents and matching ids
opp_rtg_df <- chess_df %>% select(starts_with("opp_"))
for (j in 1:ncol(opp_rtg_df)){
for (i in 1:nrow(opp_rtg_df)){
if (!is.na(opp_rtg_df[i,j])){
opp_rtg_df[i,j] <- chess_df$pre_rtg[chess_df$tournament_id==unlist(opp_rtg_df[i,j])]
}
}
}
names(opp_rtg_df) <- paste0("rtg_", names(opp_rtg_df))
# Bind the opponent ratings and calculate average rating
chess_df <- chess_df %>% bind_cols(opp_rtg_df) %>% mutate(avg_opp_rtg=round(rowMeans(opp_rtg_df, na.rm=T), 0))
Observe finished table and write to disk
chess_df_final <- chess_df %>% select(player, state, total_pts, pre_rtg, avg_opp_rtg)
knitr::kable(chess_df_final %>% head())
| player | state | total_pts | pre_rtg | avg_opp_rtg |
|---|---|---|---|---|
| GARY HUA | ON | 6.0 | 1794 | 1605 |
| DAKSHESH DARURI | MI | 6.0 | 1553 | 1469 |
| ADITYA BAJAJ | MI | 6.0 | 1384 | 1564 |
| PATRICK H SCHILLING | MI | 5.5 | 1716 | 1574 |
| HANSHI ZUO | MI | 5.5 | 1655 | 1501 |
| HANSEN SONG | OH | 5.0 | 1686 | 1519 |
# Make sure you know which directory you are in!
write.csv(chess_df_final, file="chess.csv", row.names=F)