Load the packages for later use
library(dplyr) # Selecting and filtering
library(magrittr) # Pipes
library(knitr) # kable
library(stringr) # String manipulation
Read file as a string
RawFile <- paste(readLines("tournamentinfo.txt"), collapse = "\n")
# This is a partial view of how the string looks like
paste(readLines("tournamentinfo.txt", n=20), collapse = "\n")
## [1] "-----------------------------------------------------------------------------------------\n Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| \n Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | \n-----------------------------------------------------------------------------------------\n 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|\n ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |\n-----------------------------------------------------------------------------------------\n 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|\n MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |\n-----------------------------------------------------------------------------------------\n 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|\n MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |\n-----------------------------------------------------------------------------------------\n 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|\n MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |\n-----------------------------------------------------------------------------------------\n 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|\n MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |\n-----------------------------------------------------------------------------------------\n 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
Clean up and create a table for each player per row
# remove unnecessary characters and merge 2 lines of player stats into one
splitRawFile <- gsub("\n\\s+|--+","",RawFile) %>% # remove lines containing #----------#
gsub("\\|\\s*\n", "\n", .) %>% # merge each player stats into 1 line
strsplit(.,"\n") # split the file containing one player per line
# read into a table
df_chess <- read.table(textConnection(splitRawFile[[1]]), sep="|", skip = 1)
kable(head(df_chess))
| V1 | V2 | V3 | V4 | V5 | V6 | V7 | V8 | V9 | V10 | V11 | V12 | V13 | V14 | V15 | V16 | V17 | V18 | V19 | V20 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | GARY HUA | 6.0 | W 39 | W 21 | W 18 | W 14 | W 7 | D 12 | D 4 | ON | 15445895 / R: 1794 ->1817 | N:2 | W | B | W | B | W | B | W |
| 2 | DAKSHESH DARURI | 6.0 | W 63 | W 58 | L 4 | W 17 | W 16 | W 20 | W 7 | MI | 14598900 / R: 1553 ->1663 | N:2 | B | W | B | W | B | W | B |
| 3 | ADITYA BAJAJ | 6.0 | L 8 | W 61 | W 25 | W 21 | W 11 | W 13 | W 12 | MI | 14959604 / R: 1384 ->1640 | N:2 | W | B | W | B | W | B | W |
| 4 | PATRICK H SCHILLING | 5.5 | W 23 | D 28 | W 2 | W 26 | D 5 | W 19 | D 1 | MI | 12616049 / R: 1716 ->1744 | N:2 | W | B | W | B | W | B | B |
| 5 | HANSHI ZUO | 5.5 | W 45 | W 37 | D 12 | D 13 | D 4 | W 14 | W 17 | MI | 14601533 / R: 1655 ->1690 | N:2 | B | W | B | W | B | W | B |
| 6 | HANSEN SONG | 5.0 | W 34 | D 29 | L 11 | W 35 | D 10 | W 27 | W 21 | OH | 15055204 / R: 1686 ->1687 | N:3 | W | B | W | B | B | W | B |
Get subset of columns that are needed
# Only need first 12 columns of data and give them names
cnames <- c("PlayerNum","Player","State","TotalPoints", "R1","R2","R3","R4","R5","R6","R7","Rating")
df_chess_subset <- df_chess[c(1,2,11,3:10,12)]
colnames(df_chess_subset) <- cnames
kable(head(df_chess_subset))
| PlayerNum | Player | State | TotalPoints | R1 | R2 | R3 | R4 | R5 | R6 | R7 | Rating |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | GARY HUA | ON | 6.0 | W 39 | W 21 | W 18 | W 14 | W 7 | D 12 | D 4 | 15445895 / R: 1794 ->1817 |
| 2 | DAKSHESH DARURI | MI | 6.0 | W 63 | W 58 | L 4 | W 17 | W 16 | W 20 | W 7 | 14598900 / R: 1553 ->1663 |
| 3 | ADITYA BAJAJ | MI | 6.0 | L 8 | W 61 | W 25 | W 21 | W 11 | W 13 | W 12 | 14959604 / R: 1384 ->1640 |
| 4 | PATRICK H SCHILLING | MI | 5.5 | W 23 | D 28 | W 2 | W 26 | D 5 | W 19 | D 1 | 12616049 / R: 1716 ->1744 |
| 5 | HANSHI ZUO | MI | 5.5 | W 45 | W 37 | D 12 | D 13 | D 4 | W 14 | W 17 | 14601533 / R: 1655 ->1690 |
| 6 | HANSEN SONG | OH | 5.0 | W 34 | D 29 | L 11 | W 35 | D 10 | W 27 | W 21 | 15055204 / R: 1686 ->1687 |
Trim leading and trailing whitespace in Player names and State
df_chess_subset$State <- str_trim(df_chess_subset$State)
df_chess_subset$Player <- str_trim(df_chess_subset$Player)
Clean up Rating column
df_chess_subset$Rating <- as.character(gsub("^.*R:\\s+(\\d+)(\\D|\\s).*$","\\1",df_chess_subset$Rating))
kable(head(df_chess_subset))
| PlayerNum | Player | State | TotalPoints | R1 | R2 | R3 | R4 | R5 | R6 | R7 | Rating |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | GARY HUA | ON | 6.0 | W 39 | W 21 | W 18 | W 14 | W 7 | D 12 | D 4 | 1794 |
| 2 | DAKSHESH DARURI | MI | 6.0 | W 63 | W 58 | L 4 | W 17 | W 16 | W 20 | W 7 | 1553 |
| 3 | ADITYA BAJAJ | MI | 6.0 | L 8 | W 61 | W 25 | W 21 | W 11 | W 13 | W 12 | 1384 |
| 4 | PATRICK H SCHILLING | MI | 5.5 | W 23 | D 28 | W 2 | W 26 | D 5 | W 19 | D 1 | 1716 |
| 5 | HANSHI ZUO | MI | 5.5 | W 45 | W 37 | D 12 | D 13 | D 4 | W 14 | W 17 | 1655 |
| 6 | HANSEN SONG | OH | 5.0 | W 34 | D 29 | L 11 | W 35 | D 10 | W 27 | W 21 | 1686 |
Create another data frame containing only the Player number and Rating
df_chess_rating <- df_chess_subset[c("PlayerNum","Rating")]
head(df_chess_rating)
## PlayerNum Rating
## 1 1 1794
## 2 2 1553
## 3 3 1384
## 4 4 1716
## 5 5 1655
## 6 6 1686
Replace Player number in R1:R7 to their Ratings, respectively
df_chess_subset[5:11] <- lapply(df_chess_subset[5:11], function(x)
df_chess_rating[as.numeric(str_extract_all(x,"\\d{1,}")),"Rating"])
kable(head(df_chess_subset))
| PlayerNum | Player | State | TotalPoints | R1 | R2 | R3 | R4 | R5 | R6 | R7 | Rating |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | GARY HUA | ON | 6.0 | 1436 | 1563 | 1600 | 1610 | 1649 | 1663 | 1716 | 1794 |
| 2 | DAKSHESH DARURI | MI | 6.0 | 1175 | 917 | 1716 | 1629 | 1604 | 1595 | 1649 | 1553 |
| 3 | ADITYA BAJAJ | MI | 6.0 | 1641 | 955 | 1745 | 1563 | 1712 | 1666 | 1663 | 1384 |
| 4 | PATRICK H SCHILLING | MI | 5.5 | 1363 | 1507 | 1553 | 1579 | 1655 | 1564 | 1794 | 1716 |
| 5 | HANSHI ZUO | MI | 5.5 | 1242 | 980 | 1663 | 1666 | 1716 | 1610 | 1629 | 1655 |
| 6 | HANSEN SONG | OH | 5.0 | 1399 | 1602 | 1712 | 1438 | 1365 | 1552 | 1563 | 1686 |
Now calculate the average and extract columns that are needed
df_chess_avg <- df_chess_subset %>%
group_by(Player) %>%
mutate(Avg = round(mean(as.numeric(c(R1,R2,R3,R4,R5,R6,R7)), na.rm=T))) %>%
select(Player, State, TotalPoints, as.numeric(Rating), Avg)
kable(head(df_chess_avg))
| Player | State | TotalPoints | Rating | Avg |
|---|---|---|---|---|
| GARY HUA | ON | 6.0 | 1794 | 1605 |
| DAKSHESH DARURI | MI | 6.0 | 1553 | 1469 |
| ADITYA BAJAJ | MI | 6.0 | 1384 | 1564 |
| PATRICK H SCHILLING | MI | 5.5 | 1716 | 1574 |
| HANSHI ZUO | MI | 5.5 | 1655 | 1501 |
| HANSEN SONG | OH | 5.0 | 1686 | 1519 |
Write data frame to .csv file and read it from there as well
# Write .csv
write.csv(df_chess_avg, file = "chess.csv", row.names = FALSE, quote = FALSE)
# Read .csv
kable(head(read.csv(file = "chess.csv", row.names = 1)))
| State | TotalPoints | Rating | Avg | |
|---|---|---|---|---|
| GARY HUA | ON | 6.0 | 1794 | 1605 |
| DAKSHESH DARURI | MI | 6.0 | 1553 | 1469 |
| ADITYA BAJAJ | MI | 6.0 | 1384 | 1564 |
| PATRICK H SCHILLING | MI | 5.5 | 1716 | 1574 |
| HANSHI ZUO | MI | 5.5 | 1655 | 1501 |
| HANSEN SONG | OH | 5.0 | 1686 | 1519 |