Load the packages for later use

library(dplyr) # Selecting and filtering
library(magrittr) # Pipes
library(knitr) # kable
library(stringr) # String manipulation

Read file as a string

RawFile <- paste(readLines("tournamentinfo.txt"), collapse = "\n")

# This is a partial view of how the string looks like
paste(readLines("tournamentinfo.txt", n=20), collapse = "\n")
## [1] "-----------------------------------------------------------------------------------------\n Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| \n Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | \n-----------------------------------------------------------------------------------------\n    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|\n   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |\n-----------------------------------------------------------------------------------------\n    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|\n   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |\n-----------------------------------------------------------------------------------------\n    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|\n   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |\n-----------------------------------------------------------------------------------------\n    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|\n   MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |\n-----------------------------------------------------------------------------------------\n    5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|\n   MI | 14601533 / R: 1655   ->1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |\n-----------------------------------------------------------------------------------------\n    6 | HANSEN SONG                     |5.0  |W  34|D  29|L  11|W  35|D  10|W  27|W  21|"

Clean up and create a table for each player per row

# remove unnecessary characters and merge 2 lines of player stats into one
splitRawFile <- gsub("\n\\s+|--+","",RawFile) %>% # remove lines containing #----------#
                gsub("\\|\\s*\n", "\n", .) %>% # merge each player stats into 1 line
                strsplit(.,"\n") # split the file containing one player per line

# read into a table
df_chess <- read.table(textConnection(splitRawFile[[1]]), sep="|", skip = 1)
kable(head(df_chess))
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
1 GARY HUA 6.0 W 39 W 21 W 18 W 14 W 7 D 12 D 4 ON 15445895 / R: 1794 ->1817 N:2 W B W B W B W
2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17 W 16 W 20 W 7 MI 14598900 / R: 1553 ->1663 N:2 B W B W B W B
3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21 W 11 W 13 W 12 MI 14959604 / R: 1384 ->1640 N:2 W B W B W B W
4 PATRICK H SCHILLING 5.5 W 23 D 28 W 2 W 26 D 5 W 19 D 1 MI 12616049 / R: 1716 ->1744 N:2 W B W B W B B
5 HANSHI ZUO 5.5 W 45 W 37 D 12 D 13 D 4 W 14 W 17 MI 14601533 / R: 1655 ->1690 N:2 B W B W B W B
6 HANSEN SONG 5.0 W 34 D 29 L 11 W 35 D 10 W 27 W 21 OH 15055204 / R: 1686 ->1687 N:3 W B W B B W B

Get subset of columns that are needed

# Only need first 12 columns of data and give them names
cnames <- c("PlayerNum","Player","State","TotalPoints", "R1","R2","R3","R4","R5","R6","R7","Rating")
df_chess_subset <- df_chess[c(1,2,11,3:10,12)]
colnames(df_chess_subset) <- cnames
kable(head(df_chess_subset))
PlayerNum Player State TotalPoints R1 R2 R3 R4 R5 R6 R7 Rating
1 GARY HUA ON 6.0 W 39 W 21 W 18 W 14 W 7 D 12 D 4 15445895 / R: 1794 ->1817
2 DAKSHESH DARURI MI 6.0 W 63 W 58 L 4 W 17 W 16 W 20 W 7 14598900 / R: 1553 ->1663
3 ADITYA BAJAJ MI 6.0 L 8 W 61 W 25 W 21 W 11 W 13 W 12 14959604 / R: 1384 ->1640
4 PATRICK H SCHILLING MI 5.5 W 23 D 28 W 2 W 26 D 5 W 19 D 1 12616049 / R: 1716 ->1744
5 HANSHI ZUO MI 5.5 W 45 W 37 D 12 D 13 D 4 W 14 W 17 14601533 / R: 1655 ->1690
6 HANSEN SONG OH 5.0 W 34 D 29 L 11 W 35 D 10 W 27 W 21 15055204 / R: 1686 ->1687

Trim leading and trailing whitespace in Player names and State

df_chess_subset$State <- str_trim(df_chess_subset$State)
df_chess_subset$Player <- str_trim(df_chess_subset$Player)

Clean up Rating column

df_chess_subset$Rating <- as.character(gsub("^.*R:\\s+(\\d+)(\\D|\\s).*$","\\1",df_chess_subset$Rating))
kable(head(df_chess_subset))
PlayerNum Player State TotalPoints R1 R2 R3 R4 R5 R6 R7 Rating
1 GARY HUA ON 6.0 W 39 W 21 W 18 W 14 W 7 D 12 D 4 1794
2 DAKSHESH DARURI MI 6.0 W 63 W 58 L 4 W 17 W 16 W 20 W 7 1553
3 ADITYA BAJAJ MI 6.0 L 8 W 61 W 25 W 21 W 11 W 13 W 12 1384
4 PATRICK H SCHILLING MI 5.5 W 23 D 28 W 2 W 26 D 5 W 19 D 1 1716
5 HANSHI ZUO MI 5.5 W 45 W 37 D 12 D 13 D 4 W 14 W 17 1655
6 HANSEN SONG OH 5.0 W 34 D 29 L 11 W 35 D 10 W 27 W 21 1686

Create another data frame containing only the Player number and Rating

df_chess_rating <- df_chess_subset[c("PlayerNum","Rating")]
head(df_chess_rating)
##   PlayerNum Rating
## 1         1   1794
## 2         2   1553
## 3         3   1384
## 4         4   1716
## 5         5   1655
## 6         6   1686

Replace Player number in R1:R7 to their Ratings, respectively

df_chess_subset[5:11] <- lapply(df_chess_subset[5:11], function(x) 
                                df_chess_rating[as.numeric(str_extract_all(x,"\\d{1,}")),"Rating"])
kable(head(df_chess_subset))
PlayerNum Player State TotalPoints R1 R2 R3 R4 R5 R6 R7 Rating
1 GARY HUA ON 6.0 1436 1563 1600 1610 1649 1663 1716 1794
2 DAKSHESH DARURI MI 6.0 1175 917 1716 1629 1604 1595 1649 1553
3 ADITYA BAJAJ MI 6.0 1641 955 1745 1563 1712 1666 1663 1384
4 PATRICK H SCHILLING MI 5.5 1363 1507 1553 1579 1655 1564 1794 1716
5 HANSHI ZUO MI 5.5 1242 980 1663 1666 1716 1610 1629 1655
6 HANSEN SONG OH 5.0 1399 1602 1712 1438 1365 1552 1563 1686

Now calculate the average and extract columns that are needed

df_chess_avg <- df_chess_subset %>% 
                group_by(Player) %>% 
                mutate(Avg = round(mean(as.numeric(c(R1,R2,R3,R4,R5,R6,R7)), na.rm=T))) %>%
                select(Player, State, TotalPoints, as.numeric(Rating), Avg)

kable(head(df_chess_avg))
Player State TotalPoints Rating Avg
GARY HUA ON 6.0 1794 1605
DAKSHESH DARURI MI 6.0 1553 1469
ADITYA BAJAJ MI 6.0 1384 1564
PATRICK H SCHILLING MI 5.5 1716 1574
HANSHI ZUO MI 5.5 1655 1501
HANSEN SONG OH 5.0 1686 1519

Write data frame to .csv file and read it from there as well

# Write .csv
write.csv(df_chess_avg, file = "chess.csv", row.names = FALSE, quote = FALSE)

# Read .csv
kable(head(read.csv(file = "chess.csv", row.names = 1)))
State TotalPoints Rating Avg
GARY HUA ON 6.0 1794 1605
DAKSHESH DARURI MI 6.0 1553 1469
ADITYA BAJAJ MI 6.0 1384 1564
PATRICK H SCHILLING MI 5.5 1716 1574
HANSHI ZUO MI 5.5 1655 1501
HANSEN SONG OH 5.0 1686 1519