Chess Tournament Results

In this project, a text file with chess tournament results is given. The information in the file has some structure.

tournament.txt - File structure

tournament.txt - File structure

This RMarkdown doc shows how to generates a .CSV file with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605

1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.

Load Packages

library(stringr)

Read File

txt <- readLines("tournamentinfo.txt", warn = F)
head(txt)
## [1] "-----------------------------------------------------------------------------------------" 
## [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
## [4] "-----------------------------------------------------------------------------------------" 
## [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
## [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
tail(txt)
## [1] "   63 | THOMAS JOSEPH HOSMER            |1.0  |L   2|L  48|D  49|L  43|L  45|H    |U    |"
## [2] "   MI | 15057092 / R: 1175   ->1125     |     |W    |B    |W    |B    |B    |     |     |"
## [3] "-----------------------------------------------------------------------------------------"
## [4] "   64 | BEN LI                          |1.0  |L  22|D  30|L  31|D  49|L  46|L  42|L  54|"
## [5] "   MI | 15006561 / R: 1163   ->1112     |     |B    |W    |W    |B    |W    |B    |B    |"
## [6] "-----------------------------------------------------------------------------------------"

Extract the information

names <- unlist(str_extract_all(txt, "\\d+ \\| [[:alpha:]- ]{2,}\\s+\\|"))
names <- str_trim(unlist(str_extract_all(names, "[[:alpha:]- ]{2,}")))
names[27:28]
## [1] "GAURAV GIDWANI"             "SOFIA ADINA STANESCU-BELLU"
pattern <-"R:\\s+(\\d+)"
pre_rates <- unlist(str_extract_all(txt, pattern))
head(pre_rates)
## [1] "R: 1794" "R: 1553" "R: 1384" "R: 1716" "R: 1655" "R: 1686"
pre_rates <- gsub(pattern, "\\1", pre_rates)
pre_rates <-as.integer(pre_rates)
pre_rates
##  [1] 1794 1553 1384 1716 1655 1686 1649 1641 1411 1365 1712 1663 1666 1610
## [15] 1220 1604 1629 1600 1564 1595 1563 1555 1363 1229 1745 1579 1552 1507
## [29] 1602 1522 1494 1441 1449 1399 1438 1355  980 1423 1436 1348 1403 1332
## [43] 1283 1199 1242  377 1362 1382 1291 1056 1011  935 1393 1270 1186 1153
## [57] 1092  917  853  967  955 1530 1175 1163
states <- unlist(str_extract_all(txt, " [[:upper:]]{2} \\|"))
states <- unlist(str_extract_all(states, "[[:upper:]]{2}"))
states
##  [1] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" "ON" "MI" "MI" "MI" "MI" "MI"
## [15] "MI" "MI" "MI" "MI" "MI" "MI" "ON" "MI" "ON" "MI" "MI" "ON" "MI" "MI"
## [29] "MI" "ON" "MI" "ON" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [43] "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [57] "MI" "MI" "MI" "MI" "ON" "MI" "MI" "MI"
# Just the first 9 backreferences are included in the pattern below
pattern1 <-"(\\d\\.\\d)\\s+\\|([[:upper:]])\\s+(\\d{0,2}?)\\|([[:upper:]])\\s+(\\d{0,2}?)\\|([[:upper:]])\\s+(\\d{0,2}?)\\|([[:upper:]])\\s+(\\d{0,2}?)\\|[[:upper:]]\\s+\\d{0,2}?\\|[[:upper:]]\\s+\\d{0,2}?\\|[[:upper:]]\\s+\\d{0,2}?\\|"
scores1 <- unlist(str_extract_all(txt,pattern1))
scores1[1:5]
## [1] "6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
## [2] "6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
## [3] "6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
## [4] "5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|"
## [5] "5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|"
pts <- gsub(pattern1, "\\1", scores1)
pts <- as.numeric(pts)
pts[1:5]
## [1] 6.0 6.0 6.0 5.5 5.5
r1_status <- gsub(pattern1, "\\2", scores1)
r1_status[1:5]
## [1] "W" "W" "L" "W" "W"
r1_opponent <- gsub(pattern1, "\\3", scores1)
r1_opponent <- as.integer(r1_opponent)
r1_opponent[1:5]
## [1] 39 63  8 23 45
r2_status <- gsub(pattern1, "\\4", scores1)
r2_opponent <- gsub(pattern1, "\\5", scores1)
r2_opponent <- as.integer(r2_opponent)
tail(r2_opponent)
## [1] NA 34  3 NA 48 30
r3_status <- gsub(pattern1, "\\6", scores1)
r3_opponent <- gsub(pattern1, "\\7", scores1)
r3_opponent <- as.integer(r3_opponent)
r4_status <- gsub(pattern1, "\\8", scores1)
r4_opponent <- gsub(pattern1, "\\9", scores1)
r4_opponent <- as.integer(r4_opponent)

Because we can only use backreferences \1 through \9 in the replacement text, a new pattern is used to include backreferences for rounds 5 to 7 in each string.

# text pattern backreferenced in the end of each string
pattern2 <-"\\d\\.\\d\\s+\\|[[:upper:]]\\s+\\d{0,2}?\\|[[:upper:]]\\s+\\d{0,2}?\\|[[:upper:]]\\s+\\d{0,2}?\\|[[:upper:]]\\s+\\d{0,2}?\\|([[:upper:]])\\s+(\\d{0,2}?)\\|([[:upper:]])\\s+(\\d{0,2}?)\\|([[:upper:]])\\s+(\\d{0,2}?)\\|"
scores2 <- unlist(str_extract_all(txt,pattern2))
r5_status <- gsub(pattern2, "\\1", scores2)
r5_opponent <- gsub(pattern2, "\\2", scores2)
r5_opponent <- as.integer(r5_opponent)
head(r5_status)
## [1] "W" "W" "W" "D" "D" "D"
r6_status <- gsub(pattern2, "\\3", scores2)
r6_opponent <- gsub(pattern2, "\\4", scores2)
r6_opponent <- as.integer(r6_opponent)
r7_status <- gsub(pattern2, "\\5", scores2)
r7_opponent <- gsub(pattern2, "\\6", scores2)
r7_opponent <- as.integer(r7_opponent)
tail(r7_opponent)
## [1] 44 NA 37 NA NA 54

Data Frame

df <- data.frame("Name"=names, "State"=states,"Total.Pts"=pts, "R1.Player"=r1_opponent, "R2.Player"=r2_opponent, "R3.Player"=r3_opponent, "R4.Player"=r4_opponent, "R5.Player"=r5_opponent, "R6.Player"=r6_opponent, "R7.Player"=r7_opponent,"Pre rating"=pre_rates, stringsAsFactors = F)
head(df)
##                  Name State Total.Pts R1.Player R2.Player R3.Player
## 1            GARY HUA    ON       6.0        39        21        18
## 2     DAKSHESH DARURI    MI       6.0        63        58         4
## 3        ADITYA BAJAJ    MI       6.0         8        61        25
## 4 PATRICK H SCHILLING    MI       5.5        23        28         2
## 5          HANSHI ZUO    MI       5.5        45        37        12
## 6         HANSEN SONG    OH       5.0        34        29        11
##   R4.Player R5.Player R6.Player R7.Player Pre.rating
## 1        14         7        12         4       1794
## 2        17        16        20         7       1553
## 3        21        11        13        12       1384
## 4        26         5        19         1       1716
## 5        13         4        14        17       1655
## 6        35        10        27        21       1686
sapply(df, class)
##        Name       State   Total.Pts   R1.Player   R2.Player   R3.Player 
## "character" "character"   "numeric"   "integer"   "integer"   "integer" 
##   R4.Player   R5.Player   R6.Player   R7.Player  Pre.rating 
##   "integer"   "integer"   "integer"   "integer"   "integer"
dim(df)
## [1] 64 11

Average Pre-Ratings of Opponent Players

The following function collects the non-NA values of the oppenents for player and compute the average of pre-ratings of the opponents.

avrg <- function(df){
        avrg_vec <- vector('numeric')
        values <- df[,11]
        for (i in 1:64){
                index <- which(!is.na(df[i,4:10]))
                opponents <- as.integer(df[i,4:10][index])
                temp <- round(mean(values[opponents]),0)
                avrg_vec <- c(avrg_vec,temp)
        }
        return(avrg_vec)
}

Now applying the function above to the data frame:

averages <- avrg(df)
ExtraCol <-data.frame("Average"=averages)
df <-cbind(df,ExtraCol)
df[c(1:4,12),]
##                   Name State Total.Pts R1.Player R2.Player R3.Player
## 1             GARY HUA    ON       6.0        39        21        18
## 2      DAKSHESH DARURI    MI       6.0        63        58         4
## 3         ADITYA BAJAJ    MI       6.0         8        61        25
## 4  PATRICK H SCHILLING    MI       5.5        23        28         2
## 12      KENNETH J TACK    MI       4.5        42        33         5
##    R4.Player R5.Player R6.Player R7.Player Pre.rating Average
## 1         14         7        12         4       1794    1605
## 2         17        16        20         7       1553    1469
## 3         21        11        13        12       1384    1564
## 4         26         5        19         1       1716    1574
## 12        38        NA         1         3       1663    1506

Export the Data Frame to a CSV File

write.csv(df,'ChessTournment.csv')