Player number, Round Number (1-7), Outcome (W, L, D), Opponent's player number, Opponent's Pre Chess Rating.
library(stringr)
library(knitr)## Warning: package 'knitr' was built under R version 3.2.5
library(ggplot2)
chess <- readLines("tournamentinfo.txt")## Warning in readLines("tournamentinfo.txt"): incomplete final line found on
## 'tournamentinfo.txt'
str(chess)## chr [1:196] "-----------------------------------------------------------------------------------------" ...
names <- (str_trim(unlist(str_extract_all(chess, "([[:alpha:] ]-?){15,31}"))))[2:65]
head(names);## [1] "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ"
## [4] "PATRICK H SCHILLING" "HANSHI ZUO" "HANSEN SONG"
states <- str_trim(unlist(str_extract_all(chess, " ON| OH | MI ")))
str(states);## chr [1:64] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" ...
number_points <- unlist(str_extract_all(chess, "[:digit:][//.][:digit:]"))
str(number_points)## chr [1:64] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" ...
prerating <- as.integer(sub(pattern = 'R: ', replacement = '', x = unlist(str_extract_all(chess, "R: [[:digit:] ]{4}"))))
str(prerating)## int [1:64] 1794 1553 1384 1716 1655 1686 1649 1641 1411 1365 ...
opponent <- gsub("\\|", " ", str_sub(unlist(str_extract_all(chess, "[:digit:][//.][:digit:][|DLWUXBH[:digit:] ]{44}")), start = 10, end = 47))
str(opponent)## chr [1:64] "39 W 21 W 18 W 14 W 7 D 12 D 4" ...
list_opponent <- (strsplit(opponent, " W | D | U | H | B | X | L"))
head(list_opponent)## [[1]]
## [1] "39" " 21" " 18" " 14" " 7" " 12" " 4"
##
## [[2]]
## [1] "63" " 58" " 4" " 17" " 16" " 20" " 7"
##
## [[3]]
## [1] " 8" " 61" " 25" " 21" " 11" " 13" " 12"
##
## [[4]]
## [1] "23" " 28" " 2" " 26" " 5" " 19" " 1"
##
## [[5]]
## [1] "45" " 37" " 12" " 13" " 4" " 14" " 17"
##
## [[6]]
## [1] "34" " 29" " 11" " 35" " 10" " 27" " 21"
matrix1<- sapply(list_opponent, as.numeric)
matrix2 <- matrix(prerating[matrix1], nrow = 7)
average <- as.integer(format(apply(matrix2, 2, mean, na.rm = TRUE), digits= 4))
str(average)## int [1:64] 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 ...
final_outcomes <- data.frame(names, states, number_points, prerating, average)
str(final_outcomes)## 'data.frame': 64 obs. of 5 variables:
## $ names : Factor w/ 64 levels "ADITYA BAJAJ",..: 24 12 1 51 28 27 23 21 59 5 ...
## $ states : Factor w/ 3 levels "MI","OH","ON": 3 1 1 1 1 2 1 1 3 1 ...
## $ number_points: Factor w/ 11 levels "1.0","1.5","2.0",..: 11 11 11 10 10 9 9 9 9 9 ...
## $ prerating : int 1794 1553 1384 1716 1655 1686 1649 1641 1411 1365 ...
## $ average : int 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 ...
kable(head(final_outcomes))| names | states | number_points | prerating | average |
|---|---|---|---|---|
| GARY HUA | ON | 6.0 | 1794 | 1605 |
| DAKSHESH DARURI | MI | 6.0 | 1553 | 1469 |
| ADITYA BAJAJ | MI | 6.0 | 1384 | 1564 |
| PATRICK H SCHILLING | MI | 5.5 | 1716 | 1574 |
| HANSHI ZUO | MI | 5.5 | 1655 | 1501 |
| HANSEN SONG | OH | 5.0 | 1686 | 1519 |
difference <- (final_outcomes$prerating - final_outcomes$average)
difference## [1] 189 84 -180 142 154 167 277 173 -112 -189 244 157 168 95
## [15] -264 218 130 120 138 184 93 255 149 -128 382 72 330 -15
## [29] 288 378 234 62 172 24 288 -33 -405 -116 6 -43 155 182
## [43] 176 -128 90 -981 -30 26 5 -240 -345 -560 48 64 -220 -261
## [57] -271 -474 -466 -363 -372 344 -175 -100
write.table(final_outcomes, file="chessoutcome.csv", sep=",", row.names= FALSE)