library(stringr)
## Warning: package 'stringr' was built under R version 3.5.1
chess <- NULL
opponents.rating.matrix <- NULL
Loading the Chess Tournament Cross Table from the Raw Github URL. Removing the dash lines, with solution by redmode. See: https://stackoverflow.com/questions/21114598/importing-a-text-file-into-r
url <- "https://raw.githubusercontent.com/ZacharyHerold/chinafundnews/master/tournamentinfo.txt"
lines <- readLines(url)
## Warning in readLines(url): incomplete final line found on 'https://
## raw.githubusercontent.com/ZacharyHerold/chinafundnews/master/
## tournamentinfo.txt'
lines <- sapply(lines, gsub, pattern="[-]{2,}|[|]", replacement="")
lines <- c(lines[2], lines[lines!="" & lines!=lines[2]])
head(lines)
## Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round|
## " Pair Player Name TotalRoundRoundRoundRoundRoundRoundRound "
## Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
## " Num USCF ID / Rtg (Pre->Post) Pts 1 2 3 4 5 6 7 "
## 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## " 1 GARY HUA 6.0 W 39W 21W 18W 14W 7D 12D 4"
## ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## " ON 15445895 / R: 1794 ->1817 N:2 W B W B W B W "
## 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|
## " 2 DAKSHESH DARURI 6.0 W 63W 58L 4W 17W 16W 20W 7"
## MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |
## " MI 14598900 / R: 1553 ->1663 N:2 B W B W B W B "
Extracting out the names with alphabetic characters, allowing for multiple names.
name <- unlist(str_extract_all(lines, "[[:alpha:]]{2,}([[:blank:]][[:alpha:]]{1,}){1,}"))
name <- name[-c(1,2)]
name
## [1] "GARY HUA" "DAKSHESH DARURI"
## [3] "ADITYA BAJAJ" "PATRICK H SCHILLING"
## [5] "HANSHI ZUO" "HANSEN SONG"
## [7] "GARY DEE SWATHELL" "EZEKIEL HOUGHTON"
## [9] "STEFANO LEE" "ANVIT RAO"
## [11] "CAMERON WILLIAM MC LEMAN" "KENNETH J TACK"
## [13] "TORRANCE HENRY JR" "BRADLEY SHAW"
## [15] "ZACHARY JAMES HOUGHTON" "MIKE NIKITIN"
## [17] "RONALD GRZEGORCZYK" "DAVID SUNDEEN"
## [19] "DIPANKAR ROY" "JASON ZHENG"
## [21] "DINH DANG BUI" "EUGENE L MCCLURE"
## [23] "ALAN BUI" "MICHAEL R ALDRICH"
## [25] "LOREN SCHWIEBERT" "MAX ZHU"
## [27] "GAURAV GIDWANI" "SOFIA ADINA STANESCU"
## [29] "CHIEDOZIE OKORIE" "GEORGE AVERY JONES"
## [31] "RISHI SHETTY" "JOSHUA PHILIP MATHEWS"
## [33] "JADE GE" "MICHAEL JEFFERY THOMAS"
## [35] "JOSHUA DAVID LEE" "SIDDHARTH JHA"
## [37] "AMIYATOSH PWNANANDAM" "BRIAN LIU"
## [39] "JOEL R HENDON" "FOREST ZHANG"
## [41] "KYLE WILLIAM MURPHY" "JARED GE"
## [43] "ROBERT GLEN VASEY" "JUSTIN D SCHILLING"
## [45] "DEREK YAN" "JACOB ALEXANDER LAVALLEY"
## [47] "ERIC WRIGHT" "DANIEL KHAIN"
## [49] "MICHAEL J MARTIN" "SHIVAM JHA"
## [51] "TEJAS AYYAGARI" "ETHAN GUO"
## [53] "JOSE C YBARRA" "LARRY HODGE"
## [55] "ALEX KONG" "MARISA RICCI"
## [57] "MICHAEL LU" "VIRAJ MOHILE"
## [59] "SEAN M MC CORMICK" "JULIA SHEN"
## [61] "JEZZEL FARKAS" "ASHWIN BALAJI"
## [63] "THOMAS JOSEPH HOSMER" "BEN LI"
Trimming the lines and taking the first two characters, then subsetting out the strings of two alphabetic characters, indicating the state abbreviations. As with name, removing the first two rows.
trim_lines <- str_trim(lines, side = "both")
state_collect <- str_sub(trim_lines, start = 1, end = 2)
state <- unlist(str_extract_all(state_collect, "[[:alpha:]]{2}"))
state <- state[-c(1,2)]
state
## [1] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" "ON" "MI" "MI" "MI" "MI" "MI"
## [15] "MI" "MI" "MI" "MI" "MI" "MI" "ON" "MI" "ON" "MI" "MI" "ON" "MI" "MI"
## [29] "MI" "ON" "MI" "ON" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [43] "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [57] "MI" "MI" "MI" "MI" "ON" "MI" "MI" "MI"
Checking the length of the state vector.
length(state)
## [1] 64
Separating out the digits divided by a decimal point.
points <- unlist(str_extract_all(lines, "[[:digit:]]\\.[[:digit:]]"))
length(points)
## [1] 64
The “R:” characters indicate the player’s ratings.
pre_ratings <- unlist(str_extract_all(lines, "R:( ){1,2}[[:digit:]]{3,4}"))
pre_ratings <- unlist(str_extract_all(pre_ratings, "[[:digit:]]{3,4}"))
pre_ratings
## [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980" "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377" "1362" "1382" "1291" "1056"
## [51] "1011" "935" "1393" "1270" "1186" "1153" "1092" "917" "853" "967"
## [61] "955" "1530" "1175" "1163"
length(pre_ratings)
## [1] 64
COnstructing the chess dataframe.
chess <- data.frame(name, ST = state, points, pre_ratings, stringsAsFactors = FALSE)
head(chess)
## name ST points pre_ratings
## 1 GARY HUA ON 6.0 1794
## 2 DAKSHESH DARURI MI 6.0 1553
## 3 ADITYA BAJAJ MI 6.0 1384
## 4 PATRICK H SCHILLING MI 5.5 1716
## 5 HANSHI ZUO MI 5.5 1655
## 6 HANSEN SONG OH 5.0 1686
Retrieving the contest outcome and opponent., looking for Wins (“W”), Losses (“L”), or Draws (“D”).
results <- unlist(str_extract_all(lines, "[WLD][[:blank:]]{2,3}[[:digit:]]{0,2}"))
results
## [1] "W 39" "W 21" "W 18" "W 14" "W 7" "D 12" "D 4" "W "
## [9] "W " "W " "W " "W 63" "W 58" "L 4" "W 17" "W 16"
## [17] "W 20" "W 7" "W " "W " "W " "L 8" "W 61" "W 25"
## [25] "W 21" "W 11" "W 13" "W 12" "W " "W " "W " "W "
## [33] "W 23" "D 28" "W 2" "W 26" "D 5" "W 19" "D 1" "W "
## [41] "W " "W " "W 45" "W 37" "D 12" "D 13" "D 4" "W 14"
## [49] "W 17" "W " "W " "W " "W 34" "D 29" "L 11" "W 35"
## [57] "D 10" "W 27" "W 21" "W " "W " "W " "L " "W 57"
## [65] "W 46" "W 13" "W 11" "L 1" "W 9" "L 2" "W " "W "
## [73] "W " "W " "W 3" "W 32" "L 14" "L 9" "W 47" "W 28"
## [81] "W 19" "W " "W " "W " "W " "W 25" "L 18" "W 59"
## [89] "W 8" "W 26" "L 7" "W 20" "W " "W " "W " "D 16"
## [97] "L 19" "W 55" "W 31" "D 6" "W 25" "W 18" "W " "W "
## [105] "W " "W " "D 38" "W 56" "W 6" "L 7" "L 3" "W 34"
## [113] "W 26" "W " "W " "W " "W 42" "W 33" "D 5" "W 38"
## [121] "D 1" "L 3" "W " "W " "W " "W 36" "W 27" "L 7"
## [129] "D 5" "W 33" "L 3" "W 32" "W " "W " "W " "W "
## [137] "W 54" "W 44" "W 8" "L 1" "D 27" "L 5" "W 31" "W "
## [145] "W " "W " "W " "D 19" "L 16" "W 30" "L 22" "W 54"
## [153] "W 33" "W 38" "W " "W " "W " "D 10" "W 15" "W 39"
## [161] "L 2" "W 36" "W " "W " "W 48" "W 41" "L 26" "L 2"
## [169] "W 23" "W 22" "L 5" "W " "W " "W " "W " "W 47"
## [177] "W 9" "L 1" "W 32" "L 19" "W 38" "L 10" "W " "W "
## [185] "W " "D 15" "W 10" "W 52" "D 28" "W 18" "L 4" "L 8"
## [193] "W " "W " "W " "W " "L 40" "W 49" "W 23" "W 41"
## [201] "W 28" "L 2" "L 9" "W " "W " "W " "W " "W 43"
## [209] "L 1" "W 47" "L 3" "W 40" "W 39" "L 6" "W " "W "
## [217] "W " "W " "W 64" "D 52" "L 28" "W 15" "L 17" "W 40"
## [225] "W " "W " "W " "L 4" "W 43" "L 20" "W 58" "L 17"
## [233] "W 37" "W 46" "W " "W " "W " "L 28" "L 47" "W 43"
## [241] "L 25" "W 60" "W 44" "W 39" "W " "W " "W " "L 9"
## [249] "W 53" "L 3" "W 24" "D 34" "L 10" "W 47" "W " "W "
## [257] "W " "W 49" "W 40" "W 17" "L 4" "L 9" "D 32" "L 11"
## [265] "W " "W " "W " "W " "W 51" "L 13" "W 46" "W 37"
## [273] "D 14" "L 6" "W " "W " "W " "W 24" "D 4" "W 22"
## [281] "D 19" "L 20" "L 8" "D 36" "W " "W " "W " "W "
## [289] "W 50" "D 6" "L 38" "L 34" "W 52" "W 48" "W " "W "
## [297] "W " "L 52" "D 64" "L 15" "W 55" "L 31" "W 61" "W 50"
## [305] "W " "W " "W " "L 58" "D 55" "W 64" "L 10" "W 30"
## [313] "W 50" "L 14" "W " "W " "W " "W 61" "L 8" "W 44"
## [321] "L 18" "W 51" "D 26" "L 13" "W " "W " "W " "W "
## [329] "W 60" "L 12" "W 50" "D 36" "L 13" "L 15" "W 51" "W "
## [337] "W " "W " "L 6" "W 60" "L 37" "W 29" "D 25" "L 11"
## [345] "W 52" "W " "W " "W " "L 46" "L 38" "W 56" "L 6"
## [353] "W 57" "D 52" "W 48" "W " "W " "W " "W " "L 13"
## [361] "W 57" "W 51" "D 33" "L 16" "D 28" "W " "W " "W "
## [369] "L 5" "W 34" "L 27" "L 23" "W 61" "W " "W " "W "
## [377] "D 11" "W 35" "W 29" "L 12" "L 18" "L 15" "W " "W "
## [385] "W " "L 1" "W 54" "W 40" "L 16" "W 44" "L 21" "L 24"
## [393] "W " "W " "W " "W " "W 20" "L 26" "L 39" "W 59"
## [401] "L 21" "W 56" "L 22" "W " "W " "W " "W " "W 59"
## [409] "L 17" "W 58" "L 20" "W " "W " "L 12" "L 50" "L 57"
## [417] "D 60" "D 61" "W 64" "W 56" "W " "W " "W " "L 21"
## [425] "L 23" "L 24" "W 63" "W 59" "L 46" "W 55" "W " "W "
## [433] "W " "W " "L 14" "L 32" "W 53" "L 39" "L 24" "W 59"
## [441] "W " "W " "W " "L 5" "L 51" "D 60" "L 56" "W 63"
## [449] "D 55" "W 58" "W " "W " "W " "W " "W 35" "L 7"
## [457] "L 27" "L 50" "W 64" "W 43" "L 23" "W " "W " "W "
## [465] "W " "L 18" "W 24" "L 21" "W 61" "L 8" "D 51" "L 25"
## [473] "W " "W " "W " "W " "L 17" "W 63" "D 52" "L 29"
## [481] "L 35" "W " "W " "L 26" "L 20" "D 63" "D 64" "W 58"
## [489] "W " "W " "W " "L 29" "W 42" "L 33" "W 46" "L 31"
## [497] "L 30" "W " "W " "W " "L 27" "W 45" "L 36" "W 57"
## [505] "L 32" "D 47" "L 33" "W " "W " "W " "W " "W 30"
## [513] "D 22" "L 19" "D 48" "L 29" "D 35" "L 34" "W " "W "
## [521] "W " "L 25" "L 44" "W 57" "W " "W " "L 14" "L 39"
## [529] "L 61" "L 15" "L 59" "W 64" "W " "W " "W " "L 62"
## [537] "D 31" "L 10" "L 30" "D 45" "L 43" "W " "W " "W "
## [545] "L 11" "L 35" "W 45" "L 40" "L 42" "W " "W " "W "
## [553] "L 7" "L 36" "W 42" "L 51" "L 35" "L 53" "W " "W "
## [561] "W " "W 31" "L 2" "L 41" "L 23" "L 49" "L 45" "W "
## [569] "W " "W " "L 41" "L 9" "L 40" "L 43" "W 54" "L 44"
## [577] "W " "W " "W " "L 33" "L 34" "D 45" "D 42" "L 24"
## [585] "W " "W " "L 32" "L 3" "W 54" "L 47" "D 42" "L 30"
## [593] "L 37" "W " "W " "W " "W 55" "L 2" "L 48" "D 49"
## [601] "L 43" "L 45" "W " "W " "L 22" "D 30" "L 31" "D 49"
## [609] "L 46" "L 42" "L 54" "W " "W " "W "
With 64 players and 7 rounds, there should be 448 total scores. Checking the data, should remove the “W” followed by blanks with no opponent number.
length(results)
## [1] 614
64 * 7
## [1] 448
results.raw <- subset(results, results != "W ")
length(results.raw)
## [1] 409
Just one short. Finding the abberation. The one “B” in line 75.
lines[75:77]
## 37 | AMIYATOSH PWNANANDAM |3.5 |B |L 5|W 34|L 27|H |L 23|W 61|
## " 37 AMIYATOSH PWNANANDAM 3.5 B L 5W 34L 27H L 23W 61"
## MI | 15489571 / R: 980P12->1077P17 | | |B |W |W | |B |W |
## " MI 15489571 / R: 980P12->1077P17 B W W B W "
## 38 | BRIAN LIU |3.0 |D 11|W 35|W 29|L 12|H |L 18|L 15|
## " 38 BRIAN LIU 3.0 D 11W 35W 29L 12H L 18L 15"
results <- unlist(str_extract_all(lines, "[WLDUH][[:blank:]]{2,3}[[:digit:]]{0,2}"))
results.raw <- subset(results, results != "W ")
results.raw
## [1] "W 39" "W 21" "W 18" "W 14" "W 7" "D 12" "D 4" "W 63"
## [9] "W 58" "L 4" "W 17" "W 16" "W 20" "W 7" "L 8" "W 61"
## [17] "W 25" "W 21" "W 11" "W 13" "W 12" "W 23" "D 28" "W 2"
## [25] "W 26" "D 5" "W 19" "D 1" "W 45" "W 37" "D 12" "D 13"
## [33] "D 4" "W 14" "W 17" "W 34" "D 29" "L 11" "W 35" "D 10"
## [41] "W 27" "W 21" "H 15" "L " "W 57" "W 46" "W 13" "W 11"
## [49] "L 1" "W 9" "L 2" "W 3" "W 32" "L 14" "L 9" "W 47"
## [57] "W 28" "W 19" "W 25" "L 18" "W 59" "W 8" "W 26" "L 7"
## [65] "W 20" "D 16" "L 19" "W 55" "W 31" "D 6" "W 25" "W 18"
## [73] "D 38" "W 56" "W 6" "L 7" "L 3" "W 34" "W 26" "W 42"
## [81] "W 33" "D 5" "W 38" "H " "D 1" "L 3" "W 36" "W 27"
## [89] "L 7" "D 5" "W 33" "L 3" "W 32" "W 54" "W 44" "W 8"
## [97] "L 1" "D 27" "L 5" "W 31" "D 19" "L 16" "W 30" "L 22"
## [105] "W 54" "W 33" "W 38" "D 10" "W 15" "H " "W 39" "L 2"
## [113] "W 36" "U " "W 48" "W 41" "L 26" "L 2" "W 23" "W 22"
## [121] "L 5" "W 47" "W 9" "L 1" "W 32" "L 19" "W 38" "L 10"
## [129] "D 15" "W 10" "W 52" "D 28" "W 18" "L 4" "L 8" "L 40"
## [137] "W 49" "W 23" "W 41" "W 28" "L 2" "L 9" "W 43" "L 1"
## [145] "W 47" "L 3" "W 40" "W 39" "L 6" "W 64" "D 52" "L 28"
## [153] "W 15" "H " "L 17" "W 40" "L 4" "W 43" "L 20" "W 58"
## [161] "L 17" "W 37" "W 46" "H " "L 28" "L 47" "W 43" "L 25"
## [169] "W 60" "W 44" "W 39" "L 9" "W 53" "L 3" "W 24" "D 34"
## [177] "L 10" "W 47" "U " "W 49" "W 40" "W 17" "L 4" "L 9"
## [185] "D 32" "L 11" "W 51" "L 13" "W 46" "W 37" "D 14" "L 6"
## [193] "U " "U " "W 24" "D 4" "W 22" "D 19" "L 20" "L 8"
## [201] "D 36" "W 50" "D 6" "L 38" "L 34" "W 52" "W 48" "U "
## [209] "L 52" "D 64" "L 15" "W 55" "L 31" "W 61" "W 50" "L 58"
## [217] "D 55" "W 64" "L 10" "W 30" "W 50" "L 14" "W 61" "L 8"
## [225] "W 44" "L 18" "W 51" "D 26" "L 13" "W 60" "L 12" "W 50"
## [233] "D 36" "L 13" "L 15" "W 51" "L 6" "W 60" "L 37" "W 29"
## [241] "D 25" "L 11" "W 52" "L 46" "L 38" "W 56" "L 6" "W 57"
## [249] "D 52" "W 48" "L 13" "W 57" "W 51" "D 33" "H " "L 16"
## [257] "D 28" "L 5" "W 34" "L 27" "H " "L 23" "W 61" "U "
## [265] "D 11" "W 35" "W 29" "L 12" "H " "L 18" "L 15" "L 1"
## [273] "W 54" "W 40" "L 16" "W 44" "L 21" "L 24" "W 20" "L 26"
## [281] "L 39" "W 59" "L 21" "W 56" "L 22" "W 59" "L 17" "W 58"
## [289] "L 20" "U " "U " "L 12" "L 50" "L 57" "D 60" "D 61"
## [297] "W 64" "W 56" "L 21" "L 23" "L 24" "W 63" "W 59" "L 46"
## [305] "W 55" "L 14" "L 32" "W 53" "L 39" "L 24" "W 59" "L 5"
## [313] "L 51" "D 60" "L 56" "W 63" "D 55" "W 58" "W 35" "L 7"
## [321] "L 27" "L 50" "W 64" "W 43" "L 23" "L 18" "W 24" "L 21"
## [329] "W 61" "L 8" "D 51" "L 25" "L 17" "W 63" "H " "D 52"
## [337] "H " "L 29" "L 35" "L 26" "L 20" "D 63" "D 64" "W 58"
## [345] "H " "U " "L 29" "W 42" "L 33" "W 46" "H " "L 31"
## [353] "L 30" "L 27" "W 45" "L 36" "W 57" "L 32" "D 47" "L 33"
## [361] "W 30" "D 22" "L 19" "D 48" "L 29" "D 35" "L 34" "H "
## [369] "L 25" "H " "L 44" "U " "W 57" "U " "L 14" "L 39"
## [377] "L 61" "L 15" "L 59" "W 64" "L 62" "D 31" "L 10" "L 30"
## [385] "D 45" "L 43" "H " "L 11" "L 35" "W 45" "H " "L 40"
## [393] "L 42" "U " "L 7" "L 36" "W 42" "L 51" "L 35" "L 53"
## [401] "W 31" "L 2" "L 41" "L 23" "L 49" "L 45" "L 41" "L 9"
## [409] "L 40" "L 43" "W 54" "L 44" "L 33" "L 34" "D 45" "D 42"
## [417] "L 24" "H " "U " "L 32" "L 3" "W 54" "L 47" "D 42"
## [425] "L 30" "L 37" "W 55" "U " "U " "U " "U " "U "
## [433] "U " "L 2" "L 48" "D 49" "L 43" "L 45" "H " "U "
## [441] "L 22" "D 30" "L 31" "D 49" "L 46" "L 42" "L 54"
length(results.raw)
## [1] 447
Manually discovering that the extra “B” is in position 408, then inserting it. Would be nice to automate this process somehow.
results.raw <- c(results.raw[1:407],"B", results.raw[408:length(results.raw)])
length(results.raw)
## [1] 448
From the results vector, removing the result so that only the opponent number remains.
opponents <- NULL
opponents <- ifelse(str_detect(results.raw, "[[:digit:]]{1,2}"), unlist(str_extract_all(results.raw, "[[:digit:]]{1,2}")), NA)
opponents
## [1] "39" "21" "18" "14" "7" "12" "4" "63" "58" "4" "17" "16" "20" "7"
## [15] "8" "61" "25" "21" "11" "13" "12" "23" "28" "2" "26" "5" "19" "1"
## [29] "45" "37" "12" "13" "4" "14" "17" "34" "29" "11" "35" "10" "27" "21"
## [43] "15" NA "46" "13" "11" "1" "9" "2" "3" "32" "14" "9" "47" "28"
## [57] "19" "25" "18" "59" "8" "26" "7" "20" "16" "19" "55" "31" "6" "25"
## [71] "18" "38" "56" "6" "7" "3" "34" "26" "42" "33" "5" "38" "1" NA
## [85] "36" "27" "7" "5" "33" "3" "32" "54" "44" "8" "1" "27" "5" "31"
## [99] "19" "16" "30" "22" "54" "33" "38" "10" "15" "39" "2" NA "48" "41"
## [113] "26" NA "23" "22" "5" "47" "9" "1" "32" "19" "38" "10" "15" "10"
## [127] "52" "28" "18" "4" "8" "40" "49" "23" "41" "28" "2" "9" "43" "1"
## [141] "47" "3" "40" "39" "6" "64" "52" "28" "15" "17" "40" "4" "43" NA
## [155] "58" "17" "37" "46" "28" "47" "43" "25" "60" NA "39" "9" "53" "3"
## [169] "24" "34" "10" "47" "49" "40" "17" "4" "9" "32" NA "51" "13" "46"
## [183] "37" "14" "6" "24" "4" "22" "19" "20" "8" "36" NA NA "38" "34"
## [197] "52" "48" "52" "64" "15" "55" "31" "61" "50" "58" "55" NA "10" "30"
## [211] "50" "14" "61" "8" "44" "18" "51" "26" "13" "60" "12" "50" "36" "13"
## [225] "15" "51" "6" "60" "37" "29" "25" "11" "52" "46" "38" "56" "6" "57"
## [239] "52" "48" "13" "57" "51" "33" "16" "28" "5" "34" "27" "23" "61" "11"
## [253] "35" "29" NA "18" "15" "1" "54" "40" NA "44" "21" NA "20" "26"
## [267] "39" "59" NA "56" "22" "59" "17" "58" "20" "12" "50" "57" "60" "61"
## [281] "64" "56" "21" "23" "24" "63" "59" "46" "55" NA NA "53" "39" "24"
## [295] "59" "5" "51" "60" "56" "63" "55" "58" "35" "7" "27" "50" "64" "43"
## [309] "23" "18" "24" "21" "61" "8" "51" "25" "17" "63" "52" "29" "35" "26"
## [323] "20" "63" "64" "58" "29" "42" "33" "46" "31" "30" "27" "45" NA "57"
## [337] NA "47" "33" "30" "22" "19" "48" "29" NA NA "25" "44" "57" "14"
## [351] NA "61" "15" "59" "64" "62" "31" "10" "30" "45" "43" "11" "35" "45"
## [365] "40" "42" "7" NA "42" NA "35" NA "31" NA "41" "23" "49" "45"
## [379] "41" "9" "40" "43" "54" "44" "33" "34" NA "42" "24" "32" NA "54"
## [393] "47" NA "30" "37" "55" "2" "48" "49" "43" "45" "22" "30" "31" "49"
## [407] "46" NA "54" "39" "21" "18" "14" "7" "12" "4" "63" "58" NA NA
## [421] "16" "20" "7" "8" "61" "25" "21" "11" NA NA NA NA NA NA
## [435] "5" "19" "1" "45" "37" NA NA "4" "14" "17" "34" "29" "11" "35"
Checking the class of the variable opponents. Finding it is a character, converting that to a numeric.
class(opponents)
## [1] "character"
opponents <- as.numeric(opponents)
class(opponents)
## [1] "numeric"
Creating an opponents matrix, with 64 rows for the players and 7 columns for the rounds.
opponents.mat <- matrix(opponents, nrow = 64, ncol = 7, byrow = TRUE)
opponents.mat
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 39 21 18 14 7 12 4
## [2,] 63 58 4 17 16 20 7
## [3,] 8 61 25 21 11 13 12
## [4,] 23 28 2 26 5 19 1
## [5,] 45 37 12 13 4 14 17
## [6,] 34 29 11 35 10 27 21
## [7,] 15 NA 46 13 11 1 9
## [8,] 2 3 32 14 9 47 28
## [9,] 19 25 18 59 8 26 7
## [10,] 20 16 19 55 31 6 25
## [11,] 18 38 56 6 7 3 34
## [12,] 26 42 33 5 38 1 NA
## [13,] 36 27 7 5 33 3 32
## [14,] 54 44 8 1 27 5 31
## [15,] 19 16 30 22 54 33 38
## [16,] 10 15 39 2 NA 48 41
## [17,] 26 NA 23 22 5 47 9
## [18,] 1 32 19 38 10 15 10
## [19,] 52 28 18 4 8 40 49
## [20,] 23 41 28 2 9 43 1
## [21,] 47 3 40 39 6 64 52
## [22,] 28 15 17 40 4 43 NA
## [23,] 58 17 37 46 28 47 43
## [24,] 25 60 NA 39 9 53 3
## [25,] 24 34 10 47 49 40 17
## [26,] 4 9 32 NA 51 13 46
## [27,] 37 14 6 24 4 22 19
## [28,] 20 8 36 NA NA 38 34
## [29,] 52 48 52 64 15 55 31
## [30,] 61 50 58 55 NA 10 30
## [31,] 50 14 61 8 44 18 51
## [32,] 26 13 60 12 50 36 13
## [33,] 15 51 6 60 37 29 25
## [34,] 11 52 46 38 56 6 57
## [35,] 52 48 13 57 51 33 16
## [36,] 28 5 34 27 23 61 11
## [37,] 35 29 NA 18 15 1 54
## [38,] 40 NA 44 21 NA 20 26
## [39,] 39 59 NA 56 22 59 17
## [40,] 58 20 12 50 57 60 61
## [41,] 64 56 21 23 24 63 59
## [42,] 46 55 NA NA 53 39 24
## [43,] 59 5 51 60 56 63 55
## [44,] 58 35 7 27 50 64 43
## [45,] 23 18 24 21 61 8 51
## [46,] 25 17 63 52 29 35 26
## [47,] 20 63 64 58 29 42 33
## [48,] 46 31 30 27 45 NA 57
## [49,] NA 47 33 30 22 19 48
## [50,] 29 NA NA 25 44 57 14
## [51,] NA 61 15 59 64 62 31
## [52,] 10 30 45 43 11 35 45
## [53,] 40 42 7 NA 42 NA 35
## [54,] NA 31 NA 41 23 49 45
## [55,] 41 9 40 43 54 44 33
## [56,] 34 NA 42 24 32 NA 54
## [57,] 47 NA 30 37 55 2 48
## [58,] 49 43 45 22 30 31 49
## [59,] 46 NA 54 39 21 18 14
## [60,] 7 12 4 63 58 NA NA
## [61,] 16 20 7 8 61 25 21
## [62,] 11 NA NA NA NA NA NA
## [63,] 5 19 1 45 37 NA NA
## [64,] 4 14 17 34 29 11 35
Replacing the player ID with their ratings, again ensuring it is a numeric value. The matrix is dissolved.
opponents.rating <- as.numeric(pre_ratings[opponents])
opponents.rating
## [1] 1436 1563 1600 1610 1649 1663 1716 1175 917 1716 1629 1604 1595 1649
## [15] 1641 955 1745 1563 1712 1666 1663 1363 1507 1553 1579 1655 1564 1794
## [29] 1242 980 1663 1666 1716 1610 1629 1399 1602 1712 1438 1365 1552 1563
## [43] 1220 NA 377 1666 1712 1794 1411 1553 1384 1441 1610 1411 1362 1507
## [57] 1564 1745 1600 853 1641 1579 1649 1595 1604 1564 1186 1494 1686 1745
## [71] 1600 1423 1153 1686 1649 1384 1399 1579 1332 1449 1655 1423 1794 NA
## [85] 1355 1552 1649 1655 1449 1384 1441 1270 1199 1641 1794 1552 1655 1494
## [99] 1564 1604 1522 1555 1270 1449 1423 1365 1220 1436 1553 NA 1382 1403
## [113] 1579 NA 1363 1555 1655 1362 1411 1794 1441 1564 1423 1365 1220 1365
## [127] 935 1507 1600 1716 1641 1348 1291 1363 1403 1507 1553 1411 1283 1794
## [141] 1362 1384 1348 1436 1686 1163 935 1507 1220 1629 1348 1716 1283 NA
## [155] 917 1629 980 377 1507 1362 1283 1745 967 NA 1436 1411 1393 1384
## [169] 1229 1399 1365 1362 1291 1348 1629 1716 1411 1441 NA 1011 1666 377
## [183] 980 1610 1686 1229 1716 1555 1564 1595 1641 1355 NA NA 1423 1399
## [197] 935 1382 935 1163 1220 1186 1494 955 1056 917 1186 NA 1365 1522
## [211] 1056 1610 955 1641 1199 1600 1011 1579 1666 967 1663 1056 1355 1666
## [225] 1220 1011 1686 967 980 1602 1745 1712 935 377 1423 1153 1686 1092
## [239] 935 1382 1666 1092 1011 1449 1604 1507 1655 1399 1552 1363 955 1712
## [253] 1438 1602 NA 1600 1220 1794 1270 1348 NA 1199 1563 NA 1595 1579
## [267] 1436 853 NA 1153 1555 853 1629 917 1595 1663 1056 1092 967 955
## [281] 1163 1153 1563 1363 1229 1175 853 377 1186 NA NA 1393 1436 1229
## [295] 853 1655 1011 967 1153 1175 1186 917 1438 1649 1552 1056 1163 1283
## [309] 1363 1600 1229 1563 955 1641 1011 1745 1629 1175 935 1602 1438 1579
## [323] 1595 1175 1163 917 1602 1332 1449 377 1494 1522 1552 1242 NA 1092
## [337] NA 1362 1449 1522 1555 1564 1382 1602 NA NA 1745 1199 1092 1610
## [351] NA 955 1220 853 1163 1530 1494 1365 1522 1242 1283 1712 1438 1242
## [365] 1348 1332 1649 NA 1332 NA 1438 NA 1494 NA 1403 1363 1291 1242
## [379] 1403 1411 1348 1283 1270 1199 1449 1399 NA 1332 1229 1441 NA 1270
## [393] 1362 NA 1522 980 1186 1553 1382 1291 1283 1242 1555 1522 1494 1291
## [407] 377 NA 1270 1436 1563 1600 1610 1649 1663 1716 1175 917 NA NA
## [421] 1604 1595 1649 1641 955 1745 1563 1712 NA NA NA NA NA NA
## [435] 1655 1564 1794 1242 980 NA NA 1716 1610 1629 1399 1602 1712 1438
Restructuring the opponent ratings into a matrix.
opponents.rating.matrix <- matrix(opponents.rating, nrow = 64, ncol = 7, byrow = TRUE)
opponents.rating.matrix
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1436 1563 1600 1610 1649 1663 1716
## [2,] 1175 917 1716 1629 1604 1595 1649
## [3,] 1641 955 1745 1563 1712 1666 1663
## [4,] 1363 1507 1553 1579 1655 1564 1794
## [5,] 1242 980 1663 1666 1716 1610 1629
## [6,] 1399 1602 1712 1438 1365 1552 1563
## [7,] 1220 NA 377 1666 1712 1794 1411
## [8,] 1553 1384 1441 1610 1411 1362 1507
## [9,] 1564 1745 1600 853 1641 1579 1649
## [10,] 1595 1604 1564 1186 1494 1686 1745
## [11,] 1600 1423 1153 1686 1649 1384 1399
## [12,] 1579 1332 1449 1655 1423 1794 NA
## [13,] 1355 1552 1649 1655 1449 1384 1441
## [14,] 1270 1199 1641 1794 1552 1655 1494
## [15,] 1564 1604 1522 1555 1270 1449 1423
## [16,] 1365 1220 1436 1553 NA 1382 1403
## [17,] 1579 NA 1363 1555 1655 1362 1411
## [18,] 1794 1441 1564 1423 1365 1220 1365
## [19,] 935 1507 1600 1716 1641 1348 1291
## [20,] 1363 1403 1507 1553 1411 1283 1794
## [21,] 1362 1384 1348 1436 1686 1163 935
## [22,] 1507 1220 1629 1348 1716 1283 NA
## [23,] 917 1629 980 377 1507 1362 1283
## [24,] 1745 967 NA 1436 1411 1393 1384
## [25,] 1229 1399 1365 1362 1291 1348 1629
## [26,] 1716 1411 1441 NA 1011 1666 377
## [27,] 980 1610 1686 1229 1716 1555 1564
## [28,] 1595 1641 1355 NA NA 1423 1399
## [29,] 935 1382 935 1163 1220 1186 1494
## [30,] 955 1056 917 1186 NA 1365 1522
## [31,] 1056 1610 955 1641 1199 1600 1011
## [32,] 1579 1666 967 1663 1056 1355 1666
## [33,] 1220 1011 1686 967 980 1602 1745
## [34,] 1712 935 377 1423 1153 1686 1092
## [35,] 935 1382 1666 1092 1011 1449 1604
## [36,] 1507 1655 1399 1552 1363 955 1712
## [37,] 1438 1602 NA 1600 1220 1794 1270
## [38,] 1348 NA 1199 1563 NA 1595 1579
## [39,] 1436 853 NA 1153 1555 853 1629
## [40,] 917 1595 1663 1056 1092 967 955
## [41,] 1163 1153 1563 1363 1229 1175 853
## [42,] 377 1186 NA NA 1393 1436 1229
## [43,] 853 1655 1011 967 1153 1175 1186
## [44,] 917 1438 1649 1552 1056 1163 1283
## [45,] 1363 1600 1229 1563 955 1641 1011
## [46,] 1745 1629 1175 935 1602 1438 1579
## [47,] 1595 1175 1163 917 1602 1332 1449
## [48,] 377 1494 1522 1552 1242 NA 1092
## [49,] NA 1362 1449 1522 1555 1564 1382
## [50,] 1602 NA NA 1745 1199 1092 1610
## [51,] NA 955 1220 853 1163 1530 1494
## [52,] 1365 1522 1242 1283 1712 1438 1242
## [53,] 1348 1332 1649 NA 1332 NA 1438
## [54,] NA 1494 NA 1403 1363 1291 1242
## [55,] 1403 1411 1348 1283 1270 1199 1449
## [56,] 1399 NA 1332 1229 1441 NA 1270
## [57,] 1362 NA 1522 980 1186 1553 1382
## [58,] 1291 1283 1242 1555 1522 1494 1291
## [59,] 377 NA 1270 1436 1563 1600 1610
## [60,] 1649 1663 1716 1175 917 NA NA
## [61,] 1604 1595 1649 1641 955 1745 1563
## [62,] 1712 NA NA NA NA NA NA
## [63,] 1655 1564 1794 1242 980 NA NA
## [64,] 1716 1610 1629 1399 1602 1712 1438
Taking the mean of the matrix to find the average rating of opponents, after removing the NA values.
chess$opponents.rating <- round(rowMeans(opponents.rating.matrix, na.rm = T, dims = 1))
chess
## name ST points pre_ratings opponents.rating
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519
## 7 GARY DEE SWATHELL MI 5.0 1649 1363
## 8 EZEKIEL HOUGHTON MI 5.0 1641 1467
## 9 STEFANO LEE ON 5.0 1411 1519
## 10 ANVIT RAO MI 5.0 1365 1553
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 1471
## 12 KENNETH J TACK MI 4.5 1663 1539
## 13 TORRANCE HENRY JR MI 4.5 1666 1498
## 14 BRADLEY SHAW MI 4.5 1610 1515
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1484
## 16 MIKE NIKITIN MI 4.0 1604 1393
## 17 RONALD GRZEGORCZYK MI 4.0 1629 1488
## 18 DAVID SUNDEEN MI 4.0 1600 1453
## 19 DIPANKAR ROY MI 4.0 1564 1434
## 20 JASON ZHENG MI 4.0 1595 1473
## 21 DINH DANG BUI ON 4.0 1563 1331
## 22 EUGENE L MCCLURE MI 4.0 1555 1450
## 23 ALAN BUI ON 4.0 1363 1151
## 24 MICHAEL R ALDRICH MI 4.0 1229 1389
## 25 LOREN SCHWIEBERT MI 3.5 1745 1375
## 26 MAX ZHU ON 3.5 1579 1270
## 27 GAURAV GIDWANI MI 3.5 1552 1477
## 28 SOFIA ADINA STANESCU MI 3.5 1507 1483
## 29 CHIEDOZIE OKORIE MI 3.5 1602 1188
## 30 GEORGE AVERY JONES ON 3.5 1522 1167
## 31 RISHI SHETTY MI 3.5 1494 1296
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441 1422
## 33 JADE GE MI 3.5 1449 1316
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399 1197
## 35 JOSHUA DAVID LEE MI 3.5 1438 1306
## 36 SIDDHARTH JHA MI 3.5 1355 1449
## 37 AMIYATOSH PWNANANDAM MI 3.5 980 1487
## 38 BRIAN LIU MI 3.0 1423 1457
## 39 JOEL R HENDON MI 3.0 1436 1246
## 40 FOREST ZHANG MI 3.0 1348 1178
## 41 KYLE WILLIAM MURPHY MI 3.0 1403 1214
## 42 JARED GE MI 3.0 1332 1124
## 43 ROBERT GLEN VASEY MI 3.0 1283 1143
## 44 JUSTIN D SCHILLING MI 3.0 1199 1294
## 45 DEREK YAN MI 3.0 1242 1337
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377 1443
## 47 ERIC WRIGHT MI 2.5 1362 1319
## 48 DANIEL KHAIN MI 2.5 1382 1213
## 49 MICHAEL J MARTIN MI 2.5 1291 1472
## 50 SHIVAM JHA MI 2.5 1056 1450
## 51 TEJAS AYYAGARI MI 2.5 1011 1202
## 52 ETHAN GUO MI 2.5 935 1401
## 53 JOSE C YBARRA MI 2.0 1393 1420
## 54 LARRY HODGE MI 2.0 1270 1359
## 55 ALEX KONG MI 2.0 1186 1338
## 56 MARISA RICCI MI 2.0 1153 1334
## 57 MICHAEL LU MI 2.0 1092 1331
## 58 VIRAJ MOHILE MI 2.0 917 1383
## 59 SEAN M MC CORMICK MI 2.0 853 1309
## 60 JULIA SHEN MI 1.5 967 1424
## 61 JEZZEL FARKAS ON 1.5 955 1536
## 62 ASHWIN BALAJI MI 1.0 1530 1712
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175 1447
## 64 BEN LI MI 1.0 1163 1587
Finally, capturing the data into a csv file.
write.csv(chess, file = "chess.csv")