Given a text file with chess tournament results produce a CSV file that shows the player’s name, state, total points, pre-rating, and average pre-rating of their opponents.
Read the txt file as serperate lines with the readLines function.
library(stringr)## Warning: package 'stringr' was built under R version 3.3.3
library(plyr)
library(tidyr)
library(knitr)## Warning: package 'knitr' was built under R version 3.3.3
chess_data <- readLines('chess.txt')## Warning in readLines("chess.txt"): incomplete final line found on
## 'chess.txt'
head(chess_data,10)## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [7] "-----------------------------------------------------------------------------------------"
## [8] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [9] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [10] "-----------------------------------------------------------------------------------------"
There are two lines of information for each player. We are going to make a varible for each line and then combine them to make one line for each player.
row1 <- chess_data[seq(5,196,3)]
head(row1)## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [3] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [4] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [5] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [6] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
row2 <- chess_data[seq(6,196,3)]
head(row2)## [1] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [2] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [3] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [4] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [5] " MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [6] " OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
combined <- paste (row1, row2,sep = "")
head(combined)## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4| ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [2] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7| MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [3] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12| MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [4] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1| MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [5] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17| MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [6] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21| OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
Neccesary information is delimitted by the ‘|’ character (stopping at the 12th ‘|’) so we will make benchmarks to locate where the ‘|’ are so we can parse the data.
w <- str_locate_all(combined,'\\|') #Locate |
b1 <- w[[1]][1] #1st |
b2 <- w[[1]][2] #2nd |
b3 <- w[[1]][3] #3rd |
b4 <- w[[1]][4] #4th |
b5 <- w[[1]][5] #5th |
b6 <- w[[1]][6] #6th |
b7 <- w[[1]][7] #7th |
b8 <- w[[1]][8] #8th |
b9 <- w[[1]][9] #9th |
b10 <- w[[1]][10] #10th |
b11 <- w[[1]][11] #11th |
b12 <- w[[1]][12] #12th |player_name <- str_trim(substr(combined, b1+1, b2-1))
head(player_name,30)## [1] "GARY HUA" "DAKSHESH DARURI"
## [3] "ADITYA BAJAJ" "PATRICK H SCHILLING"
## [5] "HANSHI ZUO" "HANSEN SONG"
## [7] "GARY DEE SWATHELL" "EZEKIEL HOUGHTON"
## [9] "STEFANO LEE" "ANVIT RAO"
## [11] "CAMERON WILLIAM MC LEMAN" "KENNETH J TACK"
## [13] "TORRANCE HENRY JR" "BRADLEY SHAW"
## [15] "ZACHARY JAMES HOUGHTON" "MIKE NIKITIN"
## [17] "RONALD GRZEGORCZYK" "DAVID SUNDEEN"
## [19] "DIPANKAR ROY" "JASON ZHENG"
## [21] "DINH DANG BUI" "EUGENE L MCCLURE"
## [23] "ALAN BUI" "MICHAEL R ALDRICH"
## [25] "LOREN SCHWIEBERT" "MAX ZHU"
## [27] "GAURAV GIDWANI" "SOFIA ADINA STANESCU-BELLU"
## [29] "CHIEDOZIE OKORIE" "GEORGE AVERY JONES"
player_state <- str_trim(substr(combined, b10+1, b11-1))
head(player_state,30)## [1] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" "ON" "MI" "MI" "MI" "MI" "MI"
## [15] "MI" "MI" "MI" "MI" "MI" "MI" "ON" "MI" "ON" "MI" "MI" "ON" "MI" "MI"
## [29] "MI" "ON"
player_points <- str_trim(substr(combined, b2+1, b3-1))
head(player_points,30)## [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" "5.0" "5.0" "5.0" "4.5"
## [12] "4.5" "4.5" "4.5" "4.5" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0"
## [23] "4.0" "4.0" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5"
player_pre <- str_trim(substr(combined, b11+15, b12-13))
head(player_pre,30)## [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649"
## [8] "1641P1" "1411" "1365" "1712" "1663" "1666" "1610"
## [15] "1220P1" "1604" "1629" "1600" "1564" "1595" "1563P2"
## [22] "1555" "1363" "1229" "1745" "1579" "1552" "1507"
## [29] "1602P6" "1522"
player_pre <- str_extract(player_pre, '[0-9]+')
head(player_pre,30)## [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
player_opp <- str_trim(substr(combined, b3+1, b10))
head(player_opp)## [1] "W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] "W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [3] "L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [4] "W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [5] "W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [6] "W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
player_opp <- str_extract_all(player_opp, '( |\\d){4}')
head(player_opp)## [[1]]
## [1] " 39" " 21" " 18" " 14" " 7" " 12" " 4"
##
## [[2]]
## [1] " 63" " 58" " 4" " 17" " 16" " 20" " 7"
##
## [[3]]
## [1] " 8" " 61" " 25" " 21" " 11" " 13" " 12"
##
## [[4]]
## [1] " 23" " 28" " 2" " 26" " 5" " 19" " 1"
##
## [[5]]
## [1] " 45" " 37" " 12" " 13" " 4" " 14" " 17"
##
## [[6]]
## [1] " 34" " 29" " 11" " 35" " 10" " 27" " 21"
Since we already have each player’s pre-ranking and their opponent number we index to relate the number to the pre-ranking
player_opp <- as.numeric(unlist(player_opp))
head(player_opp)## [1] 39 21 18 14 7 12
opp_pre <- as.numeric(player_pre[player_opp])
head(opp_pre)## [1] 1436 1563 1600 1610 1649 1663
Using the colMeans function and allowing NAs we can get the averages of the opponent pre-ranking
player_opp_matrix <- matrix(opp_pre, nrow=7, ncol=64)
head(player_opp_matrix)## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 1436 1175 1641 1363 1242 1399 1092 1384 1745 1604 1423 1332 1355
## [2,] 1563 917 955 1507 980 1602 377 1441 1600 1564 1153 1449 1552
## [3,] 1600 1716 1745 1553 1663 1712 1666 1610 853 1186 1686 1655 1649
## [4,] 1610 1629 1563 1579 1666 1438 1712 1411 1641 1494 1649 1423 1655
## [5,] 1649 1604 1712 1655 1716 1365 1794 1362 1579 1686 1384 NA 1449
## [6,] 1663 1595 1666 1564 1610 1552 1411 1507 1649 1745 1399 1794 1384
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## [1,] 1270 1564 1365 1382 1362 1220 1348 1283 1163 1716 1507
## [2,] 1199 1604 1220 1403 1411 1365 1291 1794 935 1283 1362
## [3,] 1641 1522 NA 1579 1794 935 1363 1362 1507 1595 1283
## [4,] 1794 1555 1436 1553 1441 1507 1403 1384 1220 917 1745
## [5,] 1552 1270 1553 1363 1564 1600 1507 1348 NA 1629 967
## [6,] 1655 1449 1355 1555 1423 1716 1553 1436 1629 980 1199
## [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35]
## [1,] 1411 1291 1011 1229 1056 935 917 955 967 1686 377
## [2,] 1393 1348 1666 1716 1686 1163 1186 1641 1663 967 1423
## [3,] 1384 1629 377 1555 1423 1220 1163 1199 1056 980 1153
## [4,] 1229 1716 980 1564 1399 1186 1365 1600 1355 1602 1686
## [5,] 1399 1411 1610 1595 935 1494 1522 1011 1666 1745 1092
## [6,] 1365 1441 1686 1641 1382 955 1056 1579 1220 1712 935
## [,36] [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46]
## [1,] 1666 NA 1712 1794 1595 853 1663 1563 NA 1655 1438
## [2,] 1092 1655 1438 1270 1579 1629 1056 1363 1610 1011 1649
## [3,] 1011 1399 1602 1348 1436 917 1092 1229 1441 967 1552
## [4,] 1449 1552 1663 1604 853 1595 967 1175 1393 1153 1056
## [5,] NA NA NA 1199 1563 NA 955 853 1436 1175 1163
## [6,] 1604 1363 1600 1563 1153 NA 1163 377 1229 1186 1283
## [,47] [,48] [,49] [,50] [,51] [,52] [,53] [,54] [,55] [,56] [,57]
## [1,] 1600 1629 1579 1602 1552 1522 NA 1610 1530 NA 1649
## [2,] 1229 1175 1595 1332 1242 1555 1745 1436 1494 1712 1355
## [3,] 1563 NA 1175 1449 1355 1564 NA 955 1365 1438 1332
## [4,] 955 935 1163 377 1092 1382 1199 NA 1522 1242 1011
## [5,] 1641 NA 917 NA 1441 1602 NA 1220 NA NA 1438
## [6,] 1011 1602 NA 1494 1362 1438 1092 853 1242 1348 1393
## [,58] [,59] [,60] [,61] [,62] [,63] [,64]
## [1,] 1494 1403 1449 1441 1186 1553 1555
## [2,] 1553 NA 1399 1384 NA 1382 1522
## [3,] 1403 1411 1242 1270 NA 1291 1494
## [4,] 1363 1348 1332 1362 NA 1283 1291
## [5,] 1291 1283 1229 1332 NA 1242 377
## [6,] NA 1270 NA 1522 NA NA 1332
opp_pre_avg <- round(colMeans(player_opp_matrix, na.rm=TRUE),0)
head(opp_pre_avg)## [1] 1605 1469 1564 1574 1501 1519
chess <- data.frame(player_name,player_state,player_points,player_pre,opp_pre_avg)
colnames(chess) <- c("Name", "State","Points","Pre-Ranking","Avg Opponent Pre-Ranking")
kable(chess)| Name | State | Points | Pre-Ranking | Avg Opponent Pre-Ranking |
|---|---|---|---|---|
| GARY HUA | ON | 6.0 | 1794 | 1605 |
| DAKSHESH DARURI | MI | 6.0 | 1553 | 1469 |
| ADITYA BAJAJ | MI | 6.0 | 1384 | 1564 |
| PATRICK H SCHILLING | MI | 5.5 | 1716 | 1574 |
| HANSHI ZUO | MI | 5.5 | 1655 | 1501 |
| HANSEN SONG | OH | 5.0 | 1686 | 1519 |
| GARY DEE SWATHELL | MI | 5.0 | 1649 | 1372 |
| EZEKIEL HOUGHTON | MI | 5.0 | 1641 | 1468 |
| STEFANO LEE | ON | 5.0 | 1411 | 1523 |
| ANVIT RAO | MI | 5.0 | 1365 | 1554 |
| CAMERON WILLIAM MC LEMAN | MI | 4.5 | 1712 | 1468 |
| KENNETH J TACK | MI | 4.5 | 1663 | 1506 |
| TORRANCE HENRY JR | MI | 4.5 | 1666 | 1498 |
| BRADLEY SHAW | MI | 4.5 | 1610 | 1515 |
| ZACHARY JAMES HOUGHTON | MI | 4.5 | 1220 | 1484 |
| MIKE NIKITIN | MI | 4.0 | 1604 | 1386 |
| RONALD GRZEGORCZYK | MI | 4.0 | 1629 | 1499 |
| DAVID SUNDEEN | MI | 4.0 | 1600 | 1480 |
| DIPANKAR ROY | MI | 4.0 | 1564 | 1426 |
| JASON ZHENG | MI | 4.0 | 1595 | 1411 |
| DINH DANG BUI | ON | 4.0 | 1563 | 1470 |
| EUGENE L MCCLURE | MI | 4.0 | 1555 | 1300 |
| ALAN BUI | ON | 4.0 | 1363 | 1214 |
| MICHAEL R ALDRICH | MI | 4.0 | 1229 | 1357 |
| LOREN SCHWIEBERT | MI | 3.5 | 1745 | 1363 |
| MAX ZHU | ON | 3.5 | 1579 | 1507 |
| GAURAV GIDWANI | MI | 3.5 | 1552 | 1222 |
| SOFIA ADINA STANESCU-BELLU | MI | 3.5 | 1507 | 1522 |
| CHIEDOZIE OKORIE | MI | 3.5 | 1602 | 1314 |
| GEORGE AVERY JONES | ON | 3.5 | 1522 | 1144 |
| RISHI SHETTY | MI | 3.5 | 1494 | 1260 |
| JOSHUA PHILIP MATHEWS | ON | 3.5 | 1441 | 1379 |
| JADE GE | MI | 3.5 | 1449 | 1277 |
| MICHAEL JEFFERY THOMAS | MI | 3.5 | 1399 | 1375 |
| JOSHUA DAVID LEE | MI | 3.5 | 1438 | 1150 |
| SIDDHARTH JHA | MI | 3.5 | 1355 | 1388 |
| AMIYATOSH PWNANANDAM | MI | 3.5 | 980 | 1385 |
| BRIAN LIU | MI | 3.0 | 1423 | 1539 |
| JOEL R HENDON | MI | 3.0 | 1436 | 1430 |
| FOREST ZHANG | MI | 3.0 | 1348 | 1391 |
| KYLE WILLIAM MURPHY | MI | 3.0 | 1403 | 1248 |
| JARED GE | MI | 3.0 | 1332 | 1150 |
| ROBERT GLEN VASEY | MI | 3.0 | 1283 | 1107 |
| JUSTIN D SCHILLING | MI | 3.0 | 1199 | 1327 |
| DEREK YAN | MI | 3.0 | 1242 | 1152 |
| JACOB ALEXANDER LAVALLEY | MI | 3.0 | 377 | 1358 |
| ERIC WRIGHT | MI | 2.5 | 1362 | 1392 |
| DANIEL KHAIN | MI | 2.5 | 1382 | 1356 |
| MICHAEL J MARTIN | MI | 2.5 | 1291 | 1286 |
| SHIVAM JHA | MI | 2.5 | 1056 | 1296 |
| TEJAS AYYAGARI | MI | 2.5 | 1011 | 1356 |
| ETHAN GUO | MI | 2.5 | 935 | 1495 |
| JOSE C YBARRA | MI | 2.0 | 1393 | 1345 |
| LARRY HODGE | MI | 2.0 | 1270 | 1206 |
| ALEX KONG | MI | 2.0 | 1186 | 1406 |
| MARISA RICCI | MI | 2.0 | 1153 | 1414 |
| MICHAEL LU | MI | 2.0 | 1092 | 1363 |
| VIRAJ MOHILE | MI | 2.0 | 917 | 1391 |
| SEAN M MC CORMICK | MI | 2.0 | 853 | 1319 |
| JULIA SHEN | MI | 1.5 | 967 | 1330 |
| JEZZEL FARKAS | ON | 1.5 | 955 | 1327 |
| ASHWIN BALAJI | MI | 1.0 | 1530 | 1186 |
| THOMAS JOSEPH HOSMER | MI | 1.0 | 1175 | 1350 |
| BEN LI | MI | 1.0 | 1163 | 1263 |
write.csv(chess, "chessData.csv", row.names=FALSE)