Load packages.
library(knitr)
library(stringr)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Import text file, identify delimiters, skip first 4 rows.
chess<-read.table("tournamentinfo.txt", sep="|", fill=TRUE, stringsAsFactors = FALSE, skip=4)
#str(chess) #testing
#head(chess) #testing
Combine observations across two rows.
namesrow <- chess[seq(1,nrow(chess),by=3),]
#head(namesrow) #testing
staterow <- chess[seq(2,nrow(chess),by=3),]
#head(staterow) #testing
chess <- cbind(namesrow, staterow)
#head(chess) #testing
Discard unneeded columns.
chess <- subset(chess[, c(1:10,12,13)])
#head(chess) #testing
Use regular expressions to extract player ID and pre-game rating.
chess <- extract(chess, V2.1, c('PID', 'PreRating'), '(.[0-9]{8}) / R: (.[0-9]{3,4})')
#str(chess) #testing
Use regular expressions to split each round into round number and opponent number.
[REVISED to adjust syntax of regular expression.]
chess <- extract (chess, V4, c('Game1', 'Opp1'), '([A-Z]{0,1})* +([0-9]{0,2})')
chess <- extract (chess, V5, c('Game2', 'Opp2'), '([A-Z]{0,1})* +([0-9]{0,2})')
chess <- extract (chess, V6, c('Game3', 'Opp3'), '([A-Z]{0,1})* +([0-9]{0,2})')
chess <- extract (chess, V7, c('Game4', 'Opp4'), '([A-Z]{0,1})* +([0-9]{0,2})')
chess <- extract (chess, V8, c('Game5', 'Opp5'), '([A-Z]{0,1})* +([0-9]{0,2})')
chess <- extract (chess, V9, c('Game6', 'Opp6'), '([A-Z]{0,1})* +([0-9]{0,2})')
chess <- extract (chess, V10, c('Game7', 'Opp7'), '([A-Z]{0,1})* +([0-9]{0,2})')
View result and structure.
knitr::kable(head(chess))
| 1 |
1 |
GARY HUA |
6.0 |
|
39 |
|
21 |
|
18 |
|
14 |
|
7 |
|
12 |
|
4 |
ON |
15445895 |
1794 |
| 4 |
2 |
DAKSHESH DARURI |
6.0 |
|
63 |
|
58 |
|
4 |
|
17 |
|
16 |
|
20 |
|
7 |
MI |
14598900 |
1553 |
| 7 |
3 |
ADITYA BAJAJ |
6.0 |
|
8 |
|
61 |
|
25 |
|
21 |
|
11 |
|
13 |
|
12 |
MI |
14959604 |
1384 |
| 10 |
4 |
PATRICK H SCHILLING |
5.5 |
|
23 |
|
28 |
|
2 |
|
26 |
|
5 |
|
19 |
|
1 |
MI |
12616049 |
1716 |
| 13 |
5 |
HANSHI ZUO |
5.5 |
|
45 |
|
37 |
|
12 |
|
13 |
|
4 |
|
14 |
|
17 |
MI |
14601533 |
1655 |
| 16 |
6 |
HANSEN SONG |
5.0 |
|
34 |
|
29 |
|
11 |
|
35 |
|
10 |
|
27 |
|
21 |
OH |
15055204 |
1686 |
## 'data.frame': 64 obs. of 20 variables:
## $ V1 : chr " 1 " " 2 " " 3 " " 4 " ...
## $ V2 : chr " GARY HUA " " DAKSHESH DARURI " " ADITYA BAJAJ " " PATRICK H SCHILLING " ...
## $ V3 : chr "6.0 " "6.0 " "6.0 " "5.5 " ...
## $ Game1 : chr "" "" "" "" ...
## $ Opp1 : chr "39" "63" "8" "23" ...
## $ Game2 : chr "" "" "" "" ...
## $ Opp2 : chr "21" "58" "61" "28" ...
## $ Game3 : chr "" "" "" "" ...
## $ Opp3 : chr "18" "4" "25" "2" ...
## $ Game4 : chr "" "" "" "" ...
## $ Opp4 : chr "14" "17" "21" "26" ...
## $ Game5 : chr "" "" "" "" ...
## $ Opp5 : chr "7" "16" "11" "5" ...
## $ Game6 : chr "" "" "" "" ...
## $ Opp6 : chr "12" "20" "13" "19" ...
## $ Game7 : chr "" "" "" "" ...
## $ Opp7 : chr "4" "7" "12" "1" ...
## $ V1.1 : chr " ON " " MI " " MI " " MI " ...
## $ PID : chr " 15445895" " 14598900" " 14959604" " 12616049" ...
## $ PreRating: chr "1794" "1553" "1384" "1716" ...
Change required character columns to numeric, view structure.
chess[, c(1,3,5,7,9,11,13,15,17,19,20)] <- sapply(chess[, c(1,3,5,7,9,11,13,15,17,19,20)], as.numeric)
str(chess)
## 'data.frame': 64 obs. of 20 variables:
## $ V1 : num 1 2 3 4 5 6 7 8 9 10 ...
## $ V2 : chr " GARY HUA " " DAKSHESH DARURI " " ADITYA BAJAJ " " PATRICK H SCHILLING " ...
## $ V3 : num 6 6 6 5.5 5.5 5 5 5 5 5 ...
## $ Game1 : chr "" "" "" "" ...
## $ Opp1 : num 39 63 8 23 45 34 57 3 25 16 ...
## $ Game2 : chr "" "" "" "" ...
## $ Opp2 : num 21 58 61 28 37 29 46 32 18 19 ...
## $ Game3 : chr "" "" "" "" ...
## $ Opp3 : num 18 4 25 2 12 11 13 14 59 55 ...
## $ Game4 : chr "" "" "" "" ...
## $ Opp4 : num 14 17 21 26 13 35 11 9 8 31 ...
## $ Game5 : chr "" "" "" "" ...
## $ Opp5 : num 7 16 11 5 4 10 1 47 26 6 ...
## $ Game6 : chr "" "" "" "" ...
## $ Opp6 : num 12 20 13 19 14 27 9 28 7 25 ...
## $ Game7 : chr "" "" "" "" ...
## $ Opp7 : num 4 7 12 1 17 21 2 19 20 18 ...
## $ V1.1 : chr " ON " " MI " " MI " " MI " ...
## $ PID : num 15445895 14598900 14959604 12616049 14601533 ...
## $ PreRating: num 1794 1553 1384 1716 1655 ...
Create lookup table of all players and their pre-game rating, view structure.
tblookup <- subset(chess[, c(1,20)])
str(tblookup)
## 'data.frame': 64 obs. of 2 variables:
## $ V1 : num 1 2 3 4 5 6 7 8 9 10 ...
## $ PreRating: num 1794 1553 1384 1716 1655 ...
## V1 PreRating
## 1 1 1794
## 4 2 1553
## 7 3 1384
## 10 4 1716
## 13 5 1655
## 16 6 1686
Perform lookup for each opponent by their opponent number.
combo<- merge(chess, tblookup, by.x="Opp1",by.y="V1", sort=FALSE, all.x=TRUE)
combo<- merge(combo, tblookup, by.x="Opp2",by.y="V1", sort=FALSE, all.x=TRUE)
combo<- merge(combo, tblookup, by.x="Opp3",by.y="V1", sort=FALSE, all.x=TRUE)
## Warning in merge.data.frame(combo, tblookup, by.x = "Opp3", by.y = "V1", :
## column names 'PreRating.x', 'PreRating.y' are duplicated in the result
combo<- merge(combo, tblookup, by.x="Opp4",by.y="V1", sort=FALSE, all.x=TRUE)
## Warning in merge.data.frame(combo, tblookup, by.x = "Opp4", by.y = "V1", :
## column names 'PreRating.x', 'PreRating.y' are duplicated in the result
combo<- merge(combo, tblookup, by.x="Opp5",by.y="V1", sort=FALSE, all.x=TRUE)
## Warning in merge.data.frame(combo, tblookup, by.x = "Opp5", by.y = "V1", :
## column names 'PreRating.x', 'PreRating.y', 'PreRating.x', 'PreRating.y' are
## duplicated in the result
combo<- merge(combo, tblookup, by.x="Opp6",by.y="V1", sort=FALSE, all.x=TRUE)
## Warning in merge.data.frame(combo, tblookup, by.x = "Opp6", by.y = "V1", :
## column names 'PreRating.x', 'PreRating.y', 'PreRating.x', 'PreRating.y' are
## duplicated in the result
combo<- merge(combo, tblookup, by.x="Opp7",by.y="V1", sort=FALSE, all.x=TRUE)
## Warning in merge.data.frame(combo, tblookup, by.x = "Opp7", by.y = "V1", :
## column names 'PreRating.x', 'PreRating.y', 'PreRating.x', 'PreRating.y',
## 'PreRating.x', 'PreRating.y' are duplicated in the result
Discard unneeded columns, rename columns, view result.
[REVISED to sort columns with ‘order’.]
combo <- subset(combo[, c(8:10,18:27)])
colnames(combo) <- c("Order","Player'S Name","Total Number of Points","Player's State","Player ID","Player's PreRating","Opp1PR","Opp2PR","Opp3PR","Opp4PR","Opp5PR","Opp6PR","Opp7PR")
combo<-combo[ order(combo[,1]), ]
knitr::kable((head(combo)))
| 1 |
GARY HUA |
6.0 |
ON |
15445895 |
1794 |
1436 |
1563 |
1600 |
1610 |
1649 |
1663 |
1716 |
| 2 |
DAKSHESH DARURI |
6.0 |
MI |
14598900 |
1553 |
1175 |
917 |
1716 |
1629 |
1604 |
1595 |
1649 |
| 3 |
ADITYA BAJAJ |
6.0 |
MI |
14959604 |
1384 |
1641 |
955 |
1745 |
1563 |
1712 |
1666 |
1663 |
| 4 |
PATRICK H SCHILLING |
5.5 |
MI |
12616049 |
1716 |
1363 |
1507 |
1553 |
1579 |
1655 |
1564 |
1794 |
| 5 |
HANSHI ZUO |
5.5 |
MI |
14601533 |
1655 |
1242 |
980 |
1663 |
1666 |
1716 |
1610 |
1629 |
| 6 |
HANSEN SONG |
5.0 |
OH |
15055204 |
1686 |
1399 |
1602 |
1712 |
1438 |
1365 |
1552 |
1563 |
knitr::kable((tail(combo)))
| 43 |
59 |
SEAN M MC CORMICK |
2.0 |
MI |
12841036 |
853 |
1403 |
NA |
1411 |
1348 |
1283 |
1270 |
1199 |
| 63 |
60 |
JULIA SHEN |
1.5 |
MI |
14579262 |
967 |
1449 |
1399 |
1242 |
1332 |
1229 |
NA |
NA |
| 40 |
61 |
JEZZEL FARKAS |
1.5 |
ON |
15771592 |
955 |
1441 |
1384 |
1270 |
1362 |
1332 |
1522 |
980 |
| 55 |
62 |
ASHWIN BALAJI |
1.0 |
MI |
15219542 |
1530 |
1186 |
NA |
NA |
NA |
NA |
NA |
NA |
| 60 |
63 |
THOMAS JOSEPH HOSMER |
1.0 |
MI |
15057092 |
1175 |
1553 |
1382 |
1291 |
1283 |
1242 |
NA |
NA |
| 41 |
64 |
BEN LI |
1.0 |
MI |
15006561 |
1163 |
1555 |
1522 |
1494 |
1291 |
377 |
1332 |
1270 |
Add column for average opponent pre-game rating, view result.
[REVISED to add ‘na.rm=TRUE’ to remove NA values from mean.]
combo <- mutate(combo, "Average Pre-Chess Rating of Opponents" = round(rowMeans(combo[,7:13],na.rm = TRUE),0))
knitr::kable((head(combo)))
| 1 |
GARY HUA |
6.0 |
ON |
15445895 |
1794 |
1436 |
1563 |
1600 |
1610 |
1649 |
1663 |
1716 |
1605 |
| 2 |
DAKSHESH DARURI |
6.0 |
MI |
14598900 |
1553 |
1175 |
917 |
1716 |
1629 |
1604 |
1595 |
1649 |
1469 |
| 3 |
ADITYA BAJAJ |
6.0 |
MI |
14959604 |
1384 |
1641 |
955 |
1745 |
1563 |
1712 |
1666 |
1663 |
1564 |
| 4 |
PATRICK H SCHILLING |
5.5 |
MI |
12616049 |
1716 |
1363 |
1507 |
1553 |
1579 |
1655 |
1564 |
1794 |
1574 |
| 5 |
HANSHI ZUO |
5.5 |
MI |
14601533 |
1655 |
1242 |
980 |
1663 |
1666 |
1716 |
1610 |
1629 |
1501 |
| 6 |
HANSEN SONG |
5.0 |
OH |
15055204 |
1686 |
1399 |
1602 |
1712 |
1438 |
1365 |
1552 |
1563 |
1519 |
knitr::kable((tail(combo)))
| 59 |
59 |
SEAN M MC CORMICK |
2.0 |
MI |
12841036 |
853 |
1403 |
NA |
1411 |
1348 |
1283 |
1270 |
1199 |
1319 |
| 60 |
60 |
JULIA SHEN |
1.5 |
MI |
14579262 |
967 |
1449 |
1399 |
1242 |
1332 |
1229 |
NA |
NA |
1330 |
| 61 |
61 |
JEZZEL FARKAS |
1.5 |
ON |
15771592 |
955 |
1441 |
1384 |
1270 |
1362 |
1332 |
1522 |
980 |
1327 |
| 62 |
62 |
ASHWIN BALAJI |
1.0 |
MI |
15219542 |
1530 |
1186 |
NA |
NA |
NA |
NA |
NA |
NA |
1186 |
| 63 |
63 |
THOMAS JOSEPH HOSMER |
1.0 |
MI |
15057092 |
1175 |
1553 |
1382 |
1291 |
1283 |
1242 |
NA |
NA |
1350 |
| 64 |
64 |
BEN LI |
1.0 |
MI |
15006561 |
1163 |
1555 |
1522 |
1494 |
1291 |
377 |
1332 |
1270 |
1263 |
Discard unneeded columns, view result.
results <- subset(combo[,c(1:4,6,14)])
knitr::kable(results)
| 1 |
GARY HUA |
6.0 |
ON |
1794 |
1605 |
| 2 |
DAKSHESH DARURI |
6.0 |
MI |
1553 |
1469 |
| 3 |
ADITYA BAJAJ |
6.0 |
MI |
1384 |
1564 |
| 4 |
PATRICK H SCHILLING |
5.5 |
MI |
1716 |
1574 |
| 5 |
HANSHI ZUO |
5.5 |
MI |
1655 |
1501 |
| 6 |
HANSEN SONG |
5.0 |
OH |
1686 |
1519 |
| 7 |
GARY DEE SWATHELL |
5.0 |
MI |
1649 |
1372 |
| 8 |
EZEKIEL HOUGHTON |
5.0 |
MI |
1641 |
1468 |
| 9 |
STEFANO LEE |
5.0 |
ON |
1411 |
1523 |
| 10 |
ANVIT RAO |
5.0 |
MI |
1365 |
1554 |
| 11 |
CAMERON WILLIAM MC LEMAN |
4.5 |
MI |
1712 |
1468 |
| 12 |
KENNETH J TACK |
4.5 |
MI |
1663 |
1506 |
| 13 |
TORRANCE HENRY JR |
4.5 |
MI |
1666 |
1498 |
| 14 |
BRADLEY SHAW |
4.5 |
MI |
1610 |
1515 |
| 15 |
ZACHARY JAMES HOUGHTON |
4.5 |
MI |
1220 |
1484 |
| 16 |
MIKE NIKITIN |
4.0 |
MI |
1604 |
1386 |
| 17 |
RONALD GRZEGORCZYK |
4.0 |
MI |
1629 |
1499 |
| 18 |
DAVID SUNDEEN |
4.0 |
MI |
1600 |
1480 |
| 19 |
DIPANKAR ROY |
4.0 |
MI |
1564 |
1426 |
| 20 |
JASON ZHENG |
4.0 |
MI |
1595 |
1411 |
| 21 |
DINH DANG BUI |
4.0 |
ON |
1563 |
1470 |
| 22 |
EUGENE L MCCLURE |
4.0 |
MI |
1555 |
1300 |
| 23 |
ALAN BUI |
4.0 |
ON |
1363 |
1214 |
| 24 |
MICHAEL R ALDRICH |
4.0 |
MI |
1229 |
1357 |
| 25 |
LOREN SCHWIEBERT |
3.5 |
MI |
1745 |
1363 |
| 26 |
MAX ZHU |
3.5 |
ON |
1579 |
1507 |
| 27 |
GAURAV GIDWANI |
3.5 |
MI |
1552 |
1222 |
| 28 |
SOFIA ADINA STANESCU-BELLU |
3.5 |
MI |
1507 |
1522 |
| 29 |
CHIEDOZIE OKORIE |
3.5 |
MI |
1602 |
1314 |
| 30 |
GEORGE AVERY JONES |
3.5 |
ON |
1522 |
1144 |
| 31 |
RISHI SHETTY |
3.5 |
MI |
1494 |
1260 |
| 32 |
JOSHUA PHILIP MATHEWS |
3.5 |
ON |
1441 |
1379 |
| 33 |
JADE GE |
3.5 |
MI |
1449 |
1277 |
| 34 |
MICHAEL JEFFERY THOMAS |
3.5 |
MI |
1399 |
1375 |
| 35 |
JOSHUA DAVID LEE |
3.5 |
MI |
1438 |
1150 |
| 36 |
SIDDHARTH JHA |
3.5 |
MI |
1355 |
1388 |
| 37 |
AMIYATOSH PWNANANDAM |
3.5 |
MI |
980 |
1385 |
| 38 |
BRIAN LIU |
3.0 |
MI |
1423 |
1539 |
| 39 |
JOEL R HENDON |
3.0 |
MI |
1436 |
1430 |
| 40 |
FOREST ZHANG |
3.0 |
MI |
1348 |
1391 |
| 41 |
KYLE WILLIAM MURPHY |
3.0 |
MI |
1403 |
1248 |
| 42 |
JARED GE |
3.0 |
MI |
1332 |
1150 |
| 43 |
ROBERT GLEN VASEY |
3.0 |
MI |
1283 |
1107 |
| 44 |
JUSTIN D SCHILLING |
3.0 |
MI |
1199 |
1327 |
| 45 |
DEREK YAN |
3.0 |
MI |
1242 |
1152 |
| 46 |
JACOB ALEXANDER LAVALLEY |
3.0 |
MI |
377 |
1358 |
| 47 |
ERIC WRIGHT |
2.5 |
MI |
1362 |
1392 |
| 48 |
DANIEL KHAIN |
2.5 |
MI |
1382 |
1356 |
| 49 |
MICHAEL J MARTIN |
2.5 |
MI |
1291 |
1286 |
| 50 |
SHIVAM JHA |
2.5 |
MI |
1056 |
1296 |
| 51 |
TEJAS AYYAGARI |
2.5 |
MI |
1011 |
1356 |
| 52 |
ETHAN GUO |
2.5 |
MI |
935 |
1495 |
| 53 |
JOSE C YBARRA |
2.0 |
MI |
1393 |
1345 |
| 54 |
LARRY HODGE |
2.0 |
MI |
1270 |
1206 |
| 55 |
ALEX KONG |
2.0 |
MI |
1186 |
1406 |
| 56 |
MARISA RICCI |
2.0 |
MI |
1153 |
1414 |
| 57 |
MICHAEL LU |
2.0 |
MI |
1092 |
1363 |
| 58 |
VIRAJ MOHILE |
2.0 |
MI |
917 |
1391 |
| 59 |
SEAN M MC CORMICK |
2.0 |
MI |
853 |
1319 |
| 60 |
JULIA SHEN |
1.5 |
MI |
967 |
1330 |
| 61 |
JEZZEL FARKAS |
1.5 |
ON |
955 |
1327 |
| 62 |
ASHWIN BALAJI |
1.0 |
MI |
1530 |
1186 |
| 63 |
THOMAS JOSEPH HOSMER |
1.0 |
MI |
1175 |
1350 |
| 64 |
BEN LI |
1.0 |
MI |
1163 |
1263 |
Export as CSV.
write.table(results, "chessresults.txt")
This can be done more easily.