The purpose
The goal of this project is to create a .CSV file from a given text file with text tournament result, the .CSV file will include : Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents. The text file can be found at: “https://raw.githubusercontent.com/choudhury1023/Data-607/gh-pages/tournamentinfo.txt”
Step 1: Install packages
##install.packages("stringr")
##install.packages("sqldf")
Step 2: Load packages
library(stringr)
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
Step 3: Load file
tourdata <- read.csv("https://raw.githubusercontent.com/choudhury1023/Data-607/gh-pages/tournamentinfo.txt")
head(tourdata)
## X.........................................................................................
## 1 Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round|
## 2 Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
## 3 -----------------------------------------------------------------------------------------
## 4 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## 5 ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## 6 -----------------------------------------------------------------------------------------
Step 6: Remove row separators
tourdata <- str_replace_all(tourdata, "-{2,}", "")
head(tourdata)
## [1] ""
## [2] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [3] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [4] ""
## [5] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [6] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
Step 7: Remove blank rows that was generated after removing separators
tourdata <- tourdata[tourdata != ""]
head(tourdata)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [3] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [4] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [5] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [6] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
Step 8: The data contains two lines for each row, break the rows at line break and create two tables
oddRows<-seq(1,127,2)
evenRows<-seq(2,128,2)
tourdata1 <- tourdata[c(oddRows)]
head(tourdata1)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [3] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [4] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [5] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [6] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
tourdata2 <- tourdata[c(evenRows)]
head(tourdata2)
## [1] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [2] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [3] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [4] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [5] " MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [6] " OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
Step 9: Get “x” numbers for table1
tourdata11 <- data.frame( do.call( rbind, strsplit(tourdata1, "|" ) ) )
head(tourdata11,1)
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20
## 1 1 | G A R Y H U A
## X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 X32 X33 X34 X35 X36 X37 X38
## 1
## X39 X40 X41 X42 X43 X44 X45 X46 X47 X48 X49 X50 X51 X52 X53 X54 X55 X56
## 1 | 6 . 0 | W 3 9 | W
## X57 X58 X59 X60 X61 X62 X63 X64 X65 X66 X67 X68 X69 X70 X71 X72 X73 X74
## 1 2 1 | W 1 8 | W 1 4 | W
## X75 X76 X77 X78 X79 X80 X81 X82 X83 X84 X85 X86 X87 X88 X89
## 1 7 | D 1 2 | D 4 |
Step 10: Extract required data from table1 using “x” numbers and “substr” function
tourdata1 <- data.frame(playerNum = as.numeric(substr(tourdata1, 1, 6)),
playerName = str_trim(substr(tourdata1, 8, 40)),
total_pts = as.numeric(substr(tourdata1, 42, 46)),
r1OppID = substr(tourdata1, 50, 52),
r2OppID = substr(tourdata1, 56, 58),
r3OppID = substr(tourdata1, 62, 64),
r4OppID = substr(tourdata1, 68, 70),
r5OppID = substr(tourdata1, 74, 76),
r6OppID = substr(tourdata1, 80, 82),
r7OppID = substr(tourdata1, 86, 88), stringsAsFactors=FALSE)
head(tourdata1)
## playerNum playerName total_pts r1OppID r2OppID r3OppID r4OppID
## 1 1 GARY HUA 6.0 39 21 18 14
## 2 2 DAKSHESH DARURI 6.0 63 58 4 17
## 3 3 ADITYA BAJAJ 6.0 8 61 25 21
## 4 4 PATRICK H SCHILLING 5.5 23 28 2 26
## 5 5 HANSHI ZUO 5.5 45 37 12 13
## 6 6 HANSEN SONG 5.0 34 29 11 35
## r5OppID r6OppID r7OppID
## 1 7 12 4
## 2 16 20 7
## 3 11 13 12
## 4 5 19 1
## 5 4 14 17
## 6 10 27 21
Step 11: Get “x” numbers for table2
tourdata12 <- data.frame( do.call( rbind, strsplit(tourdata2, "|" ) ) )
head(tourdata12,1)
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20
## 1 O N | 1 5 4 4 5 8 9 5 / R
## X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 X32 X33 X34 X35 X36 X37 X38
## 1 : 1 7 9 4 - > 1 8 1 7
## X39 X40 X41 X42 X43 X44 X45 X46 X47 X48 X49 X50 X51 X52 X53 X54 X55 X56
## 1 | N : 2 | W | B
## X57 X58 X59 X60 X61 X62 X63 X64 X65 X66 X67 X68 X69 X70 X71 X72 X73 X74
## 1 | W | B | W
## X75 X76 X77 X78 X79 X80 X81 X82 X83 X84 X85 X86 X87 X88 X89
## 1 | B | W |
Step 12: Extract required data from table2 using “x” numbers and “substr” function
tourdata2 <- data.frame(state = substr(tourdata2, 1, 6),
uscfID = substr(tourdata2, 8, 17),
preTRating = as.numeric(substr(tourdata2, 23, 26)),
postTRating = as.numeric(substr(tourdata2, 32, 35)),
stringsAsFactors=FALSE)
head(tourdata2)
## state uscfID preTRating postTRating
## 1 ON 15445895 1794 1817
## 2 MI 14598900 1553 1663
## 3 MI 14959604 1384 1640
## 4 MI 12616049 1716 1744
## 5 MI 14601533 1655 1690
## 6 OH 15055204 1686 1687
Step 13: Combine table1 and table2 to create single table
ctable <- cbind(tourdata1,tourdata2)
head(ctable)
## playerNum playerName total_pts r1OppID r2OppID r3OppID r4OppID
## 1 1 GARY HUA 6.0 39 21 18 14
## 2 2 DAKSHESH DARURI 6.0 63 58 4 17
## 3 3 ADITYA BAJAJ 6.0 8 61 25 21
## 4 4 PATRICK H SCHILLING 5.5 23 28 2 26
## 5 5 HANSHI ZUO 5.5 45 37 12 13
## 6 6 HANSEN SONG 5.0 34 29 11 35
## r5OppID r6OppID r7OppID state uscfID preTRating postTRating
## 1 7 12 4 ON 15445895 1794 1817
## 2 16 20 7 MI 14598900 1553 1663
## 3 11 13 12 MI 14959604 1384 1640
## 4 5 19 1 MI 12616049 1716 1744
## 5 4 14 17 MI 14601533 1655 1690
## 6 10 27 21 OH 15055204 1686 1687
Step 14: Match opponent number with their respective pre rating
df1 <- sqldf("SELECT a.playerNum, b.preTRating AS 'r1OppRating'
FROM tourdata1 AS a
LEFT JOIN ctable AS b ON a.r1OppID = b.playerNum;")
## Loading required package: tcltk
df2 <- sqldf("SELECT b.preTRating AS 'r2OppRating'
FROM tourdata1 AS a
LEFT JOIN ctable AS b ON a.r2OppID = b.playerNum;")
df3 <- sqldf("SELECT b.preTRating AS 'r3OppRating'
FROM tourdata1 AS a
LEFT JOIN ctable AS b ON a.r3OppID = b.playerNum;")
df4 <- sqldf("SELECT b.preTRating AS 'r4OppRating'
FROM tourdata1 AS a
LEFT JOIN ctable AS b ON a.r4OppID = b.playerNum;")
df5 <- sqldf("SELECT b.preTRating AS 'r5OppRating'
FROM tourdata1 AS a
LEFT JOIN ctable AS b ON a.r5OppID = b.playerNum;")
df6 <- sqldf("SELECT b.preTRating AS 'r6OppRating'
FROM tourdata1 AS a
LEFT JOIN ctable AS b ON a.r6OppID = b.playerNum;")
df7 <- sqldf("SELECT b.preTRating AS 'r7OppRating'
FROM tourdata1 AS a
LEFT JOIN ctable AS b ON a.r7OppID = b.playerNum;")
Step 15: Collect all the opponent pre ratings on a single table
oppRating <- cbind(df1,df2,df3,df4,df5,df6,df7)
head(oppRating)
## playerNum r1OppRating r2OppRating r3OppRating r4OppRating r5OppRating
## 1 1 1436 1563 1600 1610 1649
## 2 2 1175 917 1716 1629 1604
## 3 3 1641 955 1745 1563 1712
## 4 4 1363 1507 1553 1579 1655
## 5 5 1242 980 1663 1666 1716
## 6 6 1399 1602 1712 1438 1365
## r6OppRating r7OppRating
## 1 1663 1716
## 2 1595 1649
## 3 1666 1663
## 4 1564 1794
## 5 1610 1629
## 6 1552 1563
Step 16: Calculate opponent’s average pre rating
aveOppRating <- round(rowMeans(oppRating[,2:8],na.rm=TRUE),0)
aveOppRating
## [1] 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 1468 1506 1498 1515
## [15] 1484 1386 1499 1480 1426 1411 1470 1300 1214 1357 1363 1507 1222 1522
## [29] 1314 1144 1260 1379 1277 1375 1150 1388 1385 1539 1430 1391 1248 1150
## [43] 1107 1327 1152 1358 1392 1356 1286 1296 1356 1495 1345 1206 1406 1414
## [57] 1363 1391 1319 1330 1327 1186 1350 1263
Step 18: Write .CSV file
write.csv(finalData, "Chess_Tounament.csv", row.names=FALSE)
Step 19: Load .CSV file
csv <- read.csv("Chess_Tounament.csv")
csv
## player_number player_name state total_points pre_rating
## 1 1 GARY HUA ON 6.0 1794
## 2 2 DAKSHESH DARURI MI 6.0 1553
## 3 3 ADITYA BAJAJ MI 6.0 1384
## 4 4 PATRICK H SCHILLING MI 5.5 1716
## 5 5 HANSHI ZUO MI 5.5 1655
## 6 6 HANSEN SONG OH 5.0 1686
## 7 7 GARY DEE SWATHELL MI 5.0 1649
## 8 8 EZEKIEL HOUGHTON MI 5.0 1641
## 9 9 STEFANO LEE ON 5.0 1411
## 10 10 ANVIT RAO MI 5.0 1365
## 11 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712
## 12 12 KENNETH J TACK MI 4.5 1663
## 13 13 TORRANCE HENRY JR MI 4.5 1666
## 14 14 BRADLEY SHAW MI 4.5 1610
## 15 15 ZACHARY JAMES HOUGHTON MI 4.5 1220
## 16 16 MIKE NIKITIN MI 4.0 1604
## 17 17 RONALD GRZEGORCZYK MI 4.0 1629
## 18 18 DAVID SUNDEEN MI 4.0 1600
## 19 19 DIPANKAR ROY MI 4.0 1564
## 20 20 JASON ZHENG MI 4.0 1595
## 21 21 DINH DANG BUI ON 4.0 1563
## 22 22 EUGENE L MCCLURE MI 4.0 1555
## 23 23 ALAN BUI ON 4.0 1363
## 24 24 MICHAEL R ALDRICH MI 4.0 1229
## 25 25 LOREN SCHWIEBERT MI 3.5 1745
## 26 26 MAX ZHU ON 3.5 1579
## 27 27 GAURAV GIDWANI MI 3.5 1552
## 28 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507
## 29 29 CHIEDOZIE OKORIE MI 3.5 1602
## 30 30 GEORGE AVERY JONES ON 3.5 1522
## 31 31 RISHI SHETTY MI 3.5 1494
## 32 32 JOSHUA PHILIP MATHEWS ON 3.5 1441
## 33 33 JADE GE MI 3.5 1449
## 34 34 MICHAEL JEFFERY THOMAS MI 3.5 1399
## 35 35 JOSHUA DAVID LEE MI 3.5 1438
## 36 36 SIDDHARTH JHA MI 3.5 1355
## 37 37 AMIYATOSH PWNANANDAM MI 3.5 980
## 38 38 BRIAN LIU MI 3.0 1423
## 39 39 JOEL R HENDON MI 3.0 1436
## 40 40 FOREST ZHANG MI 3.0 1348
## 41 41 KYLE WILLIAM MURPHY MI 3.0 1403
## 42 42 JARED GE MI 3.0 1332
## 43 43 ROBERT GLEN VASEY MI 3.0 1283
## 44 44 JUSTIN D SCHILLING MI 3.0 1199
## 45 45 DEREK YAN MI 3.0 1242
## 46 46 JACOB ALEXANDER LAVALLEY MI 3.0 377
## 47 47 ERIC WRIGHT MI 2.5 1362
## 48 48 DANIEL KHAIN MI 2.5 1382
## 49 49 MICHAEL J MARTIN MI 2.5 1291
## 50 50 SHIVAM JHA MI 2.5 1056
## 51 51 TEJAS AYYAGARI MI 2.5 1011
## 52 52 ETHAN GUO MI 2.5 935
## 53 53 JOSE C YBARRA MI 2.0 1393
## 54 54 LARRY HODGE MI 2.0 1270
## 55 55 ALEX KONG MI 2.0 1186
## 56 56 MARISA RICCI MI 2.0 1153
## 57 57 MICHAEL LU MI 2.0 1092
## 58 58 VIRAJ MOHILE MI 2.0 917
## 59 59 SEAN M MC CORMICK MI 2.0 853
## 60 60 JULIA SHEN MI 1.5 967
## 61 61 JEZZEL FARKAS ON 1.5 955
## 62 62 ASHWIN BALAJI MI 1.0 1530
## 63 63 THOMAS JOSEPH HOSMER MI 1.0 1175
## 64 64 BEN LI MI 1.0 1163
## ave_opponent_rating
## 1 1605
## 2 1469
## 3 1564
## 4 1574
## 5 1501
## 6 1519
## 7 1372
## 8 1468
## 9 1523
## 10 1554
## 11 1468
## 12 1506
## 13 1498
## 14 1515
## 15 1484
## 16 1386
## 17 1499
## 18 1480
## 19 1426
## 20 1411
## 21 1470
## 22 1300
## 23 1214
## 24 1357
## 25 1363
## 26 1507
## 27 1222
## 28 1522
## 29 1314
## 30 1144
## 31 1260
## 32 1379
## 33 1277
## 34 1375
## 35 1150
## 36 1388
## 37 1385
## 38 1539
## 39 1430
## 40 1391
## 41 1248
## 42 1150
## 43 1107
## 44 1327
## 45 1152
## 46 1358
## 47 1392
## 48 1356
## 49 1286
## 50 1296
## 51 1356
## 52 1495
## 53 1345
## 54 1206
## 55 1406
## 56 1414
## 57 1363
## 58 1391
## 59 1319
## 60 1330
## 61 1327
## 62 1186
## 63 1350
## 64 1263