09/16/2018
In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players:
Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents
For the first player, the information would be:Gary Hua, ON, 6.0, 1794, 1605
1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.
#setwd("/Users/elinaazrilyan/Documents/Fall 2018/Data 607/Week 4/")
ch.data <- read.delim("tournamentinfo.txt", header = FALSE, sep = "|", skip=4, stringsAsFactors = FALSE)
library("stringr")
head(ch.data,10)
## V1
## 1 1
## 2 ON
## 3 -----------------------------------------------------------------------------------------
## 4 2
## 5 MI
## 6 -----------------------------------------------------------------------------------------
## 7 3
## 8 MI
## 9 -----------------------------------------------------------------------------------------
## 10 4
## V2 V3 V4 V5 V6 V7 V8
## 1 GARY HUA 6.0 W 39 W 21 W 18 W 14 W 7
## 2 15445895 / R: 1794 ->1817 N:2 W B W B W
## 3
## 4 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17 W 16
## 5 14598900 / R: 1553 ->1663 N:2 B W B W B
## 6
## 7 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21 W 11
## 8 14959604 / R: 1384 ->1640 N:2 W B W B W
## 9
## 10 PATRICK H SCHILLING 5.5 W 23 D 28 W 2 W 26 D 5
## V9 V10 V11
## 1 D 12 D 4 NA
## 2 B W NA
## 3 NA
## 4 W 20 W 7 NA
## 5 W B NA
## 6 NA
## 7 W 13 W 12 NA
## 8 B W NA
## 9 NA
## 10 W 19 D 1 NA
We can see from the data that the data for each player is stored in 2 rows - it will require some manupulation to get the results we need. We will need to combine player data into 1 row.
ch.data2 <- ch.data[seq(1 , 192, 3), ]
head(ch.data2)
## V1 V2 V3 V4 V5 V6 V7
## 1 1 GARY HUA 6.0 W 39 W 21 W 18 W 14
## 4 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17
## 7 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21
## 10 4 PATRICK H SCHILLING 5.5 W 23 D 28 W 2 W 26
## 13 5 HANSHI ZUO 5.5 W 45 W 37 D 12 D 13
## 16 6 HANSEN SONG 5.0 W 34 D 29 L 11 W 35
## V8 V9 V10 V11
## 1 W 7 D 12 D 4 NA
## 4 W 16 W 20 W 7 NA
## 7 W 11 W 13 W 12 NA
## 10 D 5 W 19 D 1 NA
## 13 D 4 W 14 W 17 NA
## 16 D 10 W 27 W 21 NA
ch.data2.row2 <- ch.data[seq(2 , 192, 3), ]
head(ch.data2.row2)
## V1 V2 V3 V4 V5 V6 V7
## 2 ON 15445895 / R: 1794 ->1817 N:2 W B W B
## 5 MI 14598900 / R: 1553 ->1663 N:2 B W B W
## 8 MI 14959604 / R: 1384 ->1640 N:2 W B W B
## 11 MI 12616049 / R: 1716 ->1744 N:2 W B W B
## 14 MI 14601533 / R: 1655 ->1690 N:2 B W B W
## 17 OH 15055204 / R: 1686 ->1687 N:3 W B W B
## V8 V9 V10 V11
## 2 W B W NA
## 5 B W B NA
## 8 W B W NA
## 11 W B B NA
## 14 B W B NA
## 17 B W B NA
new.ch.data<-data.frame(ch.data2$V1, ch.data2$V2, ch.data2$V3, ch.data2$V4, ch.data2$V5, ch.data2$V6, ch.data2$V7, ch.data2$V8, ch.data2$V9, ch.data2$V10, ch.data2.row2$V1, ch.data2.row2$V2)
names(new.ch.data) <- c("ID", "Name", "Score", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "State", "Stat")
head(new.ch.data)
## ID Name Score R1 R2 R3 R4
## 1 1 GARY HUA 6.0 W 39 W 21 W 18 W 14
## 2 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17
## 3 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21
## 4 4 PATRICK H SCHILLING 5.5 W 23 D 28 W 2 W 26
## 5 5 HANSHI ZUO 5.5 W 45 W 37 D 12 D 13
## 6 6 HANSEN SONG 5.0 W 34 D 29 L 11 W 35
## R5 R6 R7 State Stat
## 1 W 7 D 12 D 4 ON 15445895 / R: 1794 ->1817
## 2 W 16 W 20 W 7 MI 14598900 / R: 1553 ->1663
## 3 W 11 W 13 W 12 MI 14959604 / R: 1384 ->1640
## 4 D 5 W 19 D 1 MI 12616049 / R: 1716 ->1744
## 5 D 4 W 14 W 17 MI 14601533 / R: 1655 ->1690
## 6 D 10 W 27 W 21 OH 15055204 / R: 1686 ->1687
PGRTG <- unlist(str_extract_all(new.ch.data$Stat, "R: +\\d{3,4}"))
PGRTG <- unlist(str_extract_all(PGRTG, "\\d{3,4}"))
new.ch.data$PGRTG <- PGRTG
new.ch.data$PGRTG<-as.integer(new.ch.data$PGRTG)
head (new.ch.data)
## ID Name Score R1 R2 R3 R4
## 1 1 GARY HUA 6.0 W 39 W 21 W 18 W 14
## 2 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17
## 3 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21
## 4 4 PATRICK H SCHILLING 5.5 W 23 D 28 W 2 W 26
## 5 5 HANSHI ZUO 5.5 W 45 W 37 D 12 D 13
## 6 6 HANSEN SONG 5.0 W 34 D 29 L 11 W 35
## R5 R6 R7 State Stat PGRTG
## 1 W 7 D 12 D 4 ON 15445895 / R: 1794 ->1817 1794
## 2 W 16 W 20 W 7 MI 14598900 / R: 1553 ->1663 1553
## 3 W 11 W 13 W 12 MI 14959604 / R: 1384 ->1640 1384
## 4 D 5 W 19 D 1 MI 12616049 / R: 1716 ->1744 1716
## 5 D 4 W 14 W 17 MI 14601533 / R: 1655 ->1690 1655
## 6 D 10 W 27 W 21 OH 15055204 / R: 1686 ->1687 1686
new.ch.data$R1n<-unlist(str_replace(new.ch.data$R1,"\\w{1}\\s*(\\d*)","\\1"))
new.ch.data$R2n<-unlist(str_replace(new.ch.data$R2,"\\w{1}\\s*(\\d*)","\\1"))
new.ch.data$R3n<-unlist(str_replace(new.ch.data$R3,"\\w{1}\\s*(\\d*)","\\1"))
new.ch.data$R4n<-unlist(str_replace(new.ch.data$R4,"\\w{1}\\s*(\\d*)","\\1"))
new.ch.data$R5n<-unlist(str_replace(new.ch.data$R5,"\\w{1}\\s*(\\d*)","\\1"))
new.ch.data$R6n<-unlist(str_replace(new.ch.data$R6,"\\w{1}\\s*(\\d*)","\\1"))
new.ch.data$R7n<-unlist(str_replace(new.ch.data$R7,"\\w{1}\\s*(\\d*)","\\1"))
head(new.ch.data)
## ID Name Score R1 R2 R3 R4
## 1 1 GARY HUA 6.0 W 39 W 21 W 18 W 14
## 2 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17
## 3 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21
## 4 4 PATRICK H SCHILLING 5.5 W 23 D 28 W 2 W 26
## 5 5 HANSHI ZUO 5.5 W 45 W 37 D 12 D 13
## 6 6 HANSEN SONG 5.0 W 34 D 29 L 11 W 35
## R5 R6 R7 State Stat PGRTG R1n R2n
## 1 W 7 D 12 D 4 ON 15445895 / R: 1794 ->1817 1794 39 21
## 2 W 16 W 20 W 7 MI 14598900 / R: 1553 ->1663 1553 63 58
## 3 W 11 W 13 W 12 MI 14959604 / R: 1384 ->1640 1384 8 61
## 4 D 5 W 19 D 1 MI 12616049 / R: 1716 ->1744 1716 23 28
## 5 D 4 W 14 W 17 MI 14601533 / R: 1655 ->1690 1655 45 37
## 6 D 10 W 27 W 21 OH 15055204 / R: 1686 ->1687 1686 34 29
## R3n R4n R5n R6n R7n
## 1 18 14 7 12 4
## 2 4 17 16 20 7
## 3 25 21 11 13 12
## 4 2 26 5 19 1
## 5 12 13 4 14 17
## 6 11 35 10 27 21
new.ch.data$R1nr<-new.ch.data[new.ch.data$R1n, "PGRTG"]
new.ch.data$R2nr<-new.ch.data[new.ch.data$R2n, "PGRTG"]
new.ch.data$R3nr<-new.ch.data[new.ch.data$R3n, "PGRTG"]
new.ch.data$R4nr<-new.ch.data[new.ch.data$R4n, "PGRTG"]
new.ch.data$R5nr<-new.ch.data[new.ch.data$R5n, "PGRTG"]
new.ch.data$R6nr<-new.ch.data[new.ch.data$R6n, "PGRTG"]
new.ch.data$R7nr<-new.ch.data[new.ch.data$R7n, "PGRTG"]
new.ch.data$avgopprtg <-round(rowMeans( new.ch.data[ , c("R1nr", "R2nr", "R3nr", "R4nr", "R5nr", "R6nr", "R7nr")], na.rm = TRUE ))
head(new.ch.data)
## ID Name Score R1 R2 R3 R4
## 1 1 GARY HUA 6.0 W 39 W 21 W 18 W 14
## 2 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17
## 3 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21
## 4 4 PATRICK H SCHILLING 5.5 W 23 D 28 W 2 W 26
## 5 5 HANSHI ZUO 5.5 W 45 W 37 D 12 D 13
## 6 6 HANSEN SONG 5.0 W 34 D 29 L 11 W 35
## R5 R6 R7 State Stat PGRTG R1n R2n
## 1 W 7 D 12 D 4 ON 15445895 / R: 1794 ->1817 1794 39 21
## 2 W 16 W 20 W 7 MI 14598900 / R: 1553 ->1663 1553 63 58
## 3 W 11 W 13 W 12 MI 14959604 / R: 1384 ->1640 1384 8 61
## 4 D 5 W 19 D 1 MI 12616049 / R: 1716 ->1744 1716 23 28
## 5 D 4 W 14 W 17 MI 14601533 / R: 1655 ->1690 1655 45 37
## 6 D 10 W 27 W 21 OH 15055204 / R: 1686 ->1687 1686 34 29
## R3n R4n R5n R6n R7n R1nr R2nr R3nr R4nr R5nr R6nr R7nr avgopprtg
## 1 18 14 7 12 4 1436 1563 1600 1610 1649 1663 1716 1605
## 2 4 17 16 20 7 1175 917 1716 1629 1604 1595 1649 1469
## 3 25 21 11 13 12 1641 955 1745 1563 1712 1666 1663 1564
## 4 2 26 5 19 1 1363 1507 1553 1579 1655 1564 1794 1574
## 5 12 13 4 14 17 1242 980 1663 1666 1716 1610 1629 1501
## 6 11 35 10 27 21 1399 1602 1712 1438 1365 1552 1563 1519
final.ch.data<-data.frame(new.ch.data$Name, new.ch.data$State, new.ch.data$Score, new.ch.data$PGRTG, new.ch.data$avgopprtg)
names(final.ch.data) <- c("Player Name", "State", "Number of Points", "Pre-Rating", "Opponent Rating")
final.ch.data
## Player Name State Number of Points Pre-Rating
## 1 GARY HUA ON 6.0 1794
## 2 DAKSHESH DARURI MI 6.0 1553
## 3 ADITYA BAJAJ MI 6.0 1384
## 4 PATRICK H SCHILLING MI 5.5 1716
## 5 HANSHI ZUO MI 5.5 1655
## 6 HANSEN SONG OH 5.0 1686
## 7 GARY DEE SWATHELL MI 5.0 1649
## 8 EZEKIEL HOUGHTON MI 5.0 1641
## 9 STEFANO LEE ON 5.0 1411
## 10 ANVIT RAO MI 5.0 1365
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712
## 12 KENNETH J TACK MI 4.5 1663
## 13 TORRANCE HENRY JR MI 4.5 1666
## 14 BRADLEY SHAW MI 4.5 1610
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220
## 16 MIKE NIKITIN MI 4.0 1604
## 17 RONALD GRZEGORCZYK MI 4.0 1629
## 18 DAVID SUNDEEN MI 4.0 1600
## 19 DIPANKAR ROY MI 4.0 1564
## 20 JASON ZHENG MI 4.0 1595
## 21 DINH DANG BUI ON 4.0 1563
## 22 EUGENE L MCCLURE MI 4.0 1555
## 23 ALAN BUI ON 4.0 1363
## 24 MICHAEL R ALDRICH MI 4.0 1229
## 25 LOREN SCHWIEBERT MI 3.5 1745
## 26 MAX ZHU ON 3.5 1579
## 27 GAURAV GIDWANI MI 3.5 1552
## 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507
## 29 CHIEDOZIE OKORIE MI 3.5 1602
## 30 GEORGE AVERY JONES ON 3.5 1522
## 31 RISHI SHETTY MI 3.5 1494
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441
## 33 JADE GE MI 3.5 1449
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399
## 35 JOSHUA DAVID LEE MI 3.5 1438
## 36 SIDDHARTH JHA MI 3.5 1355
## 37 AMIYATOSH PWNANANDAM MI 3.5 980
## 38 BRIAN LIU MI 3.0 1423
## 39 JOEL R HENDON MI 3.0 1436
## 40 FOREST ZHANG MI 3.0 1348
## 41 KYLE WILLIAM MURPHY MI 3.0 1403
## 42 JARED GE MI 3.0 1332
## 43 ROBERT GLEN VASEY MI 3.0 1283
## 44 JUSTIN D SCHILLING MI 3.0 1199
## 45 DEREK YAN MI 3.0 1242
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377
## 47 ERIC WRIGHT MI 2.5 1362
## 48 DANIEL KHAIN MI 2.5 1382
## 49 MICHAEL J MARTIN MI 2.5 1291
## 50 SHIVAM JHA MI 2.5 1056
## 51 TEJAS AYYAGARI MI 2.5 1011
## 52 ETHAN GUO MI 2.5 935
## 53 JOSE C YBARRA MI 2.0 1393
## 54 LARRY HODGE MI 2.0 1270
## 55 ALEX KONG MI 2.0 1186
## 56 MARISA RICCI MI 2.0 1153
## 57 MICHAEL LU MI 2.0 1092
## 58 VIRAJ MOHILE MI 2.0 917
## 59 SEAN M MC CORMICK MI 2.0 853
## 60 JULIA SHEN MI 1.5 967
## 61 JEZZEL FARKAS ON 1.5 955
## 62 ASHWIN BALAJI MI 1.0 1530
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175
## 64 BEN LI MI 1.0 1163
## Opponent Rating
## 1 1605
## 2 1469
## 3 1564
## 4 1574
## 5 1501
## 6 1519
## 7 1372
## 8 1468
## 9 1523
## 10 1554
## 11 1468
## 12 1506
## 13 1498
## 14 1515
## 15 1484
## 16 1386
## 17 1499
## 18 1480
## 19 1426
## 20 1411
## 21 1470
## 22 1300
## 23 1214
## 24 1357
## 25 1363
## 26 1507
## 27 1222
## 28 1522
## 29 1314
## 30 1144
## 31 1260
## 32 1379
## 33 1277
## 34 1375
## 35 1150
## 36 1388
## 37 1385
## 38 1539
## 39 1430
## 40 1391
## 41 1248
## 42 1150
## 43 1107
## 44 1327
## 45 1152
## 46 1358
## 47 1392
## 48 1356
## 49 1286
## 50 1296
## 51 1356
## 52 1495
## 53 1345
## 54 1206
## 55 1406
## 56 1414
## 57 1363
## 58 1391
## 59 1319
## 60 1330
## 61 1327
## 62 1186
## 63 1350
## 64 1263
write.csv(final.ch.data, file = "ChessData.csv")