1
Data Import
First, we load the library to import the text file.
Next, we will be using 'stringr' package
library(stringr)
library(knitr)
library(kableExtra)
# tournamentinfo <- readLines("https://raw.githubusercontent.com/kleberperez1/CUNY-SPS-Data607-Project1/master/tournamentinfo.txt", header=F)
tournamentinfo <- read.csv(paste0("C:/Users/Kleber/Documents/MSDS2019/DATA607/Week4/Project1/tournamentinfo.txt"), header=F)
head(tournamentinfo)
## V1
## 1 -----------------------------------------------------------------------------------------
## 2 Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round|
## 3 Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
## 4 -----------------------------------------------------------------------------------------
## 5 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## 6 ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
tail(tournamentinfo)
## V1
## 191 63 | THOMAS JOSEPH HOSMER |1.0 |L 2|L 48|D 49|L 43|L 45|H |U |
## 192 MI | 15057092 / R: 1175 ->1125 | |W |B |W |B |B | | |
## 193 -----------------------------------------------------------------------------------------
## 194 64 | BEN LI |1.0 |L 22|D 30|L 31|D 49|L 46|L 42|L 54|
## 195 MI | 15006561 / R: 1163 ->1112 | |B |W |W |B |W |B |B |
## 196 -----------------------------------------------------------------------------------------
2
Get the information
The first line contains data begins with a number of one or two digits, and is the only line that has this pattern.
The second line begins with a pair of upper case letters, and it is the line that follows this pattern.
tournamentinfo <- tournamentinfo[-c(1:4),]
head(tournamentinfo)
## [1] 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## [2] ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## [3] -----------------------------------------------------------------------------------------
## [4] 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|
## [5] MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |
## [6] -----------------------------------------------------------------------------------------
## 131 Levels: ----------------------------------------------------------------------------------------- ...
With the dataframe in place, let's build another data frame containing the data, along with the patterns for eacth participant.
playerInfo <- tournamentinfo[seq(1, length(tournamentinfo), 3)]
ratingInfo <- tournamentinfo[seq(2, length(tournamentinfo), 3)]
3
Extracting & Transform
Players Name: Consist of separate uppercase letters and hyphens.
pairNo <- as.integer(str_extract(playerInfo, "\\d+"))
Name <- str_trim(str_extract(playerInfo, "(\\w+\\s){2,3}"))
Region <- str_extract(ratingInfo, "\\w+")
Points <- as.numeric(str_extract(playerInfo, "\\d+\\.\\d+"))
Rating <- as.integer(str_extract(str_extract(ratingInfo, "[^\\d]\\d{3,4}[^\\d]"), "\\d+"))
Opponents <- str_extract_all(str_extract_all(playerInfo, "\\d+\\|"), "\\d+")
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
Won <- str_count(playerInfo, "\\Q|W \\E")
Loose <- str_count(playerInfo, "\\Q|L \\E")
Draw <- str_count(playerInfo, "\\Q|D \\E")
4
Rating
Unique pattern: any two digits followed by a "|" on line 1.
mRating <- length(playerInfo)
for (i in 1:length(playerInfo)) {
mRating[i] <- round(mean(Rating[as.numeric(unlist(Opponents[pairNo[i]]))]), digits = 0)
}
opData <- data.frame(Name, Region, Points, Rating, mRating, Won, Loose, Draw);
5
Show Data
colnames(opData) <- c("Player's Name", "Player's State", "Total Number of Points", "Player's Pre-Rating", " Average Pre Chess Rating of Opponents", "Won", "Lost", "Draw")
kable(data.frame(opData))
Player.s.Name
Player.s.State
Total.Number.of.Points
Player.s.Pre.Rating
X.Average.Pre.Chess.Rating.of.Opponents
Won
Lost
Draw
GARY HUA
ON
6.0
1794
1605
5
0
2
DAKSHESH DARURI
MI
6.0
1553
1469
6
1
0
ADITYA BAJAJ
MI
6.0
1384
1564
6
1
0
PATRICK H SCHILLING
MI
5.5
1716
1574
4
0
3
HANSHI ZUO
MI
5.5
1655
1501
4
0
3
HANSEN SONG
OH
5.0
1686
1519
4
1
2
GARY DEE SWATHELL
MI
5.0
1649
1372
5
2
0
EZEKIEL HOUGHTON
MI
5.0
1641
1468
5
2
0
STEFANO LEE
ON
5.0
1411
1523
5
2
0
ANVIT RAO
MI
5.0
1365
1554
4
1
2
CAMERON WILLIAM MC
MI
4.5
1712
1468
4
2
1
KENNETH J TACK
MI
4.5
1663
1506
3
1
2
TORRANCE HENRY JR
MI
4.5
1666
1498
4
2
1
BRADLEY SHAW
MI
4.5
1610
1515
4
2
1
ZACHARY JAMES HOUGHTON
MI
4.5
1220
1484
4
2
1
MIKE NIKITIN
MI
4.0
1604
1386
3
1
1
RONALD GRZEGORCZYK
MI
4.0
1629
1499
4
3
0
DAVID SUNDEEN
MI
4.0
1600
1480
4
3
0
DIPANKAR ROY
MI
4.0
1564
1426
3
2
2
JASON ZHENG
MI
4.0
1595
1411
4
3
0
DINH DANG BUI
ON
4.0
1563
1470
4
3
0
EUGENE L MCCLURE
MI
4.0
1555
1300
3
2
1
ALAN BUI
ON
4.0
1363
1214
4
3
0
MICHAEL R ALDRICH
MI
4.0
1229
1357
4
3
0
LOREN SCHWIEBERT
MI
3.5
1745
1363
3
3
1
MAX ZHU
ON
3.5
1579
1507
3
3
1
GAURAV GIDWANI
MI
3.5
1552
1222
3
2
1
SOFIA ADINA
MI
3.5
1507
1522
2
2
3
CHIEDOZIE OKORIE
MI
3.5
1602
1314
3
2
1
GEORGE AVERY JONES
ON
3.5
1522
1144
3
3
1
RISHI SHETTY
MI
3.5
1494
1260
3
3
1
JOSHUA PHILIP MATHEWS
ON
3.5
1441
1379
3
3
1
JADE GE
MI
3.5
1449
1277
3
3
1
MICHAEL JEFFERY THOMAS
MI
3.5
1399
1375
3
3
1
JOSHUA DAVID LEE
MI
3.5
1438
1150
3
3
1
SIDDHARTH JHA
MI
3.5
1355
1388
2
2
2
AMIYATOSH PWNANANDAM
MI
3.5
980
1385
2
3
0
BRIAN LIU
MI
3.0
1423
1539
2
3
1
JOEL R HENDON
MI
3.0
1436
1430
3
4
0
FOREST ZHANG
MI
3.0
1348
1391
3
4
0
KYLE WILLIAM MURPHY
MI
3.0
1403
1248
2
2
0
JARED GE
MI
3.0
1332
1150
2
3
2
ROBERT GLEN VASEY
MI
3.0
1283
1107
3
4
0
JUSTIN D SCHILLING
MI
3.0
1199
1327
2
4
0
DEREK YAN
MI
3.0
1242
1152
2
3
2
JACOB ALEXANDER LAVALLEY
MI
3.0
377
1358
3
4
0
ERIC WRIGHT
MI
2.5
1362
1392
2
4
1
DANIEL KHAIN
MI
2.5
1382
1356
1
3
1
MICHAEL J MARTIN
MI
2.5
1291
1286
1
2
2
SHIVAM JHA
MI
2.5
1056
1296
2
4
0
TEJAS AYYAGARI
MI
2.5
1011
1356
2
4
1
ETHAN GUO
MI
2.5
935
1495
1
3
3
JOSE C YBARRA
MI
2.0
1393
1345
1
2
0
LARRY HODGE
MI
2.0
1270
1206
1
5
0
ALEX KONG
MI
2.0
1186
1406
0
4
2
MARISA RICCI
MI
2.0
1153
1414
1
4
0
MICHAEL LU
MI
2.0
1092
1363
1
5
0
VIRAJ MOHILE
MI
2.0
917
1391
1
5
0
SEAN M MC
MI
2.0
853
1319
1
5
0
JULIA SHEN
MI
1.5
967
1330
0
3
2
JEZZEL FARKAS
ON
1.5
955
1327
1
5
1
ASHWIN BALAJI
MI
1.0
1530
1186
1
0
0
THOMAS JOSEPH HOSMER
MI
1.0
1175
1350
0
4
1
BEN LI
MI
1.0
1163
1263
0
5
2
6
Create CSV
Create CSV file chessInfo.csv in the working directory.
write.csv(opData, file = "chessCSV.csv")