`
#import data into R
chessTable <- read_lines(file="tournamentinfo.txt", skip = 1)
#check class of table
class(chessTable)
## [1] "character"
#take a quick look at the data.frame
head(chessTable, 15)
## [1] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [2] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [3] "-----------------------------------------------------------------------------------------"
## [4] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [5] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [6] "-----------------------------------------------------------------------------------------"
## [7] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [8] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [9] "-----------------------------------------------------------------------------------------"
## [10] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [11] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [12] "-----------------------------------------------------------------------------------------"
## [13] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [14] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [15] "-----------------------------------------------------------------------------------------"
# Getting the Player's number
playerNumber <- as.integer(str_extract(chessTable[seq(5, 195,by = 3)], "\\d+"))
head(playerNumber, 10)
## [1] 15445895 14598900 14959604 12616049 14601533 15055204 11146376 15142253
## [9] 14954524 14150362
# Getting the Player's Names
playerName <- str_replace_all(str_extract(chessTable[seq(4, 195, 3)],"([|]).+?\\1"),"[|]","")
head(playerName, 10)
## [1] " GARY HUA " " DAKSHESH DARURI "
## [3] " ADITYA BAJAJ " " PATRICK H SCHILLING "
## [5] " HANSHI ZUO " " HANSEN SONG "
## [7] " GARY DEE SWATHELL " " EZEKIEL HOUGHTON "
## [9] " STEFANO LEE " " ANVIT RAO "
# Getting the Player's State/Province
playerLocation <- str_extract(chessTable[seq(5, 195,by = 3)], "[A-Z]{2}")
head(playerLocation, 10)
## [1] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" "ON" "MI"
#Getting the Player's Rating
playerRating <- as.integer(str_replace_all(str_extract_all(chessTable[seq(5, 195,by = 3)], "R: \\s?\\d{3,4}"), "R:\\s",""))
head(playerRating, 10)
## [1] 1794 1553 1384 1716 1655 1686 1649 1641 1411 1365
#Getting the Player's Points
playerPoints <- str_extract(chessTable[seq(4, 195, 3)], "\\d.\\d")
head(playerPoints, 10)
## [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" "5.0" "5.0" "5.0"
preRatingOpp <- 1
finalChessTable <- data.frame(playerName, playerLocation, playerPoints, playerRating, preRatingOpp)
head(finalChessTable)
## playerName playerLocation playerPoints playerRating
## 1 GARY HUA ON 6.0 1794
## 2 DAKSHESH DARURI MI 6.0 1553
## 3 ADITYA BAJAJ MI 6.0 1384
## 4 PATRICK H SCHILLING MI 5.5 1716
## 5 HANSHI ZUO MI 5.5 1655
## 6 HANSEN SONG OH 5.0 1686
## preRatingOpp
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
write.csv(finalChessTable, file="newchesstable.csv")