Project on RPubs
Rmd on Github
This project will read the Chess tournament text file hosted on Github at https://raw.githubusercontent.com/logicalschema/DATA607/master/Project%201/tournamentinfo.txt and will generate a CSV file. We will only be interested in the player’s wins, losses, and draws with other opponents.
The following format will be the CSV file that will be exported:
| Player’s Name | Player’s State | Total Number of Points | Player’s Pre-Rating | Average Pre-Chess Rating of Opponents |
|---|---|---|---|---|
| Gary Hua | ON | 6.0 | 1794 | 1605 |
I will add other columns named player’s ID, player’s number of games, and player’s opponents. These are for internal purposes.
library(stringr)
options(warn=-1) # Turning off warnings with reading the text file
textFile <- readLines("https://raw.githubusercontent.com/logicalschema/DATA607/master/Project%201/tournamentinfo.txt")
options(warn=0) # Turning warnings back on
head(textFile, 13)
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [7] "-----------------------------------------------------------------------------------------"
## [8] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [9] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [10] "-----------------------------------------------------------------------------------------"
## [11] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [12] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [13] "-----------------------------------------------------------------------------------------"
playerData <- data.frame(matrix(ncol = 7, nrow = 0))
x <- c("ID", "Name", "State", "totalPoints" ,"preRating", "NumberofGames", "Opponents")
names(playerData) <- x
i <- 5 # Skipping the header rows and lines that have --------------------
while (i <= length(textFile)) {
row1 <- unlist(strsplit(textFile[i], split='|', fixed=TRUE))
row2 <- unlist(strsplit(textFile[i + 1], split ='|', fixed=TRUE))
#Get the player's id
playerID <- as.numeric(str_trim(row1[1]))
#Get the player's name
playerName <- str_trim(row1[2])
#Get the player's state
playerState <- str_trim(row2[1])
#Get the player's total number of points
playertotalPoints <- as.numeric(str_trim(row1[3]))
#Get the player's pre-rating
playerpreRating <- str_match(str_trim(row2[2]), "R:(.*?)->")[,2] #Grab the text between R: and the ->
playerpreRating <- as.numeric(str_remove(str_trim(playerpreRating), 'P(.*)$')) #Remove non-numeric characters
#Get the player's number of games played in the tournament: counting only wins, losses, and draws
#Also, get the player's opponents
playerNumberofGames <- 0
playerOpponents <- ""
for (j in 4:10) {
rowValue <- row1[j]
if ( grepl('W', rowValue) || grepl('L', rowValue) || grepl('D', rowValue)) {
playerNumberofGames <- playerNumberofGames + 1
}
playerOpponents <- paste(playerOpponents, str_match(rowValue, '\\d+'), sep = ",")
}
playerOpponents <- str_remove(playerOpponents, '^,') #Remove leading ,
newRow <- data.frame(playerID, playerName, playerState, playertotalPoints, playerpreRating, playerNumberofGames, playerOpponents)
x <- c("ID", "Name", "State", "totalPoints" ,"preRating", "NumberofGames", "Opponents")
names(newRow) <- x
playerData <- rbind(playerData, newRow)
i <- i + 3
}
head(playerData, 10)
With the player information dataframe created. Next, I will create the column average pre-chess rating of opponents for each player.
avgOpponent <- data.frame(matrix(ncol = 1, nrow = 0))
x <- c("averageOpponents")
names(avgOpponent) <- x
for (i in 1:nrow(playerData)) {
row <- playerData[i,]
sum <- 0
# opponents <- as.integer(str_split(row[7], ",")[[1]])
opponents <- unlist(str_split(row$Opponents, ","))
for (value in opponents) {
if (is.na(value) == FALSE) {
sum <- sum + playerData[value, 5]
}
}
newRow <- data.frame(round(sum / row[6], digits = 0))
x <- c("averageOpponents")
names(newRow) <- x
avgOpponent <- rbind(avgOpponent, newRow)
}
#Add the new column to playerData
playerData <- cbind(playerData, avgOpponent)
head(playerData)
#Prepare a new dataframe for export
#"ID", "Name", "State", "totalPoints" ,"preRating", "NumberofGames", "Opponents", "averageOpponents"
exportCSV <- playerData[,c(2,3,4,5,8)]
x <- c("Player’s Name", "Player’s State", "Total Number of Points", "Player’s Pre-Rating",
"Average Pre-Chess Rating of Opponents")
names(exportCSV) <- x
head(exportCSV, 13)
The following code will export the dataframe exportCSV dataframe as a csv file Chess_Tournament.csv in the present working directory.
#Export the dataframe as Chess_Tournament.csv
write.csv(exportCSV,'Chess_Tournament.csv')