This is a project where one must parse a tournament’s results and output a csv file containing this data structured and including average opponent ELO.
First, we must read the file.
chessTxt <- paste(readLines(url("https://raw.githubusercontent.com/Kadaeux/DATA607Project1/master/data/tournamentinfo.txt")))
chessTxt[4]## [1] "-----------------------------------------------------------------------------------------"
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [1] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [1] "-----------------------------------------------------------------------------------------"
So, we need to take out the player’s name, their points, their and their opponents ELO, and their state. Each of these, for each player, are spread out across two separate lines. For each player, they’re in the same relative location on the line. So lines beginning with dashes can be skipped, lines beginning with numbers will contain the name, opponents, and total points, and the line after that will contain that same player’s state and pre-elo.
The first time I did this, I had Total Points as a Double and the Elos as an Integer… this did not take into account the P in these values. Thus, I simple switched them all to strings to better handle it.
chessPlayers <- data.frame(Id=character(),
Name=character(),
State=character(),
TotalPoints=character(),
PreElo=character(),
AvgPreEnElo=character(),
en1=character(),
en2=character(),
en3=character(),
en4=character(),
en5=character(),
en6=character(),
en7=character(),
stringsAsFactors=FALSE)
for (i in 4:length(chessTxt)){
line <- trimws(chessTxt[i])
enelo <- 0
if(grepl("^-", line)) {
next
} else if(grepl("^[0-9]", line)) {
tokLine <- strsplit(line, "\\|")
playerId <- trimws(tokLine[[1]][1])
playerName <- trimws(tokLine[[1]][2])
tp <- trimws(tokLine[[1]][3])
en1 <- trimws(str_match(tokLine[[1]][4], "\\s+.*"))[1]
en2 <- trimws(str_match(tokLine[[1]][5], "\\s+.*"))[1]
en3 <- trimws(str_match(tokLine[[1]][6], "\\s+.*"))[1]
en4 <- trimws(str_match(tokLine[[1]][7], "\\s+.*"))[1]
en5 <- trimws(str_match(tokLine[[1]][8], "\\s+.*"))[1]
en6 <- trimws(str_match(tokLine[[1]][9], "\\s+.*"))[1]
en7 <- trimws(str_match(tokLine[[1]][10], "\\s+.*"))[1]
} else if (grepl("^[A-Z]", line)){
tokLine <- strsplit(line, "\\|")
state <- trimws(tokLine[[1]][1])
elo <- trimws(str_match(tokLine[[1]][2], ":.*->"))[1]
elo <- str_remove_all(elo, ":\\s*")
elo <- str_remove(elo, "->")
chessPlayers[nrow(chessPlayers) + 1,] = list(playerId,
playerName,
state,
tp,
elo,
enelo,
en1,
en2,
en3,
en4,
en5,
en6,
en7
)
}
}
head(chessPlayers)## Id Name State TotalPoints PreElo AvgPreEnElo en1 en2 en3 en4
## 1 1 GARY HUA ON 6.0 1794 0 39 21 18 14
## 2 2 DAKSHESH DARURI MI 6.0 1553 0 63 58 4 17
## 3 3 ADITYA BAJAJ MI 6.0 1384 0 8 61 25 21
## 4 4 PATRICK H SCHILLING MI 5.5 1716 0 23 28 2 26
## 5 5 HANSHI ZUO MI 5.5 1655 0 45 37 12 13
## 6 6 HANSEN SONG OH 5.0 1686 0 34 29 11 35
## en5 en6 en7
## 1 7 12 4
## 2 16 20 7
## 3 11 13 12
## 4 5 19 1
## 5 4 14 17
## 6 10 27 21
Now that we have our data in a structured tabular format, we can calculate the average opponents’ elo for each player. There’s very likely a better way to do this - maybe add them all up in one var ahead of time and keep a separate counter with the number of opponents.
Here I snag the elo of the person whose ID corresponds to each of the 7 possible games where there was an opponent and didn’t have a provisional ELO.
for(i in 1:nrow(chessPlayers))
{
total <- 0
chessPlayers$AvgPreEnElo[i] <- 0
if(chessPlayers$en1[i] != '') {
enElo <- trimws(chessPlayers$PreElo[as.numeric(chessPlayers$en1[i])])
if(!grepl('P', enElo, fixed = TRUE)) {
enEloNum <- as.numeric(enElo)
chessPlayers$AvgPreEnElo[i] <- as.numeric(chessPlayers$AvgPreEnElo[i]) + enEloNum
total <- total + 1
}
}
if(chessPlayers$en2[i] != '') {
enElo <- trimws(chessPlayers$PreElo[as.numeric(chessPlayers$en2[i])])
if(!grepl('P', enElo, fixed = TRUE)) {
chessPlayers$AvgPreEnElo[i] <- as.numeric(chessPlayers$AvgPreEnElo[i]) + as.numeric(enElo)
total <- total + 1
}
}
if(chessPlayers$en3[i] != '') {
enElo <- trimws(chessPlayers$PreElo[as.numeric(chessPlayers$en3[i])])
if(!grepl('P', enElo, fixed = TRUE)) {
chessPlayers$AvgPreEnElo[i] <- as.numeric(chessPlayers$AvgPreEnElo[i]) + as.numeric(enElo)
total <- total + 1
}
}
if(chessPlayers$en4[i] != '') {
enElo <- trimws(chessPlayers$PreElo[as.numeric(chessPlayers$en4[i])])
if(!grepl('P', enElo, fixed = TRUE)) {
chessPlayers$AvgPreEnElo[i] <- as.numeric(chessPlayers$AvgPreEnElo[i]) + as.numeric(enElo)
total <- total + 1
}
}
if(chessPlayers$en5[i] != '') {
enElo <- trimws(chessPlayers$PreElo[as.numeric(chessPlayers$en5[i])])
if(!grepl('P', enElo, fixed = TRUE)) {
chessPlayers$AvgPreEnElo[i] <- as.numeric(chessPlayers$AvgPreEnElo[i]) + as.numeric(enElo)
total <- total + 1
}
}
if(chessPlayers$en6[i] != '') {
enElo <- trimws(chessPlayers$PreElo[as.numeric(chessPlayers$en6[i])])
if(!grepl('P', enElo, fixed = TRUE)) {
chessPlayers$AvgPreEnElo[i] <- as.numeric(chessPlayers$AvgPreEnElo[i]) + as.numeric(enElo)
total <- total + 1
}
}
if(chessPlayers$en7[i] != '') {
enElo <- trimws(chessPlayers$PreElo[as.numeric(chessPlayers$en7[i])])
if(!grepl('P', enElo, fixed = TRUE)) {
chessPlayers$AvgPreEnElo[i] <- as.numeric(chessPlayers$AvgPreEnElo[i]) + as.numeric(enElo)
total <- total + 1
}
}
if(is.numeric(chessPlayers$AvgPreEnElo[i]))
chessPlayers$AvgPreEnElo[i] <- chessPlayers$AvgPreEnElo[i] / total
}
head(chessPlayers)## Id Name State TotalPoints PreElo AvgPreEnElo en1 en2 en3 en4
## 1 1 GARY HUA ON 6.0 1794 8238 39 21 18 14
## 2 2 DAKSHESH DARURI MI 6.0 1553 10285 63 58 4 17
## 3 3 ADITYA BAJAJ MI 6.0 1384 6786 8 61 25 21
## 4 4 PATRICK H SCHILLING MI 5.5 1716 11015 23 28 2 26
## 5 5 HANSHI ZUO MI 5.5 1655 9526 45 37 12 13
## 6 6 HANSEN SONG OH 5.0 1686 7466 34 29 11 35
## en5 en6 en7
## 1 7 12 4
## 2 16 20 7
## 3 11 13 12
## 4 5 19 1
## 5 4 14 17
## 6 10 27 21
Now we can save the file.
If we don’t want to include the header…