Overview

This is a project where one must parse a tournament’s results and output a csv file containing this data structured and including average opponent ELO.

First, we must read the file.

chessTxt <- paste(readLines(url("https://raw.githubusercontent.com/Kadaeux/DATA607Project1/master/data/tournamentinfo.txt")))
chessTxt[4]
## [1] "-----------------------------------------------------------------------------------------"
chessTxt[5]
## [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
chessTxt[6]
## [1] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
chessTxt[7]
## [1] "-----------------------------------------------------------------------------------------"

So, we need to take out the player’s name, their points, their and their opponents ELO, and their state. Each of these, for each player, are spread out across two separate lines. For each player, they’re in the same relative location on the line. So lines beginning with dashes can be skipped, lines beginning with numbers will contain the name, opponents, and total points, and the line after that will contain that same player’s state and pre-elo.

The first time I did this, I had Total Points as a Double and the Elos as an Integer… this did not take into account the P in these values. Thus, I simple switched them all to strings to better handle it.

chessPlayers <- data.frame(Id=character(),
                 Name=character(),
                 State=character(),
                 TotalPoints=character(),
                 PreElo=character(),
                 AvgPreEnElo=character(),
                 en1=character(),
                 en2=character(),
                 en3=character(),
                 en4=character(),
                 en5=character(),
                 en6=character(),
                 en7=character(),
                 stringsAsFactors=FALSE)
for (i in 4:length(chessTxt)){
   line <- trimws(chessTxt[i])
   enelo <- 0
   if(grepl("^-", line)) {
     next
   } else if(grepl("^[0-9]", line)) {
     tokLine <- strsplit(line, "\\|")
     playerId <- trimws(tokLine[[1]][1])
     playerName <- trimws(tokLine[[1]][2])
     tp <- trimws(tokLine[[1]][3])
     en1 <- trimws(str_match(tokLine[[1]][4], "\\s+.*"))[1]
     en2 <- trimws(str_match(tokLine[[1]][5], "\\s+.*"))[1]
     en3 <- trimws(str_match(tokLine[[1]][6], "\\s+.*"))[1]
     en4 <- trimws(str_match(tokLine[[1]][7], "\\s+.*"))[1]
     en5 <- trimws(str_match(tokLine[[1]][8], "\\s+.*"))[1]
     en6 <- trimws(str_match(tokLine[[1]][9], "\\s+.*"))[1]
     en7 <- trimws(str_match(tokLine[[1]][10], "\\s+.*"))[1]
   } else if (grepl("^[A-Z]", line)){
     tokLine <- strsplit(line, "\\|")
     state <- trimws(tokLine[[1]][1])
     elo <- trimws(str_match(tokLine[[1]][2], ":.*->"))[1]
     elo <- str_remove_all(elo, ":\\s*")
     elo <- str_remove(elo, "->")
     
     chessPlayers[nrow(chessPlayers) + 1,] = list(playerId,
                                                  playerName,
                                                  state,
                                                  tp,
                                                  elo,
                                                  enelo,
                                                  en1,
                                                  en2,
                                                  en3,
                                                  en4,
                                                  en5,
                                                  en6,
                                                  en7
                                                  )
   }
}
head(chessPlayers)
##   Id                Name State TotalPoints  PreElo AvgPreEnElo en1 en2 en3 en4
## 1  1            GARY HUA    ON         6.0 1794              0  39  21  18  14
## 2  2     DAKSHESH DARURI    MI         6.0 1553              0  63  58   4  17
## 3  3        ADITYA BAJAJ    MI         6.0 1384              0   8  61  25  21
## 4  4 PATRICK H SCHILLING    MI         5.5 1716              0  23  28   2  26
## 5  5          HANSHI ZUO    MI         5.5 1655              0  45  37  12  13
## 6  6         HANSEN SONG    OH         5.0 1686              0  34  29  11  35
##   en5 en6 en7
## 1   7  12   4
## 2  16  20   7
## 3  11  13  12
## 4   5  19   1
## 5   4  14  17
## 6  10  27  21

Now that we have our data in a structured tabular format, we can calculate the average opponents’ elo for each player. There’s very likely a better way to do this - maybe add them all up in one var ahead of time and keep a separate counter with the number of opponents.

Here I snag the elo of the person whose ID corresponds to each of the 7 possible games where there was an opponent and didn’t have a provisional ELO.

for(i in 1:nrow(chessPlayers)) 
{
  total <- 0
  chessPlayers$AvgPreEnElo[i] <- 0
  if(chessPlayers$en1[i] != '') {
    enElo <- trimws(chessPlayers$PreElo[as.numeric(chessPlayers$en1[i])])
    if(!grepl('P', enElo, fixed = TRUE)) {
      enEloNum <- as.numeric(enElo)
      chessPlayers$AvgPreEnElo[i] <- as.numeric(chessPlayers$AvgPreEnElo[i]) + enEloNum
      total <- total + 1
    }
  }
  if(chessPlayers$en2[i] != '') {
    enElo <- trimws(chessPlayers$PreElo[as.numeric(chessPlayers$en2[i])])
    if(!grepl('P', enElo, fixed = TRUE)) {
      chessPlayers$AvgPreEnElo[i] <- as.numeric(chessPlayers$AvgPreEnElo[i]) + as.numeric(enElo)
      total <- total + 1
    }
  }
  if(chessPlayers$en3[i] != '') {
    enElo <- trimws(chessPlayers$PreElo[as.numeric(chessPlayers$en3[i])])
    if(!grepl('P', enElo, fixed = TRUE)) {
      chessPlayers$AvgPreEnElo[i] <- as.numeric(chessPlayers$AvgPreEnElo[i]) + as.numeric(enElo)
      total <- total + 1
    }
  }
  if(chessPlayers$en4[i] != '') {
    enElo <- trimws(chessPlayers$PreElo[as.numeric(chessPlayers$en4[i])])
    if(!grepl('P', enElo, fixed = TRUE)) {
      chessPlayers$AvgPreEnElo[i] <- as.numeric(chessPlayers$AvgPreEnElo[i]) + as.numeric(enElo)
      total <- total + 1
    }
  }
  if(chessPlayers$en5[i] != '') {
    enElo <- trimws(chessPlayers$PreElo[as.numeric(chessPlayers$en5[i])])
    if(!grepl('P', enElo, fixed = TRUE)) {
      chessPlayers$AvgPreEnElo[i] <- as.numeric(chessPlayers$AvgPreEnElo[i]) + as.numeric(enElo)
      total <- total + 1
    }
  }
  if(chessPlayers$en6[i] != '') {
    enElo <- trimws(chessPlayers$PreElo[as.numeric(chessPlayers$en6[i])])
    if(!grepl('P', enElo, fixed = TRUE)) {
      chessPlayers$AvgPreEnElo[i] <- as.numeric(chessPlayers$AvgPreEnElo[i]) + as.numeric(enElo)
      total <- total + 1
    }
  }
  if(chessPlayers$en7[i] != '') {
    enElo <- trimws(chessPlayers$PreElo[as.numeric(chessPlayers$en7[i])])
    if(!grepl('P', enElo, fixed = TRUE)) {
      chessPlayers$AvgPreEnElo[i] <- as.numeric(chessPlayers$AvgPreEnElo[i]) + as.numeric(enElo)
      total <- total + 1
    }
  }
  if(is.numeric(chessPlayers$AvgPreEnElo[i]))
  chessPlayers$AvgPreEnElo[i] <- chessPlayers$AvgPreEnElo[i] / total
}
head(chessPlayers)
##   Id                Name State TotalPoints  PreElo AvgPreEnElo en1 en2 en3 en4
## 1  1            GARY HUA    ON         6.0 1794           8238  39  21  18  14
## 2  2     DAKSHESH DARURI    MI         6.0 1553          10285  63  58   4  17
## 3  3        ADITYA BAJAJ    MI         6.0 1384           6786   8  61  25  21
## 4  4 PATRICK H SCHILLING    MI         5.5 1716          11015  23  28   2  26
## 5  5          HANSHI ZUO    MI         5.5 1655           9526  45  37  12  13
## 6  6         HANSEN SONG    OH         5.0 1686           7466  34  29  11  35
##   en5 en6 en7
## 1   7  12   4
## 2  16  20   7
## 3  11  13  12
## 4   5  19   1
## 5   4  14  17
## 6  10  27  21

Now we can save the file.

write.csv(chessPlayers, "tournamentinfo.csv", row.names=FALSE)

If we don’t want to include the header…

write.table(chessPlayers, "tournamentinfo_noheader.csv", row.names=FALSE, col.names=FALSE)