Chess Tournament Results

Given a text file with chess tournament results where the information has some structure. We need to create an R Markdown file that generates a .CSV file that could be imported into a SQL database.

Overview

The structure of the first 5 entries in the text file is shown below

knitr::include_graphics("https://raw.githubusercontent.com/henryvalentine/MSDS2019/master/Classes/DATA%20607/Projects/Project1/tournament.png")

From the image above, each player has:

Player’s specific inromation which include:

  • Player’s Name
  • Player’s State
  • Total Number of Points
  • Player’s Pre-Rating
  • Post-Chess Rating

For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605 1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.

Each player has a record of seven matches

Player’s match inromation include:

  • Round (1 - 7)
  • Outcome (Win, Loss, Draw)
  • Opponent’s Player Number

The players’ information when the Opponents’ average pre-chess ratings are computed, will be extracted and exported to a .csv file with the data ready for export/upload to a database

Load raw data

# Read the text file directly from the host
tournaments <- read.table('https://raw.githubusercontent.com/henryvalentine/MSDS2019/master/Classes/DATA%20607/Projects/Project1/tournamentinfo.txt',sep="\n",stringsAsFactors = F,skip=4)
w <- str_extract_all(tournaments[[1]], '(\\w.+)')
df <- data.frame(matrix(unlist(w), byrow=T))[1]

Extract required values

# matches <- data.frame()
extractValues <- function()
{
  # FOR PLAYERS
  ids <- vector()
  names <- vector()
  uscfIds <- vector()
  preRatings <- vector()
  postRatings <- vector()
  states <- vector() 
  points <- vector()
  
  # FOR MATCHES
  opponents <- vector()
  matches <- vector() 
  playerIds <- vector()
  
  # PROCESS THE RAW DATA
   i <- 1
   len <- nrow(df)
   while(i < len)
   {
      v <- df[i,] #FOCUSES ON LINE i (ODD ROW) TO EXTRACT PLAYER INFO
      x <- df[i+1,] # FOCUSES ON LINE i+1 (EVEN ROW) TO EXTRACT GAMES INFO
      
      
      playerNum <- as.numeric(str_extract(v, '\\d{1,}'))
      name <- str_trim(str_extract(v, '[[:alpha:] ]{2,}'), side='both')
      uscfId <- as.numeric(str_extract(x, '\\d+(?=)'))
      preRating <- as.numeric(str_extract_all(x, '(?<=R: ).\\d+(?=)'))
      postRating <- as.numeric(str_extract(x, '(?<=->).\\d+(?=)'))
      state <-   str_extract(x, '[A-Z]{2,}')
      point <- as.numeric(str_extract(v, '\\d+(\\.\\d+)'))
      games <- str_extract_all(v, '([A-Z] + \\d+)')
      game1 <- games[[1]][1]
      game2 <- games[[1]][2]
      game3 <- games[[1]][3]
      game4 <- games[[1]][4]
      game5 <- games[[1]][5]
      game6 <- games[[1]][6]
      game7 <- games[[1]][7]
      
      game1Outcome <- str_extract(game1, '\\w')
      match1 <- sprintf('1 : %s', game1Outcome)
      game1Opponent <- str_extract(game1, '\\d+')
      
      game2Outcome <- str_extract(game2, '\\w')
      match2 <- sprintf('2 : %s', game2Outcome)
      game2Opponent <- str_extract(game2, '\\d+')
      
      game3Outcome <- str_extract(game3, '\\w')
      match3 <- sprintf('3 : %s', game3Outcome)
      game3Opponent <- str_extract(game3, '\\d+')
      
      game4Outcome <- str_extract(game4, '\\w')
      match4 <- sprintf('4 : %s', game4Outcome)
      game4Opponent <- str_extract(game4, '\\d+')
      
      game5Outcome <- str_extract(game5, '\\w')
      match5 <- sprintf('5 : %s', game5Outcome)
      game5Opponent <- str_extract(game5, '\\d+')
      
      game6Outcome <- str_extract(game6, '\\w')
      match6 <- sprintf('6 : %s', game6Outcome)
      game6Opponent <- str_extract(game6, '\\d+')
      
      game7Outcome <- str_extract(game7, '\\w')
      match7 <- sprintf('7 : %s', game7Outcome)
      game7Opponent <- str_extract(game7, '\\d+')
      
      ids <- c(ids, playerNum)
      names <- c(names, name)
      uscfIds <- c(uscfIds, uscfId)
      preRatings <- c(preRatings, preRating)
      postRatings <- c(postRatings, postRating)
      states <- c(states, state) 
      points <- c(points, point)
      
      
      opponents <- append(opponents, c(as.numeric(game1Opponent), as.numeric(game2Opponent),
                   as.numeric(game3Opponent), as.numeric(game4Opponent), as.numeric(game5Opponent), 
                   as.numeric(game6Opponent), as.numeric(game7Opponent)), after = length(opponents))
      
      # REPLICATE EACH PLAYER ID ACCORDING TO THE NUMBER OF GAMES PLAYED (7 GAMES)
      playerIds <- append(playerIds, c(replicate(7, playerNum)), after = length(playerIds))
      
      matches <- append(matches, c(match1, match2, match3, match4, match5, match6, match7), after = length(matches))
      
      i <- i + 2 # INCREMENTING i BY 2 HELPS TO MOVE THE FOCUS TO THE NEXT ODD ROW
   }
   
   # PLAYERS
   players <- data.frame(id = ids, name = names, uscfId = uscfIds, preRating = preRatings, postRating = postRatings, 
                         state = states, point = points)
   
   #GAMES
   matchesPlayed <- data.frame(id = playerIds, rounds = matches, opponents = opponents)
   
   return(list(players = players, matches = matchesPlayed))
}
dfAll <- extractValues()

Players

playersInfo <- na.omit(dfAll$players)

datatable(playersInfo, colnames= c("Player Id", "Name", "UscfId", "Pre-Rating", "Post-Rating", "State", "Total Points"), class = 'cell-border stripe', options = list(
  initComplete = JS(
    "function(settings, json) {",
    "$(this.api().table().header()).css({'background-color': 'steelblue', 'color': '#fff', 'text-align': 'center !important'});",
    "$(this.api().table().body()).css({'color': '#000', 'text-align': 'center !important'});",
    "}")
))

Games played

gamesInfo <- na.omit(dfAll$matches)

Retrieve each player’s Opponents’ pre-chess rating from the Player Info dataset

t <- vector()
for(i in na.omit(gamesInfo$opponents))
{
  f <- playersInfo$preRating[playersInfo$id == i]
  t <- c(t, f)
}

Add the Opponents’ pre-chess rating column to the Games Info dataset

gamesInfo['opponentPreRating'] <- t
datatable(gamesInfo, colnames= c("Player Id", "Round : Outcome", "Opponents", "Opponent's Pre-rating"), class = 'cell-border stripe', options = list(
  initComplete = JS(
    "function(settings, json) {",
    "$(this.api().table().header()).css({'background-color': 'steelblue', 'color': '#fff', 'text-align': 'center !important'});",
    "$(this.api().table().body()).css({'color': '#000', 'text-align': 'center !important'});",
    "}")
))

Final result

Calculate each Player’s opponents’ average Pre-chess ratings, add the values to the Players’ Info dataset to get the final result

finalResult <-  
    gamesInfo %>%
    group_by(id) %>% summarise(opponents_avg_pre_rating = round(mean(opponentPreRating))) %>%
    inner_join(playersInfo, by="id") %>%
    select(id, name, uscfId, preRating, postRating, opponents_avg_pre_rating, state, point)

View Final result

datatable(finalResult, colnames= c("Player Id", "Name", "UscfId", "Pre-Rating", "Post-Rating", "Opp. Ave. Pre-rating", "State", "Total Points"), class = 'cell-border stripe', options = list(
  initComplete = JS(
    "function(settings, json) {",
    "$(this.api().table().header()).css({'background-color': '#337ab7', 'color': '#fff', 'text-align': 'center !important'});",
    "$(this.api().table().body()).css({'color': '#000', 'text-align': 'center !important'});",
    "}")
))

Export Final result to a .csv file with the data ready for upload to a Database

chessPlayers.csv

write.csv(finalResult, "ChessPlayers.csv", row.names=FALSE)

Session Info

sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17134)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United Kingdom.1252 
## [2] LC_CTYPE=English_United Kingdom.1252   
## [3] LC_MONETARY=English_United Kingdom.1252
## [4] LC_NUMERIC=C                           
## [5] LC_TIME=English_United Kingdom.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] bindrcpp_0.2.2 DT_0.5         stringr_1.3.1  dplyr_0.7.8   
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.0       knitr_1.21       bindr_0.1.1      magrittr_1.5    
##  [5] tidyselect_0.2.5 xtable_1.8-3     R6_2.3.0         rlang_0.3.1     
##  [9] tools_3.5.2      xfun_0.4         htmltools_0.3.6  crosstalk_1.0.0 
## [13] yaml_2.2.0       assertthat_0.2.0 digest_0.6.18    tibble_1.4.2    
## [17] crayon_1.3.4     shiny_1.2.0      later_0.7.5      purrr_0.2.5     
## [21] promises_1.0.1   htmlwidgets_1.3  mime_0.6         glue_1.3.0      
## [25] evaluate_0.12    rmarkdown_1.11   stringi_1.2.4    compiler_3.5.2  
## [29] pillar_1.3.1     jsonlite_1.6     httpuv_1.4.5.1   pkgconfig_2.0.2