Loading and parsing data

The data file is hosted in github.
Tournament Info

require(stringr)
## Loading required package: stringr
#Function to parse file from github url
processFile = function(filepath) {
  con = url(filepath, "r")
  
  players_list <- data.frame()
  is_player_row_found <- FALSE
  player <- vector('character')
  options(stringsAsFactors = FALSE)
  
  while (TRUE) {
    line = readLines(con, n = 1)
    if (length(line) == 0) {
      break
    }
    
    if (str_detect(line, "^[[:space:]]*[[:digit:]]{1,2}")) {
      line_split <- unlist(str_split(line, "\\|"))
      line_split_trimmed <-
        unlist(str_trim(line_split))
      pair_number_extract <-
        str_extract(line_split_trimmed, "^[[:digit:]]{1,2}$")
      pair_number <-
        pair_number_extract[!is.na(pair_number_extract)]
      player_name_extract <-
        str_extract(
          line_split_trimmed,
          "^[[:upper:]]+[[:space:]]+[[:upper:]]*[[:space:]]*[[:upper:]]*[[:space:]]*[[:upper:]]+$|^[[:upper:]]+[[:space:]]+[[:upper:]]*[[:space:]]*[[:upper:]]*-[[:upper:]]+$"
        )
      player_name <-
        player_name_extract[!is.na(player_name_extract)]
      total_points_extract <-
        str_extract(line_split_trimmed, "^[[:digit:]].[[:digit:]]$")
      total_points <-
        total_points_extract[!is.na(total_points_extract)]
      result_with_player_extract <-
        str_extract(line_split_trimmed,
                    "^[[:upper:]][[:space:]]+[[:digit:]]+$")
      result_with_player <-
        result_with_player_extract[!is.na(result_with_player_extract)]
      opponents <-
        str_extract(result_with_player, "[[:digit:]]+")
      opponents_as_csv <-
        str_c(opponents, collapse = ",")
      player <-
        c(pair_number,
          player_name,
          total_points,
          opponents_as_csv)
    }
    else if (str_detect(line, "->[[:space:]]*[[:digit:]]+")) {
      line_split <- unlist(str_split(line, "\\|"))
      line_split_trimmed <-
        unlist(str_trim(line_split))
      player_state_extract <-
        str_extract(line_split_trimmed, "[[:upper:]]{2}")
      player_state <-
        player_state_extract[!is.na(player_state_extract)]
      pre_rating_extract <-
        str_extract(line_split_trimmed, "R:[[:space:]]+[[:digit:]]+")
      pre_rating_str <-
        pre_rating_extract[!is.na(pre_rating_extract)]
      pre_rating <-
        str_extract(pre_rating_str, "[[:digit:]]+")
      players_list <-
        rbind(players_list, c(player, player_state, pre_rating, 0))
    }
    
  }
  
  close(con)
  colnames(players_list) <-
    c(
      "pair_num",
      "name",
      "total_pts",
      "opponents_as_csv",
      "state",
      "pre_rating",
      "avg_opponents_rating"
    )
  return(players_list)
}
df <-
  processFile(
    "https://raw.githubusercontent.com/binishkurian/DATA-607/master/project-01/tournamentinfo.txt"
  )
df$pre_rating <- as.numeric(as.character(df$pre_rating))
head(df)
##   pair_num                name total_pts     opponents_as_csv state
## 1        1            GARY HUA       6.0   39,21,18,14,7,12,4    ON
## 2        2     DAKSHESH DARURI       6.0   63,58,4,17,16,20,7    MI
## 3        3        ADITYA BAJAJ       6.0  8,61,25,21,11,13,12    MI
## 4        4 PATRICK H SCHILLING       5.5    23,28,2,26,5,19,1    MI
## 5        5          HANSHI ZUO       5.5  45,37,12,13,4,14,17    MI
## 6        6         HANSEN SONG       5.0 34,29,11,35,10,27,21    OH
##   pre_rating avg_opponents_rating
## 1       1794                    0
## 2       1553                    0
## 3       1384                    0
## 4       1716                    0
## 5       1655                    0
## 6       1686                    0

Calculate average pre chess rating of opponents

for (i in 1:nrow(df)) {
  opponents <- unlist(str_split(df[i,]$opponents_as_csv, ","))
  sum = 0
  for (j in 1:length(opponents)) {
    pre_rating <- df[opponents[j],]$pre_rating
    sum = sum + pre_rating
  }
  df[i, "avg_opponents_rating"] <- (floor(sum / length(opponents)))
}

new_df <- data.frame(
  "name" = df$name,
  "state" = df$state,
  "total_pts" = df$total_pts,
  "pre_rating" = df$pre_rating,
  "avg_opponents_rating" = df$avg_opponents_rating
)
new_df$avg_opponents_rating <-
  as.numeric(as.character(new_df$avg_opponents_rating))

head(new_df)
##                  name state total_pts pre_rating avg_opponents_rating
## 1            GARY HUA    ON       6.0       1794                 1605
## 2     DAKSHESH DARURI    MI       6.0       1553                 1469
## 3        ADITYA BAJAJ    MI       6.0       1384                 1563
## 4 PATRICK H SCHILLING    MI       5.5       1716                 1573
## 5          HANSHI ZUO    MI       5.5       1655                 1500
## 6         HANSEN SONG    OH       5.0       1686                 1518

CSV file

write.csv(new_df, "players.csv", row.names = FALSE)

The csv file created from this project is also hosted in github.
Players.csv