knitr::opts_chunk$set(echo = TRUE)
library(RCurl)
library(stringr)
library(DT)

Download and store raw data from github repository. read.csv was producing an error until I added the text= in front of the url. I don’t completely understand this behavior.

url = getURL('https://raw.githubusercontent.com/haobruce/CUNY/master/DATA607/Project1/tournamentinfo.txt')
chess = read.csv(text=url, stringsAsFactors = F)

# initialize empty data frame
df = data.frame('PlayerName' = character(), 'PlayerState'= character(), 'TotalPoints' = numeric(), 
                'PreRating' = integer(), 'OpponentAvgPreRating' = integer(), 'Opp1' = integer(), 
                'Opp2' = integer(), 'Opp3' = integer(), 'Opp4' = integer(), 'Opp5' = integer(), 
                'Opp6' = integer(), 'Opp7' = integer(), stringsAsFactors = F)  


# loop through chess txt file and add relevant data to data frame
for (i in 1:nrow(chess)) {
  # check if row contains player data
  if (!is.na(as.numeric(str_sub(chess[i,], 4, 5)))) {
    player = str_trim(str_sub(chess[i,], 9, 40))  # retrieve player name
    state = str_sub(chess[i+1,], 4, 5)  # retrieve player state
    points = as.numeric(str_sub(chess[i,], 42, 44))  # retrieve player points
    preRating = as.numeric(str_sub(chess[i+1,], 23, 26))  # retrieve player points
    opp1 = as.numeric(str_sub(chess[i,], 51, 52))  # retrieve opponent numbers to calculate avg later
    opp2 = as.numeric(str_sub(chess[i,], 57, 58))
    opp3 = as.numeric(str_sub(chess[i,], 63, 64))
    opp4 = as.numeric(str_sub(chess[i,], 69, 70))
    opp5 = as.numeric(str_sub(chess[i,], 75, 76))
    opp6 = as.numeric(str_sub(chess[i,], 81, 82))
    opp7 = as.numeric(str_sub(chess[i,], 87, 88))
    # use a temporary data frame to house each new row
    new_row = data.frame('PlayerName' = player, 'PlayerState' = state, 'TotalPoints' = points, 
                        'PreRating' = preRating, 'OpponentAvgPreRating' = 0.0, 'Opp1' = opp1, 
                        'Opp2' = opp2, 'Opp3' = opp3, 'Opp4' = opp4, 'Opp5' = opp5, 'Opp6' = opp6, 
                        'Opp7' = opp7, stringsAsFactors = F)  
    # append data to data frame
    df = rbind(df, new_row)
  }
}


# loop through data frame to add average opponent ratings
for (r in 1:nrow(df)) {
  oppRatings = NULL  # initialize empty vector
  for (c in 1:7) {
    colName = paste('Opp', c, sep = '')  # create dynamic reference to column name
    oppRow = df[r, colName]
    oppRatings = c(oppRatings, df[oppRow, 'PreRating'])  # append opponent rating to vector
  }
  df[r, 'OpponentAvgPreRating'] = round(mean(oppRatings, na.rm = T), 0)  # calculate avg rating and ignore NAs
}


# write df to csv file
#write.csv(df[, c('PlayerName', 'PlayerState', 'TotalPoints', 'PreRating', 'OpponentAvgPreRating')],
#          '/Users/brucehao/Google Drive/CUNY/git/DATA607/Project1/chess.csv')


# show top 10 rows of data frame
#knitr::kable(head(df[, c('PlayerName', 'PlayerState', 'TotalPoints', 'PreRating', 'OpponentAvgPreRating')], 10))
datatable(df[, c('PlayerName', 'PlayerState', 'TotalPoints', 'PreRating', 'OpponentAvgPreRating')])