Sung Lee Project 1

Introduction

This project will read the Chess tournament text file hosted on Github at https://raw.githubusercontent.com/logicalschema/DATA607/master/Project%201/tournamentinfo.txt and will generate a CSV file. We will only be interested in the player’s wins, losses, and draws with other opponents.

The following format will be the CSV file that will be exported:

Player’s Name	Player’s State	Total Number of Points	Player’s Pre-Rating	Average Pre-Chess Rating of Opponents
Gary Hua	ON	6.0	1794	1605

I will add other columns named player’s ID, player’s number of games, and player’s opponents. These are for internal purposes.

library(stringr)

options(warn=-1) # Turning off warnings with reading the text file

textFile <- readLines("https://raw.githubusercontent.com/logicalschema/DATA607/master/Project%201/tournamentinfo.txt")
options(warn=0) # Turning warnings back on

head(textFile, 13)

##  [1] "-----------------------------------------------------------------------------------------" 
##  [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
##  [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
##  [4] "-----------------------------------------------------------------------------------------" 
##  [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
##  [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [7] "-----------------------------------------------------------------------------------------" 
##  [8] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
##  [9] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [10] "-----------------------------------------------------------------------------------------" 
## [11] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|" 
## [12] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
## [13] "-----------------------------------------------------------------------------------------"

playerData <- data.frame(matrix(ncol = 7, nrow = 0))
x <- c("ID", "Name", "State", "totalPoints" ,"preRating", "NumberofGames", "Opponents")
names(playerData) <- x


i <- 5  # Skipping the header rows and lines that have --------------------
while (i <= length(textFile)) {
  row1 <- unlist(strsplit(textFile[i], split='|', fixed=TRUE))
  row2 <- unlist(strsplit(textFile[i + 1],  split ='|', fixed=TRUE))
  
  #Get the player's id
  playerID <- as.numeric(str_trim(row1[1]))

  #Get the player's name
  playerName <- str_trim(row1[2])

  #Get the player's state
  playerState <- str_trim(row2[1])

  #Get the player's total number of points
  playertotalPoints <- as.numeric(str_trim(row1[3]))

  #Get the player's pre-rating
  playerpreRating <- str_match(str_trim(row2[2]), "R:(.*?)->")[,2]    #Grab the text between R: and the ->
  playerpreRating <- as.numeric(str_remove(str_trim(playerpreRating), 'P(.*)$')) #Remove non-numeric characters

  #Get the player's number of games played in the tournament: counting only wins, losses, and draws
  #Also, get the player's opponents
  playerNumberofGames <- 0
  playerOpponents <- ""

  for (j in 4:10) {
    rowValue <- row1[j]
    if ( grepl('W', rowValue) || grepl('L', rowValue) || grepl('D', rowValue)) {
       playerNumberofGames <- playerNumberofGames + 1
     }
     playerOpponents <- paste(playerOpponents, str_match(rowValue, '\\d+'), sep = ",")
  }

  playerOpponents <- str_remove(playerOpponents, '^,') #Remove leading ,


  
  newRow <- data.frame(playerID, playerName, playerState, playertotalPoints, playerpreRating, playerNumberofGames, playerOpponents)
  x <- c("ID", "Name", "State", "totalPoints" ,"preRating", "NumberofGames", "Opponents")
names(newRow) <- x
  playerData <- rbind(playerData, newRow)
  

  i <- i + 3
}

head(playerData, 10)

Average Pre-chess Rating of Opponents

With the player information dataframe created. Next, I will create the column average pre-chess rating of opponents for each player.

avgOpponent <- data.frame(matrix(ncol = 1, nrow = 0))
x <- c("averageOpponents")
names(avgOpponent) <- x

for (i in 1:nrow(playerData)) {
    row <- playerData[i,]
    sum <- 0
    
#    opponents <-  as.integer(str_split(row[7], ",")[[1]])
    opponents <- unlist(str_split(row$Opponents, ","))
    for (value in opponents) {
       if (is.na(value) == FALSE) {
         sum <- sum + playerData[value, 5]
       }
    }
    

    newRow <- data.frame(round(sum / row[6], digits = 0))
    x <- c("averageOpponents")
    names(newRow) <- x
    avgOpponent <- rbind(avgOpponent, newRow)
  
}

#Add the new column to playerData
playerData <- cbind(playerData, avgOpponent)


head(playerData)

#Prepare a new dataframe for export
#"ID", "Name", "State", "totalPoints" ,"preRating", "NumberofGames", "Opponents", "averageOpponents"

exportCSV <- playerData[,c(2,3,4,5,8)]
x <- c("Player’s Name", "Player’s State", "Total Number of Points", "Player’s Pre-Rating",
       "Average Pre-Chess Rating of Opponents")
names(exportCSV) <- x

head(exportCSV, 13)

Sung Lee Project 1

Sung Lee

2/13/2020

Introduction

Average Pre-chess Rating of Opponents

Export the CSV