Chess Tournament Ratings Study And Conclusion


Summary

This is an R Markdown document for providing documentation for Studying The Chess Tournament Data, Data Manipulation, Analysis & Conclusion


R Code :

Loading Packages Used

knitr::opts_chunk$set(message = FALSE, echo = TRUE)

# Library for string manipulation/regex operations
library(stringr)
# Library for data display in tabular format
library(DT)
# Library to read text file
library(RCurl)
# Library to melt data from row to columns and vice versa
library(reshape2)
# Library to plot
library(ggplot2)

Function parsePlayer()

Function to parse each player record and arrange all data in one row. Transpose record data into additional columns

@param1 : chessdata , type: data frame

return data frame

parsePlayer <- function(chessdata)
{
     for(i in seq(1,nrow(chessdata),2))  # for every two rows
     {
          chessdata$State[i] <- chessdata[i+1,1]
          chessdata$IdRtgPrePost[i] <- chessdata[i+1,2]
          chessdata$Level[i] <- chessdata[i+1,3]
          chessdata$R1Col[i] <- chessdata[i+1,4]
          chessdata$R2Col[i] <- chessdata[i+1,5]
          chessdata$R3Col[i] <- chessdata[i+1,6]
          chessdata$R4Col[i] <- chessdata[i+1,7]
          chessdata$R5Col[i] <- chessdata[i+1,8]
          chessdata$R6Col[i] <- chessdata[i+1,9]
          chessdata$R7Col[i] <- chessdata[i+1,10]
          
     }

     # Removing the second line of record data for each player, 
     # after transposing data into columns 
     
     chessdata <- subset(chessdata, grepl("[[:digit:]]", chessdata$Pair) )
     
     return(chessdata)
}

Function getPreRating()

Function to extract pre rating of opponent ( or any player ‘pair’ number passed)

@param1: oppo , type: numeric

@param2: cleanchessdata , type: data frame

return numeric

getPreRating <- function(opponum, opponentchessdata) {
    preratingstring <- subset(opponentchessdata, as.numeric(Pair) == opponum, select = c(IdRtgPrePost))
    preratingstring <- str_trim(preratingstring)
    
    prerating <- str_replace(preratingstring, pattern = "[[:digit:]]{8}.+R: +", replacement = "")
    prerating <- as.numeric(str_extract(prerating, "[[:digit:]]{2,}"))
    
    return(prerating)
}

Function calAvgOppPreRating()

Function to parse each player record and calculate average opponent points all data in one row. Transpose into columns

@param1: playerchessdata, type: data frame (single record)

@param2: oppochessdata, type: data frame

return list( Avg opponent rating, Won game count, Losr game count, Draw game count)

Calls getPreRating()

calAvgOppPreRating <- function(playerchessdata, oppochessdata) {
    totalopporating <- 0
    gamecount <- 0
    wincount <- 0
    losecount <- 0
    drawcount <- 0
    oppovec <- c()
    
    
    iteratecols <- c("R1", "R2", "R3", "R4", "R5", "R6", "R7")
    
    # Rather than coding the same lines with hardcoding for finding opponent pair
    # number for each round # played , the columns names have been generated through
    # string and iterated trhough to get the # opponent pair number for each round
    # played. This is added to get a vector of opponents
    
    # Also extracted is if the game was won/lost/draw for the # player
    
    for (eachround in iteratecols) {
        
        playerchessdatacol <- playerchessdata[, sprintf("%s", eachround)]
        playerchessdatacol <- str_trim(playerchessdatacol)
        
        
        newoppo <- str_extract(playerchessdatacol, "\\s?([[:digit:]]{1,})")
        newoppo <- str_trim(newoppo)
        winlosedraw <- str_extract(playerchessdatacol, "\\s?[[:alpha:]]{1}")
        winlosedraw <- str_trim(winlosedraw)
        
        
        # Incrementing the respective counters based on Win Lose/Draw status
        
        ifelse(!(is.na(str_trim(newoppo))), oppovec <- c(oppovec, newoppo), oppovec <- c(oppovec, 
            NA))
        
        if (winlosedraw == "W") {
            wincount <- wincount + 1
        } else if (winlosedraw == "L") {
            losecount <- losecount + 1
        } else if (winlosedraw == "D") {
            drawcount <- drawcount + 1
        }
        
    }
    
    
    # Some values in opponent vectos can be 'NA' as the player may have a bye,
    # Removing such 'NA' values from opponent vector 'oppovec'
    
    oppovec <- na.omit(oppovec)
    
    
    # Iterate the oppovec to extract the pre rating for each opponent /every game
    # played increment game # counter by calling getPreRating() function Sum the pre
    # ratings for all the opponents of the player
    
    if (length(oppovec) > 0) {
        for (oppo in oppovec) {
            oppoprerating <- getPreRating(as.numeric(oppo), oppochessdata)
            
            totalopporating <- totalopporating + oppoprerating
            gamecount <- gamecount + 1
            
            
        }
    }
    # cat('\n','totalopporating ',totalopporating) cat('\n', ' gamecount ',
    # gamecount) cat('\n', ' wincount ', wincount) cat('\n', ' losecount ',
    # losecount) cat('\n', ' drawcount ', drawcount)
    
    
    # Calculate the Average opponent rating
    
    averageopporating <- round(totalopporating/gamecount)
    
    # Return Avg opponent rating, Games Won, Games Lost, Games Drawn
    
    return(list(AvgOppoRating = averageopporating, WinCount = wincount, LoseCount = losecount, 
        DrawCount = drawcount))
}

Function checkpostrating()

Function to parse each final player record.Gets the Prerating, avg opponent rating, and calculates the post rating based on formula

@param1: pf1 (final player 1), type: numeric

@param2: pf2 (final player 2), type: numeric

@param3: winner (assumed winner pair number), type : numeric

@param4: cleanchessdata , type: data frame

return list( pf1postrating (Final Player 1 post rating), pf2postrating (Final Player 2 post rating), Lead Player(Leading in Rating) Pair Number)

Calls get PreRating()

Performance Rating Formula

Performance Rating Formula

checkpostrating <- function(pf1, pf2, winner, cleanchessdata) {
    pf1prerating <- getPreRating(pf1, cleanchessdata)
    
    pf2prerating <- getPreRating(pf2, cleanchessdata)
    
    pf1gamestats <- calAvgOppPreRating(subset(cleanchessdata, as.numeric(Pair) == 
        pf1), cleanchessdata)
    
    pf1AvgOpponentPreRating <- pf1gamestats$AvgOppoRating
    pf1Won <- pf1gamestats$WinCount
    pf1Lost <- pf1gamestats$LoseCount
    pf1Draw <- pf1gamestats$DrawCount
    pf1gamecount <- pf1Won + pf1Lost + pf1Draw
    
    
    
    pf2gamestats <- calAvgOppPreRating(subset(cleanchessdata, as.numeric(Pair) == 
        pf2), cleanchessdata)
    
    pf2AvgOpponentPreRating <- pf2gamestats$AvgOppoRating
    pf2Won <- pf2gamestats$WinCount
    pf2Lost <- pf2gamestats$LoseCount
    pf2Draw <- pf2gamestats$DrawCount
    pf2gamecount <- pf2Won + pf2Lost + pf2Draw
    
    postratingpf1 <- 0
    postratingpf2 <- 0
    lead <- 0
    
    if (winner == pf1) {
        postratingpf1 <- ((pf1AvgOpponentPreRating * pf1gamecount) + (400 * (pf1Won - 
            pf1Lost)) + pf2prerating + 400)/(pf1gamecount + 1)
        
        postratingpf2 <- ((pf2AvgOpponentPreRating * pf2gamecount) + (400 * (pf2Won - 
            pf2Lost)) + pf1prerating - 400)/(pf2gamecount + 1)
        
        if (postratingpf1 > postratingpf2) 
            lead <- pf1 else lead <- pf2
    } else if (winner == pf2) {
        
        postratingpf1 <- ((pf1AvgOpponentPreRating * pf1gamecount) + (400 * (pf1Won - 
            pf1Lost)) + pf2prerating - 400)/(pf1gamecount + 1)
        
        postratingpf2 <- ((pf2AvgOpponentPreRating * pf2gamecount) + (400 * (pf2Won - 
            pf2Lost)) + pf1prerating + 400)/(pf2gamecount + 1)
        
        if (postratingpf1 > postratingpf2) 
            lead <- pf1 else lead <- pf2
    }
    
    
    return(list(pf1postrating = round(postratingpf1), pf2postrating = round(postratingpf2), 
        lead = lead))
    
    
}

Function checkleadpostrating()

Function to parse each player record and calculate average opponent points all data in one row. Transpose into columns

@param1: final (final two players) , type: numeric vector

@param2: cleanchessdata , type: data frame

return string

Calls checkpostrating()

checkleadpostrating <- function(final, cleanchessdata) {
    
    View(cleanchessdata)
    iffinal1wins <- checkpostrating(final[1], final[2], final[1], cleanchessdata)
    iffinal1wins
    
    f1w.final1post <- iffinal1wins$pf1postrating
    f1w.final2post <- iffinal1wins$pf2postrating
    f1w.lead <- iffinal1wins$lead
    f1w.final1post
    f1w.final2post
    
    iffinal2wins <- checkpostrating(final[1], final[2], final[2], cleanchessdata)
    f2w.final1post <- iffinal2wins$pf1postrating
    f2w.final2post <- iffinal2wins$pf2postrating
    f2w.lead <- iffinal2wins$lead
    
    POST <- str_c("If Player ", final[1], "( PostRating :", f1w.final1post, ")", 
        " Wins To Player ", final[2], "(  PostRating :", f1w.final2post, ")", "  Lead in Rating is Player", 
        f1w.lead, " Else If Player ", final[2], " ( PostRating :", f2w.final2post, 
        ") ", " Wins To Player  ", final[1], "  (  PostRating :", f2w.final1post, 
        ")", " Lead in Rating Is Player ", f2w.lead)
    
    return(POST)
    
}

Main Data Manipulation

Loading The Chess Tournament Data

# GitHub Location of tournament data file

chessdata.giturl <- "https://raw.githubusercontent.com/DataDriven-MSDA/DATA607/master/Week4P1/tournamentinfo.txt"

chessdata.gitdata <- getURL(chessdata.giturl)

# Reading text file from the GitHub location making header as false and skipping
# the first 4 lines (that represent ---- followed by two lines of headers
# followed by -----)

crudechessdata <- read.table(text = chessdata.gitdata, header = FALSE, fill = TRUE, 
    sep = "|", skip = 4, stringsAsFactors = FALSE)

# View(crudechessdata)

# Removing last NA column
crudechessdata <- subset(crudechessdata, select = -c(V11))

# Renaming Columns
colnames(crudechessdata) <- c("Pair", "PlayerName", "TotalPoints", "R1", "R2", "R3", 
    "R4", "R5", "R6", "R7")

# Filter out rows where first column match '----'
crudechessdata <- crudechessdata[-grep("-----", crudechessdata[, 1]), ]

# Form clean data frame
cleanchessdata <- parsePlayer(crudechessdata)
# View(cleanchessdata)

Finding Average Opponent Rating And Other Stats

Loop through each record of of player to find the avg opponent pre rating

for (i in seq(1, nrow(cleanchessdata), 1)) {
    
    
    cleanchessdata$PlayerName[i] <- str_trim(cleanchessdata$PlayerName[i])
    
    cleanchessdata$State[i] <- str_trim(cleanchessdata$State[i])
    
    cleanchessdata$TotalPoints[i] <- str_trim(cleanchessdata$TotalPoints[i])
    
    
    playerPreRating <- getPreRating(i, cleanchessdata)
    cleanchessdata$PlayerPreRating[i] <- playerPreRating
    
    
    gamestats <- calAvgOppPreRating(subset(cleanchessdata, as.numeric(Pair) == i), 
        cleanchessdata)
    
    # Also finding other stats like Games Won, Games Lost, Games Drawn
    
    cleanchessdata$AvgOpponentPreRating[i] <- gamestats$AvgOppoRating
    cleanchessdata$Won[i] <- gamestats$WinCount
    cleanchessdata$Lost[i] <- gamestats$LoseCount
    cleanchessdata$Draw[i] <- gamestats$DrawCount
    
    
}

Exporting Required Data To CSV

# Aggregating relevant data to form CSV file, Writing and Showing the CSV file

exportChessDF <- subset(cleanchessdata, select = c(PlayerName, State, TotalPoints, 
    PlayerPreRating, AvgOpponentPreRating, Won, Lost, Draw))
write.csv(exportChessDF, file = "ChessTournamentStats.csv", row.names = FALSE)

Displaying DataSet

datatable(exportChessDF, options = list(searching = FALSE, pageLength = 5, lengthMenu = c(5, 
    10, 15, 20)), rownames = FALSE)

file.show(file="ChessTournamentStats.csv",title="Chess Tournament Results And Ratings")
cat(readLines('ChessTournamentStats.csv'), sep = '\n')
"PlayerName","State","TotalPoints","PlayerPreRating","AvgOpponentPreRating","Won","Lost","Draw"
"GARY HUA","ON","6.0",1794,1605,5,0,2
"DAKSHESH DARURI","MI","6.0",1553,1469,6,1,0
"ADITYA BAJAJ","MI","6.0",1384,1564,6,1,0
"PATRICK H SCHILLING","MI","5.5",1716,1574,4,0,3
"HANSHI ZUO","MI","5.5",1655,1501,4,0,3
"HANSEN SONG","OH","5.0",1686,1519,4,1,2
"GARY DEE SWATHELL","MI","5.0",1649,1372,5,2,0
"EZEKIEL HOUGHTON","MI","5.0",1641,1468,5,2,0
"STEFANO LEE","ON","5.0",1411,1523,5,2,0
"ANVIT RAO","MI","5.0",1365,1554,4,1,2
"CAMERON WILLIAM MC LEMAN","MI","4.5",1712,1468,4,2,1
"KENNETH J TACK","MI","4.5",1663,1506,3,1,2
"TORRANCE HENRY JR","MI","4.5",1666,1498,4,2,1
"BRADLEY SHAW","MI","4.5",1610,1515,4,2,1
"ZACHARY JAMES HOUGHTON","MI","4.5",1220,1484,4,2,1
"MIKE NIKITIN","MI","4.0",1604,1386,3,1,1
"RONALD GRZEGORCZYK","MI","4.0",1629,1499,4,3,0
"DAVID SUNDEEN","MI","4.0",1600,1480,4,3,0
"DIPANKAR ROY","MI","4.0",1564,1426,3,2,2
"JASON ZHENG","MI","4.0",1595,1411,4,3,0
"DINH DANG BUI","ON","4.0",1563,1470,4,3,0
"EUGENE L MCCLURE","MI","4.0",1555,1300,3,2,1
"ALAN BUI","ON","4.0",1363,1214,4,3,0
"MICHAEL R ALDRICH","MI","4.0",1229,1357,4,3,0
"LOREN SCHWIEBERT","MI","3.5",1745,1363,3,3,1
"MAX ZHU","ON","3.5",1579,1507,3,3,1
"GAURAV GIDWANI","MI","3.5",1552,1222,3,2,1
"SOFIA ADINA STANESCU-BELLU","MI","3.5",1507,1522,2,2,3
"CHIEDOZIE OKORIE","MI","3.5",1602,1314,3,2,1
"GEORGE AVERY JONES","ON","3.5",1522,1144,3,3,1
"RISHI SHETTY","MI","3.5",1494,1260,3,3,1
"JOSHUA PHILIP MATHEWS","ON","3.5",1441,1379,3,3,1
"JADE GE","MI","3.5",1449,1277,3,3,1
"MICHAEL JEFFERY THOMAS","MI","3.5",1399,1375,3,3,1
"JOSHUA DAVID LEE","MI","3.5",1438,1150,3,3,1
"SIDDHARTH JHA","MI","3.5",1355,1388,2,2,2
"AMIYATOSH PWNANANDAM","MI","3.5",980,1385,2,3,0
"BRIAN LIU","MI","3.0",1423,1539,2,3,1
"JOEL R HENDON","MI","3.0",1436,1430,3,4,0
"FOREST ZHANG","MI","3.0",1348,1391,3,4,0
"KYLE WILLIAM MURPHY","MI","3.0",1403,1248,2,2,0
"JARED GE","MI","3.0",1332,1150,2,3,2
"ROBERT GLEN VASEY","MI","3.0",1283,1107,3,4,0
"JUSTIN D SCHILLING","MI","3.0",1199,1327,2,4,0
"DEREK YAN","MI","3.0",1242,1152,2,3,2
"JACOB ALEXANDER LAVALLEY","MI","3.0",377,1358,3,4,0
"ERIC WRIGHT","MI","2.5",1362,1392,2,4,1
"DANIEL KHAIN","MI","2.5",1382,1356,1,3,1
"MICHAEL J MARTIN","MI","2.5",1291,1286,1,2,2
"SHIVAM JHA","MI","2.5",1056,1296,2,4,0
"TEJAS AYYAGARI","MI","2.5",1011,1356,2,4,1
"ETHAN GUO","MI","2.5",935,1495,1,3,3
"JOSE C YBARRA","MI","2.0",1393,1345,1,2,0
"LARRY HODGE","MI","2.0",1270,1206,1,5,0
"ALEX KONG","MI","2.0",1186,1406,0,4,2
"MARISA RICCI","MI","2.0",1153,1414,1,4,0
"MICHAEL LU","MI","2.0",1092,1363,1,5,0
"VIRAJ MOHILE","MI","2.0",917,1391,1,5,0
"SEAN M MC CORMICK","MI","2.0",853,1319,1,5,0
"JULIA SHEN","MI","1.5",967,1330,0,3,2
"JEZZEL FARKAS","ON","1.5",955,1327,1,5,1
"ASHWIN BALAJI","MI","1.0",1530,1186,1,0,0
"THOMAS JOSEPH HOSMER","MI","1.0",1175,1350,0,4,1
"BEN LI","MI","1.0",1163,1263,0,5,2

Plot: Players Vs Game Results

To plot the games stats for each player, melt the dataset and reshape it.

# Using melt() from reshape package


plotwinstatsdf <- subset(exportChessDF, select = c(PlayerName, Won, Lost, Draw))
melted.plotwinstatsdf <- melt(plotwinstatsdf, id = c("PlayerName"))
## Warning in melt_dataframe(data, as.integer(id.ind - 1),
## as.integer(measure.ind - : '.Random.seed' is not an integer vector but of
## type 'NULL', so ignored
colnames(melted.plotwinstatsdf) <- c("PlayerName", "GameStats", "NoOfGames")

# Assign Color to bars per Won/Lost/Draw

colorbar <- c(Won = "magenta", Lost = "yellow", Draw = "cyan")
# Redirecting the plot to PDF file

toplot <- ggplot(melted.plotwinstatsdf, aes(x = PlayerName, y = NoOfGames, fill = GameStats)) + 
    ggtitle("Players Vs Game Results") + geom_bar(colour = "black", stat = "identity") + 
    coord_flip() + scale_fill_manual(values = colorbar)

pdf("PlayerGames.pdf", width = 5, height = 15)
toplot
dev.off()
## png 
##   2

Attempt To Find Post Rating Lead Between Top Two Players

An attempt to find the Post Ratings of the Winner , if a final game is played between the top two players. The top two players are considered to be the ones Who won the max number of games. i.e. for current data set it is Pair 2, Pair 3

fin <- subset(cleanchessdata, Won == max(exportChessDF$Won), select = c(Pair))
finals <- c(as.numeric(fin[1, 1]), as.numeric(fin[2, 1]))
finals
## [1] 2 3
# Posting who would lead in ratings 

newpost<-checkleadpostrating(finals, cleanchessdata)

cat("Today's Big Game : ", newpost)
## Today's Big Game :  If Player 2( PostRating :1758) Wins To Player 3(  PostRating :1763)  Lead in Rating is Player3 Else If Player 3 ( PostRating :1863)  Wins To Player  2  (  PostRating :1658) Lead in Rating Is Player 3
#####################################################################################