Project 1

Chess Cross Table Example

Idea is to combine the relevant lines/row into one, transform the dataset into a structured table/frame and work from there to calculate the opponents’ average.

Step 1

Read file, do some manipulation to clean the data and save the clean dataset into a file, in order to easily convert it to a structured table. I don’t delete the separator “|”, as it will help later in converting the table into a structured data frame.

# open connection and read lines
library(tidyverse)

## ── Attaching packages ───────────────────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0

## ── Conflicts ──────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

con <- file('https://raw.githubusercontent.com/bsvmelo/CUNY/master/tournamentinfo.txt', open='r')
w<-readLines(con)

## Warning in readLines(con): incomplete final line found on 'https://
## raw.githubusercontent.com/bsvmelo/CUNY/master/tournamentinfo.txt'

head(w,8)

## [1] "-----------------------------------------------------------------------------------------" 
## [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
## [4] "-----------------------------------------------------------------------------------------" 
## [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
## [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
## [7] "-----------------------------------------------------------------------------------------" 
## [8] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"

leng<-length(w)

#for loop in the lines collection and do some cleaning

for (i in 1:leng)
{
  if( i%%3 == 0 )
     {
         a<-w[i-1]
         b<-w[i]

# cleaning 
        c<-paste(a,b)
        c<-str_replace_all(c,"/","|")
        c<-str_replace_all(c,"->","|")
        c<-str_replace_all(c,"/ R:","|")
        c<-str_replace_all(c,"R:","")
        c<-str_trim(c, side="both")

# writing the cleaning data set into a temp file
        write(c,file="tempFile.txt", append=TRUE)
  }
}        

# close connection
        close(con)

# assign structured data back to a table within R
        
        out<-read.table("tempFile.txt",header = TRUE, sep="|")
       
        head(out,5)

##   Pair                       Player.Name Total Round Round.1 Round.2 Round.3
## 1   1   GARY HUA                         6.0   W  39   W  21   W  18   W  14
## 2   2   DAKSHESH DARURI                  6.0   W  63   W  58   L   4   W  17
## 3   3   ADITYA BAJAJ                     6.0   L   8   W  61   W  25   W  21
## 4   4   PATRICK H SCHILLING              5.5   W  23   D  28   W   2   W  26
## 5   5   HANSHI ZUO                       5.5   W  45   W  37   D  12   D  13
##   Round.4 Round.5 Round.6     Num    USCF.ID  Rtg..Pre     Post.   Pts    X1
## 1   W   7   D  12   D   4     ON   15445895    1794    1817      N:2   W    
## 2   W  16   W  20   W   7     MI   14598900    1553    1663      N:2   B    
## 3   W  11   W  13   W  12     MI   14959604    1384    1640      N:2   W    
## 4   D   5   W  19   D   1     MI   12616049    1716    1744      N:2   W    
## 5   D   4   W  14   W  17     MI   14601533    1655    1690      N:2   B    
##      X2    X3    X4    X5    X6    X7  X
## 1 B     W     B     W     B     W     NA
## 2 W     B     W     B     W     B     NA
## 3 B     W     B     W     B     W     NA
## 4 B     W     B     W     B     B     NA
## 5 W     B     W     B     W     B     NA

Step 2

Here I create 2 temporary tables containing the main data that will populate the final output and also data containing the Rounds data, indexed by the player’s number. Since both tables have the same number of rows, it will be easier to make a reference when calculating the average.

# temporary tables

        table1<-out[,c(1,2,11,3,13)]
        table1$Average<-0
        table2<-out[,c(1,4:10)] 
# extracting non relevant data from the pre rating column
        
        table1$Rtg..Pre<-str_extract_all(table1$Rtg..Pre,"[[:digit:]]{3,4}","")

Step 3

This is the main for loop that checks in the second table, whether the player had a win, loss or draw, and looks for the respective opponent rating, and calculate the average

Logic is to go element by element, check for W,D,L, and look for the corresponding opponent number, and store his/her rating in a variable

        for (i in 1:nrow(table2))
        {
          nogames<-0
          oppRtg<-0
          result<-0
          for (j in 2:ncol(table2)) 
          {
            if(substr(table2[i,j],1,1) == "W" | substr(table2[i,j],1,1) == "L" | substr(table2[i,j],1,1) == "D")
            {
              nogames <- nogames +1
              # here I'm extracting the numeric value from the string "W  39" while taking advantage of the fixed length
              noempty<- str_count(table2[i,j]," ")
              opp<-substr(table2[i,j],noempty+2,5)
              #here I store the opponent rating
              oppRtg<-oppRtg+as.numeric(table1[opp,5])
            }
          }
          # here the average is calculated and stored in table 1
          result<-oppRtg/nogames
          table1[i,6]<-round(result,0)
        }

Step 4

Final formatting to have the table ready to be exported to csv

        final_table<-table1[,c(2:6)] 
        names(final_table)<-c("Player’s Name","Player’s State","Total Number of Points","Player’s Pre-Rating","Average Pre Chess Rating of Opponents") 
        write.csv(final_table, file="Project1-Bruno de Melo.csv")
        head(final_table)

##                       Player’s Name Player’s State Total Number of Points
## 1  GARY HUA                                    ON                   6.0  
## 2  DAKSHESH DARURI                             MI                   6.0  
## 3  ADITYA BAJAJ                                MI                   6.0  
## 4  PATRICK H SCHILLING                         MI                   5.5  
## 5  HANSHI ZUO                                  MI                   5.5  
## 6  HANSEN SONG                                 OH                   5.0  
##   Player’s Pre-Rating Average Pre Chess Rating of Opponents
## 1                1794                                  1605
## 2                1553                                  1469
## 3                1384                                  1564
## 4                1716                                  1574
## 5                1655                                  1501
## 6                1686                                  1519