Task

The purpose of the project is to process a text file of chess tournament results and generate a csv file (that could for example be imported into a SQL database) with the following information for all of the players:

Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents

Solution

“stringr” package is required for the project

Reading table using read.table function. One extra column (empty) was created because “|” is treated as the beginning of the next column. We can delete the empty column.

library(stringr)
column_names <- c("ID", "Player_Name", "Total_Ptc", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "Empty")
chess_results <- read.table("https://raw.githubusercontent.com/olgashiligin/Chess-Tournament/master/Raw%20Data", header = FALSE, sep = "|", col.names = column_names, skip = 4, fill = TRUE, stringsAsFactors = FALSE)
chess_results <- subset(chess_results,select = c(1:10))
head(chess_results,5)
##                                                                                          ID
## 1                                                                                        1 
## 2                                                                                       ON 
## 3 -----------------------------------------------------------------------------------------
## 4                                                                                        2 
## 5                                                                                       MI 
##                         Player_Name Total_Ptc    R1    R2    R3    R4
## 1  GARY HUA                             6.0   W  39 W  21 W  18 W  14
## 2  15445895 / R: 1794   ->1817          N:2   W     B     W     B    
## 3                                                                    
## 4  DAKSHESH DARURI                      6.0   W  63 W  58 L   4 W  17
## 5  14598900 / R: 1553   ->1663          N:2   B     W     B     W    
##      R5    R6    R7
## 1 W   7 D  12 D   4
## 2 W     B     W    
## 3                  
## 4 W  16 W  20 W   7
## 5 B     W     B
is.data.frame(chess_results)
## [1] TRUE

Now we have to remove empty spaces across all the table.

for (row in 1:nrow(chess_results)) {
  for(col in 1:10) {
    chess_results[row,col] <- str_trim(chess_results[row,col])
  }
}

Creating State and Pre_Score columns

for (row in 1:nrow(chess_results)) {
  chess_results$State[row] <- chess_results$ID[row+1]
  chess_results$Pre_Score[row] <- str_extract(chess_results$Player_Name[row+1], "[:space:]{1}\\d{3,4}")
}

chess_results <- subset(chess_results, !is.na(chess_results$Pre_Score))

head(chess_results,5)
##    ID         Player_Name Total_Ptc    R1    R2    R3    R4    R5    R6
## 1   1            GARY HUA       6.0 W  39 W  21 W  18 W  14 W   7 D  12
## 4   2     DAKSHESH DARURI       6.0 W  63 W  58 L   4 W  17 W  16 W  20
## 7   3        ADITYA BAJAJ       6.0 L   8 W  61 W  25 W  21 W  11 W  13
## 10  4 PATRICK H SCHILLING       5.5 W  23 D  28 W   2 W  26 D   5 W  19
## 13  5          HANSHI ZUO       5.5 W  45 W  37 D  12 D  13 D   4 W  14
##       R7 State Pre_Score
## 1  D   4    ON      1794
## 4  W   7    MI      1553
## 7  W  12    MI      1384
## 10 D   1    MI      1716
## 13 W  17    MI      1655

The following steps to be taken in order to calculate opponent’s average pre rating.

  1. Removing letters (W, L, D) from R1 to R7 columns (we do not need these letters in our project)
  2. Replacing opponent’s ID in Rs columns with opponent’s pre rating.
  3. Calculating opponent’s average pre rating.

Removing letters:

for (col in 4:10) {
  chess_results[, col] <- str_extract(chess_results[, col], "\\d+")
}

head(chess_results,5)
##    ID         Player_Name Total_Ptc R1 R2 R3 R4 R5 R6 R7 State Pre_Score
## 1   1            GARY HUA       6.0 39 21 18 14  7 12  4    ON      1794
## 4   2     DAKSHESH DARURI       6.0 63 58  4 17 16 20  7    MI      1553
## 7   3        ADITYA BAJAJ       6.0  8 61 25 21 11 13 12    MI      1384
## 10  4 PATRICK H SCHILLING       5.5 23 28  2 26  5 19  1    MI      1716
## 13  5          HANSHI ZUO       5.5 45 37 12 13  4 14 17    MI      1655

Replacing opponent’s ID in Rs columns with opponent’s pre rating.

for (row in 1:nrow(chess_results)) {
  for(col in 4:10) {
    filter<-chess_results$ID == chess_results[row,col]
    chess_results[row,col] <- chess_results[filter,12][1]
  }
}

head(chess_results,5)
##    ID         Player_Name Total_Ptc    R1    R2    R3    R4    R5    R6
## 1   1            GARY HUA       6.0  1436  1563  1600  1610  1649  1663
## 4   2     DAKSHESH DARURI       6.0  1175   917  1716  1629  1604  1595
## 7   3        ADITYA BAJAJ       6.0  1641   955  1745  1563  1712  1666
## 10  4 PATRICK H SCHILLING       5.5  1363  1507  1553  1579  1655  1564
## 13  5          HANSHI ZUO       5.5  1242   980  1663  1666  1716  1610
##       R7 State Pre_Score
## 1   1716    ON      1794
## 4   1649    MI      1553
## 7   1663    MI      1384
## 10  1794    MI      1716
## 13  1629    MI      1655

Calculating opponent’s average pre rating. Checking data frame structure first.

str(chess_results)
## 'data.frame':    64 obs. of  12 variables:
##  $ ID         : chr  "1" "2" "3" "4" ...
##  $ Player_Name: chr  "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ" "PATRICK H SCHILLING" ...
##  $ Total_Ptc  : chr  "6.0" "6.0" "6.0" "5.5" ...
##  $ R1         : chr  " 1436" " 1175" " 1641" " 1363" ...
##  $ R2         : chr  " 1563" " 917" " 955" " 1507" ...
##  $ R3         : chr  " 1600" " 1716" " 1745" " 1553" ...
##  $ R4         : chr  " 1610" " 1629" " 1563" " 1579" ...
##  $ R5         : chr  " 1649" " 1604" " 1712" " 1655" ...
##  $ R6         : chr  " 1663" " 1595" " 1666" " 1564" ...
##  $ R7         : chr  " 1716" " 1649" " 1663" " 1794" ...
##  $ State      : chr  "ON" "MI" "MI" "MI" ...
##  $ Pre_Score  : chr  " 1794" " 1553" " 1384" " 1716" ...

Changing data type for Rounds in order to calculate Average pre rating for opponets.

for (col in 4:10) {
  chess_results[, col] <- as.numeric(chess_results[, col])
}

Ceating new column Opponents_Rating https://stackoverflow.com/questions/33981527/compute-the-mean-of-two-columns-in-a-dataframe

chess_results$Opponent <- round(rowMeans(chess_results[, c(4:10)], na.rm = TRUE), 0)

Final Data Frame

chess_results <- subset(chess_results,select = c(1,2,3,11,12,13))
head(chess_results,5)
##    ID         Player_Name Total_Ptc State Pre_Score Opponent
## 1   1            GARY HUA       6.0    ON      1794     1605
## 4   2     DAKSHESH DARURI       6.0    MI      1553     1469
## 7   3        ADITYA BAJAJ       6.0    MI      1384     1564
## 10  4 PATRICK H SCHILLING       5.5    MI      1716     1574
## 13  5          HANSHI ZUO       5.5    MI      1655     1501

Saving Results to CSV file

write.csv(chess_results, "chess_results.csv", row.names=FALSE)