The purpose of the project is to process a text file of chess tournament results and generate a csv file (that could for example be imported into a SQL database) with the following information for all of the players:
Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents
“stringr” package is required for the project
Reading table using read.table function. One extra column (empty) was created because “|” is treated as the beginning of the next column. We can delete the empty column.
library(stringr)
column_names <- c("ID", "Player_Name", "Total_Ptc", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "Empty")
chess_results <- read.table("https://raw.githubusercontent.com/olgashiligin/Chess-Tournament/master/Raw%20Data", header = FALSE, sep = "|", col.names = column_names, skip = 4, fill = TRUE, stringsAsFactors = FALSE)
chess_results <- subset(chess_results,select = c(1:10))
head(chess_results,5)
## ID
## 1 1
## 2 ON
## 3 -----------------------------------------------------------------------------------------
## 4 2
## 5 MI
## Player_Name Total_Ptc R1 R2 R3 R4
## 1 GARY HUA 6.0 W 39 W 21 W 18 W 14
## 2 15445895 / R: 1794 ->1817 N:2 W B W B
## 3
## 4 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17
## 5 14598900 / R: 1553 ->1663 N:2 B W B W
## R5 R6 R7
## 1 W 7 D 12 D 4
## 2 W B W
## 3
## 4 W 16 W 20 W 7
## 5 B W B
is.data.frame(chess_results)
## [1] TRUE
Now we have to remove empty spaces across all the table.
for (row in 1:nrow(chess_results)) {
for(col in 1:10) {
chess_results[row,col] <- str_trim(chess_results[row,col])
}
}
Creating State and Pre_Score columns
for (row in 1:nrow(chess_results)) {
chess_results$State[row] <- chess_results$ID[row+1]
chess_results$Pre_Score[row] <- str_extract(chess_results$Player_Name[row+1], "[:space:]{1}\\d{3,4}")
}
chess_results <- subset(chess_results, !is.na(chess_results$Pre_Score))
head(chess_results,5)
## ID Player_Name Total_Ptc R1 R2 R3 R4 R5 R6
## 1 1 GARY HUA 6.0 W 39 W 21 W 18 W 14 W 7 D 12
## 4 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17 W 16 W 20
## 7 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21 W 11 W 13
## 10 4 PATRICK H SCHILLING 5.5 W 23 D 28 W 2 W 26 D 5 W 19
## 13 5 HANSHI ZUO 5.5 W 45 W 37 D 12 D 13 D 4 W 14
## R7 State Pre_Score
## 1 D 4 ON 1794
## 4 W 7 MI 1553
## 7 W 12 MI 1384
## 10 D 1 MI 1716
## 13 W 17 MI 1655
The following steps to be taken in order to calculate opponent’s average pre rating.
Removing letters:
for (col in 4:10) {
chess_results[, col] <- str_extract(chess_results[, col], "\\d+")
}
head(chess_results,5)
## ID Player_Name Total_Ptc R1 R2 R3 R4 R5 R6 R7 State Pre_Score
## 1 1 GARY HUA 6.0 39 21 18 14 7 12 4 ON 1794
## 4 2 DAKSHESH DARURI 6.0 63 58 4 17 16 20 7 MI 1553
## 7 3 ADITYA BAJAJ 6.0 8 61 25 21 11 13 12 MI 1384
## 10 4 PATRICK H SCHILLING 5.5 23 28 2 26 5 19 1 MI 1716
## 13 5 HANSHI ZUO 5.5 45 37 12 13 4 14 17 MI 1655
Replacing opponent’s ID in Rs columns with opponent’s pre rating.
for (row in 1:nrow(chess_results)) {
for(col in 4:10) {
filter<-chess_results$ID == chess_results[row,col]
chess_results[row,col] <- chess_results[filter,12][1]
}
}
head(chess_results,5)
## ID Player_Name Total_Ptc R1 R2 R3 R4 R5 R6
## 1 1 GARY HUA 6.0 1436 1563 1600 1610 1649 1663
## 4 2 DAKSHESH DARURI 6.0 1175 917 1716 1629 1604 1595
## 7 3 ADITYA BAJAJ 6.0 1641 955 1745 1563 1712 1666
## 10 4 PATRICK H SCHILLING 5.5 1363 1507 1553 1579 1655 1564
## 13 5 HANSHI ZUO 5.5 1242 980 1663 1666 1716 1610
## R7 State Pre_Score
## 1 1716 ON 1794
## 4 1649 MI 1553
## 7 1663 MI 1384
## 10 1794 MI 1716
## 13 1629 MI 1655
Calculating opponent’s average pre rating. Checking data frame structure first.
str(chess_results)
## 'data.frame': 64 obs. of 12 variables:
## $ ID : chr "1" "2" "3" "4" ...
## $ Player_Name: chr "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ" "PATRICK H SCHILLING" ...
## $ Total_Ptc : chr "6.0" "6.0" "6.0" "5.5" ...
## $ R1 : chr " 1436" " 1175" " 1641" " 1363" ...
## $ R2 : chr " 1563" " 917" " 955" " 1507" ...
## $ R3 : chr " 1600" " 1716" " 1745" " 1553" ...
## $ R4 : chr " 1610" " 1629" " 1563" " 1579" ...
## $ R5 : chr " 1649" " 1604" " 1712" " 1655" ...
## $ R6 : chr " 1663" " 1595" " 1666" " 1564" ...
## $ R7 : chr " 1716" " 1649" " 1663" " 1794" ...
## $ State : chr "ON" "MI" "MI" "MI" ...
## $ Pre_Score : chr " 1794" " 1553" " 1384" " 1716" ...
Changing data type for Rounds in order to calculate Average pre rating for opponets.
for (col in 4:10) {
chess_results[, col] <- as.numeric(chess_results[, col])
}
Ceating new column Opponents_Rating https://stackoverflow.com/questions/33981527/compute-the-mean-of-two-columns-in-a-dataframe
chess_results$Opponent <- round(rowMeans(chess_results[, c(4:10)], na.rm = TRUE), 0)
Final Data Frame
chess_results <- subset(chess_results,select = c(1,2,3,11,12,13))
head(chess_results,5)
## ID Player_Name Total_Ptc State Pre_Score Opponent
## 1 1 GARY HUA 6.0 ON 1794 1605
## 4 2 DAKSHESH DARURI 6.0 MI 1553 1469
## 7 3 ADITYA BAJAJ 6.0 MI 1384 1564
## 10 4 PATRICK H SCHILLING 5.5 MI 1716 1574
## 13 5 HANSHI ZUO 5.5 MI 1655 1501
Saving Results to CSV file
write.csv(chess_results, "chess_results.csv", row.names=FALSE)