Assignment:-
In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605
library(stringr)
library(DT)
# Read tournamentinfo file
tournamentinfo <- read.csv(paste0("/Users/ashishsm1986/git/Cuny-Assignments/week4-assignment/tournamentinfo.txt"), header=F)
# Making sure complete file is read
head(tournamentinfo)
## V1
## 1 -----------------------------------------------------------------------------------------
## 2 Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round|
## 3 Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
## 4 -----------------------------------------------------------------------------------------
## 5 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## 6 ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
tail(tournamentinfo)
## V1
## 191 63 | THOMAS JOSEPH HOSMER |1.0 |L 2|L 48|D 49|L 43|L 45|H |U |
## 192 MI | 15057092 / R: 1175 ->1125 | |W |B |W |B |B | | |
## 193 -----------------------------------------------------------------------------------------
## 194 64 | BEN LI |1.0 |L 22|D 30|L 31|D 49|L 46|L 42|L 54|
## 195 MI | 15006561 / R: 1163 ->1112 | |B |W |W |B |W |B |B |
## 196 -----------------------------------------------------------------------------------------
# Removing the first 4 rows from the data as they are column names
tournamentinfo <- tournamentinfo[-c(1:4),]
head(tournamentinfo)
## [1] 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## [2] ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## [3] -----------------------------------------------------------------------------------------
## [4] 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|
## [5] MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |
## [6] -----------------------------------------------------------------------------------------
## 131 Levels: 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4| ...
# Extract player name and rating
player <- tournamentinfo[seq(1, length(tournamentinfo), 3)]
rating <- tournamentinfo[seq(2, length(tournamentinfo), 3)]
# Extract specific data fields using Regex
pair <- as.integer(str_extract(player, "\\d+"))
PlName <- str_trim(str_extract(player, "(\\w+\\s){2,3}"))
PlState <- str_extract(rating, "\\w+")
PlPoints <- as.numeric(str_extract(player, "\\d+\\.\\d+"))
PlRating <- as.integer(str_extract(str_extract(rating, "[^\\d]\\d{3,4}[^\\d]"), "\\d+"))
Opponent <- str_extract_all(str_extract_all(player, "\\d+\\|"), "\\d+")
Win <- str_count(player, "\\Q|W \\E")
Loss <- str_count(player, "\\Q|L \\E")
Draw <- str_count(player, "\\Q|D \\E")
# Calculate mean rating
mRating <- 1
for (i in 1:length(player)) {
mRating[i] <- round(mean(PlRating[as.numeric(unlist(Opponent[pair[i]]))]), digits = 0)
}
opData <- data.frame(PlName, PlState, PlPoints, PlRating, mRating, Win, Loss, Draw);
# Creating columns for the data and combining
# Showing wins, losses and draws as well although not asked in the assignment
colnames(opData) <- c("Player's Name", "Player's State", "Total Number of Points", "Player's Pre-Rating", " Average Pre Chess Rating of Opponent", "Win", "Loss", "Draw")
datatable(opData)
# Creating csv file with the data
write.csv(opData, file = "chess.csv")