In this project, we’re given a text file with chess tournament results where the information has some structure.Here is how the structure looks like in the below table
# Load packages
library(stringr)
library(data.table)## Warning: package 'data.table' was built under R version 3.4.2
# Reading the data file and skipping header block
chessTournament <- read.csv("https://raw.githubusercontent.com/Harpreet1984/DATA607/master/tournamentinfo.txt", sep = ",", skip = 3)
# Sample raw input data
head(chessTournament)## X.........................................................................................
## 1 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## 2 ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## 3 -----------------------------------------------------------------------------------------
## 4 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|
## 5 MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |
## 6 -----------------------------------------------------------------------------------------
chessTournamentFiltered <- chessTournament[!str_detect (chessTournament[,1], "^-+$"),]customerCombined <- str_c(chessTournamentFiltered[seq(1, length(chessTournamentFiltered), 2)], chessTournamentFiltered[seq(2, length(chessTournamentFiltered), 2)], sep = "")
customerCombined <- data.frame(str_split_fixed(customerCombined, "\\||/|->", n = Inf))colnames(customerCombined) <- c("PlayerId",
"PlayerName",
"PlayerPoints",
"Game1",
"Game2",
"Game3",
"Game4",
"Game5",
"Game6",
"Game7",
"PlayerState",
"UscfId" ,
"PlayerRatingPre",
"PlayerRatingPost",
"nValue",
"value1",
"value2",
"value3",
"value4",
"value5",
"value6",
"value7"
)
customerCombined$PlayerRatingPre <- as.numeric(str_replace(customerCombined$PlayerRatingPre, "R: *([:digit:]+)(P.*)?", "\\1"))
customerCombined$PlayerRatingPost <- as.numeric(str_replace(customerCombined$PlayerRatingPost, "([:digit:]+)(P.*)?", "\\1"))
customerCombined$`AvePreRating` <- as.numeric(0)opp1 <- data.frame(as.numeric(str_extract_all(customerCombined$Game1,"[[:digit:]]{1,}")))
opp2 <- data.frame(as.numeric(str_extract_all(customerCombined$Game2,"[[:digit:]]{1,}")))
opp3 <- data.frame(as.numeric(str_extract_all(customerCombined$Game3,"[[:digit:]]{1,}")))
opp4 <- data.frame(as.numeric(str_extract_all(customerCombined$Game4,"[[:digit:]]{1,}")))
opp5 <- data.frame(as.numeric(str_extract_all(customerCombined$Game5,"[[:digit:]]{1,}")))
opp6 <- data.frame(as.numeric(str_extract_all(customerCombined$Game6,"[[:digit:]]{1,}")))
opp7 <- data.frame(as.numeric(str_extract_all(customerCombined$Game7,"[[:digit:]]{1,}")))
opps <- cbind(customerCombined$PlayerId, customerCombined$PlayerName, opp1, opp2, opp3, opp4, opp5, opp6, opp7)
names(opps) <- c("Player Id","Player Name","Opp 1","Opp 2","Opp 3","Opp 4","Opp 5","Opp 6","Opp 7")
opps[is.na(opps)] <- as.numeric(0)for (k in 3:9){
for (j in 1:dim(customerCombined)[1]){
for (i in 1:dim(customerCombined)[1]){
if (as.numeric(opps[j,k]) == as.numeric(customerCombined$PlayerId[i])){
customerCombined$AvePreRating[j] <- as.numeric(customerCombined$AvePreRating[j]) + as.numeric(customerCombined$PlayerRatingPre[i])
}
}
}
}
customerCombined$AvePreRating <- round(as.numeric(customerCombined$AvePreRating) / 7,0)
head(customerCombined)## PlayerId PlayerName PlayerPoints Game1 Game2
## 1 1 GARY HUA 6.0 W 39 W 21
## 2 2 DAKSHESH DARURI 6.0 W 63 W 58
## 3 3 ADITYA BAJAJ 6.0 L 8 W 61
## 4 4 PATRICK H SCHILLING 5.5 W 23 D 28
## 5 5 HANSHI ZUO 5.5 W 45 W 37
## 6 6 HANSEN SONG 5.0 W 34 D 29
## Game3 Game4 Game5 Game6 Game7 PlayerState UscfId PlayerRatingPre
## 1 W 18 W 14 W 7 D 12 D 4 ON 15445895 1794
## 2 L 4 W 17 W 16 W 20 W 7 MI 14598900 1553
## 3 W 25 W 21 W 11 W 13 W 12 MI 14959604 1384
## 4 W 2 W 26 D 5 W 19 D 1 MI 12616049 1716
## 5 D 12 D 13 D 4 W 14 W 17 MI 14601533 1655
## 6 L 11 W 35 D 10 W 27 W 21 OH 15055204 1686
## PlayerRatingPost nValue value1 value2 value3 value4 value5 value6 value7
## 1 1817 N:2 W B W B W B W
## 2 1663 N:2 B W B W B W B
## 3 1640 N:2 W B W B W B W
## 4 1744 N:2 W B W B W B B
## 5 1690 N:2 B W B W B W B
## 6 1687 N:3 W B W B B W B
## NA AvePreRating
## 1 1605
## 2 1469
## 3 1564
## 4 1574
## 5 1501
## 6 1519
Our aim here is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the player 1. Player Name 2. Player State 3. Total points 4. Player Pre-Rating, 5. Average Opponents Raing
write.table(customerCombined[, c(2, 3, 11, 13, 24)], file = "Harpreet-chesstournamentInfo.csv",row.names=FALSE, na="",col.names = c("Player Name", "Points", "Player State", "Pre Rating", "Opponent Avg Rating"), sep=",")