R Markdown
Prac_chess_datasets<-read.delim("https://raw.githubusercontent.com/maliat-hossain/chess-data/main/tournamentinfo.txt",header = FALSE, sep = "|")
In this project, a text file is given with chess tournament results where the information has some structure. The objective is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database)with the following information for all of the players:Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of OpponentsFor the first player, the information would be:
Gary Hua, ON, 6.0, 1794, 1605
Opponents from each rounds are identified. Then captured the pre-ratings of opponent for each round,After pre ratings for each opponents are extracted,the mean value is calculated and stored in a new data frame. For this purpose string manipulation was implemented.
# For each Player calculated the Pre ratings and get the average of them.
refined_chess_datasets_round = NULL
for (i in 1:nrow(Prac_chess_datasets)){
PlayerNumber<- suppressWarnings(as.numeric(Prac_chess_datasets$V1[i]))
isValid <- grepl("[-]?[0-9]+[.]?[0-9]*|[-]?[0-9]+[L]?|[-]?[0-9]+[.]?[0-9]*[eE][0-9]+", PlayerNumber)
if(isValid){
PlayerName <- Prac_chess_datasets$V2[i]
# Getting the Opponents Player Number from each round
OppRound1 <- as.numeric(str_extract(Prac_chess_datasets$V4[i], "[[:digit:]]+"))
OppRound2 <- as.numeric(str_extract(Prac_chess_datasets$V5[i], "[[:digit:]]+"))
OppRound3 <- as.numeric(str_extract(Prac_chess_datasets$V6[i], "[[:digit:]]+"))
OppRound4 <- as.numeric(str_extract(Prac_chess_datasets$V7[i], "[[:digit:]]+"))
OppRound5 <- as.numeric(str_extract(Prac_chess_datasets$V8[i], "[[:digit:]]+"))
OppRound6 <- as.numeric(str_extract(Prac_chess_datasets$V9[i], "[[:digit:]]+"))
OppRound7 <- as.numeric(str_extract(Prac_chess_datasets$V10[i], "[[:digit:]]+"))
# Getting the Pre rating for each opponents
Round1OppPreRating <- as.numeric((sub("P.*", "\\1", refined_chess_datasets$PlayerPreRating[OppRound1])))
Round2OppPreRating <- as.numeric((sub("P.*", "\\1", refined_chess_datasets$PlayerPreRating[OppRound2])))
Round3OppPreRating <- as.numeric((sub("P.*", "\\1", refined_chess_datasets$PlayerPreRating[OppRound3])))
Round4OppPreRating <- as.numeric((sub("P.*", "\\1", refined_chess_datasets$PlayerPreRating[OppRound4])))
Round5OppPreRating <- as.numeric((sub("P.*", "\\1", refined_chess_datasets$PlayerPreRating[OppRound5])))
Round6OppPreRating <- as.numeric((sub("P.*", "\\1", refined_chess_datasets$PlayerPreRating[OppRound6])))
Round7OppPreRating <- as.numeric((sub("P.*", "\\1", refined_chess_datasets$PlayerPreRating[OppRound7])))
# Average Rating of the Opponents (Discarding NA values)
MeanOppRating<- (mean(c(Round1OppPreRating, Round2OppPreRating, Round3OppPreRating, Round4OppPreRating, Round5OppPreRating, Round6OppPreRating, Round7OppPreRating), na.rm=TRUE))
# Creating a new Dataframe for the calculated details.
refined_chess_datasets_round = rbind(refined_chess_datasets_round, data.frame(PlayerNumber, PlayerName, Round1OppPreRating, Round2OppPreRating, Round3OppPreRating, Round4OppPreRating, Round5OppPreRating, Round6OppPreRating, Round7OppPreRating, MeanOppRating))
}
}
head(refined_chess_datasets_round)
## PlayerNumber PlayerName Round1OppPreRating
## 1 1 GARY HUA 1436
## 2 2 DAKSHESH DARURI 1175
## 3 3 ADITYA BAJAJ 1641
## 4 4 PATRICK H SCHILLING 1363
## 5 5 HANSHI ZUO 1242
## 6 6 HANSEN SONG 1399
## Round2OppPreRating Round3OppPreRating Round4OppPreRating Round5OppPreRating
## 1 1563 1600 1610 1649
## 2 917 1716 1629 1604
## 3 955 1745 1563 1712
## 4 1507 1553 1579 1655
## 5 980 1663 1666 1716
## 6 1602 1712 1438 1365
## Round6OppPreRating Round7OppPreRating MeanOppRating
## 1 1663 1716 1605.286
## 2 1595 1649 1469.286
## 3 1666 1663 1563.571
## 4 1564 1794 1573.571
## 5 1610 1629 1500.857
## 6 1552 1563 1518.714
Created final data frame with all the required column. Also, exported data to a CSV file.
# Create a new Dataframe with required values.
FinalChessData <- data.frame("PlayerNumber" = refined_chess_datasets$PlayerNumber, "PlayerName" =refined_chess_datasets$PlayerName, "State" = refined_chess_datasets$PlayerState, "Points" = refined_chess_datasets$PlayerPoints, "Pre-Rating" = refined_chess_datasets$PlayerPreRating, "OpponentsAvgRating" = refined_chess_datasets_round$MeanOppRating)
# Extract to CSV file
write.csv(FinalChessData,"C:/Users/malia/OneDrive/Desktop/MSDS DATA 607/FinalChessData.csv", row.names = FALSE)
head(FinalChessData)
## PlayerNumber PlayerName State Points Pre.Rating
## 1 1 GARY HUA ON 6.0 1794
## 2 2 DAKSHESH DARURI MI 6.0 1553
## 3 3 ADITYA BAJAJ MI 6.0 1384
## 4 4 PATRICK H SCHILLING MI 5.5 1716
## 5 5 HANSHI ZUO MI 5.5 1655
## 6 6 HANSEN SONG OH 5.0 1686
## OpponentsAvgRating
## 1 1605.286
## 2 1469.286
## 3 1563.571
## 4 1573.571
## 5 1500.857
## 6 1518.714