setwd("C:/Users/malia/OneDrive/Desktop/MSDS DATA 607")

R Markdown

Prac_chess_datasets<-read.delim("https://raw.githubusercontent.com/maliat-hossain/chess-data/main/tournamentinfo.txt",header = FALSE, sep = "|")

In this project, a text file is given with chess tournament results where the information has some structure. The objective is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database)with the following information for all of the players:Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of OpponentsFor the first player, the information would be:

Gary Hua, ON, 6.0, 1794, 1605

A new dataframe is created. Player number,player name ,player state,player points, player pre-rating was extracted by implementing string manipulation from the given datasets.

# 1. Loop over the dataset to get the Valid values and set information under variables. 
# 2. Created a new Dataframe and save those variables under different column.


refined_chess_datasets = NULL

for (i in 1:nrow(Prac_chess_datasets)){
  PlayerNumber<- suppressWarnings(as.numeric(Prac_chess_datasets$V1[i]))
  isValid <- grepl("[-]?[0-9]+[.]?[0-9]*|[-]?[0-9]+[L]?|[-]?[0-9]+[.]?[0-9]*[eE][0-9]+", PlayerNumber)

  if(isValid){
    PlayerName <- Prac_chess_datasets$V2[i]
    PlayerPoints <- Prac_chess_datasets$V3[i]
    PlayerState <- Prac_chess_datasets$V1[i+1]
    PlayerDetails <- Prac_chess_datasets$V2[i+1]
    PlayerRankSplit <- strsplit(PlayerDetails, split = "/")[[1]]
    PlayerRank <- PlayerRankSplit[[1]]
    PlayerRating <-PlayerRankSplit[[2]]
    
    PlayerRatingNew <- str_remove_all(PlayerRating, "[R:]")
    PlayerRatingSplit <- strsplit(PlayerRatingNew, split = "->")[[1]]
    PlayerPreRating <- PlayerRatingSplit[[1]]
   
    # New dataframe for extracted data from the given Dataset. 
    refined_chess_datasets = rbind(refined_chess_datasets, data.frame(PlayerNumber, PlayerName, PlayerState, PlayerPoints, PlayerPreRating))
    
  }
    
}
head(refined_chess_datasets)
##   PlayerNumber                        PlayerName PlayerState PlayerPoints
## 1            1  GARY HUA                                 ON         6.0  
## 2            2  DAKSHESH DARURI                          MI         6.0  
## 3            3  ADITYA BAJAJ                             MI         6.0  
## 4            4  PATRICK H SCHILLING                      MI         5.5  
## 5            5  HANSHI ZUO                               MI         5.5  
## 6            6  HANSEN SONG                              OH         5.0  
##   PlayerPreRating
## 1         1794   
## 2         1553   
## 3         1384   
## 4         1716   
## 5         1655   
## 6         1686

Opponents from each rounds are identified. Then captured the pre-ratings of opponent for each round,After pre ratings for each opponents are extracted,the mean value is calculated and stored in a new data frame. For this purpose string manipulation was implemented.

# For each Player calculated the Pre ratings and get the average of them. 

refined_chess_datasets_round = NULL

for (i in 1:nrow(Prac_chess_datasets)){
  PlayerNumber<- suppressWarnings(as.numeric(Prac_chess_datasets$V1[i]))
  isValid <- grepl("[-]?[0-9]+[.]?[0-9]*|[-]?[0-9]+[L]?|[-]?[0-9]+[.]?[0-9]*[eE][0-9]+", PlayerNumber)

  if(isValid){
    PlayerName <- Prac_chess_datasets$V2[i]
    
    # Getting the Opponents Player Number from each round
    OppRound1 <- as.numeric(str_extract(Prac_chess_datasets$V4[i], "[[:digit:]]+"))
    OppRound2 <- as.numeric(str_extract(Prac_chess_datasets$V5[i], "[[:digit:]]+"))
    OppRound3 <- as.numeric(str_extract(Prac_chess_datasets$V6[i], "[[:digit:]]+"))
    OppRound4 <- as.numeric(str_extract(Prac_chess_datasets$V7[i], "[[:digit:]]+"))
    OppRound5 <- as.numeric(str_extract(Prac_chess_datasets$V8[i], "[[:digit:]]+"))
    OppRound6 <- as.numeric(str_extract(Prac_chess_datasets$V9[i], "[[:digit:]]+"))
    OppRound7 <- as.numeric(str_extract(Prac_chess_datasets$V10[i], "[[:digit:]]+"))
    
    # Getting the Pre rating for each opponents
    Round1OppPreRating <- as.numeric((sub("P.*", "\\1", refined_chess_datasets$PlayerPreRating[OppRound1])))
    Round2OppPreRating <- as.numeric((sub("P.*", "\\1", refined_chess_datasets$PlayerPreRating[OppRound2])))
    Round3OppPreRating <- as.numeric((sub("P.*", "\\1", refined_chess_datasets$PlayerPreRating[OppRound3])))
    Round4OppPreRating <- as.numeric((sub("P.*", "\\1", refined_chess_datasets$PlayerPreRating[OppRound4])))
    Round5OppPreRating <- as.numeric((sub("P.*", "\\1", refined_chess_datasets$PlayerPreRating[OppRound5])))
    Round6OppPreRating <- as.numeric((sub("P.*", "\\1", refined_chess_datasets$PlayerPreRating[OppRound6])))
    Round7OppPreRating <- as.numeric((sub("P.*", "\\1", refined_chess_datasets$PlayerPreRating[OppRound7])))
    
    # Average Rating of the Opponents (Discarding NA values)
    MeanOppRating<- (mean(c(Round1OppPreRating, Round2OppPreRating, Round3OppPreRating, Round4OppPreRating, Round5OppPreRating, Round6OppPreRating, Round7OppPreRating), na.rm=TRUE))
    
    # Creating a new Dataframe for the calculated details.
    refined_chess_datasets_round = rbind(refined_chess_datasets_round, data.frame(PlayerNumber, PlayerName, Round1OppPreRating, Round2OppPreRating, Round3OppPreRating, Round4OppPreRating, Round5OppPreRating, Round6OppPreRating, Round7OppPreRating, MeanOppRating))
    
    
    
  }
  
}
head(refined_chess_datasets_round)
##   PlayerNumber                        PlayerName Round1OppPreRating
## 1            1  GARY HUA                                       1436
## 2            2  DAKSHESH DARURI                                1175
## 3            3  ADITYA BAJAJ                                   1641
## 4            4  PATRICK H SCHILLING                            1363
## 5            5  HANSHI ZUO                                     1242
## 6            6  HANSEN SONG                                    1399
##   Round2OppPreRating Round3OppPreRating Round4OppPreRating Round5OppPreRating
## 1               1563               1600               1610               1649
## 2                917               1716               1629               1604
## 3                955               1745               1563               1712
## 4               1507               1553               1579               1655
## 5                980               1663               1666               1716
## 6               1602               1712               1438               1365
##   Round6OppPreRating Round7OppPreRating MeanOppRating
## 1               1663               1716      1605.286
## 2               1595               1649      1469.286
## 3               1666               1663      1563.571
## 4               1564               1794      1573.571
## 5               1610               1629      1500.857
## 6               1552               1563      1518.714

Created final data frame with all the required column. Also, exported data to a CSV file.

# Create a new Dataframe with required values.

FinalChessData <- data.frame("PlayerNumber" = refined_chess_datasets$PlayerNumber, "PlayerName" =refined_chess_datasets$PlayerName, "State" = refined_chess_datasets$PlayerState, "Points" = refined_chess_datasets$PlayerPoints, "Pre-Rating" = refined_chess_datasets$PlayerPreRating, "OpponentsAvgRating"  = refined_chess_datasets_round$MeanOppRating)

# Extract to CSV file
write.csv(FinalChessData,"C:/Users/malia/OneDrive/Desktop/MSDS DATA 607/FinalChessData.csv", row.names = FALSE)
head(FinalChessData)
##   PlayerNumber                        PlayerName  State Points Pre.Rating
## 1            1  GARY HUA                            ON   6.0      1794   
## 2            2  DAKSHESH DARURI                     MI   6.0      1553   
## 3            3  ADITYA BAJAJ                        MI   6.0      1384   
## 4            4  PATRICK H SCHILLING                 MI   5.5      1716   
## 5            5  HANSHI ZUO                          MI   5.5      1655   
## 6            6  HANSEN SONG                         OH   5.0      1686   
##   OpponentsAvgRating
## 1           1605.286
## 2           1469.286
## 3           1563.571
## 4           1573.571
## 5           1500.857
## 6           1518.714