In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players:

Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents

For the first player, the information would be:

Gary Hua, ON, 6.0, 1794, 1605

Importing the text file from GitHub

#I posted the data to GitHub; this loads it (without spaces or the first row of dots)
df <- read.csv("https://raw.githubusercontent.com/davidblumenstiel/data/master/tournamentinfo.txt", header = F, skip = 1, strip.white = T)
head(df)
##                                                                                          V1
## 1  Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round|
## 2  Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  |
## 3 -----------------------------------------------------------------------------------------
## 4     1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|
## 5    ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |
## 6 -----------------------------------------------------------------------------------------

Removing the junk we don’t need and concatenating every other row

#Removing the dotted lines (which occur every third line)
df1 <- data.frame(df[-seq(0,nrow(df),3), ], "")



#Concatenating every other row
df2 <- data.frame(cbind(df1[ (seq(nrow(df1)) %% 2)==1,], df1[ (seq(nrow(df1)) %% 2)==0,]))
df2$X...1 <- df2$X.. <- NULL
df2 <- df2$all <- paste(df2$df..seq.0..nrow.df...3...., df2$df..seq.0..nrow.df...3.....1)


#Delimiting by '|' into the data frame: players
players <- data.frame(do.call('rbind', strsplit(as.character(df2),'|',fixed=TRUE)))


head(players)
##      X1                                X2    X3    X4    X5    X6    X7    X8
## 1 Pair   Player Name                      Total Round Round Round Round Round
## 2    1   GARY HUA                         6.0   W  39 W  21 W  18 W  14 W   7
## 3    2   DAKSHESH DARURI                  6.0   W  63 W  58 L   4 W  17 W  16
## 4    3   ADITYA BAJAJ                     6.0   L   8 W  61 W  25 W  21 W  11
## 5    4   PATRICK H SCHILLING              5.5   W  23 D  28 W   2 W  26 D   5
## 6    5   HANSHI ZUO                       5.5   W  45 W  37 D  12 D  13 D   4
##      X9   X10    X11                               X12   X13   X14   X15   X16
## 1 Round Round  Num    USCF ID / Rtg (Pre->Post)         Pts    1     2     3  
## 2 D  12 D   4    ON   15445895 / R: 1794   ->1817      N:2   W     B     W    
## 3 W  20 W   7    MI   14598900 / R: 1553   ->1663      N:2   B     W     B    
## 4 W  13 W  12    MI   14959604 / R: 1384   ->1640      N:2   W     B     W    
## 5 W  19 D   1    MI   12616049 / R: 1716   ->1744      N:2   W     B     W    
## 6 W  14 W  17    MI   14601533 / R: 1655   ->1690      N:2   B     W     B    
##     X17   X18   X19   X20
## 1   4     5     6     7  
## 2 B     W     B     W    
## 3 W     B     W     B    
## 4 B     W     B     W    
## 5 B     W     B     B    
## 6 W     B     W     B

Subsetting the data and renaming the columns

#Getting rid of columns we aren't interested in
players <- players[ ,-c(13:20)]

#Renaming the columns and getting rid of the first row
colnames(players) <- c("Player_ID","Player_Name", "Total_Score",  "Round_1_Opponent","Round_2_Opponent","Round_3_Opponent","Round_4_Opponent","Round_5_Opponent","Round_6_Opponent","Round_7_Opponent","State","PreRating")
players <- players[-1,]
head(players)
##   Player_ID                       Player_Name Total_Score Round_1_Opponent
## 2        1   GARY HUA                               6.0              W  39
## 3        2   DAKSHESH DARURI                        6.0              W  63
## 4        3   ADITYA BAJAJ                           6.0              L   8
## 5        4   PATRICK H SCHILLING                    5.5              W  23
## 6        5   HANSHI ZUO                             5.5              W  45
## 7        6   HANSEN SONG                            5.0              W  34
##   Round_2_Opponent Round_3_Opponent Round_4_Opponent Round_5_Opponent
## 2            W  21            W  18            W  14            W   7
## 3            W  58            L   4            W  17            W  16
## 4            W  61            W  25            W  21            W  11
## 5            D  28            W   2            W  26            D   5
## 6            W  37            D  12            D  13            D   4
## 7            D  29            L  11            W  35            D  10
##   Round_6_Opponent Round_7_Opponent State                         PreRating
## 2            D  12            D   4   ON   15445895 / R: 1794   ->1817     
## 3            W  20            W   7   MI   14598900 / R: 1553   ->1663     
## 4            W  13            W  12   MI   14959604 / R: 1384   ->1640     
## 5            W  19            D   1   MI   12616049 / R: 1716   ->1744     
## 6            W  14            W  17   MI   14601533 / R: 1655   ->1690     
## 7            W  27            W  21   OH   15055204 / R: 1686   ->1687

Changing opponent columns to opponent IDs

#Removes the Win/Loss info and coerces remainder to an int
players$Round_1_Opponent <- as.integer(gsub(". ","",players$Round_1_Opponent))
players$Round_2_Opponent <- as.integer(gsub(". ","",players$Round_2_Opponent))
players$Round_3_Opponent <- as.integer(gsub(". ","",players$Round_3_Opponent))
players$Round_4_Opponent <- as.integer(gsub(". ","",players$Round_4_Opponent))
players$Round_5_Opponent <- as.integer(gsub(". ","",players$Round_5_Opponent))
players$Round_6_Opponent <- as.integer(gsub(". ","",players$Round_6_Opponent))
players$Round_7_Opponent <- as.integer(gsub(". ","",players$Round_7_Opponent))
head(players)
##   Player_ID                       Player_Name Total_Score Round_1_Opponent
## 2        1   GARY HUA                               6.0                 39
## 3        2   DAKSHESH DARURI                        6.0                 63
## 4        3   ADITYA BAJAJ                           6.0                  8
## 5        4   PATRICK H SCHILLING                    5.5                 23
## 6        5   HANSHI ZUO                             5.5                 45
## 7        6   HANSEN SONG                            5.0                 34
##   Round_2_Opponent Round_3_Opponent Round_4_Opponent Round_5_Opponent
## 2               21               18               14                7
## 3               58                4               17               16
## 4               61               25               21               11
## 5               28                2               26                5
## 6               37               12               13                4
## 7               29               11               35               10
##   Round_6_Opponent Round_7_Opponent State                         PreRating
## 2               12                4   ON   15445895 / R: 1794   ->1817     
## 3               20                7   MI   14598900 / R: 1553   ->1663     
## 4               13               12   MI   14959604 / R: 1384   ->1640     
## 5               19                1   MI   12616049 / R: 1716   ->1744     
## 6               14               17   MI   14601533 / R: 1655   ->1690     
## 7               27               21   OH   15055204 / R: 1686   ->1687

Extracting the Pre-Ratings from the ‘PreRating’ column, and changing the data type to numeric

library(stringr)
#Filtering out everything but the pre-rating in it's respective column
players$PreRating <- gsub(" ","",players$PreRating)
players$PreRating <- gsub("-.*","",players$PreRating)
players$PreRating <- gsub(".*:","",players$PreRating)
players$PreRating <- gsub("P.*","",players$PreRating)

#Changing the type to numeric
players$PreRating <- as.numeric(players$PreRating)

head(players)
##   Player_ID                       Player_Name Total_Score Round_1_Opponent
## 2        1   GARY HUA                               6.0                 39
## 3        2   DAKSHESH DARURI                        6.0                 63
## 4        3   ADITYA BAJAJ                           6.0                  8
## 5        4   PATRICK H SCHILLING                    5.5                 23
## 6        5   HANSHI ZUO                             5.5                 45
## 7        6   HANSEN SONG                            5.0                 34
##   Round_2_Opponent Round_3_Opponent Round_4_Opponent Round_5_Opponent
## 2               21               18               14                7
## 3               58                4               17               16
## 4               61               25               21               11
## 5               28                2               26                5
## 6               37               12               13                4
## 7               29               11               35               10
##   Round_6_Opponent Round_7_Opponent State PreRating
## 2               12                4   ON       1794
## 3               20                7   MI       1553
## 4               13               12   MI       1384
## 5               19                1   MI       1716
## 6               14               17   MI       1655
## 7               27               21   OH       1686

Making a new column of average opponents’ pre-ratings (Mean_Opponent_PreRatings)

library(gbutils)
#Changing Player_ID to integer
players$Player_ID <- as.integer(as.character(players$Player_ID))

#Initializing an empty column
players$Mean_Opponent_PreRatings <- NA

#Loops through the dataset, calculate's average pre-ratings for all players except the one in question, and appends it to the new column
for (i in 1:nrow(players)) {
  #Initializes an empty vector
  opts = c()
  #Loops through columns within a player's row to build a vector of their opponent's mean ratings
  for (k in c(4:10)) {
    if (isNA(players[i,k])== FALSE) {
      opts = c(opts, players$PreRating[players$Player_ID==players[i,k]])
    }
  }
  #Appends the mean of a player's opponent's pre-ratings to the respective column
  players[i,"Mean_Opponent_PreRatings"] <- round(mean(opts), digits = 0)

}

head(players)
##   Player_ID                       Player_Name Total_Score Round_1_Opponent
## 2         1  GARY HUA                               6.0                 39
## 3         2  DAKSHESH DARURI                        6.0                 63
## 4         3  ADITYA BAJAJ                           6.0                  8
## 5         4  PATRICK H SCHILLING                    5.5                 23
## 6         5  HANSHI ZUO                             5.5                 45
## 7         6  HANSEN SONG                            5.0                 34
##   Round_2_Opponent Round_3_Opponent Round_4_Opponent Round_5_Opponent
## 2               21               18               14                7
## 3               58                4               17               16
## 4               61               25               21               11
## 5               28                2               26                5
## 6               37               12               13                4
## 7               29               11               35               10
##   Round_6_Opponent Round_7_Opponent State PreRating Mean_Opponent_PreRatings
## 2               12                4   ON       1794                     1605
## 3               20                7   MI       1553                     1469
## 4               13               12   MI       1384                     1564
## 5               19                1   MI       1716                     1574
## 6               14               17   MI       1655                     1501
## 7               27               21   OH       1686                     1519

Cleaning up: removing the opponent columns and rearranging

#Removing unnecessary columns
players[,c(1,4:10)] <- NULL

#Reordering columns
players <- players[,c(1,3,2,4,5)]

row.names(players) <- NULL
head(players)
##                         Player_Name State Total_Score PreRating
## 1  GARY HUA                           ON        6.0        1794
## 2  DAKSHESH DARURI                    MI        6.0        1553
## 3  ADITYA BAJAJ                       MI        6.0        1384
## 4  PATRICK H SCHILLING                MI        5.5        1716
## 5  HANSHI ZUO                         MI        5.5        1655
## 6  HANSEN SONG                        OH        5.0        1686
##   Mean_Opponent_PreRatings
## 1                     1605
## 2                     1469
## 3                     1564
## 4                     1574
## 5                     1501
## 6                     1519

Writing to a csv

#Creates a csv file with the desired info in the workfolder
write.csv(players, file = "Chess_Tournament_Player_Info.csv")