In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players:
Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents
For the first player, the information would be:
Gary Hua, ON, 6.0, 1794, 1605
Importing the text file from GitHub
#I posted the data to GitHub; this loads it (without spaces or the first row of dots)
df <- read.csv("https://raw.githubusercontent.com/davidblumenstiel/data/master/tournamentinfo.txt", header = F, skip = 1, strip.white = T)
head(df)
## V1
## 1 Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round|
## 2 Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
## 3 -----------------------------------------------------------------------------------------
## 4 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## 5 ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## 6 -----------------------------------------------------------------------------------------
Removing the junk we don’t need and concatenating every other row
#Removing the dotted lines (which occur every third line)
df1 <- data.frame(df[-seq(0,nrow(df),3), ], "")
#Concatenating every other row
df2 <- data.frame(cbind(df1[ (seq(nrow(df1)) %% 2)==1,], df1[ (seq(nrow(df1)) %% 2)==0,]))
df2$X...1 <- df2$X.. <- NULL
df2 <- df2$all <- paste(df2$df..seq.0..nrow.df...3...., df2$df..seq.0..nrow.df...3.....1)
#Delimiting by '|' into the data frame: players
players <- data.frame(do.call('rbind', strsplit(as.character(df2),'|',fixed=TRUE)))
head(players)
## X1 X2 X3 X4 X5 X6 X7 X8
## 1 Pair Player Name Total Round Round Round Round Round
## 2 1 GARY HUA 6.0 W 39 W 21 W 18 W 14 W 7
## 3 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17 W 16
## 4 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21 W 11
## 5 4 PATRICK H SCHILLING 5.5 W 23 D 28 W 2 W 26 D 5
## 6 5 HANSHI ZUO 5.5 W 45 W 37 D 12 D 13 D 4
## X9 X10 X11 X12 X13 X14 X15 X16
## 1 Round Round Num USCF ID / Rtg (Pre->Post) Pts 1 2 3
## 2 D 12 D 4 ON 15445895 / R: 1794 ->1817 N:2 W B W
## 3 W 20 W 7 MI 14598900 / R: 1553 ->1663 N:2 B W B
## 4 W 13 W 12 MI 14959604 / R: 1384 ->1640 N:2 W B W
## 5 W 19 D 1 MI 12616049 / R: 1716 ->1744 N:2 W B W
## 6 W 14 W 17 MI 14601533 / R: 1655 ->1690 N:2 B W B
## X17 X18 X19 X20
## 1 4 5 6 7
## 2 B W B W
## 3 W B W B
## 4 B W B W
## 5 B W B B
## 6 W B W B
Subsetting the data and renaming the columns
#Getting rid of columns we aren't interested in
players <- players[ ,-c(13:20)]
#Renaming the columns and getting rid of the first row
colnames(players) <- c("Player_ID","Player_Name", "Total_Score", "Round_1_Opponent","Round_2_Opponent","Round_3_Opponent","Round_4_Opponent","Round_5_Opponent","Round_6_Opponent","Round_7_Opponent","State","PreRating")
players <- players[-1,]
head(players)
## Player_ID Player_Name Total_Score Round_1_Opponent
## 2 1 GARY HUA 6.0 W 39
## 3 2 DAKSHESH DARURI 6.0 W 63
## 4 3 ADITYA BAJAJ 6.0 L 8
## 5 4 PATRICK H SCHILLING 5.5 W 23
## 6 5 HANSHI ZUO 5.5 W 45
## 7 6 HANSEN SONG 5.0 W 34
## Round_2_Opponent Round_3_Opponent Round_4_Opponent Round_5_Opponent
## 2 W 21 W 18 W 14 W 7
## 3 W 58 L 4 W 17 W 16
## 4 W 61 W 25 W 21 W 11
## 5 D 28 W 2 W 26 D 5
## 6 W 37 D 12 D 13 D 4
## 7 D 29 L 11 W 35 D 10
## Round_6_Opponent Round_7_Opponent State PreRating
## 2 D 12 D 4 ON 15445895 / R: 1794 ->1817
## 3 W 20 W 7 MI 14598900 / R: 1553 ->1663
## 4 W 13 W 12 MI 14959604 / R: 1384 ->1640
## 5 W 19 D 1 MI 12616049 / R: 1716 ->1744
## 6 W 14 W 17 MI 14601533 / R: 1655 ->1690
## 7 W 27 W 21 OH 15055204 / R: 1686 ->1687
Changing opponent columns to opponent IDs
#Removes the Win/Loss info and coerces remainder to an int
players$Round_1_Opponent <- as.integer(gsub(". ","",players$Round_1_Opponent))
players$Round_2_Opponent <- as.integer(gsub(". ","",players$Round_2_Opponent))
players$Round_3_Opponent <- as.integer(gsub(". ","",players$Round_3_Opponent))
players$Round_4_Opponent <- as.integer(gsub(". ","",players$Round_4_Opponent))
players$Round_5_Opponent <- as.integer(gsub(". ","",players$Round_5_Opponent))
players$Round_6_Opponent <- as.integer(gsub(". ","",players$Round_6_Opponent))
players$Round_7_Opponent <- as.integer(gsub(". ","",players$Round_7_Opponent))
head(players)
## Player_ID Player_Name Total_Score Round_1_Opponent
## 2 1 GARY HUA 6.0 39
## 3 2 DAKSHESH DARURI 6.0 63
## 4 3 ADITYA BAJAJ 6.0 8
## 5 4 PATRICK H SCHILLING 5.5 23
## 6 5 HANSHI ZUO 5.5 45
## 7 6 HANSEN SONG 5.0 34
## Round_2_Opponent Round_3_Opponent Round_4_Opponent Round_5_Opponent
## 2 21 18 14 7
## 3 58 4 17 16
## 4 61 25 21 11
## 5 28 2 26 5
## 6 37 12 13 4
## 7 29 11 35 10
## Round_6_Opponent Round_7_Opponent State PreRating
## 2 12 4 ON 15445895 / R: 1794 ->1817
## 3 20 7 MI 14598900 / R: 1553 ->1663
## 4 13 12 MI 14959604 / R: 1384 ->1640
## 5 19 1 MI 12616049 / R: 1716 ->1744
## 6 14 17 MI 14601533 / R: 1655 ->1690
## 7 27 21 OH 15055204 / R: 1686 ->1687
Extracting the Pre-Ratings from the ‘PreRating’ column, and changing the data type to numeric
library(stringr)
#Filtering out everything but the pre-rating in it's respective column
players$PreRating <- gsub(" ","",players$PreRating)
players$PreRating <- gsub("-.*","",players$PreRating)
players$PreRating <- gsub(".*:","",players$PreRating)
players$PreRating <- gsub("P.*","",players$PreRating)
#Changing the type to numeric
players$PreRating <- as.numeric(players$PreRating)
head(players)
## Player_ID Player_Name Total_Score Round_1_Opponent
## 2 1 GARY HUA 6.0 39
## 3 2 DAKSHESH DARURI 6.0 63
## 4 3 ADITYA BAJAJ 6.0 8
## 5 4 PATRICK H SCHILLING 5.5 23
## 6 5 HANSHI ZUO 5.5 45
## 7 6 HANSEN SONG 5.0 34
## Round_2_Opponent Round_3_Opponent Round_4_Opponent Round_5_Opponent
## 2 21 18 14 7
## 3 58 4 17 16
## 4 61 25 21 11
## 5 28 2 26 5
## 6 37 12 13 4
## 7 29 11 35 10
## Round_6_Opponent Round_7_Opponent State PreRating
## 2 12 4 ON 1794
## 3 20 7 MI 1553
## 4 13 12 MI 1384
## 5 19 1 MI 1716
## 6 14 17 MI 1655
## 7 27 21 OH 1686
Making a new column of average opponents’ pre-ratings (Mean_Opponent_PreRatings)
library(gbutils)
#Changing Player_ID to integer
players$Player_ID <- as.integer(as.character(players$Player_ID))
#Initializing an empty column
players$Mean_Opponent_PreRatings <- NA
#Loops through the dataset, calculate's average pre-ratings for all players except the one in question, and appends it to the new column
for (i in 1:nrow(players)) {
#Initializes an empty vector
opts = c()
#Loops through columns within a player's row to build a vector of their opponent's mean ratings
for (k in c(4:10)) {
if (isNA(players[i,k])== FALSE) {
opts = c(opts, players$PreRating[players$Player_ID==players[i,k]])
}
}
#Appends the mean of a player's opponent's pre-ratings to the respective column
players[i,"Mean_Opponent_PreRatings"] <- round(mean(opts), digits = 0)
}
head(players)
## Player_ID Player_Name Total_Score Round_1_Opponent
## 2 1 GARY HUA 6.0 39
## 3 2 DAKSHESH DARURI 6.0 63
## 4 3 ADITYA BAJAJ 6.0 8
## 5 4 PATRICK H SCHILLING 5.5 23
## 6 5 HANSHI ZUO 5.5 45
## 7 6 HANSEN SONG 5.0 34
## Round_2_Opponent Round_3_Opponent Round_4_Opponent Round_5_Opponent
## 2 21 18 14 7
## 3 58 4 17 16
## 4 61 25 21 11
## 5 28 2 26 5
## 6 37 12 13 4
## 7 29 11 35 10
## Round_6_Opponent Round_7_Opponent State PreRating Mean_Opponent_PreRatings
## 2 12 4 ON 1794 1605
## 3 20 7 MI 1553 1469
## 4 13 12 MI 1384 1564
## 5 19 1 MI 1716 1574
## 6 14 17 MI 1655 1501
## 7 27 21 OH 1686 1519
Cleaning up: removing the opponent columns and rearranging
#Removing unnecessary columns
players[,c(1,4:10)] <- NULL
#Reordering columns
players <- players[,c(1,3,2,4,5)]
row.names(players) <- NULL
head(players)
## Player_Name State Total_Score PreRating
## 1 GARY HUA ON 6.0 1794
## 2 DAKSHESH DARURI MI 6.0 1553
## 3 ADITYA BAJAJ MI 6.0 1384
## 4 PATRICK H SCHILLING MI 5.5 1716
## 5 HANSHI ZUO MI 5.5 1655
## 6 HANSEN SONG OH 5.0 1686
## Mean_Opponent_PreRatings
## 1 1605
## 2 1469
## 3 1564
## 4 1574
## 5 1501
## 6 1519
Writing to a csv
#Creates a csv file with the desired info in the workfolder
write.csv(players, file = "Chess_Tournament_Player_Info.csv")