a text file with chess tournament results where the information has some structure.
an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents. For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605 1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.
library(stringr)
all_data = readLines("tournamentinfo.txt") #loading the data
## Warning in readLines("tournamentinfo.txt"): incomplete final line found on
## 'tournamentinfo.txt'
head(all_data) #looking at the data
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
summary(all_data)
## Length Class Mode
## 196 character character
We are going to spend some time in the pre-processing process, removing the extra characters and spaces
first<-all_data[1] #the first line has some extra characters
print(first)
## [1] "-----------------------------------------------------------------------------------------"
c<-grep(first, all_data) #list of the lines looking just like the first line
all_data1<-all_data[-c] #We remove all the extra lines from our data
print(all_data1[c(1,2)])
## [1] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [2] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
all_data2<-all_data1[c(-1,-2)] #We remove the first two lines which contain some headers
all_data3<-gsub("\\s+"," ",all_data2) #we remove the extra spaces in our data
all_data3<-gsub("^\\s+","",all_data3)
head(all_data3)
## [1] "1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] "ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [3] "2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [4] "MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [5] "3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [6] "MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
In the input file, the informations about a player are displayed on two lines. In the section below, we are going to make one line for each player.
i<-seq(1,length(all_data3)-1,2)
j<-i+1
newdf<-as.data.frame(paste(all_data3[i], all_data3[j]))
head(newdf)
## paste(all_data3[i], all_data3[j])
## 1 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4| ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## 2 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7| MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |
## 3 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12| MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |
## 4 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1| MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |
## 5 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17| MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |
## 6 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21| OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |
names(newdf)<-"y"
asvector<-as.vector(newdf$y)
#in the section below, we remove the delimiters |
i<-1:length(asvector)
df<-as.data.frame(matrix(c(unlist(strsplit(asvector[i],"\\|"))), byrow=T, ncol=20, nrow=64),stringsAsFactors = FALSE)
In the following portion of code below, we extract the pre ratings from each player.
#column before processing
head(df$V12)
## [1] " 15445895 / R: 1794 ->1817 " " 14598900 / R: 1553 ->1663 "
## [3] " 14959604 / R: 1384 ->1640 " " 12616049 / R: 1716 ->1744 "
## [5] " 14601533 / R: 1655 ->1690 " " 15055204 / R: 1686 ->1687 "
i<-1:length(asvector)
df[i,"V12"]<- str_replace_all(str_extract(gsub("^\\s+|\\s+","",df[i,"V12"]),":\\d{3,4}P*"),":|P","")
#column afterwards
head(df$V12)
## [1] "1794" "1553" "1384" "1716" "1655" "1686"
In the following section of code, we compute the average pre ratings for each player oponents.
col<-c("V4","V5","V6","V7","V8","V9","V10") #these column contain the informations required
df1<-apply(df[,col],1,function(x) return(as.numeric(df[as.numeric(gsub("^W|^D|^L","",gsub("U|H|B|X","0", gsub("^\\s+|\\s+","",x[1:7])))),12])))
df2<-unlist(lapply(df1,mean))
print(df2) #we need to round our results in order to comply with the given output format.
## [1] 1605.286 1469.286 1563.571 1573.571 1500.857 1518.714 1372.143
## [8] 1468.429 1523.143 1554.143 1467.571 1506.167 1497.857 1515.000
## [15] 1483.857 1385.800 1498.571 1480.000 1426.286 1410.857 1470.429
## [22] 1300.333 1213.857 1357.000 1363.286 1506.857 1221.667 1522.143
## [29] 1313.500 1144.143 1259.857 1378.714 1276.857 1375.286 1149.714
## [36] 1388.167 1384.800 1539.167 1429.571 1390.571 1248.500 1149.857
## [43] 1106.571 1327.000 1152.000 1357.714 1392.000 1355.800 1285.800
## [50] 1296.000 1356.143 1494.571 1345.333 1206.167 1406.000 1414.400
## [57] 1363.000 1391.000 1319.000 1330.200 1327.286 1186.000 1350.200
## [64] 1263.000
df3<-round(df2,0)
print(df3)
## [1] 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 1468 1506 1498 1515
## [15] 1484 1386 1499 1480 1426 1411 1470 1300 1214 1357 1363 1507 1222 1522
## [29] 1314 1144 1260 1379 1277 1375 1150 1388 1385 1539 1430 1391 1248 1150
## [43] 1107 1327 1152 1358 1392 1356 1286 1296 1356 1495 1345 1206 1406 1414
## [57] 1363 1391 1319 1330 1327 1186 1350 1263
df4<-as.data.frame(df3)
results<-as.data.frame(unlist(cbind(df$V2,df$V11,df$V3,df$V12,df4$df3)))
head(results)
## V1 V2 V3 V4 V5
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519
#Assigning the column's names
thecolnames<-c("Name", "State","Number-Points", "Pre-Rating", "AVG-Opponent- Pre-rating")
names(results)<-thecolnames
head(results)
## Name State Number-Points Pre-Rating
## 1 GARY HUA ON 6.0 1794
## 2 DAKSHESH DARURI MI 6.0 1553
## 3 ADITYA BAJAJ MI 6.0 1384
## 4 PATRICK H SCHILLING MI 5.5 1716
## 5 HANSHI ZUO MI 5.5 1655
## 6 HANSEN SONG OH 5.0 1686
## AVG-Opponent- Pre-rating
## 1 1605
## 2 1469
## 3 1564
## 4 1574
## 5 1501
## 6 1519
# Write CSV in R
write.csv(results, file = "results_tournament.csv") #creating the csv file.