The data is sourced from my github account.
fileName <-"https://raw.githubusercontent.com/RobertSellers/R/master/data/tournamentinfo.txt"
conn <- file(fileName,open="r")
pData <-readLines(conn)
This involves removing separation rows, the header (first two lines), and eliminating a final blank row of data.
pData<-pData[- grep ("------",pData)]
pData<- pData[-1:-2]
pData<-pData[-129]
head(pData)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [3] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [4] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [5] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [6] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
We want to combine the duplicate rows into a single record. We can accomplish this with the following loop. The resulting table contains half the rows but increases the number of columns.
output = list()
i<-1
for (i in seq(1,length(pData), by=2)){
if (pData[i]==FALSE) break
row1<-strsplit(as.character(pData[i]), "|", fixed="TRUE")
row2<-strsplit(as.character(pData[i+1]), "|", fixed="TRUE")
rowMerge<-c(row1[[1]],row2[[1]])
output[[i]]<-rowMerge
}
results <- do.call("rbind",output)
Using another loop, we can extract the index with the player’s individual ranking. This will be transplanted back into the main data set later in the process. The str_trim eliminates white space and is part of the stringi library. The stringr library allows for regular expressions.
require(stringr)
## Loading required package: stringr
require(stringi)
## Loading required package: stringi
luScores<-list()
i<-1
for (i in seq(i:nrow(results))){
preScore<-unlist(str_split(results[,12][i], "[R:-]"))[[3]]
preScore<-stri_trim(preScore)
luScores[[i]]<-unlist(str_split(preScore,"P"))[[1]]
}
lu<-do.call("rbind",luScores)
luScores[1:2]
## [[1]]
## [1] "1794"
##
## [[2]]
## [1] "1553"
This removes non-numeric characters found in columns 4 to 10. These are removed so that we can make a complete join on the luScores table later.
results[,4:10]<-gsub("[^0-9]","",results[,4:10])
We can begin tabulating our results data. We now only need to process the mean opponent ranking.
players<-list()
i<-1
for (i in seq(i:nrow(results))){
players[[i]]<-c(str_trim(results[,2][i]),str_trim(results[,11][i]),str_trim(results[,3][i]), lu[i], "")
}
players[1:2]
## [[1]]
## [1] "GARY HUA" "ON" "6.0" "1794" ""
##
## [[2]]
## [1] "DAKSHESH DARURI" "MI" "6.0" "1553"
## [5] ""
We replace the columns with player indexes with the ranking values found in the luScores lookup table.
i<-1
for (i in seq(i:nrow(results))){
results[i,4]<-lu[as.integer(results[i,4])]
results[i,5]<-lu[as.integer(results[i,5])]
results[i,6]<-lu[as.integer(results[i,6])]
results[i,7]<-lu[as.integer(results[i,7])]
results[i,8]<-lu[as.integer(results[i,8])]
results[i,9]<-lu[as.integer(results[i,9])]
results[i,10]<-lu[as.integer(results[i,10])]
}
We replace the columns with player indexes with the ranking values found in the luScores lookup table.
i<-1
for (i in seq(i:nrow(results))){
players[[i]][[5]]<-mean(as.numeric(results[i,][4:10]),na.rm=TRUE)
}
players[1:2]
## [[1]]
## [1] "GARY HUA" "ON" "6.0"
## [4] "1794" "1605.28571428571"
##
## [[2]]
## [1] "DAKSHESH DARURI" "MI" "6.0"
## [4] "1553" "1469.28571428571"
We recombine the data and create a .csv file. We then run a transpose to reorient the data structure. The .csv will be created inside of your working directory. Run getwd() and/or setwd() to find your file. We also run one final regular expression to truncate our mean values.
playersDF<-as.data.frame((players), stringsAsFactors = FALSE)
playersDF<-t(playersDF)
rownames(playersDF)<-NULL
colnames(playersDF)<-c("name","loc","score","rank","opp_rank")
playersDF<-as.data.frame((playersDF),stringsAsFactors = FALSE)
playersDF<-droplevels(playersDF)
playersDF$opp_rank<-gsub("\\..*","",playersDF$opp_rank)
write.csv(playersDF, file = "players.csv", quote = FALSE, row.names=FALSE)
head(playersDF)
## name loc score rank opp_rank
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1563
## 4 PATRICK H SCHILLING MI 5.5 1716 1573
## 5 HANSHI ZUO MI 5.5 1655 1500
## 6 HANSEN SONG OH 5.0 1686 1518