—————————————————————————
1] Library Initialization
library(stringr)
## Warning: package 'stringr' was built under R version 3.4.3
2] Read Data
col.names <- c("PairNum", "PalyerName", "TotPts", "Round1", "Round2", "Round3", "Round4", "Round5", "Round6", "Round7", "LastIndex")
player.data <- read.table("https://github.com/mlforsachid/MSDSQ1/blob/master/Data607/Week4/tournamentinfo.txt", header = FALSE, skip = 4, sep = "|", fill = TRUE, stringsAsFactors = FALSE, col.names = col.names)[,1:10]
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec =
## dec, : EOF within quoted string
3] Remove white spaces
player.data <- subset(player.data, !PalyerName == "")
player.data = as.data.frame(sapply(player.data, str_trim))
4] Add State and players pre rating columns
player.tmp = player.data[seq(2, nrow(player.data), 2), ]
player.data = player.data[seq(1, nrow(player.data), 2), ]
player.data$PlayerState = player.tmp$PairNum
player.data$PlayerPreRanking = str_trim(str_extract(player.tmp$PalyerName, "[[:blank:]]{1}[[:digit:]]{3,4}"))
6] Calculate opponent players pre-rating
for (i in 1:nrow(player.data)) {
for(j in 4:10) {
player.data[i,j] <- player.data[player.data$PairNum == player.data[i,j],12][1]
}
}
7] Calculate players avg rating
player.data[4:10] = sapply(player.data[4:10], function(x) as.numeric(x))
player.data$AvgRating <- round(rowMeans(player.data[, c(4:10)], na.rm = TRUE), 1)
8] Create final output
player.data.out = subset(player.data, TRUE, c(PalyerName, PlayerState, TotPts, PlayerPreRanking, AvgRating))
write.csv(player.data.out, file = "output.csv", row.names = FALSE)