The data file is hosted in github.
Tournament Info
require(stringr)
## Loading required package: stringr
#Function to parse file from github url
processFile = function(filepath) {
con = url(filepath, "r")
players_list <- data.frame()
is_player_row_found <- FALSE
player <- vector('character')
options(stringsAsFactors = FALSE)
while (TRUE) {
line = readLines(con, n = 1)
if (length(line) == 0) {
break
}
if (str_detect(line, "^[[:space:]]*[[:digit:]]{1,2}")) {
line_split <- unlist(str_split(line, "\\|"))
line_split_trimmed <-
unlist(str_trim(line_split))
pair_number_extract <-
str_extract(line_split_trimmed, "^[[:digit:]]{1,2}$")
pair_number <-
pair_number_extract[!is.na(pair_number_extract)]
player_name_extract <-
str_extract(
line_split_trimmed,
"^[[:upper:]]+[[:space:]]+[[:upper:]]*[[:space:]]*[[:upper:]]*[[:space:]]*[[:upper:]]+$|^[[:upper:]]+[[:space:]]+[[:upper:]]*[[:space:]]*[[:upper:]]*-[[:upper:]]+$"
)
player_name <-
player_name_extract[!is.na(player_name_extract)]
total_points_extract <-
str_extract(line_split_trimmed, "^[[:digit:]].[[:digit:]]$")
total_points <-
total_points_extract[!is.na(total_points_extract)]
result_with_player_extract <-
str_extract(line_split_trimmed,
"^[[:upper:]][[:space:]]+[[:digit:]]+$")
result_with_player <-
result_with_player_extract[!is.na(result_with_player_extract)]
opponents <-
str_extract(result_with_player, "[[:digit:]]+")
opponents_as_csv <-
str_c(opponents, collapse = ",")
player <-
c(pair_number,
player_name,
total_points,
opponents_as_csv)
}
else if (str_detect(line, "->[[:space:]]*[[:digit:]]+")) {
line_split <- unlist(str_split(line, "\\|"))
line_split_trimmed <-
unlist(str_trim(line_split))
player_state_extract <-
str_extract(line_split_trimmed, "[[:upper:]]{2}")
player_state <-
player_state_extract[!is.na(player_state_extract)]
pre_rating_extract <-
str_extract(line_split_trimmed, "R:[[:space:]]+[[:digit:]]+")
pre_rating_str <-
pre_rating_extract[!is.na(pre_rating_extract)]
pre_rating <-
str_extract(pre_rating_str, "[[:digit:]]+")
players_list <-
rbind(players_list, c(player, player_state, pre_rating, 0))
}
}
close(con)
colnames(players_list) <-
c(
"pair_num",
"name",
"total_pts",
"opponents_as_csv",
"state",
"pre_rating",
"avg_opponents_rating"
)
return(players_list)
}
df <-
processFile(
"https://raw.githubusercontent.com/binishkurian/DATA-607/master/project-01/tournamentinfo.txt"
)
df$pre_rating <- as.numeric(as.character(df$pre_rating))
head(df)
## pair_num name total_pts opponents_as_csv state
## 1 1 GARY HUA 6.0 39,21,18,14,7,12,4 ON
## 2 2 DAKSHESH DARURI 6.0 63,58,4,17,16,20,7 MI
## 3 3 ADITYA BAJAJ 6.0 8,61,25,21,11,13,12 MI
## 4 4 PATRICK H SCHILLING 5.5 23,28,2,26,5,19,1 MI
## 5 5 HANSHI ZUO 5.5 45,37,12,13,4,14,17 MI
## 6 6 HANSEN SONG 5.0 34,29,11,35,10,27,21 OH
## pre_rating avg_opponents_rating
## 1 1794 0
## 2 1553 0
## 3 1384 0
## 4 1716 0
## 5 1655 0
## 6 1686 0
for (i in 1:nrow(df)) {
opponents <- unlist(str_split(df[i,]$opponents_as_csv, ","))
sum = 0
for (j in 1:length(opponents)) {
pre_rating <- df[opponents[j],]$pre_rating
sum = sum + pre_rating
}
df[i, "avg_opponents_rating"] <- (floor(sum / length(opponents)))
}
new_df <- data.frame(
"name" = df$name,
"state" = df$state,
"total_pts" = df$total_pts,
"pre_rating" = df$pre_rating,
"avg_opponents_rating" = df$avg_opponents_rating
)
new_df$avg_opponents_rating <-
as.numeric(as.character(new_df$avg_opponents_rating))
head(new_df)
## name state total_pts pre_rating avg_opponents_rating
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1563
## 4 PATRICK H SCHILLING MI 5.5 1716 1573
## 5 HANSHI ZUO MI 5.5 1655 1500
## 6 HANSEN SONG OH 5.0 1686 1518
write.csv(new_df, "players.csv", row.names = FALSE)
The csv file created from this project is also hosted in github.
Players.csv