install.packages("stringr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
url <- "https://raw.githubusercontent.com/bb2955/Data-607/main/tournamentinfo.txt"
lines <- readLines(url)
## Warning in readLines(url): incomplete final line found on
## 'https://raw.githubusercontent.com/bb2955/Data-607/main/tournamentinfo.txt'
head(lines)
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
player_lines <- lines[grep("^\\s*[0-9]+\\s+\\|", lines)]
rating_lines <- lines[grep("R:", lines)]
library(stringr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
players <- data.frame(
PairNum = as.numeric(str_trim(sapply(str_split(player_lines, "\\|"), `[`, 1))),
Name = str_trim(sapply(str_split(player_lines, "\\|"), `[`, 2)),
TotalPts = as.numeric(str_extract(player_lines, "\\d+\\.\\d")),
stringsAsFactors = FALSE
)
players$State <- str_trim(substr(rating_lines, 1, 2))
players$PreRating <- as.numeric(
gsub(".*R:\\s*([0-9]+).*", "\\1", rating_lines)
)
get_opponents <- function(line) {
matches <- str_extract_all(line, "[WLD]\\s+[0-9]+")[[1]]
as.numeric(str_remove(matches, "[WLD]\\s+"))
}
opponent_list <- lapply(player_lines, get_opponents)
avg_opp_rating <- sapply(1:length(opponent_list), function(i) {
opp_nums <- opponent_list[[i]]
opp_ratings <- players$PreRating[match(opp_nums, players$PairNum)]
mean(opp_ratings, na.rm = TRUE)
})
players$AvgOppPreRating <- round(avg_opp_rating)
final_df <- players %>%
select(Name, State, TotalPts, PreRating, AvgOppPreRating)
head(final_df)
## Name State TotalPts PreRating AvgOppPreRating
## 1 GARY HUA 6.0 1794 1605
## 2 DAKSHESH DARURI 6.0 1553 1469
## 3 ADITYA BAJAJ 6.0 1384 1564
## 4 PATRICK H SCHILLING 5.5 1716 1574
## 5 HANSHI ZUO 5.5 1655 1501
## 6 HANSEN SONG 5.0 1686 1519