#install.packages("tidyverse)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Importing the entire txt file as a string to be parsed later on.
raw_tourn_info <- readChar("tournamentinfo.txt", file.info("tournamentinfo.txt")$size)
Splitting the string using regular expressions.
# Split player rows
tourn_info <- str_split(raw_tourn_info, "-+\r\n")[[1]]
tourn_info <- tourn_info[c(-1,-2)] #Drop irrelevant first indices
# Initialize dataframe
tourn_data <- data.frame(matrix(ncol=5, nrow=length(tourn_info)))
colnames(tourn_data) <- c("Name", "State", "Total Points", "Pre-Rating", "Avg Opponent Pre-Rating")
for (i in 1:length(tourn_info)){
# create temp vector to keep track of information
temp <- c(NA,NA,NA,NA,NA)
# Capture Name
temp[1] <- str_match(tourn_info[i], "\\|\\s*([A-Za-z\\- ]+?)\\s*\\|")[,2]
# Capture State
temp[2] <- str_match(tourn_info[i], "\\|\\r\\n\\s*([A-Z]+?)\\s*\\|")[,2]
# Capture Total Points
temp[3] <- str_match(tourn_info[i], "\\|(\\d+\\.\\d+)\\s*\\|")[,2]
# Capture Pre-Rating
temp[4] <- str_match(tourn_info[i], "R:\\s*(\\d+)(P\\d+)?\\s*->")[,2]
# Find all faced opponents
round_opps <- tourn_info[i] %>%
str_extract_all(., "[WDLH]\\s+\\d+") %>%
.[[1]] %>%
str_extract("\\d+")
# Avg Opp Pre-Rating calculation
sum <- round_opps %>%
as.numeric() %>%
tourn_info[.] %>%
str_match(., "R:\\s*(\\d+)(P\\d+)?\\s*->") %>%
.[,2] %>%
as.numeric() %>%
sum()
temp[5] <- (sum / length(round_opps)) %>%
round() %>%
as.character()
# Fill table row
tourn_data[i,] <- temp
}
write.csv(tourn_data, "Chess Tournament Data.csv")