Install Packages

#install.packages("tidyverse)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Import Tournament Information

Importing the entire txt file as a string to be parsed later on.

raw_tourn_info <- readChar("tournamentinfo.txt", file.info("tournamentinfo.txt")$size)

Processing String

Splitting the string using regular expressions.

  1. Split each player row. The player rows are separated by a line of dashes and the escaped characters “”.
# Split player rows
tourn_info <- str_split(raw_tourn_info, "-+\r\n")[[1]]
tourn_info <- tourn_info[c(-1,-2)] #Drop irrelevant first indices
  1. Initialize the dataframe.
# Initialize dataframe
tourn_data <- data.frame(matrix(ncol=5, nrow=length(tourn_info)))
colnames(tourn_data) <- c("Name", "State", "Total Points", "Pre-Rating", "Avg Opponent Pre-Rating")
  1. Loop through tournament information string vector and parse necessary player information. Inside the loop, the Avg Opp Pre-Rating will also be calculated.
for (i in 1:length(tourn_info)){
  # create temp vector to keep track of information
  temp <- c(NA,NA,NA,NA,NA)
  
  # Capture Name
  temp[1] <- str_match(tourn_info[i], "\\|\\s*([A-Za-z\\- ]+?)\\s*\\|")[,2]
  
  # Capture State
  temp[2] <- str_match(tourn_info[i], "\\|\\r\\n\\s*([A-Z]+?)\\s*\\|")[,2]
  
  # Capture Total Points
  temp[3] <- str_match(tourn_info[i], "\\|(\\d+\\.\\d+)\\s*\\|")[,2]
  
  # Capture Pre-Rating
  temp[4] <- str_match(tourn_info[i], "R:\\s*(\\d+)(P\\d+)?\\s*->")[,2]
  
  
  # Find all faced opponents
  round_opps <- tourn_info[i] %>%
    str_extract_all(., "[WDLH]\\s+\\d+") %>%
    .[[1]] %>% 
    str_extract("\\d+")
  
  # Avg Opp Pre-Rating calculation
  sum <- round_opps %>% 
    as.numeric() %>%
    tourn_info[.] %>% 
    str_match(., "R:\\s*(\\d+)(P\\d+)?\\s*->") %>%
    .[,2] %>%
    as.numeric() %>%
    sum()

  temp[5] <- (sum / length(round_opps)) %>% 
    round() %>% 
    as.character()
  

  # Fill table row
  tourn_data[i,] <- temp
  
}

Export CSV File

write.csv(tourn_data, "Chess Tournament Data.csv")