Load Library

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
library(dplyr)
library(readr)

Accessing Data

file_path <- "tournamentinfo.txt"

raw_data <- readLines(file_path, warn = FALSE )

head(raw_data, 20)
##  [1] "-----------------------------------------------------------------------------------------" 
##  [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
##  [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
##  [4] "-----------------------------------------------------------------------------------------" 
##  [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
##  [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [7] "-----------------------------------------------------------------------------------------" 
##  [8] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
##  [9] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [10] "-----------------------------------------------------------------------------------------" 
## [11] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|" 
## [12] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
## [13] "-----------------------------------------------------------------------------------------" 
## [14] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|" 
## [15] "   MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |" 
## [16] "-----------------------------------------------------------------------------------------" 
## [17] "    5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|" 
## [18] "   MI | 14601533 / R: 1655   ->1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [19] "-----------------------------------------------------------------------------------------" 
## [20] "    6 | HANSEN SONG                     |5.0  |W  34|D  29|L  11|W  35|D  10|W  27|W  21|"

Extracting Player Info.

player_data <- raw_data[str_detect(raw_data, "Player Name")]

Extracting names, states, and pre-rating from the text

player_info <- data.frame(
  player_name = str_extract(player_data, "^.+?(?=\\s{2,})"),
  state = str_extract(player_data, "(?<=\\s{2})([A-Z]{2})"),
  rating = str_extract(player_data, "(?<=R: )\\d+"),  
  stringsAsFactors = FALSE
)

Verifying Player Info Extracted

head(player_info)
##           player_name state rating
## 1  Pair | Player Name  <NA>   <NA>

Average Pre-tournament Rating of Opponents

Extract ratings of opponents from round results

extract_opponent_ratings <- function(player_line) {
  opponent_ratings <- str_extract_all(player_line, "(?<=R:)\\d+")
  if (length(opponent_ratings[[1]]) > 0) {
    return(mean(as.numeric(opponent_ratings[[1]])))
  }
  return(NA)
}
player_data_with_opponent_ratings <- sapply(player_info$player_name, function(name) {
  player_line <- raw_data[str_detect(raw_data, name)][1] 
  avg_opponent_rating <- extract_opponent_ratings(player_line)
  return(avg_opponent_rating)
})

player_info$avg_opponent_rating <- player_data_with_opponent_ratings


head(player_info)
##           player_name state rating avg_opponent_rating
## 1  Pair | Player Name  <NA>   <NA>                  NA

Computing Outcomes of Total Points and Pre-rating

#extracting points scored by each player

extract_points <- function(player_line) {
  points <- str_extract(player_line, "(?<=\\|)(\\d+\\.\\d+)(?=\\|)")
  return(as.numeric(points))
}

player_info$total_points <- sapply(player_info$player_name, function(name) {
  player_line <- raw_data[str_detect(raw_data, name)][1]
  points <- extract_points(player_line)
  return(points)
})
head(player_info)
##           player_name state rating avg_opponent_rating total_points
## 1  Pair | Player Name  <NA>   <NA>                  NA           NA

Generating and Exporting Data to CSV File

write.csv(player_info, "chess_tournament_result.csv", row.names = FALSE)

Verifying Output of CSV File

file.exists("chest_tournament_result.csv")
## [1] FALSE
print("chest_tournament_result.csv")
## [1] "chest_tournament_result.csv"
str(raw_data)
##  chr [1:196] "-----------------------------------------------------------------------------------------" ...
summary(raw_data)
##    Length     Class      Mode 
##       196 character character

Loading Data in SQL Database

library(DBI)
library(RSQLite)

Connecting to SQL

db_connection <- dbConnect(RSQLite::SQLite(), "chess_tournament_db.sqlite")

Inputing data frame (player_info), SQL table name “players”

dbWriteTable(db_connection, "players", player_info, overwrite = TRUE, row.names = FALSE)

dbListTables(db_connection)
## [1] "players"
dbGetQuery(db_connection, "SELECT * FROM players LIMIT 5")
##           player_name state rating avg_opponent_rating total_points
## 1  Pair | Player Name  <NA>   <NA>                  NA           NA
dbDisconnect(db_connection)