library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
library(dplyr)
library(readr)
file_path <- "tournamentinfo.txt"
raw_data <- readLines(file_path, warn = FALSE )
head(raw_data, 20)
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [7] "-----------------------------------------------------------------------------------------"
## [8] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [9] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [10] "-----------------------------------------------------------------------------------------"
## [11] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [12] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [13] "-----------------------------------------------------------------------------------------"
## [14] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [15] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [16] "-----------------------------------------------------------------------------------------"
## [17] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [18] " MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [19] "-----------------------------------------------------------------------------------------"
## [20] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
player_data <- raw_data[str_detect(raw_data, "Player Name")]
player_info <- data.frame(
player_name = str_extract(player_data, "^.+?(?=\\s{2,})"),
state = str_extract(player_data, "(?<=\\s{2})([A-Z]{2})"),
rating = str_extract(player_data, "(?<=R: )\\d+"),
stringsAsFactors = FALSE
)
head(player_info)
## player_name state rating
## 1 Pair | Player Name <NA> <NA>
extract_opponent_ratings <- function(player_line) {
opponent_ratings <- str_extract_all(player_line, "(?<=R:)\\d+")
if (length(opponent_ratings[[1]]) > 0) {
return(mean(as.numeric(opponent_ratings[[1]])))
}
return(NA)
}
player_data_with_opponent_ratings <- sapply(player_info$player_name, function(name) {
player_line <- raw_data[str_detect(raw_data, name)][1]
avg_opponent_rating <- extract_opponent_ratings(player_line)
return(avg_opponent_rating)
})
player_info$avg_opponent_rating <- player_data_with_opponent_ratings
head(player_info)
## player_name state rating avg_opponent_rating
## 1 Pair | Player Name <NA> <NA> NA
#extracting points scored by each player
extract_points <- function(player_line) {
points <- str_extract(player_line, "(?<=\\|)(\\d+\\.\\d+)(?=\\|)")
return(as.numeric(points))
}
player_info$total_points <- sapply(player_info$player_name, function(name) {
player_line <- raw_data[str_detect(raw_data, name)][1]
points <- extract_points(player_line)
return(points)
})
head(player_info)
## player_name state rating avg_opponent_rating total_points
## 1 Pair | Player Name <NA> <NA> NA NA
write.csv(player_info, "chess_tournament_result.csv", row.names = FALSE)
file.exists("chest_tournament_result.csv")
## [1] FALSE
print("chest_tournament_result.csv")
## [1] "chest_tournament_result.csv"
str(raw_data)
## chr [1:196] "-----------------------------------------------------------------------------------------" ...
summary(raw_data)
## Length Class Mode
## 196 character character
library(DBI)
library(RSQLite)
db_connection <- dbConnect(RSQLite::SQLite(), "chess_tournament_db.sqlite")
dbWriteTable(db_connection, "players", player_info, overwrite = TRUE, row.names = FALSE)
dbListTables(db_connection)
## [1] "players"
dbGetQuery(db_connection, "SELECT * FROM players LIMIT 5")
## player_name state rating avg_opponent_rating total_points
## 1 Pair | Player Name <NA> <NA> NA NA
dbDisconnect(db_connection)