This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
library(DT)
library(ggplot2)
library(readr)
con <- ("./tournamentinfo.txt")
tourinfo <- readLines(con)
## Warning in readLines(con): incomplete final line found on
## './tournamentinfo.txt'
head(tourinfo, 10)
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [7] "-----------------------------------------------------------------------------------------"
## [8] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [9] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [10] "-----------------------------------------------------------------------------------------"
# remove first 4 rows
info <- tourinfo[-c(0:4)]
# remove unnecessary spaces
info <- info[sapply(info, nchar) > 0]
# divide odd / even rows into separate set of lines
odd <- c(seq(1, length(info), 3))
odd_a <- info[odd]
even <- c(seq(2, length(info), 3))
even_a <- info[even]
# name
name <- str_extract(odd_a, "\\s+([[:alpha:]- ]+)\\b\\s*\\|")
name <- gsub(name, pattern = "|", replacement = "", fixed = T)
# strip the space
name <- trimws(name)
# state
state <- str_extract(even_a, "[[:alpha:]]{2}")
# total_points
total_points <- str_extract(odd_a, "[[:digit:]]+\\.[[:digit:]]")
total_points <- as.numeric(as.character(total_points))
# pre_rating
pre_rating <- str_extract(even_a, ".\\: \\s?[[:digit:]]{3,4}")
pre_rating <- gsub(pre_rating, pattern = "R: ", replacement = "", fixed = T)
pre_rating <- as.numeric(as.character(pre_rating))
# opponent_number to extract opponents pair number per player
opponent_number <- str_extract_all(odd_a, "[[:digit:]]{1,2}\\|")
opponent_number <- str_extract_all(opponent_number, "[[:digit:]]{1,2}")
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
opponent_number <- lapply(opponent_number, as.numeric)
# calculate Average Pre Chess Rating of Opponents and store in a list
avg_rating <- list()
for (i in 1:length(opponent_number)){
avg_rating[i] <- round(mean(pre_rating[unlist(opponent_number[i])]),2)
}
avg_rating <- lapply(avg_rating, as.numeric)
avg_rating <- data.frame(unlist(avg_rating))
# create initial data frame
df <- cbind.data.frame(name, state, total_points, pre_rating, avg_rating)
colnames(df) <- c("Name", "State", "Total_points", "Pre_rating", "Avg_pre_chess_rating_of_opponents")
head(df)
write.csv(df, "tournament.csv")