Week5

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(stringr)
library(DT)
library(ggplot2)

library(readr)
con <- ("./tournamentinfo.txt")
tourinfo <- readLines(con)

## Warning in readLines(con): incomplete final line found on
## './tournamentinfo.txt'

head(tourinfo, 10)

##  [1] "-----------------------------------------------------------------------------------------" 
##  [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
##  [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
##  [4] "-----------------------------------------------------------------------------------------" 
##  [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
##  [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [7] "-----------------------------------------------------------------------------------------" 
##  [8] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
##  [9] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [10] "-----------------------------------------------------------------------------------------"

# remove first 4 rows 
info <- tourinfo[-c(0:4)]

# remove unnecessary spaces
info <- info[sapply(info, nchar) > 0]

# divide odd / even rows into separate set of lines
odd <- c(seq(1, length(info), 3))
odd_a <- info[odd]

even <- c(seq(2, length(info), 3))
even_a <- info[even]

# name
name <- str_extract(odd_a, "\\s+([[:alpha:]- ]+)\\b\\s*\\|")
name <- gsub(name, pattern = "|", replacement = "", fixed = T)
# strip the space
name <- trimws(name)

# state
state <- str_extract(even_a, "[[:alpha:]]{2}")

# total_points
total_points <- str_extract(odd_a, "[[:digit:]]+\\.[[:digit:]]")
total_points <- as.numeric(as.character(total_points))

# pre_rating
pre_rating <- str_extract(even_a, ".\\: \\s?[[:digit:]]{3,4}")
pre_rating <- gsub(pre_rating, pattern = "R: ", replacement = "", fixed = T)
pre_rating <- as.numeric(as.character(pre_rating))

# opponent_number to extract opponents pair number per player
opponent_number <- str_extract_all(odd_a, "[[:digit:]]{1,2}\\|")
opponent_number <- str_extract_all(opponent_number, "[[:digit:]]{1,2}")

## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing

opponent_number <- lapply(opponent_number, as.numeric)

# calculate Average Pre Chess Rating of Opponents and store in a list
avg_rating <- list()
for (i in 1:length(opponent_number)){
  avg_rating[i] <- round(mean(pre_rating[unlist(opponent_number[i])]),2)
}
avg_rating <- lapply(avg_rating, as.numeric)
avg_rating <- data.frame(unlist(avg_rating))

# create initial data frame
df <- cbind.data.frame(name, state, total_points, pre_rating, avg_rating)
colnames(df) <- c("Name", "State", "Total_points", "Pre_rating", "Avg_pre_chess_rating_of_opponents")
head(df)

write.csv(df, "tournament.csv")

Week5

Musrat Jahan

2025-03-04

R Markdown