Project1_revised

Used text file with chess tournament results where the information has some structure tournamentinfo.

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(stringr)
library(readr)
library(RCurl)
library(ggplot2)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::complete() masks RCurl::complete()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ dplyr::lag()      masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

file_path <- "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/tournamentinfo.txt"
chess_data <- readLines(file_path, warn = FALSE)
# Remove separator lines and empty lines
chess_data <- chess_data[!grepl("^[-\\s]+$", chess_data)]

#Assigning Regex
rxname <- "[[:digit:]]+[[:space:]]+[|]([:space:]+[:alpha:]+){2,}"
rxstate <- "(?<=[:space:])[:alpha:]{2}(?=[:space:][|])"
rxtotalpoints <-"[:digit:][:punct:][:digit:]"
rxplayerstartingscore <- "(?<=R:[:space:]{1,2})[:digit:]+"
rxplayersfinalscore <- "(?<=->[:space:]?)[:digit:]+"
rxrnds <- "(?<=[:digit:][:punct:][:digit:][:space:]{1,2}[|]{1}).{40,}"

#Creating Vectors
player_name <- unlist(str_extract_all(chess_data,rxname))
player_state <- unlist(str_extract_all(chess_data,rxstate))
total_points <- unlist(str_extract_all(chess_data,rxtotalpoints))
player_starting_score <- unlist(str_extract_all(chess_data,rxplayerstartingscore))
player_final_score <- unlist(str_extract_all(chess_data,rxplayersfinalscore))
rounds <- unlist(str_extract_all(chess_data,rxrnds))

#Data frame
chess_table <- data.frame(
  player_name,
  player_state,
  player_starting_score,
  player_final_score,
  total_points,
  rounds)

#Rounds
col_names <- c("round_1","round_2","round_3","round_4","round_5","round_6","round_7")
chess_table <- chess_table%>%
  separate(rounds,col_names,sep = "[|]")

## Warning: Expected 7 pieces. Additional pieces discarded in 64 rows [1, 2, 3, 4, 5, 6, 7,
## 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

#New Data frame Transposed
chess_pivot <- chess_table%>%
  pivot_longer(col_names,names_to = "round",values_to = "result")

## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(col_names)
## 
##   # Now:
##   data %>% select(all_of(col_names))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

result_col_names <- c("result","opp_id")
id_col_names <- c("id","player_name")
chess_pivot <- chess_pivot%>%
  separate(result,result_col_names)%>%
  separate(player_name,id_col_names, sep = " [|]")

#Opponent rating
chess_opp_table <- chess_pivot%>%
  select(id, player_state, player_starting_score,player_final_score)%>%
  distinct()%>%
  rename(opp_id=id, 
         opp_player_state = player_state ,
         opp_player_starting_score = player_starting_score,
         opp_player_final_score = player_final_score)

chess_table_comp<- merge(chess_pivot,chess_opp_table,all = TRUE)%>%
  mutate(starting_rating_diff = as.numeric(player_starting_score) - as.numeric(opp_player_starting_score))
str(chess_table_comp)

## 'data.frame':    448 obs. of  13 variables:
##  $ opp_id                   : chr  "" "" "" "" ...
##  $ id                       : chr  "56" "53" "41" "53" ...
##  $ player_name              : chr  " MARISA RICCI" " JOSE C YBARRA" " KYLE WILLIAM MURPHY" " JOSE C YBARRA" ...
##  $ player_state             : chr  "MI" "MI" "MI" "MI" ...
##  $ player_starting_score    : chr  "1153" "1393" "1403" "1393" ...
##  $ player_final_score       : chr  "1140" "1359" "1341" "1359" ...
##  $ total_points             : chr  "2.0" "2.0" "3.0" "2.0" ...
##  $ round                    : chr  "round_5" "round_1" "round_5" "round_3" ...
##  $ result                   : chr  "H" "H" "X" "H" ...
##  $ opp_player_state         : chr  NA NA NA NA ...
##  $ opp_player_starting_score: chr  NA NA NA NA ...
##  $ opp_player_final_score   : chr  NA NA NA NA ...
##  $ starting_rating_diff     : num  NA NA NA NA NA NA NA NA NA NA ...

#Calculation
chess_table_smry <- chess_table_comp%>%
  mutate(opp_player_starting_score = as.numeric(opp_player_starting_score), opp_player_final_score = as.numeric(opp_player_final_score), player_starting_score = as.numeric(player_starting_score), player_final_score = as.numeric(player_final_score), id = as.integer(id), total_points = as.numeric(total_points))%>%
  #Filter out the records with no match played
  filter(!is.na(opp_player_state))%>%
  #Group by player
  group_by(id,player_name, player_state, player_starting_score,player_final_score, total_points)%>%
  #Add average opponent rating and number of matches
  summarise(avg_opp_starting_score = round(mean(opp_player_starting_score),2), avg_opp_final_score = round(mean(opp_player_final_score),2), num_of_matches = length(round))

## `summarise()` has grouped output by 'id', 'player_name', 'player_state',
## 'player_starting_score', 'player_final_score'. You can override using the
## `.groups` argument.

write.csv(chess_table_comp,"chess-table.csv", row.names = FALSE)
write.csv(chess_table_smry,"chess-table-summary.csv", row.names = FALSE)

Generated csv file chess-table-summary.

Project1_revised

Nwe Oo Mon (Nina)

2024-09-22