In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players:

Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605

Loading the data

#load the data
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(readr, quietly = TRUE)
library(stringr, quietly = TRUE)

chess_raw <- 'https://raw.githubusercontent.com/nk014914/Data-607/main/project_1_tournamentinfo.txt'

#place in dataframe
df <- read_lines(chess_raw)

head(df)
## [1] "-----------------------------------------------------------------------------------------" 
## [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
## [4] "-----------------------------------------------------------------------------------------" 
## [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
## [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"

Configuring the Data

#locate the pattern occurences
x0 = 0
x1 = unname(str_locate_all(pattern = '\\|', df[5])[[1]][1,1])
x2 = unname(str_locate_all(pattern = '\\|', df[5])[[1]][2,1])
x3 = unname(str_locate_all(pattern = '\\|', df[5])[[1]][3,1])
x4 = max(nchar(df))

#sequence and group
seq1 = seq(5, 196, 3)
seq2 = seq(6, 196, 3)
group1 = df[seq1]
group2 = df[seq2]

Grouping the Data

#grouping for each column

#player names
name = substr(group1, x1 + 1, x2 - 2)
PlayerName = str_trim(name)

#player state
state = substr(group2, x0, x1 - 1)
State = str_trim(state)

#total points
totalpts = substr(group1, x2 + 1, x3 - 1)

#gpre-ratings
pre = substr(group2, x1 + 1, x2 - 1)
pre = str_extract(pre, ': *\\d{2,}')

#add into df
chess_scores = data.frame(PlayerName, State)
chess_scores$TotalPts = sprintf("%.1f", as.numeric(totalpts))
chess_scores$PreRating = as.integer(str_extract(pre, '\\d{2,}'))

Calculating Opponent’s Avg Pre Rating

opp = substr(group1, x3 + 1, x4)
opp = str_extract_all(opp, '\\b\\d{1,}')
opp = as.matrix(opp)

avgoppprerate = function(y, z){
  x = y[z]
  
  for (a in x) {
    rate = 0
    c = 0
    for (b in a) {
      c = c + 1
      rate = rate + chess_scores$PreRating[as.numeric(b)]
    }
    rate = round(rate/c)
  }
  return(rate)
}

chess_scores$AvgOppPreRating = apply(opp, 1, avgoppprerate)

Converting txt to CSV

head(chess_scores)
##            PlayerName State TotalPts PreRating AvgOppPreRating
## 1            GARY HUA    ON      6.0      1794            1605
## 2     DAKSHESH DARURI    MI      6.0      1553            1469
## 3        ADITYA BAJAJ    MI      6.0      1384            1564
## 4 PATRICK H SCHILLING    MI      5.5      1716            1574
## 5          HANSHI ZUO    MI      5.5      1655            1501
## 6         HANSEN SONG    OH      5.0      1686            1519
#convert txt into csv

write.csv(chess_scores, "chesstournamentinfo.csv")