The project description can be found on github.com.
#Loading packages
library(stringr)
library(knitr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
#Reading the dataset from github
chessText <- read.csv(paste0("https://raw.githubusercontent.com/ahmshahparan/DATA607_PROJECT01/master/tournamentinfo.txt"))
head(chessText)
## X.........................................................................................
## 1 Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round|
## 2 Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
## 3 -----------------------------------------------------------------------------------------
## 4 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## 5 ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## 6 -----------------------------------------------------------------------------------------
#Removing the column header
cleanData <-chessText[-c(1:2),]
#Subsetting player info
name<-cleanData[seq(2, length(cleanData), 3)]
head(name,3)
## [1] 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## [2] 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|
## [3] 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|
## 131 Levels: 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4| ...
#Subsetting rating info
rating<-cleanData[seq(3, length(cleanData), 3)]
head(rating,3)
## [1] ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## [2] MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |
## [3] MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |
## 131 Levels: 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4| ...
#Applying data transformation
playerId <- c(1:64)
playerName <- str_trim(str_extract(name, "(\\w+\\s){2,3}"))
totalPoint <- as.numeric(str_extract(name, "\\d.\\d"))
preRating <- as.integer(str_extract(str_extract(rating, "\\D\\d{3,4}\\D"), "\\d+"))
playerState <- str_extract(rating, "\\w\\w")
opponentId <- str_extract_all(str_extract_all(name, "\\d+\\|"), "\\d+")
#For loop to find the average of every player's oppoent players' pre chess ratings
oppoPreRatingAvg <- length(name)
for (i in 1:length(name))
{
oppoPreRatingAvg[i] <- round(mean(preRating[as.numeric(unlist(opponentId[playerId[i]]))]), digits = 0)
}
df <- data_frame(playerName, playerState, totalPoint, preRating, oppoPreRatingAvg)
colnames(df) <- c("Player's Name", "Player's State", "Total Number of Points", "Player's Pre-Rating", "Opponents' Average Ratings")
kable(head(df))
| Player’s Name | Player’s State | Total Number of Points | Player’s Pre-Rating | Opponents’ Average Ratings |
|---|---|---|---|---|
| GARY HUA | ON | 6.0 | 1794 | 1605 |
| DAKSHESH DARURI | MI | 6.0 | 1553 | 1469 |
| ADITYA BAJAJ | MI | 6.0 | 1384 | 1564 |
| PATRICK H SCHILLING | MI | 5.5 | 1716 | 1574 |
| HANSHI ZUO | MI | 5.5 | 1655 | 1501 |
| HANSEN SONG | OH | 5.0 | 1686 | 1519 |
#Exporting in working directory
write.table(df, file = "tournamentInfo_cleaned.csv",row.names=FALSE, na="",col.names=TRUE, sep=",")
A copy of this cleaned up .csv file is available at github.com.