Project 1 - Chess Tournament Data

Instructions

Create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players:Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents.

Load Data

Reading Into R

library(stringr)

tournament <- readLines("https://raw.githubusercontent.com/IsARam/DATA607/master/tournamentinfo.txt", warn = FALSE)

Display Table

head(tournament)
## [1] "-----------------------------------------------------------------------------------------" 
## [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
## [4] "-----------------------------------------------------------------------------------------" 
## [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
## [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"

Tidy Data

Row Identification

line1 <- c(seq(5,length(tournament),by=3))
line2 <- c(seq(6,length(tournament),by=3))

Extract & Scrubing

name <- str_replace_all(str_extract(tournament[line1],"([|]).+?\\1"),"[|]","")
state <- str_trim(str_extract(tournament[line2],"[[:blank:]]+\\w+"))
points <- str_sub(tournament[line1],str_locate(tournament[line1],"[.]")[1]-1,str_locate(tournament[line1],"[.]")[1]+1)
prerating <- as.numeric(str_extract(str_sub(tournament[line2],str_locate(tournament[line2],":")[1],str_locate(tournament[line2],">")[1]),"[[:digit:]]+"))

Data Frame Creation

tournamentrows <- data.frame(name,state,points,prerating, stringsAsFactors = FALSE)
head(tournamentrows)
##                                name state points prerating
## 1  GARY HUA                            ON    6.0      1794
## 2  DAKSHESH DARURI                     MI    6.0      1553
## 3  ADITYA BAJAJ                        MI    6.0      1384
## 4  PATRICK H SCHILLING                 MI    5.5      1716
## 5  HANSHI ZUO                          MI    5.5      1655
## 6  HANSEN SONG                         OH    5.0      1686

Data Manipulation

Average Pre-Rating Per Player

oppavgrate <- NULL
for (i in seq(5,length(tournament),by=3))
{oppavgrate <- c(oppavgrate, as.integer(mean(tournamentrows$prerating[c(as.numeric(unlist(str_extract_all(str_sub(tournament[i], start=51),"[[:digit:]]+"))))])))}

Append To Dataframe

tournmanettable <- cbind(tournamentrows,oppavgrate)
head(tournmanettable)
##                                name state points prerating oppavgrate
## 1  GARY HUA                            ON    6.0      1794       1605
## 2  DAKSHESH DARURI                     MI    6.0      1553       1469
## 3  ADITYA BAJAJ                        MI    6.0      1384       1563
## 4  PATRICK H SCHILLING                 MI    5.5      1716       1573
## 5  HANSHI ZUO                          MI    5.5      1655       1500
## 6  HANSEN SONG                         OH    5.0      1686       1518

CSV

Genrate CSV File

# Write CSV in R
write.table(tournmanettable, file = "tournamentinfo.csv",row.names=FALSE, na="",col.names=TRUE, sep=",")

Visualization

Group and Summarize

library(dplyr)
tournamentbystate <-group_by(tournmanettable, state)
tournamentbystate <-summarize(tournamentbystate, meanpre = mean(prerating),meanoppavg = mean(oppavgrate))

Plot

library(ggplot2)

ggplot(tournamentbystate, aes(x=state, y=meanpre)) + 
  geom_col(color="dark green",fill="dark green")+ 
  labs(title ="Rating by State", 
       x = "State", 
       y = "Average Rating") 

Isabel Ramesar

February 19, 2019