Project 1 - Chess Tournament Data
Instructions
Create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players:Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents.
Load Data
Reading Into R
library(stringr)
tournament <- readLines("https://raw.githubusercontent.com/IsARam/DATA607/master/tournamentinfo.txt", warn = FALSE)Display Table
head(tournament)## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
Tidy Data
Row Identification
line1 <- c(seq(5,length(tournament),by=3))
line2 <- c(seq(6,length(tournament),by=3))Extract & Scrubing
name <- str_replace_all(str_extract(tournament[line1],"([|]).+?\\1"),"[|]","")
state <- str_trim(str_extract(tournament[line2],"[[:blank:]]+\\w+"))
points <- str_sub(tournament[line1],str_locate(tournament[line1],"[.]")[1]-1,str_locate(tournament[line1],"[.]")[1]+1)
prerating <- as.numeric(str_extract(str_sub(tournament[line2],str_locate(tournament[line2],":")[1],str_locate(tournament[line2],">")[1]),"[[:digit:]]+"))Data Frame Creation
tournamentrows <- data.frame(name,state,points,prerating, stringsAsFactors = FALSE)
head(tournamentrows)## name state points prerating
## 1 GARY HUA ON 6.0 1794
## 2 DAKSHESH DARURI MI 6.0 1553
## 3 ADITYA BAJAJ MI 6.0 1384
## 4 PATRICK H SCHILLING MI 5.5 1716
## 5 HANSHI ZUO MI 5.5 1655
## 6 HANSEN SONG OH 5.0 1686
Data Manipulation
Average Pre-Rating Per Player
oppavgrate <- NULL
for (i in seq(5,length(tournament),by=3))
{oppavgrate <- c(oppavgrate, as.integer(mean(tournamentrows$prerating[c(as.numeric(unlist(str_extract_all(str_sub(tournament[i], start=51),"[[:digit:]]+"))))])))}Append To Dataframe
tournmanettable <- cbind(tournamentrows,oppavgrate)
head(tournmanettable)## name state points prerating oppavgrate
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1563
## 4 PATRICK H SCHILLING MI 5.5 1716 1573
## 5 HANSHI ZUO MI 5.5 1655 1500
## 6 HANSEN SONG OH 5.0 1686 1518
CSV
Genrate CSV File
# Write CSV in R
write.table(tournmanettable, file = "tournamentinfo.csv",row.names=FALSE, na="",col.names=TRUE, sep=",")Visualization
Group and Summarize
library(dplyr)
tournamentbystate <-group_by(tournmanettable, state)
tournamentbystate <-summarize(tournamentbystate, meanpre = mean(prerating),meanoppavg = mean(oppavgrate))Plot
library(ggplot2)
ggplot(tournamentbystate, aes(x=state, y=meanpre)) +
geom_col(color="dark green",fill="dark green")+
labs(title ="Rating by State",
x = "State",
y = "Average Rating")