607 Project 1 KLS

Chess Tournament

#install.packages("rio")
#install.packages("RCurl")
#install.packages("bitops")
#install.packages("stringr")
#install.packages("ggplot2")
library(rio)
library(RCurl)

## Loading required package: bitops

library(stringr)
library(ggplot2)

#Load the data in
x <- getURL("https://raw.githubusercontent.com/excelsiordata/DATA607/master/tournamentinfo.txt")
TourneyOutput <- read.csv(text = x, head=TRUE, sep="|", stringsAsFactors=FALSE, col.names = c("Pair", "Player.Name", "Total.Pts", "Round.1", "Round.2", "Round.3", "Round.4", "Round.5", "Round.6", "Round.7", "Empty"), skip = 1, strip.white = TRUE)

#Take a peek
head(TourneyOutput)

##                                                                                        Pair
## 1                                                                                       Num
## 2 -----------------------------------------------------------------------------------------
## 3                                                                                         1
## 4                                                                                        ON
## 5 -----------------------------------------------------------------------------------------
## 6                                                                                         2
##                   Player.Name Total.Pts Round.1 Round.2 Round.3 Round.4
## 1   USCF ID / Rtg (Pre->Post)       Pts       1       2       3       4
## 2                                                                      
## 3                    GARY HUA       6.0   W  39   W  21   W  18   W  14
## 4 15445895 / R: 1794   ->1817       N:2       W       B       W       B
## 5                                                                      
## 6             DAKSHESH DARURI       6.0   W  63   W  58   L   4   W  17
##   Round.5 Round.6 Round.7 Empty
## 1       5       6       7    NA
## 2                            NA
## 3   W   7   D  12   D   4    NA
## 4       W       B       W    NA
## 5                            NA
## 6   W  16   W  20   W   7    NA

#Get rid of the junk lines
TourneyOutput <- TourneyOutput[seq(-2,-194,-3), ]

#Take a peek
head(TourneyOutput)

##   Pair                 Player.Name Total.Pts Round.1 Round.2 Round.3
## 1  Num   USCF ID / Rtg (Pre->Post)       Pts       1       2       3
## 3    1                    GARY HUA       6.0   W  39   W  21   W  18
## 4   ON 15445895 / R: 1794   ->1817       N:2       W       B       W
## 6    2             DAKSHESH DARURI       6.0   W  63   W  58   L   4
## 7   MI 14598900 / R: 1553   ->1663       N:2       B       W       B
## 9    3                ADITYA BAJAJ       6.0   L   8   W  61   W  25
##   Round.4 Round.5 Round.6 Round.7 Empty
## 1       4       5       6       7    NA
## 3   W  14   W   7   D  12   D   4    NA
## 4       B       W       B       W    NA
## 6   W  17   W  16   W  20   W   7    NA
## 7       W       B       W       B    NA
## 9   W  21   W  11   W  13   W  12    NA

#Create group 1
TourneyDataGroup1 <- TourneyOutput[seq(2,388/3,2),]
head(TourneyDataGroup1)

##    Pair         Player.Name Total.Pts Round.1 Round.2 Round.3 Round.4
## 3     1            GARY HUA       6.0   W  39   W  21   W  18   W  14
## 6     2     DAKSHESH DARURI       6.0   W  63   W  58   L   4   W  17
## 9     3        ADITYA BAJAJ       6.0   L   8   W  61   W  25   W  21
## 12    4 PATRICK H SCHILLING       5.5   W  23   D  28   W   2   W  26
## 15    5          HANSHI ZUO       5.5   W  45   W  37   D  12   D  13
## 18    6         HANSEN SONG       5.0   W  34   D  29   L  11   W  35
##    Round.5 Round.6 Round.7 Empty
## 3    W   7   D  12   D   4    NA
## 6    W  16   W  20   W   7    NA
## 9    W  11   W  13   W  12    NA
## 12   D   5   W  19   D   1    NA
## 15   D   4   W  14   W  17    NA
## 18   D  10   W  27   W  21    NA

#Create group 2
TourneyDataGroup2 <- TourneyOutput[seq(3,388/3,2),]
head(TourneyDataGroup2)

##    Pair                 Player.Name Total.Pts Round.1 Round.2 Round.3
## 4    ON 15445895 / R: 1794   ->1817       N:2       W       B       W
## 7    MI 14598900 / R: 1553   ->1663       N:2       B       W       B
## 10   MI 14959604 / R: 1384   ->1640       N:2       W       B       W
## 13   MI 12616049 / R: 1716   ->1744       N:2       W       B       W
## 16   MI 14601533 / R: 1655   ->1690       N:2       B       W       B
## 19   OH 15055204 / R: 1686   ->1687       N:3       W       B       W
##    Round.4 Round.5 Round.6 Round.7 Empty
## 4        B       W       B       W    NA
## 7        W       B       W       B    NA
## 10       B       W       B       W    NA
## 13       B       W       B       B    NA
## 16       W       B       W       B    NA
## 19       B       B       W       B    NA

#Create master data set with one row per player combining group 1 and group 2
MasterDataGroup <- data.frame(TourneyDataGroup1, TourneyDataGroup2)

#Begin to create final output
MDGFinal <- data.frame(TourneyDataGroup1$Player.Name, TourneyDataGroup2$Pair, TourneyDataGroup1$Total.Pts)

#Create prerating column
Pre <- unlist(str_extract_all(TourneyDataGroup2, "R:[:space:]...."))
Pre <- gsub("R: ", "", Pre)
Pre <- data.frame(unlist(Pre))
colnames(Pre) <- ("Pre.Rating")
head(Pre)

##   Pre.Rating
## 1       1794
## 2       1553
## 3       1384
## 4       1716
## 5       1655
## 6       1686

#Link newly created prerating column to the other columns for output
MDGFinalOutput <- cbind(MDGFinal,Pre)
names(MDGFinalOutput)[names(MDGFinalOutput)=="TourneyDataGroup1.Player.Name"] <- "Player's Name"
names(MDGFinalOutput)[names(MDGFinalOutput)=="TourneyDataGroup2.Pair"] <- "Player's State"
names(MDGFinalOutput)[names(MDGFinalOutput)=="TourneyDataGroup1.Total.Pts"] <- "Total Number of Points"
names(MDGFinalOutput)[names(MDGFinalOutput)=="Pre.Rating"] <- "Player's Pre-Rating"
head(MDGFinalOutput)

##         Player's Name Player's State Total Number of Points
## 1            GARY HUA             ON                    6.0
## 2     DAKSHESH DARURI             MI                    6.0
## 3        ADITYA BAJAJ             MI                    6.0
## 4 PATRICK H SCHILLING             MI                    5.5
## 5          HANSHI ZUO             MI                    5.5
## 6         HANSEN SONG             OH                    5.0
##   Player's Pre-Rating
## 1                1794
## 2                1553
## 3                1384
## 4                1716
## 5                1655
## 6                1686

#Take a look at the distribution of scores
plot(MDGFinalOutput$`Total Number of Points`, main = "Distribution of Total Points", xlab = "Total Points", ylab = "Count")

#Take a look at the distribution of scores
plot(x = MDGFinalOutput$`Total Number of Points`, main = "Distribution of Total Points", xlab = "Total Points", ylab = "Count")

#Let's look at total number of points vs. player's pre-rating
MDG <- ggplot(MDGFinalOutput, aes(x=MDGFinalOutput$`Total Number of Points`, y=MDGFinalOutput$`Player's Pre-Rating`)) +
  geom_point()

#Add a title and edit the axes
MDG <- MDG + labs(list(
  title = "Player's Total Points vs. Their Pre-Rating",
  x="Player's Total Points",
  y="Player's Pre-Rating"))
print(MDG)

#Generate CSV file with output table
#Written to C:\Users\Kelly\Documents\607 on my local PC
write.csv(MDGFinalOutput, "Chess_Player_Summary.csv", row.names=FALSE)

You can see here that there is a positive correlation between a player’s pre-rating and their total points. This is intuitive and what we would expect.