#install.packages("rio")
#install.packages("RCurl")
#install.packages("bitops")
#install.packages("stringr")
#install.packages("ggplot2")
library(rio)
library(RCurl)
## Loading required package: bitops
library(stringr)
library(ggplot2)
#Load the data in
x <- getURL("https://raw.githubusercontent.com/excelsiordata/DATA607/master/tournamentinfo.txt")
TourneyOutput <- read.csv(text = x, head=TRUE, sep="|", stringsAsFactors=FALSE, col.names = c("Pair", "Player.Name", "Total.Pts", "Round.1", "Round.2", "Round.3", "Round.4", "Round.5", "Round.6", "Round.7", "Empty"), skip = 1, strip.white = TRUE)
#Take a peek
head(TourneyOutput)
## Pair
## 1 Num
## 2 -----------------------------------------------------------------------------------------
## 3 1
## 4 ON
## 5 -----------------------------------------------------------------------------------------
## 6 2
## Player.Name Total.Pts Round.1 Round.2 Round.3 Round.4
## 1 USCF ID / Rtg (Pre->Post) Pts 1 2 3 4
## 2
## 3 GARY HUA 6.0 W 39 W 21 W 18 W 14
## 4 15445895 / R: 1794 ->1817 N:2 W B W B
## 5
## 6 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17
## Round.5 Round.6 Round.7 Empty
## 1 5 6 7 NA
## 2 NA
## 3 W 7 D 12 D 4 NA
## 4 W B W NA
## 5 NA
## 6 W 16 W 20 W 7 NA
#Get rid of the junk lines
TourneyOutput <- TourneyOutput[seq(-2,-194,-3), ]
#Take a peek
head(TourneyOutput)
## Pair Player.Name Total.Pts Round.1 Round.2 Round.3
## 1 Num USCF ID / Rtg (Pre->Post) Pts 1 2 3
## 3 1 GARY HUA 6.0 W 39 W 21 W 18
## 4 ON 15445895 / R: 1794 ->1817 N:2 W B W
## 6 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4
## 7 MI 14598900 / R: 1553 ->1663 N:2 B W B
## 9 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25
## Round.4 Round.5 Round.6 Round.7 Empty
## 1 4 5 6 7 NA
## 3 W 14 W 7 D 12 D 4 NA
## 4 B W B W NA
## 6 W 17 W 16 W 20 W 7 NA
## 7 W B W B NA
## 9 W 21 W 11 W 13 W 12 NA
#Create group 1
TourneyDataGroup1 <- TourneyOutput[seq(2,388/3,2),]
head(TourneyDataGroup1)
## Pair Player.Name Total.Pts Round.1 Round.2 Round.3 Round.4
## 3 1 GARY HUA 6.0 W 39 W 21 W 18 W 14
## 6 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17
## 9 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21
## 12 4 PATRICK H SCHILLING 5.5 W 23 D 28 W 2 W 26
## 15 5 HANSHI ZUO 5.5 W 45 W 37 D 12 D 13
## 18 6 HANSEN SONG 5.0 W 34 D 29 L 11 W 35
## Round.5 Round.6 Round.7 Empty
## 3 W 7 D 12 D 4 NA
## 6 W 16 W 20 W 7 NA
## 9 W 11 W 13 W 12 NA
## 12 D 5 W 19 D 1 NA
## 15 D 4 W 14 W 17 NA
## 18 D 10 W 27 W 21 NA
#Create group 2
TourneyDataGroup2 <- TourneyOutput[seq(3,388/3,2),]
head(TourneyDataGroup2)
## Pair Player.Name Total.Pts Round.1 Round.2 Round.3
## 4 ON 15445895 / R: 1794 ->1817 N:2 W B W
## 7 MI 14598900 / R: 1553 ->1663 N:2 B W B
## 10 MI 14959604 / R: 1384 ->1640 N:2 W B W
## 13 MI 12616049 / R: 1716 ->1744 N:2 W B W
## 16 MI 14601533 / R: 1655 ->1690 N:2 B W B
## 19 OH 15055204 / R: 1686 ->1687 N:3 W B W
## Round.4 Round.5 Round.6 Round.7 Empty
## 4 B W B W NA
## 7 W B W B NA
## 10 B W B W NA
## 13 B W B B NA
## 16 W B W B NA
## 19 B B W B NA
#Create master data set with one row per player combining group 1 and group 2
MasterDataGroup <- data.frame(TourneyDataGroup1, TourneyDataGroup2)
#Begin to create final output
MDGFinal <- data.frame(TourneyDataGroup1$Player.Name, TourneyDataGroup2$Pair, TourneyDataGroup1$Total.Pts)
#Create prerating column
Pre <- unlist(str_extract_all(TourneyDataGroup2, "R:[:space:]...."))
Pre <- gsub("R: ", "", Pre)
Pre <- data.frame(unlist(Pre))
colnames(Pre) <- ("Pre.Rating")
head(Pre)
## Pre.Rating
## 1 1794
## 2 1553
## 3 1384
## 4 1716
## 5 1655
## 6 1686
#Link newly created prerating column to the other columns for output
MDGFinalOutput <- cbind(MDGFinal,Pre)
names(MDGFinalOutput)[names(MDGFinalOutput)=="TourneyDataGroup1.Player.Name"] <- "Player's Name"
names(MDGFinalOutput)[names(MDGFinalOutput)=="TourneyDataGroup2.Pair"] <- "Player's State"
names(MDGFinalOutput)[names(MDGFinalOutput)=="TourneyDataGroup1.Total.Pts"] <- "Total Number of Points"
names(MDGFinalOutput)[names(MDGFinalOutput)=="Pre.Rating"] <- "Player's Pre-Rating"
head(MDGFinalOutput)
## Player's Name Player's State Total Number of Points
## 1 GARY HUA ON 6.0
## 2 DAKSHESH DARURI MI 6.0
## 3 ADITYA BAJAJ MI 6.0
## 4 PATRICK H SCHILLING MI 5.5
## 5 HANSHI ZUO MI 5.5
## 6 HANSEN SONG OH 5.0
## Player's Pre-Rating
## 1 1794
## 2 1553
## 3 1384
## 4 1716
## 5 1655
## 6 1686
#Take a look at the distribution of scores
plot(MDGFinalOutput$`Total Number of Points`, main = "Distribution of Total Points", xlab = "Total Points", ylab = "Count")
#Take a look at the distribution of scores
plot(x = MDGFinalOutput$`Total Number of Points`, main = "Distribution of Total Points", xlab = "Total Points", ylab = "Count")
#Let's look at total number of points vs. player's pre-rating
MDG <- ggplot(MDGFinalOutput, aes(x=MDGFinalOutput$`Total Number of Points`, y=MDGFinalOutput$`Player's Pre-Rating`)) +
geom_point()
#Add a title and edit the axes
MDG <- MDG + labs(list(
title = "Player's Total Points vs. Their Pre-Rating",
x="Player's Total Points",
y="Player's Pre-Rating"))
print(MDG)
#Generate CSV file with output table
#Written to C:\Users\Kelly\Documents\607 on my local PC
write.csv(MDGFinalOutput, "Chess_Player_Summary.csv", row.names=FALSE)
You can see here that there is a positive correlation between a player’s pre-rating and their total points. This is intuitive and what we would expect.