Main Data Manipulation
Loading The Chess Tournament Data
# GitHub Location of tournament data file
chessdata.giturl <- "https://raw.githubusercontent.com/DataDriven-MSDA/DATA607/master/Week4P1/tournamentinfo.txt"
chessdata.gitdata <- getURL(chessdata.giturl)
# Reading text file from the GitHub location making header as false and skipping
# the first 4 lines (that represent ---- followed by two lines of headers
# followed by -----)
crudechessdata <- read.table(text = chessdata.gitdata, header = FALSE, fill = TRUE,
sep = "|", skip = 4, stringsAsFactors = FALSE)
# View(crudechessdata)
# Removing last NA column
crudechessdata <- subset(crudechessdata, select = -c(V11))
# Renaming Columns
colnames(crudechessdata) <- c("Pair", "PlayerName", "TotalPoints", "R1", "R2", "R3",
"R4", "R5", "R6", "R7")
# Filter out rows where first column match '----'
crudechessdata <- crudechessdata[-grep("-----", crudechessdata[, 1]), ]
# Form clean data frame
cleanchessdata <- parsePlayer(crudechessdata)
# View(cleanchessdata)Finding Average Opponent Rating And Other Stats
Loop through each record of of player to find the avg opponent pre rating
for (i in seq(1, nrow(cleanchessdata), 1)) {
cleanchessdata$PlayerName[i] <- str_trim(cleanchessdata$PlayerName[i])
cleanchessdata$State[i] <- str_trim(cleanchessdata$State[i])
cleanchessdata$TotalPoints[i] <- str_trim(cleanchessdata$TotalPoints[i])
playerPreRating <- getPreRating(i, cleanchessdata)
cleanchessdata$PlayerPreRating[i] <- playerPreRating
gamestats <- calAvgOppPreRating(subset(cleanchessdata, as.numeric(Pair) == i),
cleanchessdata)
# Also finding other stats like Games Won, Games Lost, Games Drawn
cleanchessdata$AvgOpponentPreRating[i] <- gamestats$AvgOppoRating
cleanchessdata$Won[i] <- gamestats$WinCount
cleanchessdata$Lost[i] <- gamestats$LoseCount
cleanchessdata$Draw[i] <- gamestats$DrawCount
}Exporting Required Data To CSV
# Aggregating relevant data to form CSV file, Writing and Showing the CSV file
exportChessDF <- subset(cleanchessdata, select = c(PlayerName, State, TotalPoints,
PlayerPreRating, AvgOpponentPreRating, Won, Lost, Draw))
write.csv(exportChessDF, file = "ChessTournamentStats.csv", row.names = FALSE)Displaying DataSet
datatable(exportChessDF, options = list(searching = FALSE, pageLength = 5, lengthMenu = c(5,
10, 15, 20)), rownames = FALSE)file.show(file="ChessTournamentStats.csv",title="Chess Tournament Results And Ratings")cat(readLines('ChessTournamentStats.csv'), sep = '\n')"PlayerName","State","TotalPoints","PlayerPreRating","AvgOpponentPreRating","Won","Lost","Draw"
"GARY HUA","ON","6.0",1794,1605,5,0,2
"DAKSHESH DARURI","MI","6.0",1553,1469,6,1,0
"ADITYA BAJAJ","MI","6.0",1384,1564,6,1,0
"PATRICK H SCHILLING","MI","5.5",1716,1574,4,0,3
"HANSHI ZUO","MI","5.5",1655,1501,4,0,3
"HANSEN SONG","OH","5.0",1686,1519,4,1,2
"GARY DEE SWATHELL","MI","5.0",1649,1372,5,2,0
"EZEKIEL HOUGHTON","MI","5.0",1641,1468,5,2,0
"STEFANO LEE","ON","5.0",1411,1523,5,2,0
"ANVIT RAO","MI","5.0",1365,1554,4,1,2
"CAMERON WILLIAM MC LEMAN","MI","4.5",1712,1468,4,2,1
"KENNETH J TACK","MI","4.5",1663,1506,3,1,2
"TORRANCE HENRY JR","MI","4.5",1666,1498,4,2,1
"BRADLEY SHAW","MI","4.5",1610,1515,4,2,1
"ZACHARY JAMES HOUGHTON","MI","4.5",1220,1484,4,2,1
"MIKE NIKITIN","MI","4.0",1604,1386,3,1,1
"RONALD GRZEGORCZYK","MI","4.0",1629,1499,4,3,0
"DAVID SUNDEEN","MI","4.0",1600,1480,4,3,0
"DIPANKAR ROY","MI","4.0",1564,1426,3,2,2
"JASON ZHENG","MI","4.0",1595,1411,4,3,0
"DINH DANG BUI","ON","4.0",1563,1470,4,3,0
"EUGENE L MCCLURE","MI","4.0",1555,1300,3,2,1
"ALAN BUI","ON","4.0",1363,1214,4,3,0
"MICHAEL R ALDRICH","MI","4.0",1229,1357,4,3,0
"LOREN SCHWIEBERT","MI","3.5",1745,1363,3,3,1
"MAX ZHU","ON","3.5",1579,1507,3,3,1
"GAURAV GIDWANI","MI","3.5",1552,1222,3,2,1
"SOFIA ADINA STANESCU-BELLU","MI","3.5",1507,1522,2,2,3
"CHIEDOZIE OKORIE","MI","3.5",1602,1314,3,2,1
"GEORGE AVERY JONES","ON","3.5",1522,1144,3,3,1
"RISHI SHETTY","MI","3.5",1494,1260,3,3,1
"JOSHUA PHILIP MATHEWS","ON","3.5",1441,1379,3,3,1
"JADE GE","MI","3.5",1449,1277,3,3,1
"MICHAEL JEFFERY THOMAS","MI","3.5",1399,1375,3,3,1
"JOSHUA DAVID LEE","MI","3.5",1438,1150,3,3,1
"SIDDHARTH JHA","MI","3.5",1355,1388,2,2,2
"AMIYATOSH PWNANANDAM","MI","3.5",980,1385,2,3,0
"BRIAN LIU","MI","3.0",1423,1539,2,3,1
"JOEL R HENDON","MI","3.0",1436,1430,3,4,0
"FOREST ZHANG","MI","3.0",1348,1391,3,4,0
"KYLE WILLIAM MURPHY","MI","3.0",1403,1248,2,2,0
"JARED GE","MI","3.0",1332,1150,2,3,2
"ROBERT GLEN VASEY","MI","3.0",1283,1107,3,4,0
"JUSTIN D SCHILLING","MI","3.0",1199,1327,2,4,0
"DEREK YAN","MI","3.0",1242,1152,2,3,2
"JACOB ALEXANDER LAVALLEY","MI","3.0",377,1358,3,4,0
"ERIC WRIGHT","MI","2.5",1362,1392,2,4,1
"DANIEL KHAIN","MI","2.5",1382,1356,1,3,1
"MICHAEL J MARTIN","MI","2.5",1291,1286,1,2,2
"SHIVAM JHA","MI","2.5",1056,1296,2,4,0
"TEJAS AYYAGARI","MI","2.5",1011,1356,2,4,1
"ETHAN GUO","MI","2.5",935,1495,1,3,3
"JOSE C YBARRA","MI","2.0",1393,1345,1,2,0
"LARRY HODGE","MI","2.0",1270,1206,1,5,0
"ALEX KONG","MI","2.0",1186,1406,0,4,2
"MARISA RICCI","MI","2.0",1153,1414,1,4,0
"MICHAEL LU","MI","2.0",1092,1363,1,5,0
"VIRAJ MOHILE","MI","2.0",917,1391,1,5,0
"SEAN M MC CORMICK","MI","2.0",853,1319,1,5,0
"JULIA SHEN","MI","1.5",967,1330,0,3,2
"JEZZEL FARKAS","ON","1.5",955,1327,1,5,1
"ASHWIN BALAJI","MI","1.0",1530,1186,1,0,0
"THOMAS JOSEPH HOSMER","MI","1.0",1175,1350,0,4,1
"BEN LI","MI","1.0",1163,1263,0,5,2
Plot: Players Vs Game Results
To plot the games stats for each player, melt the dataset and reshape it.
# Using melt() from reshape package
plotwinstatsdf <- subset(exportChessDF, select = c(PlayerName, Won, Lost, Draw))
melted.plotwinstatsdf <- melt(plotwinstatsdf, id = c("PlayerName"))## Warning in melt_dataframe(data, as.integer(id.ind - 1),
## as.integer(measure.ind - : '.Random.seed' is not an integer vector but of
## type 'NULL', so ignored
colnames(melted.plotwinstatsdf) <- c("PlayerName", "GameStats", "NoOfGames")
# Assign Color to bars per Won/Lost/Draw
colorbar <- c(Won = "magenta", Lost = "yellow", Draw = "cyan")# Redirecting the plot to PDF file
toplot <- ggplot(melted.plotwinstatsdf, aes(x = PlayerName, y = NoOfGames, fill = GameStats)) +
ggtitle("Players Vs Game Results") + geom_bar(colour = "black", stat = "identity") +
coord_flip() + scale_fill_manual(values = colorbar)
pdf("PlayerGames.pdf", width = 5, height = 15)
toplot
dev.off()## png
## 2
Attempt To Find Post Rating Lead Between Top Two Players
An attempt to find the Post Ratings of the Winner , if a final game is played between the top two players. The top two players are considered to be the ones Who won the max number of games. i.e. for current data set it is Pair 2, Pair 3
fin <- subset(cleanchessdata, Won == max(exportChessDF$Won), select = c(Pair))
finals <- c(as.numeric(fin[1, 1]), as.numeric(fin[2, 1]))
finals## [1] 2 3
# Posting who would lead in ratings
newpost<-checkleadpostrating(finals, cleanchessdata)
cat("Today's Big Game : ", newpost)## Today's Big Game : If Player 2( PostRating :1758) Wins To Player 3( PostRating :1763) Lead in Rating is Player3 Else If Player 3 ( PostRating :1863) Wins To Player 2 ( PostRating :1658) Lead in Rating Is Player 3
#####################################################################################