Project 1

Player's Seven Round Match Results

Player number, Round Number (1-7), Outcome (W, L, D), Opponent's player number, Opponent's Pre Chess Rating.

library(stringr)
library(knitr)

## Warning: package 'knitr' was built under R version 3.2.5

library(ggplot2)
chess <- readLines("tournamentinfo.txt")

## Warning in readLines("tournamentinfo.txt"): incomplete final line found on
## 'tournamentinfo.txt'

str(chess)

##  chr [1:196] "-----------------------------------------------------------------------------------------" ...

extract information

We need player's name, player's state, total number of points, player's pre-rating, average pre Chess Rating of Opponents.

names <- (str_trim(unlist(str_extract_all(chess, "([[:alpha:] ]-?){15,31}"))))[2:65]
head(names);

## [1] "GARY HUA"            "DAKSHESH DARURI"     "ADITYA BAJAJ"       
## [4] "PATRICK H SCHILLING" "HANSHI ZUO"          "HANSEN SONG"

states <- str_trim(unlist(str_extract_all(chess, " ON| OH | MI ")))
str(states);

##  chr [1:64] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" ...

number_points <- unlist(str_extract_all(chess, "[:digit:][//.][:digit:]"))
str(number_points)

##  chr [1:64] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" ...

prerating <- as.integer(sub(pattern = 'R: ', replacement = '', x = unlist(str_extract_all(chess, "R: [[:digit:] ]{4}"))))
str(prerating)

##  int [1:64] 1794 1553 1384 1716 1655 1686 1649 1641 1411 1365 ...

opponent <- gsub("\\|", " ", str_sub(unlist(str_extract_all(chess, "[:digit:][//.][:digit:][|DLWUXBH[:digit:] ]{44}")), start = 10, end = 47))
str(opponent)

##  chr [1:64] "39 W  21 W  18 W  14 W   7 D  12 D   4" ...

list_opponent <- (strsplit(opponent, " W | D | U | H | B | X | L"))
head(list_opponent)

## [[1]]
## [1] "39"  " 21" " 18" " 14" "  7" " 12" "  4"
## 
## [[2]]
## [1] "63"   " 58"  "   4" " 17"  " 16"  " 20"  "  7" 
## 
## [[3]]
## [1] " 8"  " 61" " 25" " 21" " 11" " 13" " 12"
## 
## [[4]]
## [1] "23"  " 28" "  2" " 26" "  5" " 19" "  1"
## 
## [[5]]
## [1] "45"  " 37" " 12" " 13" "  4" " 14" " 17"
## 
## [[6]]
## [1] "34"   " 29"  "  11" " 35"  " 10"  " 27"  " 21"

convert the prerating to a matrix and convert it to integer so that we can calculate the mean

matrix1<- sapply(list_opponent, as.numeric)
matrix2 <- matrix(prerating[matrix1], nrow = 7)
average <- as.integer(format(apply(matrix2, 2, mean, na.rm = TRUE), digits= 4))
str(average)

##  int [1:64] 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 ...

final outcomes

final_outcomes <- data.frame(names, states, number_points, prerating, average)
str(final_outcomes)

## 'data.frame':    64 obs. of  5 variables:
##  $ names        : Factor w/ 64 levels "ADITYA BAJAJ",..: 24 12 1 51 28 27 23 21 59 5 ...
##  $ states       : Factor w/ 3 levels "MI","OH","ON": 3 1 1 1 1 2 1 1 3 1 ...
##  $ number_points: Factor w/ 11 levels "1.0","1.5","2.0",..: 11 11 11 10 10 9 9 9 9 9 ...
##  $ prerating    : int  1794 1553 1384 1716 1655 1686 1649 1641 1411 1365 ...
##  $ average      : int  1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 ...

kable(head(final_outcomes))

names	states	number_points	prerating	average
GARY HUA	ON	6.0	1794	1605
DAKSHESH DARURI	MI	6.0	1553	1469
ADITYA BAJAJ	MI	6.0	1384	1564
PATRICK H SCHILLING	MI	5.5	1716	1574
HANSHI ZUO	MI	5.5	1655	1501
HANSEN SONG	OH	5.0	1686	1519

difference <- (final_outcomes$prerating - final_outcomes$average)
difference

##  [1]  189   84 -180  142  154  167  277  173 -112 -189  244  157  168   95
## [15] -264  218  130  120  138  184   93  255  149 -128  382   72  330  -15
## [29]  288  378  234   62  172   24  288  -33 -405 -116    6  -43  155  182
## [43]  176 -128   90 -981  -30   26    5 -240 -345 -560   48   64 -220 -261
## [57] -271 -474 -466 -363 -372  344 -175 -100

write.table(final_outcomes, file="chessoutcome.csv", sep=",", row.names= FALSE)