library(RCurl)
## Loading required package: bitops
library(stringr)
chess_data <- read.csv("https://raw.githubusercontent.com/gpsingh12/IS-607-MSDA/master/tournament.txt", header = TRUE, skip=2)
#using skip=2 will remove first three lines as USCF and ID were adding up to the substrings created
head(chess_data)
## Num....USCF.ID...Rtg..Pre..Post..........Pts....1.....2.....3.....4.....5.....6.....7...
## 1 -----------------------------------------------------------------------------------------
## 2 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## 3 ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## 4 -----------------------------------------------------------------------------------------
## 5 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|
## 6 MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |
cd<-unlist(chess_data)
The data given in the original format needs to be separated and stored as different variables, which can be combined later on to create the output as csv file.
We will start with unlisting players names.
Create a variable player containing names of the players.
player <- str_trim(unlist(str_extract_all(cd, "([[:alpha:]., ]-?){7,}")))
head(player)
## [1] "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ"
## [4] "PATRICK H SCHILLING" "HANSHI ZUO" "HANSEN SONG"
State represent the names of the states of the players by extracting the information from text file.
state <- str_trim(unlist(str_extract_all(cd, "\\ [[:upper:]]{2}")))
head(state)
## [1] "ON" "MI" "MI" "MI" "MI" "OH"
Extract the player scores.
scores<-str_trim(unlist(str_extract_all(cd, "[0-9]\\.[0-9]-?")))
head(scores)
## [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0"
Ratings of the players
rating<-unlist(str_extract_all(cd, "\\: [[:digit:] ]{4}"))
head(rating)
## [1] ": 1794" ": 1553" ": 1384" ": 1716" ": 1655" ": 1686"
rating_upd <- sub(pattern = ': ', replacement = '', rating)
prerating<- as.numeric(rating_upd)
head(prerating)
## [1] 1794 1553 1384 1716 1655 1686
Opponents pre ratings
oppavg=unlist(str_extract_all(cd, "\\|[[:upper:]] [[:digit:] ]{1,}"))
head(oppavg)
## [1] "|W 39" "|W 21" "|W 18" "|W 14" "|W 7" "|D 12"
oavg <- as.numeric(unlist(str_extract_all(oppavg,"[[:digit:]]{1,2}")))
head(oavg)
## [1] 39 21 18 14 7 12
# using python's ideology , finding the number in a list using index, I will create a vector opponent. i.e using a number from oavg it will retrieve the score corresponding to the prerating.
opponent<- (prerating[oavg])
head(opponent)
## [1] 1436 1563 1600 1610 1649 1663
# once we have the list of scores of opponents, we will create a matrix to perform mathematical operations (mean).
avg_opp <- matrix(opponent,7,64)
## Warning in matrix(opponent, 7, 64): data length [408] is not a sub-multiple
## or multiple of the number of rows [7]
# take the transpose of the matrix to convert it into a matrix of 64 x 7 as required
#in accordance with the text file.
opp1<-t(avg_opp)
head(opp1)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1436 1563 1600 1610 1649 1663 1716
## [2,] 1175 917 1716 1629 1604 1595 1649
## [3,] 1641 955 1745 1563 1712 1666 1663
## [4,] 1363 1507 1553 1579 1655 1564 1794
## [5,] 1242 980 1663 1666 1716 1610 1629
## [6,] 1399 1602 1712 1438 1365 1552 1563
# we have a matrix of 64 rows and 7 columns where a11 (first element of row1 and column 1 represent the pre rating of the player 39. Similarly a12 represent element at position row1 and column2 i.e it corresponds to the rating of player 21 (1563) and so on.)
opp_prerating <- rowMeans(opp1, n=7)
opp_prerating<- as.integer(opp_prerating)
opp_prerating<- round(opp_prerating, digits=4)
head(opp_prerating)
## [1] 1605 1469 1563 1573 1500 1518
chess <- data.frame(player, state, scores, prerating, opp_prerating)
head(chess)
## player state scores prerating opp_prerating
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1563
## 4 PATRICK H SCHILLING MI 5.5 1716 1573
## 5 HANSHI ZUO MI 5.5 1655 1500
## 6 HANSEN SONG OH 5.0 1686 1518
write.csv(chess, "C:/Users/Gurpreet/Documents/IS607/chessdata.csv")