Project 1

library(RCurl)

## Loading required package: bitops

library(stringr)


chess_data <- read.csv("https://raw.githubusercontent.com/gpsingh12/IS-607-MSDA/master/tournament.txt", header = TRUE, skip=2)
#using skip=2 will remove first three lines as USCF and ID were adding up to the substrings created
head(chess_data)

##    Num....USCF.ID...Rtg..Pre..Post..........Pts....1.....2.....3.....4.....5.....6.....7...
## 1 -----------------------------------------------------------------------------------------
## 2     1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|
## 3    ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |
## 4 -----------------------------------------------------------------------------------------
## 5     2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|
## 6    MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |

cd<-unlist(chess_data)

The data given in the original format needs to be separated and stored as different variables, which can be combined later on to create the output as csv file.

We will start with unlisting players names.

Create a variable player containing names of the players.

player <- str_trim(unlist(str_extract_all(cd, "([[:alpha:]., ]-?){7,}")))
head(player)

## [1] "GARY HUA"            "DAKSHESH DARURI"     "ADITYA BAJAJ"       
## [4] "PATRICK H SCHILLING" "HANSHI ZUO"          "HANSEN SONG"

State represent the names of the states of the players by extracting the information from text file.

state <- str_trim(unlist(str_extract_all(cd, "\\  [[:upper:]]{2}")))
head(state)

## [1] "ON" "MI" "MI" "MI" "MI" "OH"

Extract the player scores.

scores<-str_trim(unlist(str_extract_all(cd, "[0-9]\\.[0-9]-?")))
head(scores)

## [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0"

Ratings of the players

rating<-unlist(str_extract_all(cd, "\\: [[:digit:] ]{4}"))

head(rating)

## [1] ": 1794" ": 1553" ": 1384" ": 1716" ": 1655" ": 1686"

rating_upd <- sub(pattern = ': ', replacement = '', rating)


prerating<- as.numeric(rating_upd)
head(prerating)

## [1] 1794 1553 1384 1716 1655 1686

Opponents pre ratings

oppavg=unlist(str_extract_all(cd, "\\|[[:upper:]]  [[:digit:] ]{1,}"))

head(oppavg)

## [1] "|W  39" "|W  21" "|W  18" "|W  14" "|W   7" "|D  12"

oavg <- as.numeric(unlist(str_extract_all(oppavg,"[[:digit:]]{1,2}")))
head(oavg)

## [1] 39 21 18 14  7 12

# using python's ideology , finding the number in a list using index, I will create a vector opponent. i.e using a number from oavg it will retrieve the score corresponding to the prerating.
opponent<- (prerating[oavg])
head(opponent)

## [1] 1436 1563 1600 1610 1649 1663

 # once we have the list of scores of opponents, we will create a matrix to perform mathematical operations (mean).

avg_opp <- matrix(opponent,7,64)

## Warning in matrix(opponent, 7, 64): data length [408] is not a sub-multiple
## or multiple of the number of rows [7]

# take the transpose of the matrix to convert it into a matrix of 64 x 7 as required 
#in accordance with the text file.
opp1<-t(avg_opp)
head(opp1)

##      [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1436 1563 1600 1610 1649 1663 1716
## [2,] 1175  917 1716 1629 1604 1595 1649
## [3,] 1641  955 1745 1563 1712 1666 1663
## [4,] 1363 1507 1553 1579 1655 1564 1794
## [5,] 1242  980 1663 1666 1716 1610 1629
## [6,] 1399 1602 1712 1438 1365 1552 1563

# we have a matrix of 64 rows and 7 columns where a11 (first element of row1 and column 1 represent the pre rating of the player 39. Similarly a12 represent element at position row1 and column2 i.e it corresponds to the rating of player 21 (1563) and so on.)
 
opp_prerating <- rowMeans(opp1, n=7)
opp_prerating<- as.integer(opp_prerating)
opp_prerating<- round(opp_prerating, digits=4)
head(opp_prerating)

## [1] 1605 1469 1563 1573 1500 1518

chess <- data.frame(player, state, scores, prerating, opp_prerating)

head(chess)

##                player state scores prerating opp_prerating
## 1            GARY HUA    ON    6.0      1794          1605
## 2     DAKSHESH DARURI    MI    6.0      1553          1469
## 3        ADITYA BAJAJ    MI    6.0      1384          1563
## 4 PATRICK H SCHILLING    MI    5.5      1716          1573
## 5          HANSHI ZUO    MI    5.5      1655          1500
## 6         HANSEN SONG    OH    5.0      1686          1518

write.csv(chess, "C:/Users/Gurpreet/Documents/IS607/chessdata.csv")

Project 1

GP SINGH

February 23, 2016