PROJECT 1

In this project, we’re given a text file with chess tournament results where the information has some structure.Here is how the structure looks like in the below table

# Load packages
library(stringr)
library(data.table)
## Warning: package 'data.table' was built under R version 3.4.2
# Reading the data file and skipping header block
chessTournament <- read.csv("https://raw.githubusercontent.com/Harpreet1984/DATA607/master/tournamentinfo.txt", sep = ",", skip = 3)

# Sample raw input data
head(chessTournament)
##   X.........................................................................................
## 1      1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|
## 2     ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |
## 3  -----------------------------------------------------------------------------------------
## 4      2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|
## 5     MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |
## 6  -----------------------------------------------------------------------------------------

Remove the dashed lines

chessTournamentFiltered <- chessTournament[!str_detect (chessTournament[,1], "^-+$"),]

combining multiple customer rows into one

customerCombined <- str_c(chessTournamentFiltered[seq(1, length(chessTournamentFiltered), 2)], chessTournamentFiltered[seq(2, length(chessTournamentFiltered), 2)], sep = "")
customerCombined <- data.frame(str_split_fixed(customerCombined, "\\||/|->", n = Inf))

update column headers, setup computational columns

colnames(customerCombined) <- c("PlayerId",
 "PlayerName",           
 "PlayerPoints",         
 "Game1",                 
 "Game2",
 "Game3",
 "Game4",
 "Game5",
 "Game6",
 "Game7",
 "PlayerState",          
 "UscfId" ,
 "PlayerRatingPre",
 "PlayerRatingPost",
 "nValue",
 "value1",
 "value2",
 "value3",
 "value4",
 "value5",
 "value6",
 "value7"
)
 
customerCombined$PlayerRatingPre <- as.numeric(str_replace(customerCombined$PlayerRatingPre, "R: *([:digit:]+)(P.*)?", "\\1"))

customerCombined$PlayerRatingPost <- as.numeric(str_replace(customerCombined$PlayerRatingPost, "([:digit:]+)(P.*)?", "\\1"))

customerCombined$`AvePreRating` <- as.numeric(0)

Create derived table, for getting average opponent.

opp1 <- data.frame(as.numeric(str_extract_all(customerCombined$Game1,"[[:digit:]]{1,}")))
opp2 <- data.frame(as.numeric(str_extract_all(customerCombined$Game2,"[[:digit:]]{1,}")))
opp3 <- data.frame(as.numeric(str_extract_all(customerCombined$Game3,"[[:digit:]]{1,}")))
opp4 <- data.frame(as.numeric(str_extract_all(customerCombined$Game4,"[[:digit:]]{1,}")))
opp5 <- data.frame(as.numeric(str_extract_all(customerCombined$Game5,"[[:digit:]]{1,}")))
opp6 <- data.frame(as.numeric(str_extract_all(customerCombined$Game6,"[[:digit:]]{1,}")))
opp7 <- data.frame(as.numeric(str_extract_all(customerCombined$Game7,"[[:digit:]]{1,}")))

opps <- cbind(customerCombined$PlayerId, customerCombined$PlayerName, opp1, opp2, opp3, opp4, opp5, opp6, opp7)
names(opps) <- c("Player Id","Player Name","Opp 1","Opp 2","Opp 3","Opp 4","Opp 5","Opp 6","Opp 7")

opps[is.na(opps)] <- as.numeric(0)

Extract opponent average points

for (k in 3:9){
  for (j in 1:dim(customerCombined)[1]){
    for (i in 1:dim(customerCombined)[1]){
      if (as.numeric(opps[j,k]) == as.numeric(customerCombined$PlayerId[i])){
         customerCombined$AvePreRating[j] <- as.numeric(customerCombined$AvePreRating[j]) + as.numeric(customerCombined$PlayerRatingPre[i])
      }
    }  
  }
}  

customerCombined$AvePreRating  <- round(as.numeric(customerCombined$AvePreRating) / 7,0)

head(customerCombined)
##   PlayerId                        PlayerName PlayerPoints Game1 Game2
## 1       1   GARY HUA                                6.0   W  39 W  21
## 2       2   DAKSHESH DARURI                         6.0   W  63 W  58
## 3       3   ADITYA BAJAJ                            6.0   L   8 W  61
## 4       4   PATRICK H SCHILLING                     5.5   W  23 D  28
## 5       5   HANSHI ZUO                              5.5   W  45 W  37
## 6       6   HANSEN SONG                             5.0   W  34 D  29
##   Game3 Game4 Game5 Game6 Game7 PlayerState     UscfId PlayerRatingPre
## 1 W  18 W  14 W   7 D  12 D   4         ON   15445895             1794
## 2 L   4 W  17 W  16 W  20 W   7         MI   14598900             1553
## 3 W  25 W  21 W  11 W  13 W  12         MI   14959604             1384
## 4 W   2 W  26 D   5 W  19 D   1         MI   12616049             1716
## 5 D  12 D  13 D   4 W  14 W  17         MI   14601533             1655
## 6 L  11 W  35 D  10 W  27 W  21         OH   15055204             1686
##   PlayerRatingPost nValue value1 value2 value3 value4 value5 value6 value7
## 1             1817  N:2    W      B      W      B      W      B      W    
## 2             1663  N:2    B      W      B      W      B      W      B    
## 3             1640  N:2    W      B      W      B      W      B      W    
## 4             1744  N:2    W      B      W      B      W      B      B    
## 5             1690  N:2    B      W      B      W      B      W      B    
## 6             1687  N:3    W      B      W      B      B      W      B    
##   NA AvePreRating
## 1            1605
## 2            1469
## 3            1564
## 4            1574
## 5            1501
## 6            1519

Our aim here is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the player 1. Player Name 2. Player State 3. Total points 4. Player Pre-Rating, 5. Average Opponents Raing

generate csv file

write.table(customerCombined[, c(2, 3, 11, 13, 24)], file = "Harpreet-chesstournamentInfo.csv",row.names=FALSE, na="",col.names = c("Player Name", "Points", "Player State", "Pre Rating", "Opponent Avg Rating"), sep=",")