Project 1

## Loading required package: bitops

Overview

The objective of this project is to import and transform the structure of a text file with contains the results of a chess tournament and produce a CSV output of the results. My game plan for this project was to import the results into a data frame. I would then massage the frame’s structure to match the layout required for the CSV output. To achieve the desired result the following steps were taken:

Step 1: Import Chess Tournament Result Data into R

#Load chess tournament data 
data_set <- read.table("https://raw.githubusercontent.com/jasonjgy2000/IS607/master/Projects/Week%204%20Project/tournamentinfo.txt",skip = 3,sep = "|",fill = TRUE,header = FALSE)
head(data_set)

##                                                                                          V1
## 1 -----------------------------------------------------------------------------------------
## 2                                                                                        1 
## 3                                                                                       ON 
## 4 -----------------------------------------------------------------------------------------
## 5                                                                                        2 
## 6                                                                                       MI 
##                                  V2    V3    V4    V5    V6    V7    V8
## 1                                                                      
## 2  GARY HUA                         6.0   W  39 W  21 W  18 W  14 W   7
## 3  15445895 / R: 1794   ->1817      N:2   W     B     W     B     W    
## 4                                                                      
## 5  DAKSHESH DARURI                  6.0   W  63 W  58 L   4 W  17 W  16
## 6  14598900 / R: 1553   ->1663      N:2   B     W     B     W     B    
##      V9   V10 V11
## 1              NA
## 2 D  12 D   4  NA
## 3 B     W      NA
## 4              NA
## 5 W  20 W   7  NA
## 6 W     B      NA

Step 2: Remove rows that were filled with the line separator “—” that was used in the text file. I then removed the extra column that was created during the import.

#Remove rows filled with seperating character "---"
data_set <- subset(data_set,!grepl("-{1,}",data_set$V1))
# Remove extra column (V11) and row names column
data_set$V11 <- data_set$row.names <- NULL
head(data_set)

##       V1                                V2    V3    V4    V5    V6    V7
## 2     1   GARY HUA                         6.0   W  39 W  21 W  18 W  14
## 3    ON   15445895 / R: 1794   ->1817      N:2   W     B     W     B    
## 5     2   DAKSHESH DARURI                  6.0   W  63 W  58 L   4 W  17
## 6    MI   14598900 / R: 1553   ->1663      N:2   B     W     B     W    
## 8     3   ADITYA BAJAJ                     6.0   L   8 W  61 W  25 W  21
## 9    MI   14959604 / R: 1384   ->1640      N:2   W     B     W     B    
##      V8    V9   V10
## 2 W   7 D  12 D   4
## 3 W     B     W    
## 5 W  16 W  20 W   7
## 6 B     W     B    
## 8 W  11 W  13 W  12
## 9 W     B     W

Step 3: I assigned user-friendly column names to the data frame.

# assign column names to data frame
colnames(data_set) <- c("Pair_num","Player_Name", "Total Number of Points","Round_1","Round_2","Round_3","Round_4","Round_5","Round_6","Round_7")
head(data_set)

##   Pair_num                       Player_Name Total Number of Points
## 2       1   GARY HUA                                          6.0  
## 3      ON   15445895 / R: 1794   ->1817                       N:2  
## 5       2   DAKSHESH DARURI                                   6.0  
## 6      MI   14598900 / R: 1553   ->1663                       N:2  
## 8       3   ADITYA BAJAJ                                      6.0  
## 9      MI   14959604 / R: 1384   ->1640                       N:2  
##   Round_1 Round_2 Round_3 Round_4 Round_5 Round_6 Round_7
## 2   W  39   W  21   W  18   W  14   W   7   D  12   D   4
## 3   W       B       W       B       W       B       W    
## 5   W  63   W  58   L   4   W  17   W  16   W  20   W   7
## 6   B       W       B       W       B       W       B    
## 8   L   8   W  61   W  25   W  21   W  11   W  13   W  12
## 9   W       B       W       B       W       B       W

Step 4: I extracted each player’s pre- rating score and merged them into the row with the player’s information under the column heading “pre-rating”. I then removed the extra rows.

# Extract players pre-rating into from seperate column and merge
exp <- "([R]\\:\\s*[[:digit:]]+)"
n <- 1
while(n < nrow(data_set))
{
  data_set$state[n] <- toString(data_set$Pair_num[n+1])
  data_set$pre_rating[n] <- str_extract(str_extract(data_set$Player_Name[n+1],exp),"[[:digit:]]+")
  data_set$state[n+1] <- NA
  data_set$pre_rating[n+1] <- NA
  n <- n+2
}

# remove extra rows 
data_set <- subset(data_set,grepl("[[:digit:]]",data_set$Pair_num))
head(data_set)

##    Pair_num                       Player_Name Total Number of Points
## 2        1   GARY HUA                                          6.0  
## 5        2   DAKSHESH DARURI                                   6.0  
## 8        3   ADITYA BAJAJ                                      6.0  
## 11       4   PATRICK H SCHILLING                               5.5  
## 14       5   HANSHI ZUO                                        5.5  
## 17       6   HANSEN SONG                                       5.0  
##    Round_1 Round_2 Round_3 Round_4 Round_5 Round_6 Round_7  state
## 2    W  39   W  21   W  18   W  14   W   7   D  12   D   4    ON 
## 5    W  63   W  58   L   4   W  17   W  16   W  20   W   7    MI 
## 8    L   8   W  61   W  25   W  21   W  11   W  13   W  12    MI 
## 11   W  23   D  28   W   2   W  26   D   5   W  19   D   1    MI 
## 14   W  45   W  37   D  12   D  13   D   4   W  14   W  17    MI 
## 17   W  34   D  29   L  11   W  35   D  10   W  27   W  21    OH 
##    pre_rating
## 2        1794
## 5        1553
## 8        1384
## 11       1716
## 14       1655
## 17       1686

Step 5: I created a function that accepts each player’s round data and returns the average pre-chess rating of their opponents that was assigned to the opp_aver column within the data frame.

#calcuate mean 
exp <- "[[:digit:]]+"

calMean <- function(x1,x2,x3,x4,x5,x6,x7){
  items <- c(as.numeric(data_set$pre_rating[as.numeric(str_extract(x1,exp))]),
             as.numeric(data_set$pre_rating[as.numeric(str_extract(x2,exp))]),
             as.numeric(data_set$pre_rating[as.numeric(str_extract(x3,exp))]),
             as.numeric(data_set$pre_rating[as.numeric(str_extract(x4,exp))]),
             as.numeric(data_set$pre_rating[as.numeric(str_extract(x5,exp))]),
             as.numeric(data_set$pre_rating[as.numeric(str_extract(x6,exp))]),
             as.numeric(data_set$pre_rating[as.numeric(str_extract(x7,exp))]))
  
  return (round(mean(items,na.rm = TRUE)))
  
}

data_set$opp_aver <- mapply(calMean,data_set$Round_1,data_set$Round_2,data_set$Round_3,data_set$Round_4,data_set$Round_5,data_set$Round_6,data_set$Round_7)
head(data_set)

##    Pair_num                       Player_Name Total Number of Points
## 2        1   GARY HUA                                          6.0  
## 5        2   DAKSHESH DARURI                                   6.0  
## 8        3   ADITYA BAJAJ                                      6.0  
## 11       4   PATRICK H SCHILLING                               5.5  
## 14       5   HANSHI ZUO                                        5.5  
## 17       6   HANSEN SONG                                       5.0  
##    Round_1 Round_2 Round_3 Round_4 Round_5 Round_6 Round_7  state
## 2    W  39   W  21   W  18   W  14   W   7   D  12   D   4    ON 
## 5    W  63   W  58   L   4   W  17   W  16   W  20   W   7    MI 
## 8    L   8   W  61   W  25   W  21   W  11   W  13   W  12    MI 
## 11   W  23   D  28   W   2   W  26   D   5   W  19   D   1    MI 
## 14   W  45   W  37   D  12   D  13   D   4   W  14   W  17    MI 
## 17   W  34   D  29   L  11   W  35   D  10   W  27   W  21    OH 
##    pre_rating opp_aver
## 2        1794     1605
## 5        1553     1469
## 8        1384     1564
## 11       1716     1574
## 14       1655     1501
## 17       1686     1519

Step 6: All non-essential columns were removed. I then renamed each column to reflect those required in the CSV output. I then reorder the columns to match the ordering provided for the CSV.

# remove extra columns 
data_set$row.names <- data_set$Pair_num <- data_set$Round_1 <- data_set$Round_2 <- data_set$Round_3 <- data_set$Round_4 <- data_set$Round_4 <- data_set$Round_5  <- data_set$Round_6 <- data_set$Round_7 <- NULL
row.names(data_set) <- NULL
# rename columns 
colnames(data_set) <- c("Player's Name","Total Number of Points","State","Player's Pre-Rating","Average Pre Chess Rating of Opponents")

# reorder columns 
data_set <- data_set[c(1,3,2,4,5)]
head(data_set)

##                       Player's Name  State Total Number of Points
## 1  GARY HUA                            ON                   6.0  
## 2  DAKSHESH DARURI                     MI                   6.0  
## 3  ADITYA BAJAJ                        MI                   6.0  
## 4  PATRICK H SCHILLING                 MI                   5.5  
## 5  HANSHI ZUO                          MI                   5.5  
## 6  HANSEN SONG                         OH                   5.0  
##   Player's Pre-Rating Average Pre Chess Rating of Opponents
## 1                1794                                  1605
## 2                1553                                  1469
## 3                1384                                  1564
## 4                1716                                  1574
## 5                1655                                  1501
## 6                1686                                  1519

Step 7: Now that the structure of the data frame matches the structure required for the CSV output, the “write.CSV” function was used to output the data frame into the CSV format.

# create csv
write.csv(data_set,"output.csv")

Project 1

Jason Joseph

September 22, 2015