## Loading required package: bitops
Overview
The objective of this project is to import and transform the structure of a text file with contains the results of a chess tournament and produce a CSV output of the results. My game plan for this project was to import the results into a data frame. I would then massage the frame’s structure to match the layout required for the CSV output. To achieve the desired result the following steps were taken:
Step 1: Import Chess Tournament Result Data into R
#Load chess tournament data
data_set <- read.table("https://raw.githubusercontent.com/jasonjgy2000/IS607/master/Projects/Week%204%20Project/tournamentinfo.txt",skip = 3,sep = "|",fill = TRUE,header = FALSE)
head(data_set)
## V1
## 1 -----------------------------------------------------------------------------------------
## 2 1
## 3 ON
## 4 -----------------------------------------------------------------------------------------
## 5 2
## 6 MI
## V2 V3 V4 V5 V6 V7 V8
## 1
## 2 GARY HUA 6.0 W 39 W 21 W 18 W 14 W 7
## 3 15445895 / R: 1794 ->1817 N:2 W B W B W
## 4
## 5 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17 W 16
## 6 14598900 / R: 1553 ->1663 N:2 B W B W B
## V9 V10 V11
## 1 NA
## 2 D 12 D 4 NA
## 3 B W NA
## 4 NA
## 5 W 20 W 7 NA
## 6 W B NA
Step 2: Remove rows that were filled with the line separator “—” that was used in the text file. I then removed the extra column that was created during the import.
#Remove rows filled with seperating character "---"
data_set <- subset(data_set,!grepl("-{1,}",data_set$V1))
# Remove extra column (V11) and row names column
data_set$V11 <- data_set$row.names <- NULL
head(data_set)
## V1 V2 V3 V4 V5 V6 V7
## 2 1 GARY HUA 6.0 W 39 W 21 W 18 W 14
## 3 ON 15445895 / R: 1794 ->1817 N:2 W B W B
## 5 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17
## 6 MI 14598900 / R: 1553 ->1663 N:2 B W B W
## 8 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21
## 9 MI 14959604 / R: 1384 ->1640 N:2 W B W B
## V8 V9 V10
## 2 W 7 D 12 D 4
## 3 W B W
## 5 W 16 W 20 W 7
## 6 B W B
## 8 W 11 W 13 W 12
## 9 W B W
Step 3: I assigned user-friendly column names to the data frame.
# assign column names to data frame
colnames(data_set) <- c("Pair_num","Player_Name", "Total Number of Points","Round_1","Round_2","Round_3","Round_4","Round_5","Round_6","Round_7")
head(data_set)
## Pair_num Player_Name Total Number of Points
## 2 1 GARY HUA 6.0
## 3 ON 15445895 / R: 1794 ->1817 N:2
## 5 2 DAKSHESH DARURI 6.0
## 6 MI 14598900 / R: 1553 ->1663 N:2
## 8 3 ADITYA BAJAJ 6.0
## 9 MI 14959604 / R: 1384 ->1640 N:2
## Round_1 Round_2 Round_3 Round_4 Round_5 Round_6 Round_7
## 2 W 39 W 21 W 18 W 14 W 7 D 12 D 4
## 3 W B W B W B W
## 5 W 63 W 58 L 4 W 17 W 16 W 20 W 7
## 6 B W B W B W B
## 8 L 8 W 61 W 25 W 21 W 11 W 13 W 12
## 9 W B W B W B W
Step 4: I extracted each player’s pre- rating score and merged them into the row with the player’s information under the column heading “pre-rating”. I then removed the extra rows.
# Extract players pre-rating into from seperate column and merge
exp <- "([R]\\:\\s*[[:digit:]]+)"
n <- 1
while(n < nrow(data_set))
{
data_set$state[n] <- toString(data_set$Pair_num[n+1])
data_set$pre_rating[n] <- str_extract(str_extract(data_set$Player_Name[n+1],exp),"[[:digit:]]+")
data_set$state[n+1] <- NA
data_set$pre_rating[n+1] <- NA
n <- n+2
}
# remove extra rows
data_set <- subset(data_set,grepl("[[:digit:]]",data_set$Pair_num))
head(data_set)
## Pair_num Player_Name Total Number of Points
## 2 1 GARY HUA 6.0
## 5 2 DAKSHESH DARURI 6.0
## 8 3 ADITYA BAJAJ 6.0
## 11 4 PATRICK H SCHILLING 5.5
## 14 5 HANSHI ZUO 5.5
## 17 6 HANSEN SONG 5.0
## Round_1 Round_2 Round_3 Round_4 Round_5 Round_6 Round_7 state
## 2 W 39 W 21 W 18 W 14 W 7 D 12 D 4 ON
## 5 W 63 W 58 L 4 W 17 W 16 W 20 W 7 MI
## 8 L 8 W 61 W 25 W 21 W 11 W 13 W 12 MI
## 11 W 23 D 28 W 2 W 26 D 5 W 19 D 1 MI
## 14 W 45 W 37 D 12 D 13 D 4 W 14 W 17 MI
## 17 W 34 D 29 L 11 W 35 D 10 W 27 W 21 OH
## pre_rating
## 2 1794
## 5 1553
## 8 1384
## 11 1716
## 14 1655
## 17 1686
Step 5: I created a function that accepts each player’s round data and returns the average pre-chess rating of their opponents that was assigned to the opp_aver column within the data frame.
#calcuate mean
exp <- "[[:digit:]]+"
calMean <- function(x1,x2,x3,x4,x5,x6,x7){
items <- c(as.numeric(data_set$pre_rating[as.numeric(str_extract(x1,exp))]),
as.numeric(data_set$pre_rating[as.numeric(str_extract(x2,exp))]),
as.numeric(data_set$pre_rating[as.numeric(str_extract(x3,exp))]),
as.numeric(data_set$pre_rating[as.numeric(str_extract(x4,exp))]),
as.numeric(data_set$pre_rating[as.numeric(str_extract(x5,exp))]),
as.numeric(data_set$pre_rating[as.numeric(str_extract(x6,exp))]),
as.numeric(data_set$pre_rating[as.numeric(str_extract(x7,exp))]))
return (round(mean(items,na.rm = TRUE)))
}
data_set$opp_aver <- mapply(calMean,data_set$Round_1,data_set$Round_2,data_set$Round_3,data_set$Round_4,data_set$Round_5,data_set$Round_6,data_set$Round_7)
head(data_set)
## Pair_num Player_Name Total Number of Points
## 2 1 GARY HUA 6.0
## 5 2 DAKSHESH DARURI 6.0
## 8 3 ADITYA BAJAJ 6.0
## 11 4 PATRICK H SCHILLING 5.5
## 14 5 HANSHI ZUO 5.5
## 17 6 HANSEN SONG 5.0
## Round_1 Round_2 Round_3 Round_4 Round_5 Round_6 Round_7 state
## 2 W 39 W 21 W 18 W 14 W 7 D 12 D 4 ON
## 5 W 63 W 58 L 4 W 17 W 16 W 20 W 7 MI
## 8 L 8 W 61 W 25 W 21 W 11 W 13 W 12 MI
## 11 W 23 D 28 W 2 W 26 D 5 W 19 D 1 MI
## 14 W 45 W 37 D 12 D 13 D 4 W 14 W 17 MI
## 17 W 34 D 29 L 11 W 35 D 10 W 27 W 21 OH
## pre_rating opp_aver
## 2 1794 1605
## 5 1553 1469
## 8 1384 1564
## 11 1716 1574
## 14 1655 1501
## 17 1686 1519
Step 6: All non-essential columns were removed. I then renamed each column to reflect those required in the CSV output. I then reorder the columns to match the ordering provided for the CSV.
# remove extra columns
data_set$row.names <- data_set$Pair_num <- data_set$Round_1 <- data_set$Round_2 <- data_set$Round_3 <- data_set$Round_4 <- data_set$Round_4 <- data_set$Round_5 <- data_set$Round_6 <- data_set$Round_7 <- NULL
row.names(data_set) <- NULL
# rename columns
colnames(data_set) <- c("Player's Name","Total Number of Points","State","Player's Pre-Rating","Average Pre Chess Rating of Opponents")
# reorder columns
data_set <- data_set[c(1,3,2,4,5)]
head(data_set)
## Player's Name State Total Number of Points
## 1 GARY HUA ON 6.0
## 2 DAKSHESH DARURI MI 6.0
## 3 ADITYA BAJAJ MI 6.0
## 4 PATRICK H SCHILLING MI 5.5
## 5 HANSHI ZUO MI 5.5
## 6 HANSEN SONG OH 5.0
## Player's Pre-Rating Average Pre Chess Rating of Opponents
## 1 1794 1605
## 2 1553 1469
## 3 1384 1564
## 4 1716 1574
## 5 1655 1501
## 6 1686 1519
Step 7: Now that the structure of the data frame matches the structure required for the CSV output, the “write.CSV” function was used to output the data frame into the CSV format.
# create csv
write.csv(data_set,"output.csv")