Load the library & Import the CSV

Dplyer will be used to transform the csv file. The file is imported using the ‘|’ separator to clean up the file upon import. This creates a pseudo-tabular dataframe.

library (dplyr)

#Import the CSV
sourcecsv <- read.table("https://raw.githubusercontent.com/johnnydrodriguez/data_607_week4/main/tournamentinfo.txt", header = FALSE, sep = '|', na.strings="", fill = TRUE)

knitr::kable(head(sourcecsv))
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11
—————————————————————————————– NA NA NA NA NA NA NA NA NA NA
Pair Player Name Total Round Round Round Round Round Round Round NA
Num USCF ID / Rtg (Pre->Post) Pts 1 2 3 4 5 6 7 NA
—————————————————————————————– NA NA NA NA NA NA NA NA NA NA
1 GARY HUA 6.0 W 39 W 21 W 18 W 14 W 7 D 12 D 4 NA
ON 15445895 / R: 1794 ->1817 N:2 W B W B W B W NA


Cleaning up the imported source CSV

To begin cleaning up the source file, a column of NA’s was removed, rows with NA values were removed, and the first two unneeded rows of the imported file were removed as well.

Once these NA rows and columns were eliminated, the remaining columns could be renamed.

And finally, leading and trailing white spaces in several columns were removed.

#Drops the unneeded NA column
sourcecsv <- sourcecsv[-c(11)]

#Drops the unneeded NA rows
sourcecsv <- na.omit(sourcecsv)

#Drops the unneeded first 2 rows
sourcecsv <- sourcecsv[-c(1:2),]

#Renames the remaining columns
colnames(sourcecsv) <- c('ID', 'Name', 'Points', 'Round1', 'Round2', 'Round3', 'Round4', 'Round5', 'Round6', 'Round7')

#Removes the leading and trailing white spaces
sourcecsv$ID <- trimws(sourcecsv$ID, which = c("both"))
sourcecsv$Name <- trimws(sourcecsv$Name, which = c("both"))
sourcecsv$Points <- trimws(sourcecsv$Points, which = c("both"))

knitr::kable(head(sourcecsv))
ID Name Points Round1 Round2 Round3 Round4 Round5 Round6 Round7
5 1 GARY HUA 6.0 W 39 W 21 W 18 W 14 W 7 D 12 D 4
6 ON 15445895 / R: 1794 ->1817 N:2 W B W B W B W
8 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17 W 16 W 20 W 7
9 MI 14598900 / R: 1553 ->1663 N:2 B W B W B W B
11 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21 W 11 W 13 W 12
12 MI 14959604 / R: 1384 ->1640 N:2 W B W B W B W


Splitting & Merging the Source CSV data frame

Because of the alternating pattern of the sourcecsv dataframe rows, the sourcecsv dataframe was split based on the ID column. This allowed for two dataframes: one based on the actual ID (numerical) values and the other based on the (non-numerical) State values.

The first table retained the column names of the original sourcecsv. The second table column names were updated to reflect their column data.

Once both tables were split, they were re-merged based on index to create a new flat, wide table. The merged table is where all additional transformation and calculation will occur.

#Creates table 1 based on numerical ID data values the column
chess1 <- sourcecsv %>% filter(!(is.na(as.numeric(sourcecsv$ID))))

#Creates table 2 based on non numerical State values in column
chess2 <- sourcecsv %>% filter((is.na(as.numeric(sourcecsv$ID))))

#Renames columns in second split table
colnames(chess2) <- c('State', 'Rank', 'Nvalue', 'R1Color', 'R2Color', 'R3Color', 'R4Color', 'R5Color', 'R6Color', 'R7Color')

#Merges the split tables into a single flat, wide table by index.
chess3 <- merge(chess1, chess2, by=0)

knitr::kable(head(chess3))
Row.names ID Name Points Round1 Round2 Round3 Round4 Round5 Round6 Round7 State Rank Nvalue R1Color R2Color R3Color R4Color R5Color R6Color R7Color
1 1 GARY HUA 6.0 W 39 W 21 W 18 W 14 W 7 D 12 D 4 ON 15445895 / R: 1794 ->1817 N:2 W B W B W B W
10 10 ANVIT RAO 5.0 D 16 L 19 W 55 W 31 D 6 W 25 W 18 MI 14150362 / R: 1365 ->1544 N:3 W W B B W B W
11 11 CAMERON WILLIAM MC LEMAN 4.5 D 38 W 56 W 6 L 7 L 3 W 34 W 26 MI 12581589 / R: 1712 ->1696 N:3 B W B W B W B
12 12 KENNETH J TACK 4.5 W 42 W 33 D 5 W 38 H D 1 L 3 MI 12681257 / R: 1663 ->1670 N:3 W B W B W B
13 13 TORRANCE HENRY JR 4.5 W 36 W 27 L 7 D 5 W 33 L 3 W 32 MI 15082995 / R: 1666 ->1662 N:3 B W B B W W B
14 14 BRADLEY SHAW 4.5 W 54 W 44 W 8 L 1 D 27 L 5 W 31 MI 10131499 / R: 1610 ->1618 N:3 W B W W B B W


Split Opponent Result and Opponent ID columns

Since the Opponent ID column for every round is needed for the calculation, the single Round columns containing this information were split into the two separate Round Result and Round Opponent columns.

chess3 <- tidyr::separate(data = chess3, col = Round1, sep = "  ", into = c('R1Result', 'R1Opponent'), remove = TRUE)
chess3 <- tidyr::separate(data = chess3, col = Round2, sep = "  ", into = c('R2Result', 'R2Opponent'), remove = TRUE)
chess3 <- tidyr::separate(data = chess3, col = Round3, sep = "  ", into = c('R3Result', 'R3Opponent'), remove = TRUE)
chess3 <- tidyr::separate(data = chess3, col = Round4, sep = "  ", into = c('R4Result', 'R4Opponent'), remove = TRUE)
chess3 <- tidyr::separate(data = chess3, col = Round5, sep = "  ", into = c('R5Result', 'R5Opponent'), remove = TRUE)
chess3 <- tidyr::separate(data = chess3, col = Round6, sep = "  ", into = c('R6Result', 'R6Opponent'), remove = TRUE)
chess3 <- tidyr::separate(data = chess3, col = Round7, sep = "  ", into = c('R7Result', 'R7Opponent'), remove = TRUE)

knitr::kable(head(chess3[c('Name', 'R1Opponent', 'R2Opponent', 'R3Opponent', 'R4Opponent', 'R5Opponent', 'R6Opponent', 'R7Opponent')]))
Name R1Opponent R2Opponent R3Opponent R4Opponent R5Opponent R6Opponent R7Opponent
GARY HUA 39 21 18 14 7 12 4
ANVIT RAO 16 19 55 31 6 25 18
CAMERON WILLIAM MC LEMAN 38 56 6 7 3 34 26
KENNETH J TACK 42 33 5 38 1 3
TORRANCE HENRY JR 36 27 7 5 33 3 32
BRADLEY SHAW 54 44 8 1 27 5 31


Split Column to Extract PreRating Value

In the initial split of the sourcecsv dataframe, the column containing the PreRating values named ‘Rank’. This column contained multiple separate values that were split from the PreRating value.

To perform this split correctly, the white spaces within the Rank column were removed. This permitted the column split to occur along the “:” and “P” characters as delimiters.

#Removes the internal white spaces between the various values
chess3$Rank <- gsub(" ","",chess3$Rank) 

#Splits the initial Rank column into its four distinct values
chess3 <- tidyr::separate(data = chess3,col = Rank, sep = ":", into = c('left', 'right'), remove = TRUE)
chess3 <- tidyr::separate(data = chess3,col = right, sep = "-", into = c('left2', 'right2'), remove = TRUE)
chess3 <- tidyr::separate(data = chess3,col = left2, sep = "P", into = c('left3', 'right3'), remove = TRUE)

#Renames the resulting columns including the PreRating column used in the average opponent calculation
chess3 <- chess3 %>% rename(Rvalue = left, PreRating = left3, Provisional = right3, PostRating = right2)

knitr::kable(head(chess3[c('Name', 'PreRating')]))
Name PreRating
GARY HUA 1794
ANVIT RAO 1365
CAMERON WILLIAM MC LEMAN 1712
KENNETH J TACK 1663
TORRANCE HENRY JR 1666
BRADLEY SHAW 1610


Converting Columns to Numeric

To perform the remaining transformations, several columns were converted to numeric. This has a secondary effect of removing leading or trailing whitespace characters.

# ID, Points, PreRating, and all Opponent id columns were converted to numeric.
chess3$ID <- as.numeric(as.character(chess3$ID))
chess3$Points <- as.numeric(as.character(chess3$Points))
chess3$PreRating <- as.numeric(as.character(chess3$PreRating))
chess3$R1Opponent <- as.numeric(as.character(chess3$R1Opponent))
chess3$R2Opponent <- as.numeric(as.character(chess3$R2Opponent))
chess3$R3Opponent <- as.numeric(as.character(chess3$R3Opponent))
chess3$R4Opponent <- as.numeric(as.character(chess3$R4Opponent))
chess3$R5Opponent <- as.numeric(as.character(chess3$R5Opponent))
chess3$R6Opponent <- as.numeric(as.character(chess3$R6Opponent))
chess3$R7Opponent <- as.numeric(as.character(chess3$R7Opponent))


str(chess3)
## 'data.frame':    64 obs. of  31 variables:
##  $ Row.names  : 'AsIs' chr  "1" "10" "11" "12" ...
##  $ ID         : num  1 10 11 12 13 14 15 16 17 18 ...
##  $ Name       : chr  "GARY HUA" "ANVIT RAO" "CAMERON WILLIAM MC LEMAN" "KENNETH J TACK" ...
##  $ Points     : num  6 5 4.5 4.5 4.5 4.5 4.5 4 4 4 ...
##  $ R1Result   : chr  "W" "D" "D" "W" ...
##  $ R1Opponent : num  39 16 38 42 36 54 19 10 48 47 ...
##  $ R2Result   : chr  "W" "L" "W" "W" ...
##  $ R2Opponent : num  21 19 56 33 27 44 16 15 41 9 ...
##  $ R3Result   : chr  "W" "W" "W" "D" ...
##  $ R3Opponent : num  18 55 6 5 7 8 30 NA 26 1 ...
##  $ R4Result   : chr  "W" "W" "L" "W" ...
##  $ R4Opponent : num  14 31 7 38 5 1 22 39 2 32 ...
##  $ R5Result   : chr  "W" "D" "L" "H" ...
##  $ R5Opponent : num  7 6 3 NA 33 27 54 2 23 19 ...
##  $ R6Result   : chr  "D" "W" "W" "D" ...
##  $ R6Opponent : num  12 25 34 1 3 5 33 36 22 38 ...
##  $ R7Result   : chr  "D" "W" "W" "L" ...
##  $ R7Opponent : num  4 18 26 3 32 31 38 NA 5 10 ...
##  $ State      : chr  "ON" "MI" "MI" "MI" ...
##  $ Rvalue     : chr  "15445895/R" "14150362/R" "12581589/R" "12681257/R" ...
##  $ PreRating  : num  1794 1365 1712 1663 1666 ...
##  $ Provisional: chr  NA NA NA NA ...
##  $ PostRating : chr  ">1817" ">1544" ">1696" ">1670" ...
##  $ Nvalue     : chr  "N:2" "N:3" "N:3" "N:3" ...
##  $ R1Color    : chr  "W    " "W    " "B    " "W    " ...
##  $ R2Color    : chr  "B    " "W    " "W    " "B    " ...
##  $ R3Color    : chr  "W    " "B    " "B    " "W    " ...
##  $ R4Color    : chr  "B    " "B    " "W    " "B    " ...
##  $ R5Color    : chr  "W    " "W    " "B    " "     " ...
##  $ R6Color    : chr  "B    " "B    " "W    " "W    " ...
##  $ R7Color    : chr  "W    " "W    " "B    " "B    " ...


Self Merges to create Round Opponent PreRating columns for all 7 rounds

Through 7 table self merges, a new column was added for every round opponent’s PreRating value. This merge has the affect of matching opponent to their PreRating and inserting the PreRating value in the new column.

After each merge, the resulting columns are renamed to reflect the round data. All NA rows (created by Bi weeks where no opponent is matched) are also deleted after merge.

# Merges Round 1 Opponents and PreRatings and inserts value into Round 1 PreRating column
chess3 <- merge(chess3, chess3[c("ID", "PreRating") ], by.x = "R1Opponent", by.y = "ID", all = TRUE, na.rm = TRUE)
chess3 <- chess3 %>% rename(R1ORating = PreRating.y, PreRating = PreRating.x)
chess3 <- chess3 %>% filter(!is.na(ID))

# Merges Round 2 Opponents and PreRatings and inserts value into Round 2 PreRating column
chess3 <- merge(chess3, chess3[c("ID" , "PreRating") ], by.x = "R2Opponent", by.y = "ID", all = TRUE)
chess3 <- chess3 %>% rename(R2ORating = PreRating.y, PreRating = PreRating.x)
chess3 <- chess3 %>% filter(!is.na(ID))

# Merges Round 3 Opponents and PreRatings and inserts value into Round 3 PreRating column
chess3 <- merge(chess3, chess3[c("ID" , "PreRating") ], by.x = "R3Opponent", by.y = "ID", all = TRUE)
chess3 <- chess3 %>% rename(R3ORating = PreRating.y, PreRating = PreRating.x)
chess3 <- chess3 %>% filter(!is.na(ID))

# Merges Round 4 Opponents and PreRatings and inserts value into Round 4 PreRating column
chess3 <- merge(chess3, chess3[c("ID" , "PreRating") ], by.x = "R4Opponent", by.y = "ID", all = TRUE)
chess3 <- chess3 %>% rename(R4ORating = PreRating.y, PreRating = PreRating.x)
chess3 <- chess3 %>% filter(!is.na(ID))

# Merges Round 5 Opponents and PreRatings and inserts value into Round 5 PreRating column
chess3 <- merge(chess3, chess3[c("ID" , "PreRating") ], by.x = "R5Opponent", by.y = "ID", all = TRUE)
chess3 <- chess3 %>% rename(R5ORating = PreRating.y, PreRating = PreRating.x)
chess3 <- chess3 %>% filter(!is.na(ID))

# Merges Round 6 Opponents and PreRatings and inserts value into Round 6 PreRating column
chess3 <- merge(chess3, chess3[c("ID" , "PreRating") ], by.x = "R6Opponent", by.y = "ID", all = TRUE)
chess3 <- chess3 %>% rename(R6ORating = PreRating.y, PreRating = PreRating.x)
chess3 <- chess3 %>% filter(!is.na(ID))

# Merges Round 7 Opponents and PreRatings and inserts value into Round 7 PreRating column
chess3 <- merge(chess3, chess3[c("ID" , "PreRating") ], by.x = "R7Opponent", by.y = "ID", all = TRUE)
chess3 <- chess3 %>% rename(R7ORating = PreRating.y, PreRating = PreRating.x)
chess3 <- chess3 %>% filter(!is.na(ID))

knitr::kable(head(chess3[c('Name', 'R1ORating', 'R2ORating', 'R3ORating', 'R4ORating', 'R5ORating', 'R6ORating', 'R7ORating')]))
Name R1ORating R2ORating R3ORating R4ORating R5ORating R6ORating R7ORating
PATRICK H SCHILLING 1363 1507 1553 1579 1655 1564 1794
GARY DEE SWATHELL 1092 377 1666 1712 1794 1411 1553
KENNETH J TACK 1332 1449 1655 1423 NA 1794 1384
GARY HUA 1436 1563 1600 1610 1649 1663 1716
RONALD GRZEGORCZYK 1382 1403 1579 1553 1363 1555 1655
DINH DANG BUI 1283 1794 1362 1384 1348 1436 1686


Calculate the Average Opponent Rating

Once the dataframe has been completely transformed and the opponent PreRating columns have been added, the Average Opponent PreRating for all opponents for all rounds is calculated.

#Calculates the average opponent PreRating for all opponents, all rounds
chess3 <- mutate(chess3, AvgOpponentRating= round(rowMeans(select(chess3, "R1ORating","R2ORating","R3ORating","R4ORating","R5ORating","R6ORating","R7ORating"), na.rm = TRUE)))

chess3 <- chess3[order(chess3$ID),]
knitr::kable(head(chess3[c('Name', 'State', 'Points', 'PreRating', 'AvgOpponentRating')]))
Name State Points PreRating AvgOpponentRating
4 GARY HUA ON 6.0 1794 1605
7 DAKSHESH DARURI MI 6.0 1553 1469
12 ADITYA BAJAJ MI 6.0 1384 1564
1 PATRICK H SCHILLING MI 5.5 1716 1574
16 HANSHI ZUO MI 5.5 1655 1501
20 HANSEN SONG OH 5.0 1686 1519


Write the output CSV file with subset of columns: chess_outfile.csv

Writes the CSV file with Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents

#Writes output CSV using subset of columns from the transformed dataframe

write.csv(chess3[,c("Name","State", "Points", "PreRating", "AvgOpponentRating")], file="chess_outfile.csv",row.names=FALSE)