NOTE: Choose Knit with Parameters…

Libraries

library(RCurl)
library(magrittr)
library(stringr)

Load lines from the source text file

elo <- readLines('https://raw.githubusercontent.com/simon63/Project1/master/tournamentinfo.txt', warn = F);
head(elo, 10);
##  [1] "-----------------------------------------------------------------------------------------" 
##  [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
##  [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
##  [4] "-----------------------------------------------------------------------------------------" 
##  [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
##  [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [7] "-----------------------------------------------------------------------------------------" 
##  [8] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
##  [9] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [10] "-----------------------------------------------------------------------------------------"

Remove 3 lines of the Header

elo <- elo[-c(1:3)]
head(elo, n = 10)
##  [1] "-----------------------------------------------------------------------------------------"
##  [2] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
##  [3] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
##  [4] "-----------------------------------------------------------------------------------------"
##  [5] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
##  [6] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
##  [7] "-----------------------------------------------------------------------------------------"
##  [8] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
##  [9] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [10] "-----------------------------------------------------------------------------------------"

Remove Row-Separator lines

elo <- subset(elo, str_detect(elo, '^[^-+$]'));
head(elo)
## [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
## [2] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [3] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
## [4] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
## [5] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
## [6] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"

Divide lines into two sets of rows

separating odd and even rows into two sets
row1 <- elo[(c(1:length(elo)) %% 2) != 0]
row2 <- elo[(c(1:length(elo)) %% 2) == 0]
message('first set of rows')
## first set of rows
head(row1)
## [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
## [2] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
## [3] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
## [4] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|"
## [5] "    5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|"
## [6] "    6 | HANSEN SONG                     |5.0  |W  34|D  29|L  11|W  35|D  10|W  27|W  21|"
message('second set of rows')
## second set of rows
head(row2)
## [1] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [2] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
## [3] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [4] "   MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |"
## [5] "   MI | 14601533 / R: 1655   ->1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
## [6] "   OH | 15055204 / R: 1686   ->1687     |N:3  |W    |B    |W    |B    |B    |W    |B    |"

Read both sets of rows into two data frame objects

and drop the last empty column from the data frames
df1 <- read.csv(sep = '|', header = F, text = row1, stringsAsFactors = F)
df2 <- read.csv(sep = '|', header = F, text = row2, stringsAsFactors = F)
# drop the last empty column
df1 <- df1[-11]
df2 <- df2[-11]
knitr::kable(
  head(df1),
  caption = "first data frame"
)
first data frame
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
1 GARY HUA 6.0 W 39 W 21 W 18 W 14 W 7 D 12 D 4
2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17 W 16 W 20 W 7
3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21 W 11 W 13 W 12
4 PATRICK H SCHILLING 5.5 W 23 D 28 W 2 W 26 D 5 W 19 D 1
5 HANSHI ZUO 5.5 W 45 W 37 D 12 D 13 D 4 W 14 W 17
6 HANSEN SONG 5.0 W 34 D 29 L 11 W 35 D 10 W 27 W 21
knitr::kable(
  head(df2),
  caption = "second data frame"
)
second data frame
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
ON 15445895 / R: 1794 ->1817 N:2 W B W B W B W
MI 14598900 / R: 1553 ->1663 N:2 B W B W B W B
MI 14959604 / R: 1384 ->1640 N:2 W B W B W B W
MI 12616049 / R: 1716 ->1744 N:2 W B W B W B B
MI 14601533 / R: 1655 ->1690 N:2 B W B W B W B
OH 15055204 / R: 1686 ->1687 N:3 W B W B B W B

Extract Opponents’ ID values {V4-V10 columns of Data.Frame#1} and convert to numeric

df1$V4 <- df1$V4 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V5 <- df1$V5 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V6 <- df1$V6 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V7 <- df1$V7 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V8 <- df1$V8 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V9 <- df1$V9 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V10 <- df1$V10 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
message("data.frame#1 with opponents's ID")
## data.frame#1 with opponents's ID
head(df1)
##   V1                                V2  V3 V4 V5 V6 V7 V8 V9 V10
## 1  1  GARY HUA                         6.0 39 21 18 14  7 12   4
## 2  2  DAKSHESH DARURI                  6.0 63 58  4 17 16 20   7
## 3  3  ADITYA BAJAJ                     6.0  8 61 25 21 11 13  12
## 4  4  PATRICK H SCHILLING              5.5 23 28  2 26  5 19   1
## 5  5  HANSHI ZUO                       5.5 45 37 12 13  4 14  17
## 6  6  HANSEN SONG                      5.0 34 29 11 35 10 27  21

Create a function to calculate “Average of Pre-Chess Rating of Opponents”

The function takes in, as parameters, a vector of IDs and the two data frames. For each ID it picks up the corresponding opponents’ IDs (from data.frame#1 omitting NAs) and looks up the Pre-Chess Rating column (V2) from the corresponding data rows in data.frame#2. It then creates a pipeline to extract the ratings, convert them to numerics and find the average value

f_avg <- function (v, d1, d2) {
  result <- vector(mode = "double", length = length(v))
  for (i in 1:length(result)) {
    result[i] <- na.omit(d2[unlist(d1[v[i], c(4:10)]),]$V2) %>%
      str_extract_all('R:\\s+\\d+') %>% 
        str_extract('\\d+') %>%
          type.convert %>% 
            mean() %>% 
              round()
  }
  return(result)
}

message("Function test result for Player #1:")
## Function test result for Player #1:
f_avg(c(1), df1, df2)
## [1] 1605
message("Pre-Chess Rating of Opponents for Player #1:")
## Pre-Chess Rating of Opponents for Player #1:
na.omit(df2[unlist(df1[1, c(4:10)]), 2]) %>% str_view_all('R:\\s+\\d+')
message("Function test result for Player #37:")
## Function test result for Player #37:
f_avg(c(37), df1, df2)
## [1] 1385
message("Pre-Chess Rating of Opponents for Player #37:")
## Pre-Chess Rating of Opponents for Player #37:
na.omit(df2[unlist(df1[37, c(4:10)]), 2]) %>% str_view_all('R:\\s+\\d+')

Build the final data.frame object which will be used to generate a .CSV file

The .CSV file (that could for example be imported into a SQL database) should have the following information for all of the players:
* Player’s Name
* Player’s State
* Total Number of Points
* Player’s Pre-Rating
* Average Pre Chess Rating of Opponents

dfResult = data.frame(
  plyr_name = df1$V2 %>% str_trim(side = "both"),
  plyr_state = df2$V1 %>% str_trim(side = "both"),
  plyr_total_pts = df1$V3,
  plyr_pre_rating = df2$V2 %>% str_extract_all('R:\\s+\\d+') %>% str_extract('\\d+') %>% type.convert,
  plyr_oppt_avg_pre_rating = f_avg(c(1:dim(df1)[[1]]), df1, df2)
)
knitr::kable(
  head(dfResult),
  caption = "Resulting Data Frame"
)
Resulting Data Frame
plyr_name plyr_state plyr_total_pts plyr_pre_rating plyr_oppt_avg_pre_rating
GARY HUA ON 6.0 1794 1605
DAKSHESH DARURI MI 6.0 1553 1469
ADITYA BAJAJ MI 6.0 1384 1564
PATRICK H SCHILLING MI 5.5 1716 1574
HANSHI ZUO MI 5.5 1655 1501
HANSEN SONG OH 5.0 1686 1519

Write out the final required .CSV ouput file for this project

write.csv(dfResult, file = params$csv_file)