Week4-Project1

NOTE: Choose Knit with Parameters…

Libraries

library(RCurl)
library(magrittr)
library(stringr)

Load lines from the source text file

elo <- readLines('https://raw.githubusercontent.com/simon63/Project1/master/tournamentinfo.txt', warn = F);
head(elo, 10);

##  [1] "-----------------------------------------------------------------------------------------" 
##  [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
##  [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
##  [4] "-----------------------------------------------------------------------------------------" 
##  [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
##  [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [7] "-----------------------------------------------------------------------------------------" 
##  [8] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
##  [9] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [10] "-----------------------------------------------------------------------------------------"

Remove 3 lines of the Header

elo <- elo[-c(1:3)]
head(elo, n = 10)

##  [1] "-----------------------------------------------------------------------------------------"
##  [2] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
##  [3] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
##  [4] "-----------------------------------------------------------------------------------------"
##  [5] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
##  [6] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
##  [7] "-----------------------------------------------------------------------------------------"
##  [8] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
##  [9] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [10] "-----------------------------------------------------------------------------------------"

Remove Row-Separator lines

elo <- subset(elo, str_detect(elo, '^[^-+$]'));
head(elo)

## [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
## [2] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [3] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
## [4] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
## [5] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
## [6] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"

Divide lines into two sets of rows

separating odd and even rows into two sets

row1 <- elo[(c(1:length(elo)) %% 2) != 0]
row2 <- elo[(c(1:length(elo)) %% 2) == 0]
message('first set of rows')

## first set of rows

head(row1)

## [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
## [2] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
## [3] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
## [4] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|"
## [5] "    5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|"
## [6] "    6 | HANSEN SONG                     |5.0  |W  34|D  29|L  11|W  35|D  10|W  27|W  21|"

message('second set of rows')

## second set of rows

head(row2)

## [1] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [2] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
## [3] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [4] "   MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |"
## [5] "   MI | 14601533 / R: 1655   ->1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
## [6] "   OH | 15055204 / R: 1686   ->1687     |N:3  |W    |B    |W    |B    |B    |W    |B    |"

Read both sets of rows into two data frame objects

and drop the last empty column from the data frames

df1 <- read.csv(sep = '|', header = F, text = row1, stringsAsFactors = F)
df2 <- read.csv(sep = '|', header = F, text = row2, stringsAsFactors = F)
# drop the last empty column
df1 <- df1[-11]
df2 <- df2[-11]
knitr::kable(
  head(df1),
  caption = "first data frame"
)

first data frame
V1	V2	V3	V4	V5	V6	V7	V8	V9	V10
1	GARY HUA	6.0	W 39	W 21	W 18	W 14	W 7	D 12	D 4
2	DAKSHESH DARURI	6.0	W 63	W 58	L 4	W 17	W 16	W 20	W 7
3	ADITYA BAJAJ	6.0	L 8	W 61	W 25	W 21	W 11	W 13	W 12
4	PATRICK H SCHILLING	5.5	W 23	D 28	W 2	W 26	D 5	W 19	D 1
5	HANSHI ZUO	5.5	W 45	W 37	D 12	D 13	D 4	W 14	W 17
6	HANSEN SONG	5.0	W 34	D 29	L 11	W 35	D 10	W 27	W 21

knitr::kable(
  head(df2),
  caption = "second data frame"
)

second data frame
V1	V2	V3	V4	V5	V6	V7	V8	V9	V10
ON	15445895 / R: 1794 ->1817	N:2	W	B	W	B	W	B	W
MI	14598900 / R: 1553 ->1663	N:2	B	W	B	W	B	W	B
MI	14959604 / R: 1384 ->1640	N:2	W	B	W	B	W	B	W
MI	12616049 / R: 1716 ->1744	N:2	W	B	W	B	W	B	B
MI	14601533 / R: 1655 ->1690	N:2	B	W	B	W	B	W	B
OH	15055204 / R: 1686 ->1687	N:3	W	B	W	B	B	W	B

Extract Opponents’ ID values {V4-V10 columns of Data.Frame#1} and convert to numeric

df1$V4 <- df1$V4 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V5 <- df1$V5 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V6 <- df1$V6 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V7 <- df1$V7 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V8 <- df1$V8 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V9 <- df1$V9 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V10 <- df1$V10 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
message("data.frame#1 with opponents's ID")

## data.frame#1 with opponents's ID

head(df1)

##   V1                                V2  V3 V4 V5 V6 V7 V8 V9 V10
## 1  1  GARY HUA                         6.0 39 21 18 14  7 12   4
## 2  2  DAKSHESH DARURI                  6.0 63 58  4 17 16 20   7
## 3  3  ADITYA BAJAJ                     6.0  8 61 25 21 11 13  12
## 4  4  PATRICK H SCHILLING              5.5 23 28  2 26  5 19   1
## 5  5  HANSHI ZUO                       5.5 45 37 12 13  4 14  17
## 6  6  HANSEN SONG                      5.0 34 29 11 35 10 27  21

Create a function to calculate “Average of Pre-Chess Rating of Opponents”

The function takes in, as parameters, a vector of IDs and the two data frames. For each ID it picks up the corresponding opponents’ IDs (from data.frame#1 omitting NAs) and looks up the Pre-Chess Rating column (V2) from the corresponding data rows in data.frame#2. It then creates a pipeline to extract the ratings, convert them to numerics and find the average value

f_avg <- function (v, d1, d2) {
  result <- vector(mode = "double", length = length(v))
  for (i in 1:length(result)) {
    result[i] <- na.omit(d2[unlist(d1[v[i], c(4:10)]),]$V2) %>%
      str_extract_all('R:\\s+\\d+') %>% 
        str_extract('\\d+') %>%
          type.convert %>% 
            mean() %>% 
              round()
  }
  return(result)
}

message("Function test result for Player #1:")

## Function test result for Player #1:

f_avg(c(1), df1, df2)

## [1] 1605

message("Pre-Chess Rating of Opponents for Player #1:")

## Pre-Chess Rating of Opponents for Player #1:

na.omit(df2[unlist(df1[1, c(4:10)]), 2]) %>% str_view_all('R:\\s+\\d+')

message("Function test result for Player #37:")

## Function test result for Player #37:

f_avg(c(37), df1, df2)

## [1] 1385

message("Pre-Chess Rating of Opponents for Player #37:")

## Pre-Chess Rating of Opponents for Player #37:

na.omit(df2[unlist(df1[37, c(4:10)]), 2]) %>% str_view_all('R:\\s+\\d+')

Build the final data.frame object which will be used to generate a .CSV file

The .CSV file (that could for example be imported into a SQL database) should have the following information for all of the players:
* Player’s Name
* Player’s State
* Total Number of Points
* Player’s Pre-Rating
* Average Pre Chess Rating of Opponents

dfResult = data.frame(
  plyr_name = df1$V2 %>% str_trim(side = "both"),
  plyr_state = df2$V1 %>% str_trim(side = "both"),
  plyr_total_pts = df1$V3,
  plyr_pre_rating = df2$V2 %>% str_extract_all('R:\\s+\\d+') %>% str_extract('\\d+') %>% type.convert,
  plyr_oppt_avg_pre_rating = f_avg(c(1:dim(df1)[[1]]), df1, df2)
)
knitr::kable(
  head(dfResult),
  caption = "Resulting Data Frame"
)

Resulting Data Frame
plyr_name	plyr_state	plyr_total_pts	plyr_pre_rating	plyr_oppt_avg_pre_rating
GARY HUA	ON	6.0	1794	1605
DAKSHESH DARURI	MI	6.0	1553	1469
ADITYA BAJAJ	MI	6.0	1384	1564
PATRICK H SCHILLING	MI	5.5	1716	1574
HANSHI ZUO	MI	5.5	1655	1501
HANSEN SONG	OH	5.0	1686	1519

Write out the final required .CSV ouput file for this project

write.csv(dfResult, file = params$csv_file)