NOTE: Choose Knit with Parameters…
Libraries
library(RCurl)
library(magrittr)
library(stringr)
Load lines from the source text file
elo <- readLines('https://raw.githubusercontent.com/simon63/Project1/master/tournamentinfo.txt', warn = F);
head(elo, 10);
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [7] "-----------------------------------------------------------------------------------------"
## [8] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [9] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [10] "-----------------------------------------------------------------------------------------"
Remove Row-Separator lines
elo <- subset(elo, str_detect(elo, '^[^-+$]'));
head(elo)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [3] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [4] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [5] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [6] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
Divide lines into two sets of rows
separating odd and even rows into two sets
row1 <- elo[(c(1:length(elo)) %% 2) != 0]
row2 <- elo[(c(1:length(elo)) %% 2) == 0]
message('first set of rows')
## first set of rows
head(row1)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [3] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [4] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [5] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [6] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
message('second set of rows')
## second set of rows
head(row2)
## [1] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [2] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [3] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [4] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [5] " MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [6] " OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
Read both sets of rows into two data frame objects
and drop the last empty column from the data frames
df1 <- read.csv(sep = '|', header = F, text = row1, stringsAsFactors = F)
df2 <- read.csv(sep = '|', header = F, text = row2, stringsAsFactors = F)
# drop the last empty column
df1 <- df1[-11]
df2 <- df2[-11]
knitr::kable(
head(df1),
caption = "first data frame"
)
first data frame
| 1 |
GARY HUA |
6.0 |
W 39 |
W 21 |
W 18 |
W 14 |
W 7 |
D 12 |
D 4 |
| 2 |
DAKSHESH DARURI |
6.0 |
W 63 |
W 58 |
L 4 |
W 17 |
W 16 |
W 20 |
W 7 |
| 3 |
ADITYA BAJAJ |
6.0 |
L 8 |
W 61 |
W 25 |
W 21 |
W 11 |
W 13 |
W 12 |
| 4 |
PATRICK H SCHILLING |
5.5 |
W 23 |
D 28 |
W 2 |
W 26 |
D 5 |
W 19 |
D 1 |
| 5 |
HANSHI ZUO |
5.5 |
W 45 |
W 37 |
D 12 |
D 13 |
D 4 |
W 14 |
W 17 |
| 6 |
HANSEN SONG |
5.0 |
W 34 |
D 29 |
L 11 |
W 35 |
D 10 |
W 27 |
W 21 |
knitr::kable(
head(df2),
caption = "second data frame"
)
second data frame
| ON |
15445895 / R: 1794 ->1817 |
N:2 |
W |
B |
W |
B |
W |
B |
W |
| MI |
14598900 / R: 1553 ->1663 |
N:2 |
B |
W |
B |
W |
B |
W |
B |
| MI |
14959604 / R: 1384 ->1640 |
N:2 |
W |
B |
W |
B |
W |
B |
W |
| MI |
12616049 / R: 1716 ->1744 |
N:2 |
W |
B |
W |
B |
W |
B |
B |
| MI |
14601533 / R: 1655 ->1690 |
N:2 |
B |
W |
B |
W |
B |
W |
B |
| OH |
15055204 / R: 1686 ->1687 |
N:3 |
W |
B |
W |
B |
B |
W |
B |
Extract Opponents’ ID values {V4-V10 columns of Data.Frame#1} and convert to numeric
df1$V4 <- df1$V4 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V5 <- df1$V5 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V6 <- df1$V6 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V7 <- df1$V7 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V8 <- df1$V8 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V9 <- df1$V9 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
df1$V10 <- df1$V10 %>% str_sub(2, -1) %>% str_trim(side = "both") %>% type.convert
message("data.frame#1 with opponents's ID")
## data.frame#1 with opponents's ID
head(df1)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
## 1 1 GARY HUA 6.0 39 21 18 14 7 12 4
## 2 2 DAKSHESH DARURI 6.0 63 58 4 17 16 20 7
## 3 3 ADITYA BAJAJ 6.0 8 61 25 21 11 13 12
## 4 4 PATRICK H SCHILLING 5.5 23 28 2 26 5 19 1
## 5 5 HANSHI ZUO 5.5 45 37 12 13 4 14 17
## 6 6 HANSEN SONG 5.0 34 29 11 35 10 27 21
Create a function to calculate “Average of Pre-Chess Rating of Opponents”
The function takes in, as parameters, a vector of IDs and the two data frames. For each ID it picks up the corresponding opponents’ IDs (from data.frame#1 omitting NAs) and looks up the Pre-Chess Rating column (V2) from the corresponding data rows in data.frame#2. It then creates a pipeline to extract the ratings, convert them to numerics and find the average value
f_avg <- function (v, d1, d2) {
result <- vector(mode = "double", length = length(v))
for (i in 1:length(result)) {
result[i] <- na.omit(d2[unlist(d1[v[i], c(4:10)]),]$V2) %>%
str_extract_all('R:\\s+\\d+') %>%
str_extract('\\d+') %>%
type.convert %>%
mean() %>%
round()
}
return(result)
}
message("Function test result for Player #1:")
## Function test result for Player #1:
f_avg(c(1), df1, df2)
## [1] 1605
message("Pre-Chess Rating of Opponents for Player #1:")
## Pre-Chess Rating of Opponents for Player #1:
na.omit(df2[unlist(df1[1, c(4:10)]), 2]) %>% str_view_all('R:\\s+\\d+')
message("Function test result for Player #37:")
## Function test result for Player #37:
f_avg(c(37), df1, df2)
## [1] 1385
message("Pre-Chess Rating of Opponents for Player #37:")
## Pre-Chess Rating of Opponents for Player #37:
na.omit(df2[unlist(df1[37, c(4:10)]), 2]) %>% str_view_all('R:\\s+\\d+')
Build the final data.frame object which will be used to generate a .CSV file
The .CSV file (that could for example be imported into a SQL database) should have the following information for all of the players:
* Player’s Name
* Player’s State
* Total Number of Points
* Player’s Pre-Rating
* Average Pre Chess Rating of Opponents
dfResult = data.frame(
plyr_name = df1$V2 %>% str_trim(side = "both"),
plyr_state = df2$V1 %>% str_trim(side = "both"),
plyr_total_pts = df1$V3,
plyr_pre_rating = df2$V2 %>% str_extract_all('R:\\s+\\d+') %>% str_extract('\\d+') %>% type.convert,
plyr_oppt_avg_pre_rating = f_avg(c(1:dim(df1)[[1]]), df1, df2)
)
knitr::kable(
head(dfResult),
caption = "Resulting Data Frame"
)
Resulting Data Frame
| GARY HUA |
ON |
6.0 |
1794 |
1605 |
| DAKSHESH DARURI |
MI |
6.0 |
1553 |
1469 |
| ADITYA BAJAJ |
MI |
6.0 |
1384 |
1564 |
| PATRICK H SCHILLING |
MI |
5.5 |
1716 |
1574 |
| HANSHI ZUO |
MI |
5.5 |
1655 |
1501 |
| HANSEN SONG |
OH |
5.0 |
1686 |
1519 |
Write out the final required .CSV ouput file for this project
write.csv(dfResult, file = params$csv_file)