Load the project text file
library(stringr)
raw.data <- readLines("./tournamentinfo.txt", warn = FALSE)
head(raw.data)
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
Create a regular expression to parse the text file and load the results into a dataframe
get_data <- str_match_all(raw.data, "(\\d+)\\s\\|\\s([A-Za-z]+\\s[A-za-z]+\\s[A-Za-z]*)\\D+?(\\d+\\.?\\d?)\\D+(\\d+)?\\D+(\\d+)?\\D+(\\d+)?\\D+(\\d+)?\\D+(\\d+)?\\D+(\\d+)?\\D+(\\d+)?|\\s(\\w{2})\\s\\|.*?R\\:\\s+(\\d+)?")
head(unlist(get_data), n = 26)
## [1] "1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4"
## [2] "1"
## [3] "GARY HUA "
## [4] "6.0"
## [5] "39"
## [6] "21"
## [7] "18"
## [8] "14"
## [9] "7"
## [10] "12"
## [11] "4"
## [12] NA
## [13] NA
## [14] " ON | 15445895 / R: 1794"
## [15] NA
## [16] NA
## [17] NA
## [18] NA
## [19] NA
## [20] NA
## [21] NA
## [22] NA
## [23] NA
## [24] NA
## [25] "ON"
## [26] "1794"
m <- matrix(unlist(get_data), ncol = 26, byrow = TRUE)
head(m)
## [,1] [,2]
## [1,] "1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4" "1"
## [2,] "2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7" "2"
## [3,] "3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12" "3"
## [4,] "4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1" "4"
## [5,] "5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17" "5"
## [6,] "6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21" "6"
## [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] "GARY HUA " "6.0" "39" "21" "18" "14" "7" "12" "4" NA NA
## [2,] "DAKSHESH DARURI " "6.0" "63" "58" "4" "17" "16" "20" "7" NA NA
## [3,] "ADITYA BAJAJ " "6.0" "8" "61" "25" "21" "11" "13" "12" NA NA
## [4,] "PATRICK H SCHILLING" "5.5" "23" "28" "2" "26" "5" "19" "1" NA NA
## [5,] "HANSHI ZUO " "5.5" "45" "37" "12" "13" "4" "14" "17" NA NA
## [6,] "HANSEN SONG " "5.0" "34" "29" "11" "35" "10" "27" "21" NA NA
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
## [1,] " ON | 15445895 / R: 1794" NA NA NA NA NA NA NA NA NA NA "ON"
## [2,] " MI | 14598900 / R: 1553" NA NA NA NA NA NA NA NA NA NA "MI"
## [3,] " MI | 14959604 / R: 1384" NA NA NA NA NA NA NA NA NA NA "MI"
## [4,] " MI | 12616049 / R: 1716" NA NA NA NA NA NA NA NA NA NA "MI"
## [5,] " MI | 14601533 / R: 1655" NA NA NA NA NA NA NA NA NA NA "MI"
## [6,] " OH | 15055204 / R: 1686" NA NA NA NA NA NA NA NA NA NA "OH"
## [,26]
## [1,] "1794"
## [2,] "1553"
## [3,] "1384"
## [4,] "1716"
## [5,] "1655"
## [6,] "1686"
player_df <- data.frame(m[, 2:11], m[, 25:26])
names(player_df) <- c("player_index", "player_name", "total_number_points",
"round_1", "round_2", "round_3", "round_4", "round_5", "round_6",
"round_7", "state", "pre_rating")
head(player_df)
## player_index player_name total_number_points round_1 round_2 round_3 round_4 round_5
## 1 1 GARY HUA 6.0 39 21 18 14 7
## 2 2 DAKSHESH DARURI 6.0 63 58 4 17 16
## 3 3 ADITYA BAJAJ 6.0 8 61 25 21 11
## 4 4 PATRICK H SCHILLING 5.5 23 28 2 26 5
## 5 5 HANSHI ZUO 5.5 45 37 12 13 4
## 6 6 HANSEN SONG 5.0 34 29 11 35 10
## round_6 round_7 state pre_rating
## 1 12 4 ON 1794
## 2 20 7 MI 1553
## 3 13 12 MI 1384
## 4 19 1 MI 1716
## 5 14 17 MI 1655
## 6 27 21 OH 1686
Calculate the average opponenet score, create the requested table, and export it to a csv file
get_avg <- function(r1, r2, r3, r4, r5, r6, r7) {
r1 <- as.numeric(as.character(player_df[which(player_df$player_index ==
as.numeric(as.character(r1))), 12]))
r2 <- as.numeric(as.character(player_df[which(player_df$player_index ==
as.numeric(as.character(r2))), 12]))
r3 <- as.numeric(as.character(player_df[which(player_df$player_index ==
as.numeric(as.character(r3))), 12]))
r4 <- as.numeric(as.character(player_df[which(player_df$player_index ==
as.numeric(as.character(r4))), 12]))
r5 <- as.numeric(as.character(player_df[which(player_df$player_index ==
as.numeric(as.character(r5))), 12]))
r6 <- as.numeric(as.character(player_df[which(player_df$player_index ==
as.numeric(as.character(r6))), 12]))
r7 <- as.numeric(as.character(player_df[which(player_df$player_index ==
as.numeric(as.character(r7))), 12]))
scores <- round(mean(c(r1, r2, r3, r4, r5, r6, r7), na.rm = TRUE),
digits = 0)
return(scores)
}
player_df$avg_opp_pre_rating <- mapply(FUN = get_avg, player_df$round_1,
player_df$round_2, player_df$round_3, player_df$round_4,
player_df$round_5, player_df$round_6, player_df$round_7)
head(player_df)
## player_index player_name total_number_points round_1 round_2 round_3 round_4 round_5
## 1 1 GARY HUA 6.0 39 21 18 14 7
## 2 2 DAKSHESH DARURI 6.0 63 58 4 17 16
## 3 3 ADITYA BAJAJ 6.0 8 61 25 21 11
## 4 4 PATRICK H SCHILLING 5.5 23 28 2 26 5
## 5 5 HANSHI ZUO 5.5 45 37 12 13 4
## 6 6 HANSEN SONG 5.0 34 29 11 35 10
## round_6 round_7 state pre_rating avg_opp_pre_rating
## 1 12 4 ON 1794 1605
## 2 20 7 MI 1553 1469
## 3 13 12 MI 1384 1564
## 4 19 1 MI 1716 1574
## 5 14 17 MI 1655 1501
## 6 27 21 OH 1686 1519
final_player_df <- data.frame(player_df$player_name, player_df$state,
player_df$total_number_points, player_df$pre_rating, player_df$avg_opp_pre_rating)
names(final_player_df) <- c("player_name", "state", "total_number_points",
"pre_rating", "avg_opp_pre_rating")
head(final_player_df)
## player_name state total_number_points pre_rating avg_opp_pre_rating
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519
write.csv(final_player_df, "final_player.csv")