First, we must import the txt file into RStudio. We can remove the first few lines to start cleaning up the data.
rawchessdata <- readLines("tournamentinfo.txt")
## Warning in readLines("tournamentinfo.txt"): incomplete final line found on
## 'tournamentinfo.txt'
rawchessdata <- rawchessdata[-(1:4)]
lines <- length(rawchessdata)
head(rawchessdata)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [3] "-----------------------------------------------------------------------------------------"
## [4] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [5] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [6] "-----------------------------------------------------------------------------------------"
num_players <- length(rawchessdata) / 3
lines_per_player <- 3
ID <- integer(num_players)
Name <- character(num_players)
State <- character(num_players)
Points <- numeric(num_players)
Rating <- numeric(num_players)
Average <- numeric(num_players)
Rounds <- numeric(8)
I can’t seem to loop it back to calculate the first’s person’s average.
opponent_ratings_list <- vector("list", num_players)
for (i in seq_len(num_players)) {
start_idx <- (i - 1) * lines_per_player + 1
player_data <- rawchessdata[start_idx:(start_idx + lines_per_player - 1)]
ID[i] <- as.integer(str_extract(player_data[2], "\\d+"))
Name[i] <- str_squish(str_replace_all(str_extract(player_data[1], "\\|.*?\\|"), "\\|", ""))
Points[i] <- as.numeric(str_extract(player_data[1], "\\d+\\.\\d+"))
State[i] <- str_extract(player_data[2], "\\b[A-Z]{2}\\b")
Rating[i] <- as.numeric(str_extract(player_data[2], "(?<=R:\\s?)\\d{3,4}"))
opponent_ids <- as.numeric(str_extract_all(player_data[2], "\\d+")[[1]])
if (length(opponent_ids) > 0) {
opponent_ratings <- Rating[opponent_ids]
Average[i] <- round(mean(opponent_ratings, na.rm = TRUE), digits = 0)
} else {
Average[i] <- NA
}
}
chess_data <- data.frame(
ID,
Name,
State,
Points,
Rating,
Average,
stringsAsFactors = FALSE
)
clean_chess_data <- chess_data %>%
select(Name, State, Points, Rating, Average) %>%
drop_na()
print(clean_chess_data)
## Name State Points Rating Average
## 1 GARY HUA ON 6.0 1794 0
## 2 DAKSHESH DARURI MI 6.0 1553 1553
## 3 ADITYA BAJAJ MI 6.0 1384 1553
## 4 PATRICK H SCHILLING MI 5.5 1716 1553
## 5 HANSHI ZUO MI 5.5 1655 1553
## 6 HANSEN SONG OH 5.0 1686 1384
## 7 GARY DEE SWATHELL MI 5.0 1649 1384
## 8 EZEKIEL HOUGHTON MI 5.0 1641 461
## 9 STEFANO LEE ON 5.0 1411 1553
## 10 ANVIT RAO MI 5.0 1365 1384
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 1384
## 12 KENNETH J TACK MI 4.5 1663 1384
## 13 TORRANCE HENRY JR MI 4.5 1666 1384
## 14 BRADLEY SHAW MI 4.5 1610 1384
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1017
## 16 MIKE NIKITIN MI 4.0 1604 1384
## 17 RONALD GRZEGORCZYK MI 4.0 1629 1384
## 18 DAVID SUNDEEN MI 4.0 1600 1384
## 19 DIPANKAR ROY MI 4.0 1564 1384
## 20 JASON ZHENG MI 4.0 1595 1716
## 21 DINH DANG BUI ON 4.0 1563 692
## 22 EUGENE L MCCLURE MI 4.0 1555 1716
## 23 MICHAEL R ALDRICH MI 4.0 1229 1716
## 24 LOREN SCHWIEBERT MI 3.5 1745 1716
## 25 MAX ZHU ON 3.5 1579 1716
## 26 GAURAV GIDWANI MI 3.5 1552 1716
## 27 SOFIA ADINA STANESCU-BELLU MI 3.5 1507 1384
## 28 CHIEDOZIE OKORIE MI 3.5 1602 1688
## 29 JOSHUA PHILIP MATHEWS ON 3.5 1441 1716
## 30 SIDDHARTH JHA MI 3.5 1355 1716
## 31 BRIAN LIU MI 3.0 1423 1716
## 32 JOEL R HENDON MI 3.0 1436 1540
## 33 KYLE WILLIAM MURPHY MI 3.0 1403 1533
## 34 MICHAEL J MARTIN MI 2.5 1291 1646