library(dplyr)
library(stringr)
Read the data from text file.
# Read the text file
file_path <- "C:\\Users\\HP\\Downloads\\tournamentinfo.txt"
chess_data <- readLines(file_path, warn=FALSE)
head(chess_data)
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
Processing the data
## Length Class Mode
## 196 character character
# Extracting relevant data by row
name <- seq(5, 196, 3)
state_point <- seq(6, 196, 3)
#Extracting player ID and points from name
player <- as.integer(str_extract(chess_data[name], "\\d+"))
name_player <- str_replace_all(str_extract(chess_data[name],"([|]).+?\\1"),"[|]","")
points <- str_extract(chess_data[name], "\\d.\\d")
#extracting state and rating
state <- str_extract(chess_data[state_point], "[A-Z]{2}" )
rating <- as.integer(str_replace_all(str_extract(chess_data[state_point], "R: \\s?\\d{3,4}"), "R:\\s", ""))
# Combining the vectors to form a file which has clean data
new_chess_data <- data.frame(player, name_player, state, points, rating)
head(new_chess_data)
## player name_player state points rating
## 1 1 GARY HUA ON 6.0 1794
## 2 2 DAKSHESH DARURI MI 6.0 1553
## 3 3 ADITYA BAJAJ MI 6.0 1384
## 4 4 PATRICK H SCHILLING MI 5.5 1716
## 5 5 HANSHI ZUO MI 5.5 1655
## 6 6 HANSEN SONG OH 5.0 1686
# Calculating average opponent rating:
opponent <- str_extract_all(str_extract_all(chess_data[name], "\\d+\\|"), "\\d+")
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
avg_opp <- length(name)
for (i in 1:length(name))
{
avg_opp[i] <- round(mean(rating[as.numeric(unlist(opponent[player[i]]))]), digits = 0)
}
avg_opp
## [1] 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 1468 1506 1498 1515 1484
## [16] 1386 1499 1480 1426 1411 1470 1300 1214 1357 1363 1507 1222 1522 1314 1144
## [31] 1260 1379 1277 1375 1150 1388 1385 1539 1430 1391 1248 1150 1107 1327 1152
## [46] 1358 1392 1356 1286 1296 1356 1495 1345 1206 1406 1414 1363 1391 1319 1330
## [61] 1327 1186 1350 1263
# Adding the avg variable to the new chess data
newchessdata <- data.frame(player, name, state, points, rating, avg_opp)
head(newchessdata)
## player name state points rating avg_opp
## 1 1 5 ON 6.0 1794 1605
## 2 2 8 MI 6.0 1553 1469
## 3 3 11 MI 6.0 1384 1564
## 4 4 14 MI 5.5 1716 1574
## 5 5 17 MI 5.5 1655 1501
## 6 6 20 OH 5.0 1686 1519
Write to CSV
#Write data to csv
write.csv(newchessdata, file = "newchessdatainfo.csv")
LS0tDQp0aXRsZTogIldlZWsgNSBQcm9qZWN0IDEgLSBEYXRhIEFuYWx5c2lzIg0KYXV0aG9yOiAiTUQgQXNhZHVsIElzbGFtIg0KZGF0ZTogImByIFN5cy5EYXRlKClgIg0Kb3V0cHV0OiBvcGVuaW50cm86OmxhYl9yZXBvcnQNCi0tLQ0KDQpgYGB7ciBsb2FkLXBhY2thZ2VzLCBtZXNzYWdlPUZBTFNFfQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkoc3RyaW5ncikNCmBgYA0KDQojIyMgUmVhZCB0aGUgZGF0YSBmcm9tIHRleHQgZmlsZS4NCg0KYGBge3IgY29kZS1jaHVuay1sYWJlbCBmb3IgcmVhZGluZyB0ZXh0IGZpbGV9DQojIFJlYWQgdGhlIHRleHQgZmlsZQ0KZmlsZV9wYXRoIDwtICJDOlxcVXNlcnNcXEhQXFxEb3dubG9hZHNcXHRvdXJuYW1lbnRpbmZvLnR4dCINCmNoZXNzX2RhdGEgPC0gcmVhZExpbmVzKGZpbGVfcGF0aCwgd2Fybj1GQUxTRSkNCmhlYWQoY2hlc3NfZGF0YSkNCmBgYA0KDQojIyMgUHJvY2Vzc2luZyB0aGUgZGF0YQ0KYGBge3IgY29kZSBjaHVuayBmb3IgcHJvY2Vzc2luZyBkYXRhIH0NCnN1bW1hcnkoY2hlc3NfZGF0YSkNCiMgRXh0cmFjdGluZyByZWxldmFudCBkYXRhIGJ5IHJvdw0KbmFtZSA8LSBzZXEoNSwgMTk2LCAzKSANCnN0YXRlX3BvaW50IDwtIHNlcSg2LCAxOTYsIDMpIA0KDQojRXh0cmFjdGluZyBwbGF5ZXIgSUQgYW5kIHBvaW50cyBmcm9tIG5hbWUNCnBsYXllciA8LSBhcy5pbnRlZ2VyKHN0cl9leHRyYWN0KGNoZXNzX2RhdGFbbmFtZV0sICJcXGQrIikpIA0KbmFtZV9wbGF5ZXIgPC0gc3RyX3JlcGxhY2VfYWxsKHN0cl9leHRyYWN0KGNoZXNzX2RhdGFbbmFtZV0sIihbfF0pLis/XFwxIiksIlt8XSIsIiIpIA0KcG9pbnRzIDwtIHN0cl9leHRyYWN0KGNoZXNzX2RhdGFbbmFtZV0sICJcXGQuXFxkIikgDQoNCiNleHRyYWN0aW5nIHN0YXRlIGFuZCByYXRpbmcNCnN0YXRlIDwtIHN0cl9leHRyYWN0KGNoZXNzX2RhdGFbc3RhdGVfcG9pbnRdLCAiW0EtWl17Mn0iICkgDQpyYXRpbmcgPC0gYXMuaW50ZWdlcihzdHJfcmVwbGFjZV9hbGwoc3RyX2V4dHJhY3QoY2hlc3NfZGF0YVtzdGF0ZV9wb2ludF0sICJSOiBcXHM/XFxkezMsNH0iKSwgIlI6XFxzIiwgIiIpKQ0KDQojIENvbWJpbmluZyB0aGUgdmVjdG9ycyB0byBmb3JtIGEgZmlsZSB3aGljaCBoYXMgY2xlYW4gZGF0YQ0KbmV3X2NoZXNzX2RhdGEgPC0gZGF0YS5mcmFtZShwbGF5ZXIsIG5hbWVfcGxheWVyLCBzdGF0ZSwgcG9pbnRzLCByYXRpbmcpIA0KaGVhZChuZXdfY2hlc3NfZGF0YSkNCg0KIyBDYWxjdWxhdGluZyBhdmVyYWdlIG9wcG9uZW50IHJhdGluZzoNCm9wcG9uZW50IDwtIHN0cl9leHRyYWN0X2FsbChzdHJfZXh0cmFjdF9hbGwoY2hlc3NfZGF0YVtuYW1lXSwgIlxcZCtcXHwiKSwgIlxcZCsiKQ0KYXZnX29wcCA8LSBsZW5ndGgobmFtZSkNCg0KZm9yIChpIGluIDE6bGVuZ3RoKG5hbWUpKSANCnsgDQogIGF2Z19vcHBbaV0gPC0gcm91bmQobWVhbihyYXRpbmdbYXMubnVtZXJpYyh1bmxpc3Qob3Bwb25lbnRbcGxheWVyW2ldXSkpXSksIGRpZ2l0cyA9IDApDQp9DQphdmdfb3BwDQoNCiMgQWRkaW5nIHRoZSBhdmcgdmFyaWFibGUgdG8gdGhlIG5ldyBjaGVzcyBkYXRhDQpuZXdjaGVzc2RhdGEgPC0gZGF0YS5mcmFtZShwbGF5ZXIsIG5hbWUsIHN0YXRlLCBwb2ludHMsIHJhdGluZywgYXZnX29wcCkNCmhlYWQobmV3Y2hlc3NkYXRhKQ0KDQpgYGANCiMjIyBXcml0ZSB0byBDU1YNCg0KYGBge3IgY29kZSBjaHVuayBmb3Igd3JpdGluZyBkYXRhIHRvIGNzdn0NCiNXcml0ZSBkYXRhIHRvIGNzdg0Kd3JpdGUuY3N2KG5ld2NoZXNzZGF0YSwgZmlsZSA9ICJuZXdjaGVzc2RhdGFpbmZvLmNzdiIpDQpgYGANCg==