The following code transforms an ascii formatted chess tournament results table in a .csv.
library("stringr")
## Warning: package 'stringr' was built under R version 3.1.3
setwd("C:\\Users\\Andrew\\Desktop\\Cuny\\Data Acquisition\\Project 1")
chess_grid <- readLines("tournamentinfo.txt")
## Warning in readLines("tournamentinfo.txt"): incomplete final line found on
## 'tournamentinfo.txt'
str(chess_grid)
## chr [1:196] "-----------------------------------------------------------------------------------------" ...
#Extracts vector of player names
player_name <- (str_trim(unlist(str_extract_all(chess_grid, "([[:alpha:] ]-?){15,31}"))))[2:65]
str(player_name)
## chr [1:64] "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ" ...
#Extracts vector of states
state <- str_trim(unlist(str_extract_all(chess_grid, " MI | ON | OH ")))
str(state)
## chr [1:64] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" ...
#Extracts vector of player scores
results <- unlist(str_extract_all(chess_grid, "[:digit:][//.][:digit:]"))
str(results)
## chr [1:64] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" ...
#Extracts vector of player preratings
prerating <- as.integer(sub(pattern = 'R: ', replacement = '', x = unlist(str_extract_all(chess_grid, "R: [[:digit:] ]{4}"))))
#add index
names(prerating) <- (1:64)
str(prerating)
## Named int [1:64] 1794 1553 1384 1716 1655 1686 1649 1641 1411 1365 ...
## - attr(*, "names")= chr [1:64] "1" "2" "3" "4" ...
#Collects rest of string following top row 'total points', with opponent data
opp_dat <- gsub("\\|", " ", str_sub(unlist(str_extract_all(chess_grid, "[:digit:][//.][:digit:][|DLWUXBH[:digit:] ]{44}")), start = 10, end = 47))
str(opp_dat)
## chr [1:64] "39 W 21 W 18 W 14 W 7 D 12 D 4" ...
#Extracts list of opponent numbers
opp_list <- (strsplit(opp_dat, " B | L | W | H | U | X | D "))
str(opp_list)
## List of 64
## $ : chr [1:7] "39" "21" "18" "14" ...
## $ : chr [1:7] "63" "58" " 4" "17" ...
## $ : chr [1:7] " 8" "61" "25" "21" ...
## $ : chr [1:7] "23" "28" " 2" "26" ...
## $ : chr [1:7] "45" "37" "12" "13" ...
## $ : chr [1:7] "34" "29" "11" "35" ...
## $ : chr [1:7] "57" "46" "13" "11" ...
## $ : chr [1:7] " 3" "32" "14" " 9" ...
## $ : chr [1:7] "25" "18" "59" " 8" ...
## $ : chr [1:7] "16" "19" "55" "31" ...
## $ : chr [1:7] "38" "56" " 6" " 7" ...
## $ : chr [1:7] "42" "33" " 5" "38" ...
## $ : chr [1:7] "36" "27" " 7" " 5" ...
## $ : chr [1:7] "54" "44" " 8" " 1" ...
## $ : chr [1:7] "19" "16" "30" "22" ...
## $ : chr [1:7] "10" "15" " " "39" ...
## $ : chr [1:7] "48" "41" "26" " 2" ...
## $ : chr [1:7] "47" " 9" " 1" "32" ...
## $ : chr [1:7] "15" "10" "52" "28" ...
## $ : chr [1:7] "40" "49" "23" "41" ...
## $ : chr [1:7] "43" " 1" "47" " 3" ...
## $ : chr [1:7] "64" "52" "28" "15" ...
## $ : chr [1:7] " 4" "43" "20" "58" ...
## $ : chr [1:7] "28" "47" "43" "25" ...
## $ : chr [1:7] " 9" "53" " 3" "24" ...
## $ : chr [1:7] "49" "40" "17" " 4" ...
## $ : chr [1:7] "51" "13" "46" "37" ...
## $ : chr [1:7] "24" " 4" "22" "19" ...
## $ : chr [1:7] "50" " 6" "38" "34" ...
## $ : chr [1:7] "52" "64" "15" "55" ...
## $ : chr [1:7] "58" "55" "64" "10" ...
## $ : chr [1:7] "61" " 8" "44" "18" ...
## $ : chr [1:7] "60" "12" "50" "36" ...
## $ : chr [1:7] " 6" "60" "37" "29" ...
## $ : chr [1:7] "46" "38" "56" " 6" ...
## $ : chr [1:7] "13" "57" "51" "33" ...
## $ : chr [1:7] " " " 5" "34" "27" ...
## $ : chr [1:7] "11" "35" "29" "12" ...
## $ : chr [1:7] " 1" "54" "40" "16" ...
## $ : chr [1:7] "20" "26" "39" "59" ...
## $ : chr [1:7] "59" "17" "58" "20" ...
## $ : chr [1:7] "12" "50" "57" "60" ...
## $ : chr [1:7] "21" "23" "24" "63" ...
## $ : chr [1:7] " " "14" "32" "53" ...
## $ : chr [1:7] " 5" "51" "60" "56" ...
## $ : chr [1:7] "35" " 7" "27" "50" ...
## $ : chr [1:7] "18" "24" "21" "61" ...
## $ : chr [1:7] "17" "63" " " "52" ...
## $ : chr [1:7] "26" "20" "63" "64" ...
## $ : chr [1:7] "29" "42" "33" "46" ...
## $ : chr [1:7] "27" "45" "36" "57" ...
## $ : chr [1:7] "30" "22" "19" "48" ...
## $ : chr [1:7] " " "25" " " "44" ...
## $ : chr [1:7] "14" "39" "61" " " ...
## $ : chr [1:7] "62" "31" "10" "30" ...
## $ : chr [1:7] " " "11" "35" "45" ...
## $ : chr [1:7] " 7" "36" "42" "51" ...
## $ : chr [1:7] "31" " 2" "41" "23" ...
## $ : chr [1:7] "41" " " " 9" "40" ...
## $ : chr [1:7] "33" "34" "45" "42" ...
## $ : chr [1:7] "32" " 3" "54" "47" ...
## $ : chr [1:7] "55" " " " " " " ...
## $ : chr [1:7] " 2" "48" "49" "43" ...
## $ : chr [1:7] "22" "30" "31" "49" ...
#Simplifies list into numeric matrix of opponent numbers
opp_mtx <- sapply(opp_list, as.numeric)
str(opp_mtx)
## num [1:7, 1:64] 39 21 18 14 7 12 4 63 58 4 ...
#Creates matrix of correlating preratings
opp_rating_mtx <- matrix(prerating[opp_mtx], nrow = 7)
str(opp_rating_mtx)
## int [1:7, 1:64] 1436 1563 1600 1610 1649 1663 1716 1175 917 1716 ...
#Creates vector of averaged preratings
opp_avg_prerating <- as.integer(format(apply(opp_rating_mtx, 2, mean, na.rm = TRUE), digits = 4))
str(opp_avg_prerating)
## int [1:64] 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 ...
#Combines vectors into a data frame
tourn_results <- data.frame(player_name, state, results, prerating, opp_avg_prerating)
str(tourn_results)
## 'data.frame': 64 obs. of 5 variables:
## $ player_name : Factor w/ 64 levels "ADITYA BAJAJ",..: 24 12 1 51 28 27 23 21 59 5 ...
## $ state : Factor w/ 3 levels "MI","OH","ON": 3 1 1 1 1 2 1 1 3 1 ...
## $ results : Factor w/ 11 levels "1.0","1.5","2.0",..: 11 11 11 10 10 9 9 9 9 9 ...
## $ prerating : int 1794 1553 1384 1716 1655 1686 1649 1641 1411 1365 ...
## $ opp_avg_prerating: int 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 ...
tourn_results
## player_name state results prerating opp_avg_prerating
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519
## 7 GARY DEE SWATHELL MI 5.0 1649 1372
## 8 EZEKIEL HOUGHTON MI 5.0 1641 1468
## 9 STEFANO LEE ON 5.0 1411 1523
## 10 ANVIT RAO MI 5.0 1365 1554
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 1468
## 12 KENNETH J TACK MI 4.5 1663 1506
## 13 TORRANCE HENRY JR MI 4.5 1666 1498
## 14 BRADLEY SHAW MI 4.5 1610 1515
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1484
## 16 MIKE NIKITIN MI 4.0 1604 1386
## 17 RONALD GRZEGORCZYK MI 4.0 1629 1499
## 18 DAVID SUNDEEN MI 4.0 1600 1480
## 19 DIPANKAR ROY MI 4.0 1564 1426
## 20 JASON ZHENG MI 4.0 1595 1411
## 21 DINH DANG BUI ON 4.0 1563 1470
## 22 EUGENE L MCCLURE MI 4.0 1555 1300
## 23 ALAN BUI ON 4.0 1363 1214
## 24 MICHAEL R ALDRICH MI 4.0 1229 1357
## 25 LOREN SCHWIEBERT MI 3.5 1745 1363
## 26 MAX ZHU ON 3.5 1579 1507
## 27 GAURAV GIDWANI MI 3.5 1552 1222
## 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507 1522
## 29 CHIEDOZIE OKORIE MI 3.5 1602 1314
## 30 GEORGE AVERY JONES ON 3.5 1522 1144
## 31 RISHI SHETTY MI 3.5 1494 1260
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441 1379
## 33 JADE GE MI 3.5 1449 1277
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399 1375
## 35 JOSHUA DAVID LEE MI 3.5 1438 1150
## 36 SIDDHARTH JHA MI 3.5 1355 1388
## 37 AMIYATOSH PWNANANDAM MI 3.5 980 1385
## 38 BRIAN LIU MI 3.0 1423 1539
## 39 JOEL R HENDON MI 3.0 1436 1430
## 40 FOREST ZHANG MI 3.0 1348 1391
## 41 KYLE WILLIAM MURPHY MI 3.0 1403 1248
## 42 JARED GE MI 3.0 1332 1150
## 43 ROBERT GLEN VASEY MI 3.0 1283 1107
## 44 JUSTIN D SCHILLING MI 3.0 1199 1327
## 45 DEREK YAN MI 3.0 1242 1152
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377 1358
## 47 ERIC WRIGHT MI 2.5 1362 1392
## 48 DANIEL KHAIN MI 2.5 1382 1356
## 49 MICHAEL J MARTIN MI 2.5 1291 1286
## 50 SHIVAM JHA MI 2.5 1056 1296
## 51 TEJAS AYYAGARI MI 2.5 1011 1356
## 52 ETHAN GUO MI 2.5 935 1495
## 53 JOSE C YBARRA MI 2.0 1393 1345
## 54 LARRY HODGE MI 2.0 1270 1206
## 55 ALEX KONG MI 2.0 1186 1406
## 56 MARISA RICCI MI 2.0 1153 1414
## 57 MICHAEL LU MI 2.0 1092 1363
## 58 VIRAJ MOHILE MI 2.0 917 1391
## 59 SEAN M MC CORMICK MI 2.0 853 1319
## 60 JULIA SHEN MI 1.5 967 1330
## 61 JEZZEL FARKAS ON 1.5 955 1327
## 62 ASHWIN BALAJI MI 1.0 1530 1186
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175 1350
## 64 BEN LI MI 1.0 1163 1263
#Exports .csv
write.csv(tourn_results, file = "chess_results.csv")