Read txt file
chess = readLines('chess.txt',warn=FALSE)
head(chess,10)
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [7] "-----------------------------------------------------------------------------------------"
## [8] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [9] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [10] "-----------------------------------------------------------------------------------------"
remove all dash and header
chess = subset(chess,str_detect(chess, '[[:alpha:]]')) ## Keep rows with any alphabetic
chess_rm_header = chess[-c(1:2)]
head(chess_rm_header,10)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [3] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [4] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [5] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [6] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [7] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [8] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [9] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [10] " MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
Separate even index row and odd index row
index = c(1:length(chess_rm_header))
even = subset(chess_rm_header,index%%2==0)
odd = subset(chess_rm_header,index%%2==1)
head(even,10)
## [1] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [2] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [3] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [4] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [5] " MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [6] " OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
## [7] " MI | 11146376 / R: 1649 ->1673 |N:3 |W |B |W |B |B |W |W |"
## [8] " MI | 15142253 / R: 1641P17->1657P24 |N:3 |B |W |B |W |B |W |W |"
## [9] " ON | 14954524 / R: 1411 ->1564 |N:2 |W |B |W |B |W |B |B |"
## [10] " MI | 14150362 / R: 1365 ->1544 |N:3 |W |W |B |B |W |B |W |"
head(odd,10)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [3] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [4] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [5] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [6] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
## [7] " 7 | GARY DEE SWATHELL |5.0 |W 57|W 46|W 13|W 11|L 1|W 9|L 2|"
## [8] " 8 | EZEKIEL HOUGHTON |5.0 |W 3|W 32|L 14|L 9|W 47|W 28|W 19|"
## [9] " 9 | STEFANO LEE |5.0 |W 25|L 18|W 59|W 8|W 26|L 7|W 20|"
## [10] " 10 | ANVIT RAO |5.0 |D 16|L 19|W 55|W 31|D 6|W 25|W 18|"
Convert string into dataframe
even_df <- read.csv(sep = '|', header = F, text = even, stringsAsFactors = F)
odd_df <- read.csv(sep = '|', header = F, text = odd, stringsAsFactors = F)
head(even_df,10)
## V1 V2 V3 V4 V5 V6 V7
## 1 ON 15445895 / R: 1794 ->1817 N:2 W B W B
## 2 MI 14598900 / R: 1553 ->1663 N:2 B W B W
## 3 MI 14959604 / R: 1384 ->1640 N:2 W B W B
## 4 MI 12616049 / R: 1716 ->1744 N:2 W B W B
## 5 MI 14601533 / R: 1655 ->1690 N:2 B W B W
## 6 OH 15055204 / R: 1686 ->1687 N:3 W B W B
## 7 MI 11146376 / R: 1649 ->1673 N:3 W B W B
## 8 MI 15142253 / R: 1641P17->1657P24 N:3 B W B W
## 9 ON 14954524 / R: 1411 ->1564 N:2 W B W B
## 10 MI 14150362 / R: 1365 ->1544 N:3 W W B B
## V8 V9 V10 V11
## 1 W B W NA
## 2 B W B NA
## 3 W B W NA
## 4 W B B NA
## 5 B W B NA
## 6 B W B NA
## 7 B W W NA
## 8 B W W NA
## 9 W B B NA
## 10 W B W NA
head(odd_df,10)
## V1 V2 V3 V4 V5 V6 V7 V8
## 1 1 GARY HUA 6.0 W 39 W 21 W 18 W 14 W 7
## 2 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17 W 16
## 3 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21 W 11
## 4 4 PATRICK H SCHILLING 5.5 W 23 D 28 W 2 W 26 D 5
## 5 5 HANSHI ZUO 5.5 W 45 W 37 D 12 D 13 D 4
## 6 6 HANSEN SONG 5.0 W 34 D 29 L 11 W 35 D 10
## 7 7 GARY DEE SWATHELL 5.0 W 57 W 46 W 13 W 11 L 1
## 8 8 EZEKIEL HOUGHTON 5.0 W 3 W 32 L 14 L 9 W 47
## 9 9 STEFANO LEE 5.0 W 25 L 18 W 59 W 8 W 26
## 10 10 ANVIT RAO 5.0 D 16 L 19 W 55 W 31 D 6
## V9 V10 V11
## 1 D 12 D 4 NA
## 2 W 20 W 7 NA
## 3 W 13 W 12 NA
## 4 W 19 D 1 NA
## 5 W 14 W 17 NA
## 6 W 27 W 21 NA
## 7 W 9 L 2 NA
## 8 W 28 W 19 NA
## 9 L 7 W 20 NA
## 10 W 25 W 18 NA
## The shape of even_df and odd_df are the same. We can then merge these 2 dataframe based on index
Extract pre tournament rating, state and create pre tournament rating table
pre_rate_str=unlist(str_extract_all(even_df$V2,'[[:punct:]]\\s*[0-9]{3,4}'))
pre_rate=as.numeric(gsub("[[:punct:][:space:]]", "", pre_rate_str))
state = even_df$V1%>%str_trim(side = "both")
ucsf_id= c(1:length(pre_rate))
rate_df = data.frame(ucsf_id,state,pre_rate)
head(rate_df)
## ucsf_id state pre_rate
## 1 1 ON 1794
## 2 2 MI 1553
## 3 3 MI 1384
## 4 4 MI 1716
## 5 5 MI 1655
## 6 6 OH 1686
Extract all players’ name, total number of point and opponents’ ucsf ID
name = odd_df$V2%>%str_trim(side = "both")
total_number_point = odd_df$V3 %>% str_trim(side = "both")%>%type.convert
Calculate average pre chess rating of opponents
avg_rating <- vector(mode = "double", length = length(ucsf_id))
for (id in ucsf_id){
avg_rating[id] = rate_df$pre_rate[odd_df[id,c(4:10)]%>%
str_sub(2, -1) %>%
str_trim(side = "both")%>%
type.convert]%>%
na.omit()%>%
mean()%>%
round()
}
avg_rating
## [1] 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 1468 1506 1498 1515
## [15] 1484 1386 1499 1480 1426 1411 1470 1300 1214 1357 1363 1507 1222 1522
## [29] 1314 1144 1260 1379 1277 1375 1150 1388 1385 1539 1430 1391 1248 1150
## [43] 1107 1327 1152 1358 1392 1356 1286 1296 1356 1495 1345 1206 1406 1414
## [57] 1363 1391 1319 1330 1327 1186 1350 1263
Combine to final dataframe
final_df = data.frame(name,state,total_number_point,pre_rate,avg_rating)
final_df
## name state total_number_point pre_rate avg_rating
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519
## 7 GARY DEE SWATHELL MI 5.0 1649 1372
## 8 EZEKIEL HOUGHTON MI 5.0 1641 1468
## 9 STEFANO LEE ON 5.0 1411 1523
## 10 ANVIT RAO MI 5.0 1365 1554
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 1468
## 12 KENNETH J TACK MI 4.5 1663 1506
## 13 TORRANCE HENRY JR MI 4.5 1666 1498
## 14 BRADLEY SHAW MI 4.5 1610 1515
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1484
## 16 MIKE NIKITIN MI 4.0 1604 1386
## 17 RONALD GRZEGORCZYK MI 4.0 1629 1499
## 18 DAVID SUNDEEN MI 4.0 1600 1480
## 19 DIPANKAR ROY MI 4.0 1564 1426
## 20 JASON ZHENG MI 4.0 1595 1411
## 21 DINH DANG BUI ON 4.0 1563 1470
## 22 EUGENE L MCCLURE MI 4.0 1555 1300
## 23 ALAN BUI ON 4.0 1363 1214
## 24 MICHAEL R ALDRICH MI 4.0 1229 1357
## 25 LOREN SCHWIEBERT MI 3.5 1745 1363
## 26 MAX ZHU ON 3.5 1579 1507
## 27 GAURAV GIDWANI MI 3.5 1552 1222
## 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507 1522
## 29 CHIEDOZIE OKORIE MI 3.5 1602 1314
## 30 GEORGE AVERY JONES ON 3.5 1522 1144
## 31 RISHI SHETTY MI 3.5 1494 1260
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441 1379
## 33 JADE GE MI 3.5 1449 1277
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399 1375
## 35 JOSHUA DAVID LEE MI 3.5 1438 1150
## 36 SIDDHARTH JHA MI 3.5 1355 1388
## 37 AMIYATOSH PWNANANDAM MI 3.5 980 1385
## 38 BRIAN LIU MI 3.0 1423 1539
## 39 JOEL R HENDON MI 3.0 1436 1430
## 40 FOREST ZHANG MI 3.0 1348 1391
## 41 KYLE WILLIAM MURPHY MI 3.0 1403 1248
## 42 JARED GE MI 3.0 1332 1150
## 43 ROBERT GLEN VASEY MI 3.0 1283 1107
## 44 JUSTIN D SCHILLING MI 3.0 1199 1327
## 45 DEREK YAN MI 3.0 1242 1152
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377 1358
## 47 ERIC WRIGHT MI 2.5 1362 1392
## 48 DANIEL KHAIN MI 2.5 1382 1356
## 49 MICHAEL J MARTIN MI 2.5 1291 1286
## 50 SHIVAM JHA MI 2.5 1056 1296
## 51 TEJAS AYYAGARI MI 2.5 1011 1356
## 52 ETHAN GUO MI 2.5 935 1495
## 53 JOSE C YBARRA MI 2.0 1393 1345
## 54 LARRY HODGE MI 2.0 1270 1206
## 55 ALEX KONG MI 2.0 1186 1406
## 56 MARISA RICCI MI 2.0 1153 1414
## 57 MICHAEL LU MI 2.0 1092 1363
## 58 VIRAJ MOHILE MI 2.0 917 1391
## 59 SEAN M MC CORMICK MI 2.0 853 1319
## 60 JULIA SHEN MI 1.5 967 1330
## 61 JEZZEL FARKAS ON 1.5 955 1327
## 62 ASHWIN BALAJI MI 1.0 1530 1186
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175 1350
## 64 BEN LI MI 1.0 1163 1263
Write to CSV
write.csv(final_df, file = 'chess.csv')