Read txt file

chess = readLines('chess.txt',warn=FALSE)
head(chess,10)
##  [1] "-----------------------------------------------------------------------------------------" 
##  [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
##  [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
##  [4] "-----------------------------------------------------------------------------------------" 
##  [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
##  [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [7] "-----------------------------------------------------------------------------------------" 
##  [8] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
##  [9] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [10] "-----------------------------------------------------------------------------------------"

remove all dash and header

chess = subset(chess,str_detect(chess, '[[:alpha:]]')) ## Keep rows with any alphabetic
chess_rm_header = chess[-c(1:2)]
head(chess_rm_header,10)
##  [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
##  [2] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
##  [3] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
##  [4] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
##  [5] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
##  [6] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
##  [7] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|"
##  [8] "   MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |"
##  [9] "    5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|"
## [10] "   MI | 14601533 / R: 1655   ->1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |"

Separate even index row and odd index row

index = c(1:length(chess_rm_header))
even = subset(chess_rm_header,index%%2==0)
odd = subset(chess_rm_header,index%%2==1)
head(even,10)
##  [1] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
##  [2] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
##  [3] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
##  [4] "   MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |"
##  [5] "   MI | 14601533 / R: 1655   ->1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
##  [6] "   OH | 15055204 / R: 1686   ->1687     |N:3  |W    |B    |W    |B    |B    |W    |B    |"
##  [7] "   MI | 11146376 / R: 1649   ->1673     |N:3  |W    |B    |W    |B    |B    |W    |W    |"
##  [8] "   MI | 15142253 / R: 1641P17->1657P24  |N:3  |B    |W    |B    |W    |B    |W    |W    |"
##  [9] "   ON | 14954524 / R: 1411   ->1564     |N:2  |W    |B    |W    |B    |W    |B    |B    |"
## [10] "   MI | 14150362 / R: 1365   ->1544     |N:3  |W    |W    |B    |B    |W    |B    |W    |"
head(odd,10)
##  [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
##  [2] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
##  [3] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
##  [4] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|"
##  [5] "    5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|"
##  [6] "    6 | HANSEN SONG                     |5.0  |W  34|D  29|L  11|W  35|D  10|W  27|W  21|"
##  [7] "    7 | GARY DEE SWATHELL               |5.0  |W  57|W  46|W  13|W  11|L   1|W   9|L   2|"
##  [8] "    8 | EZEKIEL HOUGHTON                |5.0  |W   3|W  32|L  14|L   9|W  47|W  28|W  19|"
##  [9] "    9 | STEFANO LEE                     |5.0  |W  25|L  18|W  59|W   8|W  26|L   7|W  20|"
## [10] "   10 | ANVIT RAO                       |5.0  |D  16|L  19|W  55|W  31|D   6|W  25|W  18|"

Convert string into dataframe

even_df <- read.csv(sep = '|', header = F, text = even, stringsAsFactors = F)
odd_df <- read.csv(sep = '|', header = F, text = odd, stringsAsFactors = F)

head(even_df,10)
##        V1                                V2    V3    V4    V5    V6    V7
## 1     ON   15445895 / R: 1794   ->1817      N:2   W     B     W     B    
## 2     MI   14598900 / R: 1553   ->1663      N:2   B     W     B     W    
## 3     MI   14959604 / R: 1384   ->1640      N:2   W     B     W     B    
## 4     MI   12616049 / R: 1716   ->1744      N:2   W     B     W     B    
## 5     MI   14601533 / R: 1655   ->1690      N:2   B     W     B     W    
## 6     OH   15055204 / R: 1686   ->1687      N:3   W     B     W     B    
## 7     MI   11146376 / R: 1649   ->1673      N:3   W     B     W     B    
## 8     MI   15142253 / R: 1641P17->1657P24   N:3   B     W     B     W    
## 9     ON   14954524 / R: 1411   ->1564      N:2   W     B     W     B    
## 10    MI   14150362 / R: 1365   ->1544      N:3   W     W     B     B    
##       V8    V9   V10 V11
## 1  W     B     W      NA
## 2  B     W     B      NA
## 3  W     B     W      NA
## 4  W     B     B      NA
## 5  B     W     B      NA
## 6  B     W     B      NA
## 7  B     W     W      NA
## 8  B     W     W      NA
## 9  W     B     B      NA
## 10 W     B     W      NA
head(odd_df,10)
##    V1                                V2  V3    V4    V5    V6    V7    V8
## 1   1  GARY HUA                         6.0 W  39 W  21 W  18 W  14 W   7
## 2   2  DAKSHESH DARURI                  6.0 W  63 W  58 L   4 W  17 W  16
## 3   3  ADITYA BAJAJ                     6.0 L   8 W  61 W  25 W  21 W  11
## 4   4  PATRICK H SCHILLING              5.5 W  23 D  28 W   2 W  26 D   5
## 5   5  HANSHI ZUO                       5.5 W  45 W  37 D  12 D  13 D   4
## 6   6  HANSEN SONG                      5.0 W  34 D  29 L  11 W  35 D  10
## 7   7  GARY DEE SWATHELL                5.0 W  57 W  46 W  13 W  11 L   1
## 8   8  EZEKIEL HOUGHTON                 5.0 W   3 W  32 L  14 L   9 W  47
## 9   9  STEFANO LEE                      5.0 W  25 L  18 W  59 W   8 W  26
## 10 10  ANVIT RAO                        5.0 D  16 L  19 W  55 W  31 D   6
##       V9   V10 V11
## 1  D  12 D   4  NA
## 2  W  20 W   7  NA
## 3  W  13 W  12  NA
## 4  W  19 D   1  NA
## 5  W  14 W  17  NA
## 6  W  27 W  21  NA
## 7  W   9 L   2  NA
## 8  W  28 W  19  NA
## 9  L   7 W  20  NA
## 10 W  25 W  18  NA
## The shape of even_df and odd_df are the same. We can then merge these 2 dataframe based on index

Extract pre tournament rating, state and create pre tournament rating table

pre_rate_str=unlist(str_extract_all(even_df$V2,'[[:punct:]]\\s*[0-9]{3,4}'))
pre_rate=as.numeric(gsub("[[:punct:][:space:]]", "", pre_rate_str))
state = even_df$V1%>%str_trim(side = "both")
ucsf_id= c(1:length(pre_rate))
rate_df = data.frame(ucsf_id,state,pre_rate)
head(rate_df)
##   ucsf_id state pre_rate
## 1       1    ON     1794
## 2       2    MI     1553
## 3       3    MI     1384
## 4       4    MI     1716
## 5       5    MI     1655
## 6       6    OH     1686

Extract all players’ name, total number of point and opponents’ ucsf ID

name = odd_df$V2%>%str_trim(side = "both")
total_number_point = odd_df$V3 %>% str_trim(side = "both")%>%type.convert

Calculate average pre chess rating of opponents

avg_rating <- vector(mode = "double", length = length(ucsf_id))
for (id in ucsf_id){
  avg_rating[id] = rate_df$pre_rate[odd_df[id,c(4:10)]%>%
                                    str_sub(2, -1) %>% 
                                    str_trim(side = "both")%>%
                                    type.convert]%>%
                                    na.omit()%>%
                                    mean()%>%
                                    round()
    
}

avg_rating
##  [1] 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 1468 1506 1498 1515
## [15] 1484 1386 1499 1480 1426 1411 1470 1300 1214 1357 1363 1507 1222 1522
## [29] 1314 1144 1260 1379 1277 1375 1150 1388 1385 1539 1430 1391 1248 1150
## [43] 1107 1327 1152 1358 1392 1356 1286 1296 1356 1495 1345 1206 1406 1414
## [57] 1363 1391 1319 1330 1327 1186 1350 1263

Combine to final dataframe

final_df = data.frame(name,state,total_number_point,pre_rate,avg_rating)

final_df
##                          name state total_number_point pre_rate avg_rating
## 1                    GARY HUA    ON                6.0     1794       1605
## 2             DAKSHESH DARURI    MI                6.0     1553       1469
## 3                ADITYA BAJAJ    MI                6.0     1384       1564
## 4         PATRICK H SCHILLING    MI                5.5     1716       1574
## 5                  HANSHI ZUO    MI                5.5     1655       1501
## 6                 HANSEN SONG    OH                5.0     1686       1519
## 7           GARY DEE SWATHELL    MI                5.0     1649       1372
## 8            EZEKIEL HOUGHTON    MI                5.0     1641       1468
## 9                 STEFANO LEE    ON                5.0     1411       1523
## 10                  ANVIT RAO    MI                5.0     1365       1554
## 11   CAMERON WILLIAM MC LEMAN    MI                4.5     1712       1468
## 12             KENNETH J TACK    MI                4.5     1663       1506
## 13          TORRANCE HENRY JR    MI                4.5     1666       1498
## 14               BRADLEY SHAW    MI                4.5     1610       1515
## 15     ZACHARY JAMES HOUGHTON    MI                4.5     1220       1484
## 16               MIKE NIKITIN    MI                4.0     1604       1386
## 17         RONALD GRZEGORCZYK    MI                4.0     1629       1499
## 18              DAVID SUNDEEN    MI                4.0     1600       1480
## 19               DIPANKAR ROY    MI                4.0     1564       1426
## 20                JASON ZHENG    MI                4.0     1595       1411
## 21              DINH DANG BUI    ON                4.0     1563       1470
## 22           EUGENE L MCCLURE    MI                4.0     1555       1300
## 23                   ALAN BUI    ON                4.0     1363       1214
## 24          MICHAEL R ALDRICH    MI                4.0     1229       1357
## 25           LOREN SCHWIEBERT    MI                3.5     1745       1363
## 26                    MAX ZHU    ON                3.5     1579       1507
## 27             GAURAV GIDWANI    MI                3.5     1552       1222
## 28 SOFIA ADINA STANESCU-BELLU    MI                3.5     1507       1522
## 29           CHIEDOZIE OKORIE    MI                3.5     1602       1314
## 30         GEORGE AVERY JONES    ON                3.5     1522       1144
## 31               RISHI SHETTY    MI                3.5     1494       1260
## 32      JOSHUA PHILIP MATHEWS    ON                3.5     1441       1379
## 33                    JADE GE    MI                3.5     1449       1277
## 34     MICHAEL JEFFERY THOMAS    MI                3.5     1399       1375
## 35           JOSHUA DAVID LEE    MI                3.5     1438       1150
## 36              SIDDHARTH JHA    MI                3.5     1355       1388
## 37       AMIYATOSH PWNANANDAM    MI                3.5      980       1385
## 38                  BRIAN LIU    MI                3.0     1423       1539
## 39              JOEL R HENDON    MI                3.0     1436       1430
## 40               FOREST ZHANG    MI                3.0     1348       1391
## 41        KYLE WILLIAM MURPHY    MI                3.0     1403       1248
## 42                   JARED GE    MI                3.0     1332       1150
## 43          ROBERT GLEN VASEY    MI                3.0     1283       1107
## 44         JUSTIN D SCHILLING    MI                3.0     1199       1327
## 45                  DEREK YAN    MI                3.0     1242       1152
## 46   JACOB ALEXANDER LAVALLEY    MI                3.0      377       1358
## 47                ERIC WRIGHT    MI                2.5     1362       1392
## 48               DANIEL KHAIN    MI                2.5     1382       1356
## 49           MICHAEL J MARTIN    MI                2.5     1291       1286
## 50                 SHIVAM JHA    MI                2.5     1056       1296
## 51             TEJAS AYYAGARI    MI                2.5     1011       1356
## 52                  ETHAN GUO    MI                2.5      935       1495
## 53              JOSE C YBARRA    MI                2.0     1393       1345
## 54                LARRY HODGE    MI                2.0     1270       1206
## 55                  ALEX KONG    MI                2.0     1186       1406
## 56               MARISA RICCI    MI                2.0     1153       1414
## 57                 MICHAEL LU    MI                2.0     1092       1363
## 58               VIRAJ MOHILE    MI                2.0      917       1391
## 59          SEAN M MC CORMICK    MI                2.0      853       1319
## 60                 JULIA SHEN    MI                1.5      967       1330
## 61              JEZZEL FARKAS    ON                1.5      955       1327
## 62              ASHWIN BALAJI    MI                1.0     1530       1186
## 63       THOMAS JOSEPH HOSMER    MI                1.0     1175       1350
## 64                     BEN LI    MI                1.0     1163       1263

Write to CSV

write.csv(final_df, file = 'chess.csv')