library(stringr) ## used for string extraction 
library(sqldf) ## used for SQL query 
## Loading required package: gsubfn
## Loading required package: proto
## Warning in doTryCatch(return(expr), name, parentenv, handler): unable to load shared object '/Library/Frameworks/R.framework/Resources/modules//R_X11.so':
##   dlopen(/Library/Frameworks/R.framework/Resources/modules//R_X11.so, 6): Library not loaded: /opt/X11/lib/libSM.6.dylib
##   Referenced from: /Library/Frameworks/R.framework/Resources/modules//R_X11.so
##   Reason: image not found
## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/
## R.framework/Resources/library/tcltk/libs//tcltk.so'' had status 1
## Could not load tcltk.  Will use slower R code instead.
## Loading required package: RSQLite
## Loading required package: DBI

Description of Task

In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players:

For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605

1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.

Read in raw .txt file and begin cleaning

First, read file into R

raw_txt <- readLines(con = "tournamentinfo.txt")
## Warning in readLines(con = "tournamentinfo.txt"): incomplete final line
## found on 'tournamentinfo.txt'
head(raw_txt, 15)
##  [1] "-----------------------------------------------------------------------------------------" 
##  [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
##  [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
##  [4] "-----------------------------------------------------------------------------------------" 
##  [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
##  [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [7] "-----------------------------------------------------------------------------------------" 
##  [8] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
##  [9] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [10] "-----------------------------------------------------------------------------------------" 
## [11] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|" 
## [12] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
## [13] "-----------------------------------------------------------------------------------------" 
## [14] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|" 
## [15] "   MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |"

Subset data to eliminate rows with all dashes and 2 header rows

raw_txt_s1 <- subset(raw_txt, !grepl('^-+$', raw_txt))  ## Remove rows with ----
head(raw_txt_s1, 15)
##  [1] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
##  [2] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
##  [3] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
##  [4] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [5] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
##  [6] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
##  [7] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|" 
##  [8] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [9] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|" 
## [10] "   MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |" 
## [11] "    5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|" 
## [12] "   MI | 14601533 / R: 1655   ->1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [13] "    6 | HANSEN SONG                     |5.0  |W  34|D  29|L  11|W  35|D  10|W  27|W  21|" 
## [14] "   OH | 15055204 / R: 1686   ->1687     |N:3  |W    |B    |W    |B    |B    |W    |B    |" 
## [15] "    7 | GARY DEE SWATHELL               |5.0  |W  57|W  46|W  13|W  11|L   1|W   9|L   2|"
raw_txt_s2 <- raw_txt_s1[3:130] ## Remove header rows

raw_txt_s2
##   [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
##   [2] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
##   [3] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
##   [4] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
##   [5] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
##   [6] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
##   [7] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|"
##   [8] "   MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |"
##   [9] "    5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|"
##  [10] "   MI | 14601533 / R: 1655   ->1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
##  [11] "    6 | HANSEN SONG                     |5.0  |W  34|D  29|L  11|W  35|D  10|W  27|W  21|"
##  [12] "   OH | 15055204 / R: 1686   ->1687     |N:3  |W    |B    |W    |B    |B    |W    |B    |"
##  [13] "    7 | GARY DEE SWATHELL               |5.0  |W  57|W  46|W  13|W  11|L   1|W   9|L   2|"
##  [14] "   MI | 11146376 / R: 1649   ->1673     |N:3  |W    |B    |W    |B    |B    |W    |W    |"
##  [15] "    8 | EZEKIEL HOUGHTON                |5.0  |W   3|W  32|L  14|L   9|W  47|W  28|W  19|"
##  [16] "   MI | 15142253 / R: 1641P17->1657P24  |N:3  |B    |W    |B    |W    |B    |W    |W    |"
##  [17] "    9 | STEFANO LEE                     |5.0  |W  25|L  18|W  59|W   8|W  26|L   7|W  20|"
##  [18] "   ON | 14954524 / R: 1411   ->1564     |N:2  |W    |B    |W    |B    |W    |B    |B    |"
##  [19] "   10 | ANVIT RAO                       |5.0  |D  16|L  19|W  55|W  31|D   6|W  25|W  18|"
##  [20] "   MI | 14150362 / R: 1365   ->1544     |N:3  |W    |W    |B    |B    |W    |B    |W    |"
##  [21] "   11 | CAMERON WILLIAM MC LEMAN        |4.5  |D  38|W  56|W   6|L   7|L   3|W  34|W  26|"
##  [22] "   MI | 12581589 / R: 1712   ->1696     |N:3  |B    |W    |B    |W    |B    |W    |B    |"
##  [23] "   12 | KENNETH J TACK                  |4.5  |W  42|W  33|D   5|W  38|H    |D   1|L   3|"
##  [24] "   MI | 12681257 / R: 1663   ->1670     |N:3  |W    |B    |W    |B    |     |W    |B    |"
##  [25] "   13 | TORRANCE HENRY JR               |4.5  |W  36|W  27|L   7|D   5|W  33|L   3|W  32|"
##  [26] "   MI | 15082995 / R: 1666   ->1662     |N:3  |B    |W    |B    |B    |W    |W    |B    |"
##  [27] "   14 | BRADLEY SHAW                    |4.5  |W  54|W  44|W   8|L   1|D  27|L   5|W  31|"
##  [28] "   MI | 10131499 / R: 1610   ->1618     |N:3  |W    |B    |W    |W    |B    |B    |W    |"
##  [29] "   15 | ZACHARY JAMES HOUGHTON          |4.5  |D  19|L  16|W  30|L  22|W  54|W  33|W  38|"
##  [30] "   MI | 15619130 / R: 1220P13->1416P20  |N:3  |B    |B    |W    |W    |B    |B    |W    |"
##  [31] "   16 | MIKE NIKITIN                    |4.0  |D  10|W  15|H    |W  39|L   2|W  36|U    |"
##  [32] "   MI | 10295068 / R: 1604   ->1613     |N:3  |B    |W    |     |B    |W    |B    |     |"
##  [33] "   17 | RONALD GRZEGORCZYK              |4.0  |W  48|W  41|L  26|L   2|W  23|W  22|L   5|"
##  [34] "   MI | 10297702 / R: 1629   ->1610     |N:3  |W    |B    |W    |B    |W    |B    |W    |"
##  [35] "   18 | DAVID SUNDEEN                   |4.0  |W  47|W   9|L   1|W  32|L  19|W  38|L  10|"
##  [36] "   MI | 11342094 / R: 1600   ->1600     |N:3  |B    |W    |B    |W    |B    |W    |B    |"
##  [37] "   19 | DIPANKAR ROY                    |4.0  |D  15|W  10|W  52|D  28|W  18|L   4|L   8|"
##  [38] "   MI | 14862333 / R: 1564   ->1570     |N:3  |W    |B    |W    |B    |W    |W    |B    |"
##  [39] "   20 | JASON ZHENG                     |4.0  |L  40|W  49|W  23|W  41|W  28|L   2|L   9|"
##  [40] "   MI | 14529060 / R: 1595   ->1569     |N:4  |W    |B    |W    |B    |W    |B    |W    |"
##  [41] "   21 | DINH DANG BUI                   |4.0  |W  43|L   1|W  47|L   3|W  40|W  39|L   6|"
##  [42] "   ON | 15495066 / R: 1563P22->1562     |N:3  |B    |W    |B    |W    |W    |B    |W    |"
##  [43] "   22 | EUGENE L MCCLURE                |4.0  |W  64|D  52|L  28|W  15|H    |L  17|W  40|"
##  [44] "   MI | 12405534 / R: 1555   ->1529     |N:4  |W    |B    |W    |B    |     |W    |B    |"
##  [45] "   23 | ALAN BUI                        |4.0  |L   4|W  43|L  20|W  58|L  17|W  37|W  46|"
##  [46] "   ON | 15030142 / R: 1363   ->1371     |     |B    |W    |B    |W    |B    |W    |B    |"
##  [47] "   24 | MICHAEL R ALDRICH               |4.0  |L  28|L  47|W  43|L  25|W  60|W  44|W  39|"
##  [48] "   MI | 13469010 / R: 1229   ->1300     |N:4  |B    |W    |B    |B    |W    |W    |B    |"
##  [49] "   25 | LOREN SCHWIEBERT                |3.5  |L   9|W  53|L   3|W  24|D  34|L  10|W  47|"
##  [50] "   MI | 12486656 / R: 1745   ->1681     |N:4  |B    |W    |B    |W    |B    |W    |B    |"
##  [51] "   26 | MAX ZHU                         |3.5  |W  49|W  40|W  17|L   4|L   9|D  32|L  11|"
##  [52] "   ON | 15131520 / R: 1579   ->1564     |N:4  |B    |W    |B    |W    |B    |W    |W    |"
##  [53] "   27 | GAURAV GIDWANI                  |3.5  |W  51|L  13|W  46|W  37|D  14|L   6|U    |"
##  [54] "   MI | 14476567 / R: 1552   ->1539     |N:4  |W    |B    |W    |B    |W    |B    |     |"
##  [55] "   28 | SOFIA ADINA STANESCU-BELLU      |3.5  |W  24|D   4|W  22|D  19|L  20|L   8|D  36|"
##  [56] "   MI | 14882954 / R: 1507   ->1513     |N:3  |W    |W    |B    |W    |B    |B    |W    |"
##  [57] "   29 | CHIEDOZIE OKORIE                |3.5  |W  50|D   6|L  38|L  34|W  52|W  48|U    |"
##  [58] "   MI | 15323285 / R: 1602P6 ->1508P12  |N:4  |B    |W    |B    |W    |W    |B    |     |"
##  [59] "   30 | GEORGE AVERY JONES              |3.5  |L  52|D  64|L  15|W  55|L  31|W  61|W  50|"
##  [60] "   ON | 12577178 / R: 1522   ->1444     |     |W    |B    |B    |W    |W    |B    |B    |"
##  [61] "   31 | RISHI SHETTY                    |3.5  |L  58|D  55|W  64|L  10|W  30|W  50|L  14|"
##  [62] "   MI | 15131618 / R: 1494   ->1444     |     |B    |W    |B    |W    |B    |W    |B    |"
##  [63] "   32 | JOSHUA PHILIP MATHEWS           |3.5  |W  61|L   8|W  44|L  18|W  51|D  26|L  13|"
##  [64] "   ON | 14073750 / R: 1441   ->1433     |N:4  |W    |B    |W    |B    |W    |B    |W    |"
##  [65] "   33 | JADE GE                         |3.5  |W  60|L  12|W  50|D  36|L  13|L  15|W  51|"
##  [66] "   MI | 14691842 / R: 1449   ->1421     |     |B    |W    |B    |W    |B    |W    |B    |"
##  [67] "   34 | MICHAEL JEFFERY THOMAS          |3.5  |L   6|W  60|L  37|W  29|D  25|L  11|W  52|"
##  [68] "   MI | 15051807 / R: 1399   ->1400     |     |B    |W    |B    |B    |W    |B    |W    |"
##  [69] "   35 | JOSHUA DAVID LEE                |3.5  |L  46|L  38|W  56|L   6|W  57|D  52|W  48|"
##  [70] "   MI | 14601397 / R: 1438   ->1392     |     |W    |W    |B    |W    |B    |B    |W    |"
##  [71] "   36 | SIDDHARTH JHA                   |3.5  |L  13|W  57|W  51|D  33|H    |L  16|D  28|"
##  [72] "   MI | 14773163 / R: 1355   ->1367     |N:4  |W    |B    |W    |B    |     |W    |B    |"
##  [73] "   37 | AMIYATOSH PWNANANDAM            |3.5  |B    |L   5|W  34|L  27|H    |L  23|W  61|"
##  [74] "   MI | 15489571 / R:  980P12->1077P17  |     |     |B    |W    |W    |     |B    |W    |"
##  [75] "   38 | BRIAN LIU                       |3.0  |D  11|W  35|W  29|L  12|H    |L  18|L  15|"
##  [76] "   MI | 15108523 / R: 1423   ->1439     |N:4  |W    |B    |W    |W    |     |B    |B    |"
##  [77] "   39 | JOEL R HENDON                   |3.0  |L   1|W  54|W  40|L  16|W  44|L  21|L  24|"
##  [78] "   MI | 12923035 / R: 1436P23->1413     |N:4  |B    |W    |B    |W    |B    |W    |W    |"
##  [79] "   40 | FOREST ZHANG                    |3.0  |W  20|L  26|L  39|W  59|L  21|W  56|L  22|"
##  [80] "   MI | 14892710 / R: 1348   ->1346     |     |B    |B    |W    |W    |B    |W    |W    |"
##  [81] "   41 | KYLE WILLIAM MURPHY             |3.0  |W  59|L  17|W  58|L  20|X    |U    |U    |"
##  [82] "   MI | 15761443 / R: 1403P5 ->1341P9   |     |B    |W    |B    |W    |     |     |     |"
##  [83] "   42 | JARED GE                        |3.0  |L  12|L  50|L  57|D  60|D  61|W  64|W  56|"
##  [84] "   MI | 14462326 / R: 1332   ->1256     |     |B    |W    |B    |B    |W    |W    |B    |"
##  [85] "   43 | ROBERT GLEN VASEY               |3.0  |L  21|L  23|L  24|W  63|W  59|L  46|W  55|"
##  [86] "   MI | 14101068 / R: 1283   ->1244     |     |W    |B    |W    |W    |B    |B    |W    |"
##  [87] "   44 | JUSTIN D SCHILLING              |3.0  |B    |L  14|L  32|W  53|L  39|L  24|W  59|"
##  [88] "   MI | 15323504 / R: 1199   ->1199     |     |     |W    |B    |B    |W    |B    |W    |"
##  [89] "   45 | DEREK YAN                       |3.0  |L   5|L  51|D  60|L  56|W  63|D  55|W  58|"
##  [90] "   MI | 15372807 / R: 1242   ->1191     |     |W    |B    |W    |B    |W    |B    |W    |"
##  [91] "   46 | JACOB ALEXANDER LAVALLEY        |3.0  |W  35|L   7|L  27|L  50|W  64|W  43|L  23|"
##  [92] "   MI | 15490981 / R:  377P3 ->1076P10  |     |B    |W    |B    |W    |B    |W    |W    |"
##  [93] "   47 | ERIC WRIGHT                     |2.5  |L  18|W  24|L  21|W  61|L   8|D  51|L  25|"
##  [94] "   MI | 12533115 / R: 1362   ->1341     |     |W    |B    |W    |B    |W    |B    |W    |"
##  [95] "   48 | DANIEL KHAIN                    |2.5  |L  17|W  63|H    |D  52|H    |L  29|L  35|"
##  [96] "   MI | 14369165 / R: 1382   ->1335     |     |B    |W    |     |B    |     |W    |B    |"
##  [97] "   49 | MICHAEL J MARTIN                |2.5  |L  26|L  20|D  63|D  64|W  58|H    |U    |"
##  [98] "   MI | 12531685 / R: 1291P12->1259P17  |     |W    |W    |B    |W    |B    |     |     |"
##  [99] "   50 | SHIVAM JHA                      |2.5  |L  29|W  42|L  33|W  46|H    |L  31|L  30|"
## [100] "   MI | 14773178 / R: 1056   ->1111     |     |W    |B    |W    |B    |     |B    |W    |"
## [101] "   51 | TEJAS AYYAGARI                  |2.5  |L  27|W  45|L  36|W  57|L  32|D  47|L  33|"
## [102] "   MI | 15205474 / R: 1011   ->1097     |     |B    |W    |B    |W    |B    |W    |W    |"
## [103] "   52 | ETHAN GUO                       |2.5  |W  30|D  22|L  19|D  48|L  29|D  35|L  34|"
## [104] "   MI | 14918803 / R:  935   ->1092     |N:4  |B    |W    |B    |W    |B    |W    |B    |"
## [105] "   53 | JOSE C YBARRA                   |2.0  |H    |L  25|H    |L  44|U    |W  57|U    |"
## [106] "   MI | 12578849 / R: 1393   ->1359     |     |     |B    |     |W    |     |W    |     |"
## [107] "   54 | LARRY HODGE                     |2.0  |L  14|L  39|L  61|B    |L  15|L  59|W  64|"
## [108] "   MI | 12836773 / R: 1270   ->1200     |     |B    |B    |W    |     |W    |B    |W    |"
## [109] "   55 | ALEX KONG                       |2.0  |L  62|D  31|L  10|L  30|B    |D  45|L  43|"
## [110] "   MI | 15412571 / R: 1186   ->1163     |     |W    |B    |W    |B    |     |W    |B    |"
## [111] "   56 | MARISA RICCI                    |2.0  |H    |L  11|L  35|W  45|H    |L  40|L  42|"
## [112] "   MI | 14679887 / R: 1153   ->1140     |     |     |B    |W    |W    |     |B    |W    |"
## [113] "   57 | MICHAEL LU                      |2.0  |L   7|L  36|W  42|L  51|L  35|L  53|B    |"
## [114] "   MI | 15113330 / R: 1092   ->1079     |     |B    |W    |W    |B    |W    |B    |     |"
## [115] "   58 | VIRAJ MOHILE                    |2.0  |W  31|L   2|L  41|L  23|L  49|B    |L  45|"
## [116] "   MI | 14700365 / R:  917   -> 941     |     |W    |B    |W    |B    |W    |     |B    |"
## [117] "   59 | SEAN M MC CORMICK               |2.0  |L  41|B    |L   9|L  40|L  43|W  54|L  44|"
## [118] "   MI | 12841036 / R:  853   -> 878     |     |W    |     |B    |B    |W    |W    |B    |"
## [119] "   60 | JULIA SHEN                      |1.5  |L  33|L  34|D  45|D  42|L  24|H    |U    |"
## [120] "   MI | 14579262 / R:  967   -> 984     |     |W    |B    |B    |W    |B    |     |     |"
## [121] "   61 | JEZZEL FARKAS                   |1.5  |L  32|L   3|W  54|L  47|D  42|L  30|L  37|"
## [122] "   ON | 15771592 / R:  955P11-> 979P18  |     |B    |W    |B    |W    |B    |W    |B    |"
## [123] "   62 | ASHWIN BALAJI                   |1.0  |W  55|U    |U    |U    |U    |U    |U    |"
## [124] "   MI | 15219542 / R: 1530   ->1535     |     |B    |     |     |     |     |     |     |"
## [125] "   63 | THOMAS JOSEPH HOSMER            |1.0  |L   2|L  48|D  49|L  43|L  45|H    |U    |"
## [126] "   MI | 15057092 / R: 1175   ->1125     |     |W    |B    |W    |B    |B    |     |     |"
## [127] "   64 | BEN LI                          |1.0  |L  22|D  30|L  31|D  49|L  46|L  42|L  54|"
## [128] "   MI | 15006561 / R: 1163   ->1112     |     |B    |W    |W    |B    |W    |B    |B    |"
raw_txt_df <- as.data.frame(raw_txt_s2, stringsAsFactors = FALSE) ## Create DataFrame

Since data for each player is on two rows we need to split them apart and them combine them into one row. I used the examples shown here to create even and odd rows:

evenr <- raw_txt_df[seq_len(nrow(raw_txt_df)) %% 2 == 1, ] ## grab even rows
oddr <-  raw_txt_df[!seq_len(nrow(raw_txt_df)) %% 2 == 1, ] ## grab odd rows

raw_txt_final <- as.data.frame(cbind(evenr, oddr), stringsAsFactors = FALSE) ## combine even and odd together

Extract all needed pieces of information from strings

Next extract all the pieces of information we want from each row.

id <- as.numeric(str_extract(raw_txt_final$evenr, "[:space:]+[:digit:]+|"), "[0-9]")
name <-  str_trim(str_replace_all(str_extract(raw_txt_final$evenr, "[:alpha:].+?[|]"), "[|]", ""))
state <-  str_trim(str_replace_all(str_extract(raw_txt_final$oddr, "[:alpha:].+?[|]"), "[|]", ""))
total_points <- as.numeric(str_trim(str_replace_all(str_extract(raw_txt_final$evenr, "[|]+[:digit:][.][:digit:].+?"), "[|]", "")), "[0-9]")
player_pre_rating <- as.numeric(str_replace_all(str_extract(raw_txt_final$oddr, "R:[:space:]+[:digit:]+"), "R:", ""), "[0-9]")

To extract the ID for each opponent I first used a string locate function to find the position of each | and then used those positions to extract each players ID.

str_locate_all(raw_txt_final$evenr, "[|]") # find position of | for opponent ID
## [[1]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[2]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[3]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[4]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[5]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[6]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[7]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[8]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[9]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[10]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[11]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[12]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[13]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[14]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[15]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[16]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[17]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[18]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[19]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[20]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[21]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[22]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[23]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[24]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[25]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[26]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[27]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[28]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[29]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[30]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[31]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[32]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[33]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[34]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[35]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[36]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[37]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[38]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[39]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[40]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[41]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[42]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[43]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[44]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[45]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[46]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[47]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[48]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[49]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[50]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[51]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[52]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[53]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[54]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[55]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[56]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[57]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[58]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[59]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[60]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[61]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[62]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[63]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
## 
## [[64]]
##       start end
##  [1,]     7   7
##  [2,]    41  41
##  [3,]    47  47
##  [4,]    53  53
##  [5,]    59  59
##  [6,]    65  65
##  [7,]    71  71
##  [8,]    77  77
##  [9,]    83  83
## [10,]    89  89
start end
[1,]     7   7
[2,]    41  41
[3,]    47  47 #opp 1
[4,]    53  53 #opp 2
[5,]    59  59 #opp 3
[6,]    65  65 #opp 4
[7,]    71  71 #opp 5
[8,]    77  77 #opp 6
[9,]    83  83 #opp 7
[10,]    89  89 #end
opp1 <- as.numeric(str_sub(raw_txt_final$evenr, start = 50, end = 52), "[0-9]")
opp2 <- as.numeric(str_sub(raw_txt_final$evenr, start = 55, end = 58), "[0-9]")
opp3 <- as.numeric(str_sub(raw_txt_final$evenr, start = 61, end = 64), "[0-9]")
opp4 <- as.numeric(str_sub(raw_txt_final$evenr, start = 67, end = 70), "[0-9]")
opp5 <- as.numeric(str_sub(raw_txt_final$evenr, start = 73, end = 76), "[0-9]")
opp6 <- as.numeric(str_sub(raw_txt_final$evenr, start = 79, end = 82), "[0-9]")
opp7 <- as.numeric(str_sub(raw_txt_final$evenr, start = 85, end = 88), "[0-9]")

Recombine pieces back into one dataframe

Now we combine all the information we got from each step into one data-frame.

final_df <- data.frame(id, name, state, total_points, player_pre_rating, opp1, opp2, opp3, opp4, opp5, opp6, opp7, stringsAsFactors = FALSE)

To capture the opponents average pre rating I used SQL with multiple sub queries.

final_df_opp <- sqldf("SELECT a.*, 
(SELECT b.player_pre_rating FROM final_df AS b WHERE a.opp1 = b.id) AS opp1_rating,
                      (SELECT b.player_pre_rating FROM final_df AS b WHERE a.opp2 = b.id) AS opp2_rating,
                      (SELECT b.player_pre_rating FROM final_df AS b WHERE a.opp3 = b.id) AS opp3_rating, 
                      (SELECT b.player_pre_rating FROM final_df AS b WHERE a.opp4 = b.id) AS opp4_rating, 
                      (SELECT b.player_pre_rating FROM final_df AS b WHERE a.opp5 = b.id) AS opp5_rating, 
                      (SELECT b.player_pre_rating FROM final_df AS b WHERE a.opp6 = b.id) AS opp6_rating, 
                      (SELECT b.player_pre_rating FROM final_df AS b WHERE a.opp7 = b.id) AS opp7_rating 
                      
                      FROM final_df AS a;", drv='SQLite')

Now we calculate the average pre chess rating for each opponent and add it to our data-frame.

final_df_opp$oppavgrating <- round(rowSums(final_df_opp[, c(13:19)], na.rm = TRUE) / rowSums(!is.na(final_df_opp[, c(13:19)])), 0)

Export information to CSV

Finally we can export this information to CSV

csvexport <- data.frame(final_df_opp$id, final_df_opp$name, final_df_opp$state, final_df_opp$total_points, final_df_opp$player_pre_rating, final_df_opp$oppavgrating)

write.csv(csvexport, file = "Project1BMO.csv", row.names=FALSE)