library(stringr) ## used for string extraction
library(sqldf) ## used for SQL query
## Loading required package: gsubfn
## Loading required package: proto
## Warning in doTryCatch(return(expr), name, parentenv, handler): unable to load shared object '/Library/Frameworks/R.framework/Resources/modules//R_X11.so':
## dlopen(/Library/Frameworks/R.framework/Resources/modules//R_X11.so, 6): Library not loaded: /opt/X11/lib/libSM.6.dylib
## Referenced from: /Library/Frameworks/R.framework/Resources/modules//R_X11.so
## Reason: image not found
## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/
## R.framework/Resources/library/tcltk/libs//tcltk.so'' had status 1
## Could not load tcltk. Will use slower R code instead.
## Loading required package: RSQLite
## Loading required package: DBI
In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players:
For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605
1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.
First, read file into R
raw_txt <- readLines(con = "tournamentinfo.txt")
## Warning in readLines(con = "tournamentinfo.txt"): incomplete final line
## found on 'tournamentinfo.txt'
head(raw_txt, 15)
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [7] "-----------------------------------------------------------------------------------------"
## [8] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [9] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [10] "-----------------------------------------------------------------------------------------"
## [11] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [12] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [13] "-----------------------------------------------------------------------------------------"
## [14] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [15] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
Subset data to eliminate rows with all dashes and 2 header rows
raw_txt_s1 <- subset(raw_txt, !grepl('^-+$', raw_txt)) ## Remove rows with ----
head(raw_txt_s1, 15)
## [1] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [2] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [3] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [4] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [5] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [6] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [7] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [8] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [9] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [10] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [11] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [12] " MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [13] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
## [14] " OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
## [15] " 7 | GARY DEE SWATHELL |5.0 |W 57|W 46|W 13|W 11|L 1|W 9|L 2|"
raw_txt_s2 <- raw_txt_s1[3:130] ## Remove header rows
raw_txt_s2
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [3] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [4] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [5] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [6] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [7] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [8] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [9] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [10] " MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [11] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
## [12] " OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
## [13] " 7 | GARY DEE SWATHELL |5.0 |W 57|W 46|W 13|W 11|L 1|W 9|L 2|"
## [14] " MI | 11146376 / R: 1649 ->1673 |N:3 |W |B |W |B |B |W |W |"
## [15] " 8 | EZEKIEL HOUGHTON |5.0 |W 3|W 32|L 14|L 9|W 47|W 28|W 19|"
## [16] " MI | 15142253 / R: 1641P17->1657P24 |N:3 |B |W |B |W |B |W |W |"
## [17] " 9 | STEFANO LEE |5.0 |W 25|L 18|W 59|W 8|W 26|L 7|W 20|"
## [18] " ON | 14954524 / R: 1411 ->1564 |N:2 |W |B |W |B |W |B |B |"
## [19] " 10 | ANVIT RAO |5.0 |D 16|L 19|W 55|W 31|D 6|W 25|W 18|"
## [20] " MI | 14150362 / R: 1365 ->1544 |N:3 |W |W |B |B |W |B |W |"
## [21] " 11 | CAMERON WILLIAM MC LEMAN |4.5 |D 38|W 56|W 6|L 7|L 3|W 34|W 26|"
## [22] " MI | 12581589 / R: 1712 ->1696 |N:3 |B |W |B |W |B |W |B |"
## [23] " 12 | KENNETH J TACK |4.5 |W 42|W 33|D 5|W 38|H |D 1|L 3|"
## [24] " MI | 12681257 / R: 1663 ->1670 |N:3 |W |B |W |B | |W |B |"
## [25] " 13 | TORRANCE HENRY JR |4.5 |W 36|W 27|L 7|D 5|W 33|L 3|W 32|"
## [26] " MI | 15082995 / R: 1666 ->1662 |N:3 |B |W |B |B |W |W |B |"
## [27] " 14 | BRADLEY SHAW |4.5 |W 54|W 44|W 8|L 1|D 27|L 5|W 31|"
## [28] " MI | 10131499 / R: 1610 ->1618 |N:3 |W |B |W |W |B |B |W |"
## [29] " 15 | ZACHARY JAMES HOUGHTON |4.5 |D 19|L 16|W 30|L 22|W 54|W 33|W 38|"
## [30] " MI | 15619130 / R: 1220P13->1416P20 |N:3 |B |B |W |W |B |B |W |"
## [31] " 16 | MIKE NIKITIN |4.0 |D 10|W 15|H |W 39|L 2|W 36|U |"
## [32] " MI | 10295068 / R: 1604 ->1613 |N:3 |B |W | |B |W |B | |"
## [33] " 17 | RONALD GRZEGORCZYK |4.0 |W 48|W 41|L 26|L 2|W 23|W 22|L 5|"
## [34] " MI | 10297702 / R: 1629 ->1610 |N:3 |W |B |W |B |W |B |W |"
## [35] " 18 | DAVID SUNDEEN |4.0 |W 47|W 9|L 1|W 32|L 19|W 38|L 10|"
## [36] " MI | 11342094 / R: 1600 ->1600 |N:3 |B |W |B |W |B |W |B |"
## [37] " 19 | DIPANKAR ROY |4.0 |D 15|W 10|W 52|D 28|W 18|L 4|L 8|"
## [38] " MI | 14862333 / R: 1564 ->1570 |N:3 |W |B |W |B |W |W |B |"
## [39] " 20 | JASON ZHENG |4.0 |L 40|W 49|W 23|W 41|W 28|L 2|L 9|"
## [40] " MI | 14529060 / R: 1595 ->1569 |N:4 |W |B |W |B |W |B |W |"
## [41] " 21 | DINH DANG BUI |4.0 |W 43|L 1|W 47|L 3|W 40|W 39|L 6|"
## [42] " ON | 15495066 / R: 1563P22->1562 |N:3 |B |W |B |W |W |B |W |"
## [43] " 22 | EUGENE L MCCLURE |4.0 |W 64|D 52|L 28|W 15|H |L 17|W 40|"
## [44] " MI | 12405534 / R: 1555 ->1529 |N:4 |W |B |W |B | |W |B |"
## [45] " 23 | ALAN BUI |4.0 |L 4|W 43|L 20|W 58|L 17|W 37|W 46|"
## [46] " ON | 15030142 / R: 1363 ->1371 | |B |W |B |W |B |W |B |"
## [47] " 24 | MICHAEL R ALDRICH |4.0 |L 28|L 47|W 43|L 25|W 60|W 44|W 39|"
## [48] " MI | 13469010 / R: 1229 ->1300 |N:4 |B |W |B |B |W |W |B |"
## [49] " 25 | LOREN SCHWIEBERT |3.5 |L 9|W 53|L 3|W 24|D 34|L 10|W 47|"
## [50] " MI | 12486656 / R: 1745 ->1681 |N:4 |B |W |B |W |B |W |B |"
## [51] " 26 | MAX ZHU |3.5 |W 49|W 40|W 17|L 4|L 9|D 32|L 11|"
## [52] " ON | 15131520 / R: 1579 ->1564 |N:4 |B |W |B |W |B |W |W |"
## [53] " 27 | GAURAV GIDWANI |3.5 |W 51|L 13|W 46|W 37|D 14|L 6|U |"
## [54] " MI | 14476567 / R: 1552 ->1539 |N:4 |W |B |W |B |W |B | |"
## [55] " 28 | SOFIA ADINA STANESCU-BELLU |3.5 |W 24|D 4|W 22|D 19|L 20|L 8|D 36|"
## [56] " MI | 14882954 / R: 1507 ->1513 |N:3 |W |W |B |W |B |B |W |"
## [57] " 29 | CHIEDOZIE OKORIE |3.5 |W 50|D 6|L 38|L 34|W 52|W 48|U |"
## [58] " MI | 15323285 / R: 1602P6 ->1508P12 |N:4 |B |W |B |W |W |B | |"
## [59] " 30 | GEORGE AVERY JONES |3.5 |L 52|D 64|L 15|W 55|L 31|W 61|W 50|"
## [60] " ON | 12577178 / R: 1522 ->1444 | |W |B |B |W |W |B |B |"
## [61] " 31 | RISHI SHETTY |3.5 |L 58|D 55|W 64|L 10|W 30|W 50|L 14|"
## [62] " MI | 15131618 / R: 1494 ->1444 | |B |W |B |W |B |W |B |"
## [63] " 32 | JOSHUA PHILIP MATHEWS |3.5 |W 61|L 8|W 44|L 18|W 51|D 26|L 13|"
## [64] " ON | 14073750 / R: 1441 ->1433 |N:4 |W |B |W |B |W |B |W |"
## [65] " 33 | JADE GE |3.5 |W 60|L 12|W 50|D 36|L 13|L 15|W 51|"
## [66] " MI | 14691842 / R: 1449 ->1421 | |B |W |B |W |B |W |B |"
## [67] " 34 | MICHAEL JEFFERY THOMAS |3.5 |L 6|W 60|L 37|W 29|D 25|L 11|W 52|"
## [68] " MI | 15051807 / R: 1399 ->1400 | |B |W |B |B |W |B |W |"
## [69] " 35 | JOSHUA DAVID LEE |3.5 |L 46|L 38|W 56|L 6|W 57|D 52|W 48|"
## [70] " MI | 14601397 / R: 1438 ->1392 | |W |W |B |W |B |B |W |"
## [71] " 36 | SIDDHARTH JHA |3.5 |L 13|W 57|W 51|D 33|H |L 16|D 28|"
## [72] " MI | 14773163 / R: 1355 ->1367 |N:4 |W |B |W |B | |W |B |"
## [73] " 37 | AMIYATOSH PWNANANDAM |3.5 |B |L 5|W 34|L 27|H |L 23|W 61|"
## [74] " MI | 15489571 / R: 980P12->1077P17 | | |B |W |W | |B |W |"
## [75] " 38 | BRIAN LIU |3.0 |D 11|W 35|W 29|L 12|H |L 18|L 15|"
## [76] " MI | 15108523 / R: 1423 ->1439 |N:4 |W |B |W |W | |B |B |"
## [77] " 39 | JOEL R HENDON |3.0 |L 1|W 54|W 40|L 16|W 44|L 21|L 24|"
## [78] " MI | 12923035 / R: 1436P23->1413 |N:4 |B |W |B |W |B |W |W |"
## [79] " 40 | FOREST ZHANG |3.0 |W 20|L 26|L 39|W 59|L 21|W 56|L 22|"
## [80] " MI | 14892710 / R: 1348 ->1346 | |B |B |W |W |B |W |W |"
## [81] " 41 | KYLE WILLIAM MURPHY |3.0 |W 59|L 17|W 58|L 20|X |U |U |"
## [82] " MI | 15761443 / R: 1403P5 ->1341P9 | |B |W |B |W | | | |"
## [83] " 42 | JARED GE |3.0 |L 12|L 50|L 57|D 60|D 61|W 64|W 56|"
## [84] " MI | 14462326 / R: 1332 ->1256 | |B |W |B |B |W |W |B |"
## [85] " 43 | ROBERT GLEN VASEY |3.0 |L 21|L 23|L 24|W 63|W 59|L 46|W 55|"
## [86] " MI | 14101068 / R: 1283 ->1244 | |W |B |W |W |B |B |W |"
## [87] " 44 | JUSTIN D SCHILLING |3.0 |B |L 14|L 32|W 53|L 39|L 24|W 59|"
## [88] " MI | 15323504 / R: 1199 ->1199 | | |W |B |B |W |B |W |"
## [89] " 45 | DEREK YAN |3.0 |L 5|L 51|D 60|L 56|W 63|D 55|W 58|"
## [90] " MI | 15372807 / R: 1242 ->1191 | |W |B |W |B |W |B |W |"
## [91] " 46 | JACOB ALEXANDER LAVALLEY |3.0 |W 35|L 7|L 27|L 50|W 64|W 43|L 23|"
## [92] " MI | 15490981 / R: 377P3 ->1076P10 | |B |W |B |W |B |W |W |"
## [93] " 47 | ERIC WRIGHT |2.5 |L 18|W 24|L 21|W 61|L 8|D 51|L 25|"
## [94] " MI | 12533115 / R: 1362 ->1341 | |W |B |W |B |W |B |W |"
## [95] " 48 | DANIEL KHAIN |2.5 |L 17|W 63|H |D 52|H |L 29|L 35|"
## [96] " MI | 14369165 / R: 1382 ->1335 | |B |W | |B | |W |B |"
## [97] " 49 | MICHAEL J MARTIN |2.5 |L 26|L 20|D 63|D 64|W 58|H |U |"
## [98] " MI | 12531685 / R: 1291P12->1259P17 | |W |W |B |W |B | | |"
## [99] " 50 | SHIVAM JHA |2.5 |L 29|W 42|L 33|W 46|H |L 31|L 30|"
## [100] " MI | 14773178 / R: 1056 ->1111 | |W |B |W |B | |B |W |"
## [101] " 51 | TEJAS AYYAGARI |2.5 |L 27|W 45|L 36|W 57|L 32|D 47|L 33|"
## [102] " MI | 15205474 / R: 1011 ->1097 | |B |W |B |W |B |W |W |"
## [103] " 52 | ETHAN GUO |2.5 |W 30|D 22|L 19|D 48|L 29|D 35|L 34|"
## [104] " MI | 14918803 / R: 935 ->1092 |N:4 |B |W |B |W |B |W |B |"
## [105] " 53 | JOSE C YBARRA |2.0 |H |L 25|H |L 44|U |W 57|U |"
## [106] " MI | 12578849 / R: 1393 ->1359 | | |B | |W | |W | |"
## [107] " 54 | LARRY HODGE |2.0 |L 14|L 39|L 61|B |L 15|L 59|W 64|"
## [108] " MI | 12836773 / R: 1270 ->1200 | |B |B |W | |W |B |W |"
## [109] " 55 | ALEX KONG |2.0 |L 62|D 31|L 10|L 30|B |D 45|L 43|"
## [110] " MI | 15412571 / R: 1186 ->1163 | |W |B |W |B | |W |B |"
## [111] " 56 | MARISA RICCI |2.0 |H |L 11|L 35|W 45|H |L 40|L 42|"
## [112] " MI | 14679887 / R: 1153 ->1140 | | |B |W |W | |B |W |"
## [113] " 57 | MICHAEL LU |2.0 |L 7|L 36|W 42|L 51|L 35|L 53|B |"
## [114] " MI | 15113330 / R: 1092 ->1079 | |B |W |W |B |W |B | |"
## [115] " 58 | VIRAJ MOHILE |2.0 |W 31|L 2|L 41|L 23|L 49|B |L 45|"
## [116] " MI | 14700365 / R: 917 -> 941 | |W |B |W |B |W | |B |"
## [117] " 59 | SEAN M MC CORMICK |2.0 |L 41|B |L 9|L 40|L 43|W 54|L 44|"
## [118] " MI | 12841036 / R: 853 -> 878 | |W | |B |B |W |W |B |"
## [119] " 60 | JULIA SHEN |1.5 |L 33|L 34|D 45|D 42|L 24|H |U |"
## [120] " MI | 14579262 / R: 967 -> 984 | |W |B |B |W |B | | |"
## [121] " 61 | JEZZEL FARKAS |1.5 |L 32|L 3|W 54|L 47|D 42|L 30|L 37|"
## [122] " ON | 15771592 / R: 955P11-> 979P18 | |B |W |B |W |B |W |B |"
## [123] " 62 | ASHWIN BALAJI |1.0 |W 55|U |U |U |U |U |U |"
## [124] " MI | 15219542 / R: 1530 ->1535 | |B | | | | | | |"
## [125] " 63 | THOMAS JOSEPH HOSMER |1.0 |L 2|L 48|D 49|L 43|L 45|H |U |"
## [126] " MI | 15057092 / R: 1175 ->1125 | |W |B |W |B |B | | |"
## [127] " 64 | BEN LI |1.0 |L 22|D 30|L 31|D 49|L 46|L 42|L 54|"
## [128] " MI | 15006561 / R: 1163 ->1112 | |B |W |W |B |W |B |B |"
raw_txt_df <- as.data.frame(raw_txt_s2, stringsAsFactors = FALSE) ## Create DataFrame
Since data for each player is on two rows we need to split them apart and them combine them into one row. I used the examples shown here to create even and odd rows:
evenr <- raw_txt_df[seq_len(nrow(raw_txt_df)) %% 2 == 1, ] ## grab even rows
oddr <- raw_txt_df[!seq_len(nrow(raw_txt_df)) %% 2 == 1, ] ## grab odd rows
raw_txt_final <- as.data.frame(cbind(evenr, oddr), stringsAsFactors = FALSE) ## combine even and odd together
Next extract all the pieces of information we want from each row.
id <- as.numeric(str_extract(raw_txt_final$evenr, "[:space:]+[:digit:]+|"), "[0-9]")
name <- str_trim(str_replace_all(str_extract(raw_txt_final$evenr, "[:alpha:].+?[|]"), "[|]", ""))
state <- str_trim(str_replace_all(str_extract(raw_txt_final$oddr, "[:alpha:].+?[|]"), "[|]", ""))
total_points <- as.numeric(str_trim(str_replace_all(str_extract(raw_txt_final$evenr, "[|]+[:digit:][.][:digit:].+?"), "[|]", "")), "[0-9]")
player_pre_rating <- as.numeric(str_replace_all(str_extract(raw_txt_final$oddr, "R:[:space:]+[:digit:]+"), "R:", ""), "[0-9]")
To extract the ID for each opponent I first used a string locate function to find the position of each | and then used those positions to extract each players ID.
str_locate_all(raw_txt_final$evenr, "[|]") # find position of | for opponent ID
## [[1]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[2]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[3]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[4]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[5]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[6]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[7]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[8]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[9]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[10]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[11]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[12]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[13]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[14]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[15]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[16]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[17]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[18]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[19]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[20]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[21]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[22]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[23]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[24]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[25]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[26]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[27]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[28]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[29]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[30]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[31]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[32]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[33]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[34]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[35]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[36]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[37]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[38]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[39]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[40]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[41]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[42]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[43]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[44]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[45]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[46]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[47]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[48]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[49]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[50]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[51]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[52]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[53]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[54]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[55]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[56]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[57]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[58]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[59]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[60]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[61]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[62]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[63]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
##
## [[64]]
## start end
## [1,] 7 7
## [2,] 41 41
## [3,] 47 47
## [4,] 53 53
## [5,] 59 59
## [6,] 65 65
## [7,] 71 71
## [8,] 77 77
## [9,] 83 83
## [10,] 89 89
start end
[1,] 7 7
[2,] 41 41
[3,] 47 47 #opp 1
[4,] 53 53 #opp 2
[5,] 59 59 #opp 3
[6,] 65 65 #opp 4
[7,] 71 71 #opp 5
[8,] 77 77 #opp 6
[9,] 83 83 #opp 7
[10,] 89 89 #end
opp1 <- as.numeric(str_sub(raw_txt_final$evenr, start = 50, end = 52), "[0-9]")
opp2 <- as.numeric(str_sub(raw_txt_final$evenr, start = 55, end = 58), "[0-9]")
opp3 <- as.numeric(str_sub(raw_txt_final$evenr, start = 61, end = 64), "[0-9]")
opp4 <- as.numeric(str_sub(raw_txt_final$evenr, start = 67, end = 70), "[0-9]")
opp5 <- as.numeric(str_sub(raw_txt_final$evenr, start = 73, end = 76), "[0-9]")
opp6 <- as.numeric(str_sub(raw_txt_final$evenr, start = 79, end = 82), "[0-9]")
opp7 <- as.numeric(str_sub(raw_txt_final$evenr, start = 85, end = 88), "[0-9]")
Now we combine all the information we got from each step into one data-frame.
final_df <- data.frame(id, name, state, total_points, player_pre_rating, opp1, opp2, opp3, opp4, opp5, opp6, opp7, stringsAsFactors = FALSE)
To capture the opponents average pre rating I used SQL with multiple sub queries.
final_df_opp <- sqldf("SELECT a.*,
(SELECT b.player_pre_rating FROM final_df AS b WHERE a.opp1 = b.id) AS opp1_rating,
(SELECT b.player_pre_rating FROM final_df AS b WHERE a.opp2 = b.id) AS opp2_rating,
(SELECT b.player_pre_rating FROM final_df AS b WHERE a.opp3 = b.id) AS opp3_rating,
(SELECT b.player_pre_rating FROM final_df AS b WHERE a.opp4 = b.id) AS opp4_rating,
(SELECT b.player_pre_rating FROM final_df AS b WHERE a.opp5 = b.id) AS opp5_rating,
(SELECT b.player_pre_rating FROM final_df AS b WHERE a.opp6 = b.id) AS opp6_rating,
(SELECT b.player_pre_rating FROM final_df AS b WHERE a.opp7 = b.id) AS opp7_rating
FROM final_df AS a;", drv='SQLite')
Now we calculate the average pre chess rating for each opponent and add it to our data-frame.
final_df_opp$oppavgrating <- round(rowSums(final_df_opp[, c(13:19)], na.rm = TRUE) / rowSums(!is.na(final_df_opp[, c(13:19)])), 0)
Finally we can export this information to CSV
csvexport <- data.frame(final_df_opp$id, final_df_opp$name, final_df_opp$state, final_df_opp$total_points, final_df_opp$player_pre_rating, final_df_opp$oppavgrating)
write.csv(csvexport, file = "Project1BMO.csv", row.names=FALSE)