##Load library and data
library(stringr)
chesstourney = readLines("https://raw.githubusercontent.com/danielhong98/MSDA-Spring-2016/29394c9cb74160fd53931123c2358019208293bb/chesstournament.txt")
## Warning in readLines("https://raw.githubusercontent.com/danielhong98/MSDA-
## Spring-2016/29394c9cb74160fd53931123c2358019208293bb/chesstournament.txt"):
## incomplete final line found on 'https://raw.githubusercontent.com/
## danielhong98/MSDA-Spring-2016/29394c9cb74160fd53931123c2358019208293bb/
## chesstournament.txt'
dput(head(chesstourney))
## c("-----------------------------------------------------------------------------------------", 
## " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| ", 
## " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | ", 
## "-----------------------------------------------------------------------------------------", 
## "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|", 
## "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## )
##cleanse Data > create 2 vectors for each row
chesstourneynohead = chesstourney[-1:-4]
dashes = grep("^-+", chesstourneynohead)
chessfinal = chesstourneynohead[-dashes]
v1 = chessfinal[c(TRUE, FALSE)]
v2 = chessfinal[c(FALSE, TRUE)]
head(v1)
## [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
## [2] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
## [3] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
## [4] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|"
## [5] "    5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|"
## [6] "    6 | HANSEN SONG                     |5.0  |W  34|D  29|L  11|W  35|D  10|W  27|W  21|"
head(v2)
## [1] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [2] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
## [3] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [4] "   MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |"
## [5] "   MI | 14601533 / R: 1655   ->1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
## [6] "   OH | 15055204 / R: 1686   ->1687     |N:3  |W    |B    |W    |B    |B    |W    |B    |"
##Extract Data > create data frame
name <- (str_trim(unlist(str_extract_all(v1, "([[:alpha:] ]-?){15,31}"))))
str(name)
##  chr [1:64] "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ" ...
totpts <- unlist(str_extract_all(v1, "[:digit:][//.][:digit:]"))
str(totpts)
##  chr [1:64] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" ...
state <- str_trim(unlist(str_extract_all(v2, " MI | ON | OH ")))
str(state)
##  chr [1:64] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" ...
prerate <- as.numeric(sub(pattern = 'R: ', replacement = '', x = unlist(str_extract_all(v2, "R: [[:digit:] ]{4}"))))
str(prerate)
##  num [1:64] 1794 1553 1384 1716 1655 ...
chessrate <- data.frame(name, state, totpts, prerate)
list(chessrate)
## [[1]]
##                          name state totpts prerate
## 1                    GARY HUA    ON    6.0    1794
## 2             DAKSHESH DARURI    MI    6.0    1553
## 3                ADITYA BAJAJ    MI    6.0    1384
## 4         PATRICK H SCHILLING    MI    5.5    1716
## 5                  HANSHI ZUO    MI    5.5    1655
## 6                 HANSEN SONG    OH    5.0    1686
## 7           GARY DEE SWATHELL    MI    5.0    1649
## 8            EZEKIEL HOUGHTON    MI    5.0    1641
## 9                 STEFANO LEE    ON    5.0    1411
## 10                  ANVIT RAO    MI    5.0    1365
## 11   CAMERON WILLIAM MC LEMAN    MI    4.5    1712
## 12             KENNETH J TACK    MI    4.5    1663
## 13          TORRANCE HENRY JR    MI    4.5    1666
## 14               BRADLEY SHAW    MI    4.5    1610
## 15     ZACHARY JAMES HOUGHTON    MI    4.5    1220
## 16               MIKE NIKITIN    MI    4.0    1604
## 17         RONALD GRZEGORCZYK    MI    4.0    1629
## 18              DAVID SUNDEEN    MI    4.0    1600
## 19               DIPANKAR ROY    MI    4.0    1564
## 20                JASON ZHENG    MI    4.0    1595
## 21              DINH DANG BUI    ON    4.0    1563
## 22           EUGENE L MCCLURE    MI    4.0    1555
## 23                   ALAN BUI    ON    4.0    1363
## 24          MICHAEL R ALDRICH    MI    4.0    1229
## 25           LOREN SCHWIEBERT    MI    3.5    1745
## 26                    MAX ZHU    ON    3.5    1579
## 27             GAURAV GIDWANI    MI    3.5    1552
## 28 SOFIA ADINA STANESCU-BELLU    MI    3.5    1507
## 29           CHIEDOZIE OKORIE    MI    3.5    1602
## 30         GEORGE AVERY JONES    ON    3.5    1522
## 31               RISHI SHETTY    MI    3.5    1494
## 32      JOSHUA PHILIP MATHEWS    ON    3.5    1441
## 33                    JADE GE    MI    3.5    1449
## 34     MICHAEL JEFFERY THOMAS    MI    3.5    1399
## 35           JOSHUA DAVID LEE    MI    3.5    1438
## 36              SIDDHARTH JHA    MI    3.5    1355
## 37       AMIYATOSH PWNANANDAM    MI    3.5     980
## 38                  BRIAN LIU    MI    3.0    1423
## 39              JOEL R HENDON    MI    3.0    1436
## 40               FOREST ZHANG    MI    3.0    1348
## 41        KYLE WILLIAM MURPHY    MI    3.0    1403
## 42                   JARED GE    MI    3.0    1332
## 43          ROBERT GLEN VASEY    MI    3.0    1283
## 44         JUSTIN D SCHILLING    MI    3.0    1199
## 45                  DEREK YAN    MI    3.0    1242
## 46   JACOB ALEXANDER LAVALLEY    MI    3.0     377
## 47                ERIC WRIGHT    MI    2.5    1362
## 48               DANIEL KHAIN    MI    2.5    1382
## 49           MICHAEL J MARTIN    MI    2.5    1291
## 50                 SHIVAM JHA    MI    2.5    1056
## 51             TEJAS AYYAGARI    MI    2.5    1011
## 52                  ETHAN GUO    MI    2.5     935
## 53              JOSE C YBARRA    MI    2.0    1393
## 54                LARRY HODGE    MI    2.0    1270
## 55                  ALEX KONG    MI    2.0    1186
## 56               MARISA RICCI    MI    2.0    1153
## 57                 MICHAEL LU    MI    2.0    1092
## 58               VIRAJ MOHILE    MI    2.0     917
## 59          SEAN M MC CORMICK    MI    2.0     853
## 60                 JULIA SHEN    MI    1.5     967
## 61              JEZZEL FARKAS    ON    1.5     955
## 62              ASHWIN BALAJI    MI    1.0    1530
## 63       THOMAS JOSEPH HOSMER    MI    1.0    1175
## 64                     BEN LI    MI    1.0    1163
##Calculate Opponent Average Rating and append data
id = c(1:length(v1))
opp = data.frame (id,prerate)
head(opp)
##   id prerate
## 1  1    1794
## 2  2    1553
## 3  3    1384
## 4  4    1716
## 5  5    1655
## 6  6    1686
opponents = str_extract_all(v1, ("[0-9]{1,2}(?=\\|)"))
str(opponents)
## List of 64
##  $ : chr [1:7] "39" "21" "18" "14" ...
##  $ : chr [1:7] "63" "58" "4" "17" ...
##  $ : chr [1:7] "8" "61" "25" "21" ...
##  $ : chr [1:7] "23" "28" "2" "26" ...
##  $ : chr [1:7] "45" "37" "12" "13" ...
##  $ : chr [1:7] "34" "29" "11" "35" ...
##  $ : chr [1:7] "57" "46" "13" "11" ...
##  $ : chr [1:7] "3" "32" "14" "9" ...
##  $ : chr [1:7] "25" "18" "59" "8" ...
##  $ : chr [1:7] "16" "19" "55" "31" ...
##  $ : chr [1:7] "38" "56" "6" "7" ...
##  $ : chr [1:6] "42" "33" "5" "38" ...
##  $ : chr [1:7] "36" "27" "7" "5" ...
##  $ : chr [1:7] "54" "44" "8" "1" ...
##  $ : chr [1:7] "19" "16" "30" "22" ...
##  $ : chr [1:5] "10" "15" "39" "2" ...
##  $ : chr [1:7] "48" "41" "26" "2" ...
##  $ : chr [1:7] "47" "9" "1" "32" ...
##  $ : chr [1:7] "15" "10" "52" "28" ...
##  $ : chr [1:7] "40" "49" "23" "41" ...
##  $ : chr [1:7] "43" "1" "47" "3" ...
##  $ : chr [1:6] "64" "52" "28" "15" ...
##  $ : chr [1:7] "4" "43" "20" "58" ...
##  $ : chr [1:7] "28" "47" "43" "25" ...
##  $ : chr [1:7] "9" "53" "3" "24" ...
##  $ : chr [1:7] "49" "40" "17" "4" ...
##  $ : chr [1:6] "51" "13" "46" "37" ...
##  $ : chr [1:7] "24" "4" "22" "19" ...
##  $ : chr [1:6] "50" "6" "38" "34" ...
##  $ : chr [1:7] "52" "64" "15" "55" ...
##  $ : chr [1:7] "58" "55" "64" "10" ...
##  $ : chr [1:7] "61" "8" "44" "18" ...
##  $ : chr [1:7] "60" "12" "50" "36" ...
##  $ : chr [1:7] "6" "60" "37" "29" ...
##  $ : chr [1:7] "46" "38" "56" "6" ...
##  $ : chr [1:6] "13" "57" "51" "33" ...
##  $ : chr [1:5] "5" "34" "27" "23" ...
##  $ : chr [1:6] "11" "35" "29" "12" ...
##  $ : chr [1:7] "1" "54" "40" "16" ...
##  $ : chr [1:7] "20" "26" "39" "59" ...
##  $ : chr [1:4] "59" "17" "58" "20"
##  $ : chr [1:7] "12" "50" "57" "60" ...
##  $ : chr [1:7] "21" "23" "24" "63" ...
##  $ : chr [1:6] "14" "32" "53" "39" ...
##  $ : chr [1:7] "5" "51" "60" "56" ...
##  $ : chr [1:7] "35" "7" "27" "50" ...
##  $ : chr [1:7] "18" "24" "21" "61" ...
##  $ : chr [1:5] "17" "63" "52" "29" ...
##  $ : chr [1:5] "26" "20" "63" "64" ...
##  $ : chr [1:6] "29" "42" "33" "46" ...
##  $ : chr [1:7] "27" "45" "36" "57" ...
##  $ : chr [1:7] "30" "22" "19" "48" ...
##  $ : chr [1:3] "25" "44" "57"
##  $ : chr [1:6] "14" "39" "61" "15" ...
##  $ : chr [1:6] "62" "31" "10" "30" ...
##  $ : chr [1:5] "11" "35" "45" "40" ...
##  $ : chr [1:6] "7" "36" "42" "51" ...
##  $ : chr [1:6] "31" "2" "41" "23" ...
##  $ : chr [1:6] "41" "9" "40" "43" ...
##  $ : chr [1:5] "33" "34" "45" "42" ...
##  $ : chr [1:7] "32" "3" "54" "47" ...
##  $ : chr "55"
##  $ : chr [1:5] "2" "48" "49" "43" ...
##  $ : chr [1:7] "22" "30" "31" "49" ...
opponents <- lapply(opponents, as.numeric)
avgscore <- function(opponents) {
  numopp <- length(opponents)
  totrate <- 0
  for (i in opponents){totrate <- totrate + chessrate[i, "prerate"]}
  return(totrate/numopp)}
chessrate[,"avgoppprerate"] <- unlist(lapply(opponents, avgscore))
list(chessrate)
## [[1]]
##                          name state totpts prerate avgoppprerate
## 1                    GARY HUA    ON    6.0    1794      1605.286
## 2             DAKSHESH DARURI    MI    6.0    1553      1469.286
## 3                ADITYA BAJAJ    MI    6.0    1384      1563.571
## 4         PATRICK H SCHILLING    MI    5.5    1716      1573.571
## 5                  HANSHI ZUO    MI    5.5    1655      1500.857
## 6                 HANSEN SONG    OH    5.0    1686      1518.714
## 7           GARY DEE SWATHELL    MI    5.0    1649      1372.143
## 8            EZEKIEL HOUGHTON    MI    5.0    1641      1468.429
## 9                 STEFANO LEE    ON    5.0    1411      1523.143
## 10                  ANVIT RAO    MI    5.0    1365      1554.143
## 11   CAMERON WILLIAM MC LEMAN    MI    4.5    1712      1467.571
## 12             KENNETH J TACK    MI    4.5    1663      1506.167
## 13          TORRANCE HENRY JR    MI    4.5    1666      1497.857
## 14               BRADLEY SHAW    MI    4.5    1610      1515.000
## 15     ZACHARY JAMES HOUGHTON    MI    4.5    1220      1483.857
## 16               MIKE NIKITIN    MI    4.0    1604      1385.800
## 17         RONALD GRZEGORCZYK    MI    4.0    1629      1498.571
## 18              DAVID SUNDEEN    MI    4.0    1600      1480.000
## 19               DIPANKAR ROY    MI    4.0    1564      1426.286
## 20                JASON ZHENG    MI    4.0    1595      1410.857
## 21              DINH DANG BUI    ON    4.0    1563      1470.429
## 22           EUGENE L MCCLURE    MI    4.0    1555      1300.333
## 23                   ALAN BUI    ON    4.0    1363      1213.857
## 24          MICHAEL R ALDRICH    MI    4.0    1229      1357.000
## 25           LOREN SCHWIEBERT    MI    3.5    1745      1363.286
## 26                    MAX ZHU    ON    3.5    1579      1506.857
## 27             GAURAV GIDWANI    MI    3.5    1552      1221.667
## 28 SOFIA ADINA STANESCU-BELLU    MI    3.5    1507      1522.143
## 29           CHIEDOZIE OKORIE    MI    3.5    1602      1313.500
## 30         GEORGE AVERY JONES    ON    3.5    1522      1144.143
## 31               RISHI SHETTY    MI    3.5    1494      1259.857
## 32      JOSHUA PHILIP MATHEWS    ON    3.5    1441      1378.714
## 33                    JADE GE    MI    3.5    1449      1276.857
## 34     MICHAEL JEFFERY THOMAS    MI    3.5    1399      1375.286
## 35           JOSHUA DAVID LEE    MI    3.5    1438      1149.714
## 36              SIDDHARTH JHA    MI    3.5    1355      1388.167
## 37       AMIYATOSH PWNANANDAM    MI    3.5     980      1384.800
## 38                  BRIAN LIU    MI    3.0    1423      1539.167
## 39              JOEL R HENDON    MI    3.0    1436      1429.571
## 40               FOREST ZHANG    MI    3.0    1348      1390.571
## 41        KYLE WILLIAM MURPHY    MI    3.0    1403      1248.500
## 42                   JARED GE    MI    3.0    1332      1149.857
## 43          ROBERT GLEN VASEY    MI    3.0    1283      1106.571
## 44         JUSTIN D SCHILLING    MI    3.0    1199      1327.000
## 45                  DEREK YAN    MI    3.0    1242      1152.000
## 46   JACOB ALEXANDER LAVALLEY    MI    3.0     377      1357.714
## 47                ERIC WRIGHT    MI    2.5    1362      1392.000
## 48               DANIEL KHAIN    MI    2.5    1382      1355.800
## 49           MICHAEL J MARTIN    MI    2.5    1291      1285.800
## 50                 SHIVAM JHA    MI    2.5    1056      1296.000
## 51             TEJAS AYYAGARI    MI    2.5    1011      1356.143
## 52                  ETHAN GUO    MI    2.5     935      1494.571
## 53              JOSE C YBARRA    MI    2.0    1393      1345.333
## 54                LARRY HODGE    MI    2.0    1270      1206.167
## 55                  ALEX KONG    MI    2.0    1186      1406.000
## 56               MARISA RICCI    MI    2.0    1153      1414.400
## 57                 MICHAEL LU    MI    2.0    1092      1363.000
## 58               VIRAJ MOHILE    MI    2.0     917      1391.000
## 59          SEAN M MC CORMICK    MI    2.0     853      1319.000
## 60                 JULIA SHEN    MI    1.5     967      1330.200
## 61              JEZZEL FARKAS    ON    1.5     955      1327.286
## 62              ASHWIN BALAJI    MI    1.0    1530      1186.000
## 63       THOMAS JOSEPH HOSMER    MI    1.0    1175      1350.200
## 64                     BEN LI    MI    1.0    1163      1263.000
##Export .csv
write.csv(chessrate,"ouput.csv")