##Load library and data
library(stringr)
chesstourney = readLines("https://raw.githubusercontent.com/danielhong98/MSDA-Spring-2016/29394c9cb74160fd53931123c2358019208293bb/chesstournament.txt")
## Warning in readLines("https://raw.githubusercontent.com/danielhong98/MSDA-
## Spring-2016/29394c9cb74160fd53931123c2358019208293bb/chesstournament.txt"):
## incomplete final line found on 'https://raw.githubusercontent.com/
## danielhong98/MSDA-Spring-2016/29394c9cb74160fd53931123c2358019208293bb/
## chesstournament.txt'
dput(head(chesstourney))
## c("-----------------------------------------------------------------------------------------",
## " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| ",
## " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | ",
## "-----------------------------------------------------------------------------------------",
## " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|",
## " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## )
##cleanse Data > create 2 vectors for each row
chesstourneynohead = chesstourney[-1:-4]
dashes = grep("^-+", chesstourneynohead)
chessfinal = chesstourneynohead[-dashes]
v1 = chessfinal[c(TRUE, FALSE)]
v2 = chessfinal[c(FALSE, TRUE)]
head(v1)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [3] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [4] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [5] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [6] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
head(v2)
## [1] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [2] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [3] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [4] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [5] " MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [6] " OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
##Extract Data > create data frame
name <- (str_trim(unlist(str_extract_all(v1, "([[:alpha:] ]-?){15,31}"))))
str(name)
## chr [1:64] "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ" ...
totpts <- unlist(str_extract_all(v1, "[:digit:][//.][:digit:]"))
str(totpts)
## chr [1:64] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" ...
state <- str_trim(unlist(str_extract_all(v2, " MI | ON | OH ")))
str(state)
## chr [1:64] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" ...
prerate <- as.numeric(sub(pattern = 'R: ', replacement = '', x = unlist(str_extract_all(v2, "R: [[:digit:] ]{4}"))))
str(prerate)
## num [1:64] 1794 1553 1384 1716 1655 ...
chessrate <- data.frame(name, state, totpts, prerate)
list(chessrate)
## [[1]]
## name state totpts prerate
## 1 GARY HUA ON 6.0 1794
## 2 DAKSHESH DARURI MI 6.0 1553
## 3 ADITYA BAJAJ MI 6.0 1384
## 4 PATRICK H SCHILLING MI 5.5 1716
## 5 HANSHI ZUO MI 5.5 1655
## 6 HANSEN SONG OH 5.0 1686
## 7 GARY DEE SWATHELL MI 5.0 1649
## 8 EZEKIEL HOUGHTON MI 5.0 1641
## 9 STEFANO LEE ON 5.0 1411
## 10 ANVIT RAO MI 5.0 1365
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712
## 12 KENNETH J TACK MI 4.5 1663
## 13 TORRANCE HENRY JR MI 4.5 1666
## 14 BRADLEY SHAW MI 4.5 1610
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220
## 16 MIKE NIKITIN MI 4.0 1604
## 17 RONALD GRZEGORCZYK MI 4.0 1629
## 18 DAVID SUNDEEN MI 4.0 1600
## 19 DIPANKAR ROY MI 4.0 1564
## 20 JASON ZHENG MI 4.0 1595
## 21 DINH DANG BUI ON 4.0 1563
## 22 EUGENE L MCCLURE MI 4.0 1555
## 23 ALAN BUI ON 4.0 1363
## 24 MICHAEL R ALDRICH MI 4.0 1229
## 25 LOREN SCHWIEBERT MI 3.5 1745
## 26 MAX ZHU ON 3.5 1579
## 27 GAURAV GIDWANI MI 3.5 1552
## 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507
## 29 CHIEDOZIE OKORIE MI 3.5 1602
## 30 GEORGE AVERY JONES ON 3.5 1522
## 31 RISHI SHETTY MI 3.5 1494
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441
## 33 JADE GE MI 3.5 1449
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399
## 35 JOSHUA DAVID LEE MI 3.5 1438
## 36 SIDDHARTH JHA MI 3.5 1355
## 37 AMIYATOSH PWNANANDAM MI 3.5 980
## 38 BRIAN LIU MI 3.0 1423
## 39 JOEL R HENDON MI 3.0 1436
## 40 FOREST ZHANG MI 3.0 1348
## 41 KYLE WILLIAM MURPHY MI 3.0 1403
## 42 JARED GE MI 3.0 1332
## 43 ROBERT GLEN VASEY MI 3.0 1283
## 44 JUSTIN D SCHILLING MI 3.0 1199
## 45 DEREK YAN MI 3.0 1242
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377
## 47 ERIC WRIGHT MI 2.5 1362
## 48 DANIEL KHAIN MI 2.5 1382
## 49 MICHAEL J MARTIN MI 2.5 1291
## 50 SHIVAM JHA MI 2.5 1056
## 51 TEJAS AYYAGARI MI 2.5 1011
## 52 ETHAN GUO MI 2.5 935
## 53 JOSE C YBARRA MI 2.0 1393
## 54 LARRY HODGE MI 2.0 1270
## 55 ALEX KONG MI 2.0 1186
## 56 MARISA RICCI MI 2.0 1153
## 57 MICHAEL LU MI 2.0 1092
## 58 VIRAJ MOHILE MI 2.0 917
## 59 SEAN M MC CORMICK MI 2.0 853
## 60 JULIA SHEN MI 1.5 967
## 61 JEZZEL FARKAS ON 1.5 955
## 62 ASHWIN BALAJI MI 1.0 1530
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175
## 64 BEN LI MI 1.0 1163
##Calculate Opponent Average Rating and append data
id = c(1:length(v1))
opp = data.frame (id,prerate)
head(opp)
## id prerate
## 1 1 1794
## 2 2 1553
## 3 3 1384
## 4 4 1716
## 5 5 1655
## 6 6 1686
opponents = str_extract_all(v1, ("[0-9]{1,2}(?=\\|)"))
str(opponents)
## List of 64
## $ : chr [1:7] "39" "21" "18" "14" ...
## $ : chr [1:7] "63" "58" "4" "17" ...
## $ : chr [1:7] "8" "61" "25" "21" ...
## $ : chr [1:7] "23" "28" "2" "26" ...
## $ : chr [1:7] "45" "37" "12" "13" ...
## $ : chr [1:7] "34" "29" "11" "35" ...
## $ : chr [1:7] "57" "46" "13" "11" ...
## $ : chr [1:7] "3" "32" "14" "9" ...
## $ : chr [1:7] "25" "18" "59" "8" ...
## $ : chr [1:7] "16" "19" "55" "31" ...
## $ : chr [1:7] "38" "56" "6" "7" ...
## $ : chr [1:6] "42" "33" "5" "38" ...
## $ : chr [1:7] "36" "27" "7" "5" ...
## $ : chr [1:7] "54" "44" "8" "1" ...
## $ : chr [1:7] "19" "16" "30" "22" ...
## $ : chr [1:5] "10" "15" "39" "2" ...
## $ : chr [1:7] "48" "41" "26" "2" ...
## $ : chr [1:7] "47" "9" "1" "32" ...
## $ : chr [1:7] "15" "10" "52" "28" ...
## $ : chr [1:7] "40" "49" "23" "41" ...
## $ : chr [1:7] "43" "1" "47" "3" ...
## $ : chr [1:6] "64" "52" "28" "15" ...
## $ : chr [1:7] "4" "43" "20" "58" ...
## $ : chr [1:7] "28" "47" "43" "25" ...
## $ : chr [1:7] "9" "53" "3" "24" ...
## $ : chr [1:7] "49" "40" "17" "4" ...
## $ : chr [1:6] "51" "13" "46" "37" ...
## $ : chr [1:7] "24" "4" "22" "19" ...
## $ : chr [1:6] "50" "6" "38" "34" ...
## $ : chr [1:7] "52" "64" "15" "55" ...
## $ : chr [1:7] "58" "55" "64" "10" ...
## $ : chr [1:7] "61" "8" "44" "18" ...
## $ : chr [1:7] "60" "12" "50" "36" ...
## $ : chr [1:7] "6" "60" "37" "29" ...
## $ : chr [1:7] "46" "38" "56" "6" ...
## $ : chr [1:6] "13" "57" "51" "33" ...
## $ : chr [1:5] "5" "34" "27" "23" ...
## $ : chr [1:6] "11" "35" "29" "12" ...
## $ : chr [1:7] "1" "54" "40" "16" ...
## $ : chr [1:7] "20" "26" "39" "59" ...
## $ : chr [1:4] "59" "17" "58" "20"
## $ : chr [1:7] "12" "50" "57" "60" ...
## $ : chr [1:7] "21" "23" "24" "63" ...
## $ : chr [1:6] "14" "32" "53" "39" ...
## $ : chr [1:7] "5" "51" "60" "56" ...
## $ : chr [1:7] "35" "7" "27" "50" ...
## $ : chr [1:7] "18" "24" "21" "61" ...
## $ : chr [1:5] "17" "63" "52" "29" ...
## $ : chr [1:5] "26" "20" "63" "64" ...
## $ : chr [1:6] "29" "42" "33" "46" ...
## $ : chr [1:7] "27" "45" "36" "57" ...
## $ : chr [1:7] "30" "22" "19" "48" ...
## $ : chr [1:3] "25" "44" "57"
## $ : chr [1:6] "14" "39" "61" "15" ...
## $ : chr [1:6] "62" "31" "10" "30" ...
## $ : chr [1:5] "11" "35" "45" "40" ...
## $ : chr [1:6] "7" "36" "42" "51" ...
## $ : chr [1:6] "31" "2" "41" "23" ...
## $ : chr [1:6] "41" "9" "40" "43" ...
## $ : chr [1:5] "33" "34" "45" "42" ...
## $ : chr [1:7] "32" "3" "54" "47" ...
## $ : chr "55"
## $ : chr [1:5] "2" "48" "49" "43" ...
## $ : chr [1:7] "22" "30" "31" "49" ...
opponents <- lapply(opponents, as.numeric)
avgscore <- function(opponents) {
numopp <- length(opponents)
totrate <- 0
for (i in opponents){totrate <- totrate + chessrate[i, "prerate"]}
return(totrate/numopp)}
chessrate[,"avgoppprerate"] <- unlist(lapply(opponents, avgscore))
list(chessrate)
## [[1]]
## name state totpts prerate avgoppprerate
## 1 GARY HUA ON 6.0 1794 1605.286
## 2 DAKSHESH DARURI MI 6.0 1553 1469.286
## 3 ADITYA BAJAJ MI 6.0 1384 1563.571
## 4 PATRICK H SCHILLING MI 5.5 1716 1573.571
## 5 HANSHI ZUO MI 5.5 1655 1500.857
## 6 HANSEN SONG OH 5.0 1686 1518.714
## 7 GARY DEE SWATHELL MI 5.0 1649 1372.143
## 8 EZEKIEL HOUGHTON MI 5.0 1641 1468.429
## 9 STEFANO LEE ON 5.0 1411 1523.143
## 10 ANVIT RAO MI 5.0 1365 1554.143
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 1467.571
## 12 KENNETH J TACK MI 4.5 1663 1506.167
## 13 TORRANCE HENRY JR MI 4.5 1666 1497.857
## 14 BRADLEY SHAW MI 4.5 1610 1515.000
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1483.857
## 16 MIKE NIKITIN MI 4.0 1604 1385.800
## 17 RONALD GRZEGORCZYK MI 4.0 1629 1498.571
## 18 DAVID SUNDEEN MI 4.0 1600 1480.000
## 19 DIPANKAR ROY MI 4.0 1564 1426.286
## 20 JASON ZHENG MI 4.0 1595 1410.857
## 21 DINH DANG BUI ON 4.0 1563 1470.429
## 22 EUGENE L MCCLURE MI 4.0 1555 1300.333
## 23 ALAN BUI ON 4.0 1363 1213.857
## 24 MICHAEL R ALDRICH MI 4.0 1229 1357.000
## 25 LOREN SCHWIEBERT MI 3.5 1745 1363.286
## 26 MAX ZHU ON 3.5 1579 1506.857
## 27 GAURAV GIDWANI MI 3.5 1552 1221.667
## 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507 1522.143
## 29 CHIEDOZIE OKORIE MI 3.5 1602 1313.500
## 30 GEORGE AVERY JONES ON 3.5 1522 1144.143
## 31 RISHI SHETTY MI 3.5 1494 1259.857
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441 1378.714
## 33 JADE GE MI 3.5 1449 1276.857
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399 1375.286
## 35 JOSHUA DAVID LEE MI 3.5 1438 1149.714
## 36 SIDDHARTH JHA MI 3.5 1355 1388.167
## 37 AMIYATOSH PWNANANDAM MI 3.5 980 1384.800
## 38 BRIAN LIU MI 3.0 1423 1539.167
## 39 JOEL R HENDON MI 3.0 1436 1429.571
## 40 FOREST ZHANG MI 3.0 1348 1390.571
## 41 KYLE WILLIAM MURPHY MI 3.0 1403 1248.500
## 42 JARED GE MI 3.0 1332 1149.857
## 43 ROBERT GLEN VASEY MI 3.0 1283 1106.571
## 44 JUSTIN D SCHILLING MI 3.0 1199 1327.000
## 45 DEREK YAN MI 3.0 1242 1152.000
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377 1357.714
## 47 ERIC WRIGHT MI 2.5 1362 1392.000
## 48 DANIEL KHAIN MI 2.5 1382 1355.800
## 49 MICHAEL J MARTIN MI 2.5 1291 1285.800
## 50 SHIVAM JHA MI 2.5 1056 1296.000
## 51 TEJAS AYYAGARI MI 2.5 1011 1356.143
## 52 ETHAN GUO MI 2.5 935 1494.571
## 53 JOSE C YBARRA MI 2.0 1393 1345.333
## 54 LARRY HODGE MI 2.0 1270 1206.167
## 55 ALEX KONG MI 2.0 1186 1406.000
## 56 MARISA RICCI MI 2.0 1153 1414.400
## 57 MICHAEL LU MI 2.0 1092 1363.000
## 58 VIRAJ MOHILE MI 2.0 917 1391.000
## 59 SEAN M MC CORMICK MI 2.0 853 1319.000
## 60 JULIA SHEN MI 1.5 967 1330.200
## 61 JEZZEL FARKAS ON 1.5 955 1327.286
## 62 ASHWIN BALAJI MI 1.0 1530 1186.000
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175 1350.200
## 64 BEN LI MI 1.0 1163 1263.000
##Export .csv
write.csv(chessrate,"ouput.csv")