##Load library and data
library(stringr)
chesstourney = readLines("https://raw.githubusercontent.com/danielhong98/MSDA-Spring-2016/29394c9cb74160fd53931123c2358019208293bb/chesstournament.txt")
## Warning in readLines("https://raw.githubusercontent.com/danielhong98/MSDA-
## Spring-2016/29394c9cb74160fd53931123c2358019208293bb/chesstournament.txt"):
## incomplete final line found on 'https://raw.githubusercontent.com/
## danielhong98/MSDA-Spring-2016/29394c9cb74160fd53931123c2358019208293bb/
## chesstournament.txt'
dput(head(chesstourney))
## c("-----------------------------------------------------------------------------------------",
## " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| ",
## " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | ",
## "-----------------------------------------------------------------------------------------",
## " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|",
## " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## )
##cleanse Data > create 2 vectors for each row
chesstourneynohead = chesstourney[-1:-4]
dashes = grep("^-+", chesstourneynohead)
chessfinal = chesstourneynohead[-dashes]
v1 = chessfinal[c(TRUE, FALSE)]
v2 = chessfinal[c(FALSE, TRUE)]
head(v1)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [3] " 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [4] " 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [5] " 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [6] " 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
head(v2)
## [1] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [2] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [3] " MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [4] " MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [5] " MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [6] " OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
##Extract Data > create data frame
name <- (str_trim(unlist(str_extract_all(v1, "([[:alpha:] ]-?){15,31}"))))
str(name)
## chr [1:64] "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ" ...
totpts <- unlist(str_extract_all(v1, "[:digit:][//.][:digit:]"))
str(totpts)
## chr [1:64] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" ...
state <- str_trim(unlist(str_extract_all(v2, " MI | ON | OH ")))
str(state)
## chr [1:64] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" ...
prerate <- as.numeric(sub(pattern = 'R: ', replacement = '', x = unlist(str_extract_all(v2, "R: [[:digit:] ]{4}"))))
str(prerate)
## num [1:64] 1794 1553 1384 1716 1655 ...
chessrate <- data.frame(name, state, totpts, prerate)
list(chessrate)
## [[1]]
## name state totpts prerate
## 1 GARY HUA ON 6.0 1794
## 2 DAKSHESH DARURI MI 6.0 1553
## 3 ADITYA BAJAJ MI 6.0 1384
## 4 PATRICK H SCHILLING MI 5.5 1716
## 5 HANSHI ZUO MI 5.5 1655
## 6 HANSEN SONG OH 5.0 1686
## 7 GARY DEE SWATHELL MI 5.0 1649
## 8 EZEKIEL HOUGHTON MI 5.0 1641
## 9 STEFANO LEE ON 5.0 1411
## 10 ANVIT RAO MI 5.0 1365
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712
## 12 KENNETH J TACK MI 4.5 1663
## 13 TORRANCE HENRY JR MI 4.5 1666
## 14 BRADLEY SHAW MI 4.5 1610
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220
## 16 MIKE NIKITIN MI 4.0 1604
## 17 RONALD GRZEGORCZYK MI 4.0 1629
## 18 DAVID SUNDEEN MI 4.0 1600
## 19 DIPANKAR ROY MI 4.0 1564
## 20 JASON ZHENG MI 4.0 1595
## 21 DINH DANG BUI ON 4.0 1563
## 22 EUGENE L MCCLURE MI 4.0 1555
## 23 ALAN BUI ON 4.0 1363
## 24 MICHAEL R ALDRICH MI 4.0 1229
## 25 LOREN SCHWIEBERT MI 3.5 1745
## 26 MAX ZHU ON 3.5 1579
## 27 GAURAV GIDWANI MI 3.5 1552
## 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507
## 29 CHIEDOZIE OKORIE MI 3.5 1602
## 30 GEORGE AVERY JONES ON 3.5 1522
## 31 RISHI SHETTY MI 3.5 1494
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441
## 33 JADE GE MI 3.5 1449
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399
## 35 JOSHUA DAVID LEE MI 3.5 1438
## 36 SIDDHARTH JHA MI 3.5 1355
## 37 AMIYATOSH PWNANANDAM MI 3.5 980
## 38 BRIAN LIU MI 3.0 1423
## 39 JOEL R HENDON MI 3.0 1436
## 40 FOREST ZHANG MI 3.0 1348
## 41 KYLE WILLIAM MURPHY MI 3.0 1403
## 42 JARED GE MI 3.0 1332
## 43 ROBERT GLEN VASEY MI 3.0 1283
## 44 JUSTIN D SCHILLING MI 3.0 1199
## 45 DEREK YAN MI 3.0 1242
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377
## 47 ERIC WRIGHT MI 2.5 1362
## 48 DANIEL KHAIN MI 2.5 1382
## 49 MICHAEL J MARTIN MI 2.5 1291
## 50 SHIVAM JHA MI 2.5 1056
## 51 TEJAS AYYAGARI MI 2.5 1011
## 52 ETHAN GUO MI 2.5 935
## 53 JOSE C YBARRA MI 2.0 1393
## 54 LARRY HODGE MI 2.0 1270
## 55 ALEX KONG MI 2.0 1186
## 56 MARISA RICCI MI 2.0 1153
## 57 MICHAEL LU MI 2.0 1092
## 58 VIRAJ MOHILE MI 2.0 917
## 59 SEAN M MC CORMICK MI 2.0 853
## 60 JULIA SHEN MI 1.5 967
## 61 JEZZEL FARKAS ON 1.5 955
## 62 ASHWIN BALAJI MI 1.0 1530
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175
## 64 BEN LI MI 1.0 1163
##Calculate Opponent Average Rating and append data frame
id = c(1:length(v1))
opp = data.frame (id,prerate)
head(opp)
## id prerate
## 1 1 1794
## 2 2 1553
## 3 3 1384
## 4 4 1716
## 5 5 1655
## 6 6 1686
##Export .csv
write.csv(chessrate,"ouput.csv")