Text file was uploaded to personal Github account
#load necessary packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
library(readr)
#Read text file, uploaded on personal Github
ELOratings1 = read.csv("https://raw.githubusercontent.com/sokkarbishoy/DATA607/main/tournamentinfo.txt")
ELOratings2 <- read_lines(url("https://raw.githubusercontent.com/sokkarbishoy/DATA607/main/tournamentinfo.txt"))
ELOratings3 <- str_squish(ELOratings2)
head(ELOratings3, n=15)
## [1] "-----------------------------------------------------------------------------------------"
## [2] "Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round|"
## [3] "Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 |"
## [4] "-----------------------------------------------------------------------------------------"
## [5] "1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] "ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [7] "-----------------------------------------------------------------------------------------"
## [8] "2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [9] "MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [10] "-----------------------------------------------------------------------------------------"
## [11] "3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [12] "MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [13] "-----------------------------------------------------------------------------------------"
## [14] "4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [15] "MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
Extracting the names of the players using REGEX
elorating4 <- str_view(ELOratings3, "\\s\\|\\s[A-Z].*\\s\\|\\d")
head(elorating4, n=40)
## [5] │ 1< | GARY HUA |6>.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## [8] │ 2< | DAKSHESH DARURI |6>.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|
## [11] │ 3< | ADITYA BAJAJ |6>.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|
## [14] │ 4< | PATRICK H SCHILLING |5>.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|
## [17] │ 5< | HANSHI ZUO |5>.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|
## [20] │ 6< | HANSEN SONG |5>.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|
## [23] │ 7< | GARY DEE SWATHELL |5>.0 |W 57|W 46|W 13|W 11|L 1|W 9|L 2|
## [26] │ 8< | EZEKIEL HOUGHTON |5>.0 |W 3|W 32|L 14|L 9|W 47|W 28|W 19|
## [29] │ 9< | STEFANO LEE |5>.0 |W 25|L 18|W 59|W 8|W 26|L 7|W 20|
## [32] │ 10< | ANVIT RAO |5>.0 |D 16|L 19|W 55|W 31|D 6|W 25|W 18|
## [35] │ 11< | CAMERON WILLIAM MC LEMAN |4>.5 |D 38|W 56|W 6|L 7|L 3|W 34|W 26|
## [38] │ 12< | KENNETH J TACK |4>.5 |W 42|W 33|D 5|W 38|H |D 1|L 3|
## [41] │ 13< | TORRANCE HENRY JR |4>.5 |W 36|W 27|L 7|D 5|W 33|L 3|W 32|
## [44] │ 14< | BRADLEY SHAW |4>.5 |W 54|W 44|W 8|L 1|D 27|L 5|W 31|
## [47] │ 15< | ZACHARY JAMES HOUGHTON |4>.5 |D 19|L 16|W 30|L 22|W 54|W 33|W 38|
## [50] │ 16< | MIKE NIKITIN |4>.0 |D 10|W 15|H |W 39|L 2|W 36|U |
## [53] │ 17< | RONALD GRZEGORCZYK |4>.0 |W 48|W 41|L 26|L 2|W 23|W 22|L 5|
## [56] │ 18< | DAVID SUNDEEN |4>.0 |W 47|W 9|L 1|W 32|L 19|W 38|L 10|
## [59] │ 19< | DIPANKAR ROY |4>.0 |D 15|W 10|W 52|D 28|W 18|L 4|L 8|
## [62] │ 20< | JASON ZHENG |4>.0 |L 40|W 49|W 23|W 41|W 28|L 2|L 9|
## ... and 20 more
elorating4 <- str_extract(elorating4,"\\|\\s[^0-9]+\\s\\|" )
head(elorating4)
## [1] "| GARY HUA |" "| DAKSHESH DARURI |"
## [3] "| ADITYA BAJAJ |" "| PATRICK H SCHILLING |"
## [5] "| HANSHI ZUO |" "| HANSEN SONG |"
Player <- str_extract(elorating4, "\\s.*\\s")
Player <- str_trim(Player)
print(Player)
## [1] "GARY HUA" "DAKSHESH DARURI"
## [3] "ADITYA BAJAJ" "PATRICK H SCHILLING"
## [5] "HANSHI ZUO" "HANSEN SONG"
## [7] "GARY DEE SWATHELL" "EZEKIEL HOUGHTON"
## [9] "STEFANO LEE" "ANVIT RAO"
## [11] "CAMERON WILLIAM MC LEMAN" "KENNETH J TACK"
## [13] "TORRANCE HENRY JR" "BRADLEY SHAW"
## [15] "ZACHARY JAMES HOUGHTON" "MIKE NIKITIN"
## [17] "RONALD GRZEGORCZYK" "DAVID SUNDEEN"
## [19] "DIPANKAR ROY" "JASON ZHENG"
## [21] "DINH DANG BUI" "EUGENE L MCCLURE"
## [23] "ALAN BUI" "MICHAEL R ALDRICH"
## [25] "LOREN SCHWIEBERT" "MAX ZHU"
## [27] "GAURAV GIDWANI" "SOFIA ADINA STANESCU-BELLU"
## [29] "CHIEDOZIE OKORIE" "GEORGE AVERY JONES"
## [31] "RISHI SHETTY" "JOSHUA PHILIP MATHEWS"
## [33] "JADE GE" "MICHAEL JEFFERY THOMAS"
## [35] "JOSHUA DAVID LEE" "SIDDHARTH JHA"
## [37] "AMIYATOSH PWNANANDAM" "BRIAN LIU"
## [39] "JOEL R HENDON" "FOREST ZHANG"
## [41] "KYLE WILLIAM MURPHY" "JARED GE"
## [43] "ROBERT GLEN VASEY" "JUSTIN D SCHILLING"
## [45] "DEREK YAN" "JACOB ALEXANDER LAVALLEY"
## [47] "ERIC WRIGHT" "DANIEL KHAIN"
## [49] "MICHAEL J MARTIN" "SHIVAM JHA"
## [51] "TEJAS AYYAGARI" "ETHAN GUO"
## [53] "JOSE C YBARRA" "LARRY HODGE"
## [55] "ALEX KONG" "MARISA RICCI"
## [57] "MICHAEL LU" "VIRAJ MOHILE"
## [59] "SEAN M MC CORMICK" "JULIA SHEN"
## [61] "JEZZEL FARKAS" "ASHWIN BALAJI"
## [63] "THOMAS JOSEPH HOSMER" "BEN LI"
Extracting States using REGEX
State <- str_view(ELOratings3, "^[A-Z].\\s")
head(State)
## [6] │ <ON >| 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## [9] │ <MI >| 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |
## [12] │ <MI >| 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |
## [15] │ <MI >| 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |
## [18] │ <MI >| 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |
## [21] │ <OH >| 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |
State <- str_extract(State, "[A-Z][A-Z]\\s")
print(State)
## [1] "ON " "MI " "MI " "MI " "MI " "OH " "MI " "MI " "ON " "MI " "MI " "MI "
## [13] "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "ON " "MI " "ON " "MI "
## [25] "MI " "ON " "MI " "MI " "MI " "ON " "MI " "ON " "MI " "MI " "MI " "MI "
## [37] "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI "
## [49] "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI "
## [61] "ON " "MI " "MI " "MI "
Extracting total points
Total_points <- str_view(ELOratings3, "\\s\\|\\s[A-Z].*\\s\\|\\d.\\d")
Total_points <- str_extract(Total_points, "\\d.\\d")
print(Total_points)
## [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" "5.0" "5.0" "5.0" "4.5" "4.5"
## [13] "4.5" "4.5" "4.5" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0"
## [25] "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5"
## [37] "3.5" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "2.5" "2.5"
## [49] "2.5" "2.5" "2.5" "2.5" "2.0" "2.0" "2.0" "2.0" "2.0" "2.0" "2.0" "1.5"
## [61] "1.5" "1.0" "1.0" "1.0"
Extracting Pre Rating
Pre_rating <- str_view(ELOratings3, "R:\\s.*\\-")
Pre_rating <- str_extract(Pre_rating, ":\\s.*\\-")
Pre_rating <- str_extract(Pre_rating, "\\d.*\\d")
Pre_rating <- str_extract(Pre_rating, "\\d{3,4}")
print(Pre_rating)
## [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980" "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377" "1362" "1382" "1291" "1056"
## [51] "1011" "935" "1393" "1270" "1186" "1153" "1092" "917" "853" "967"
## [61] "955" "1530" "1175" "1163"
Extracting players opponent average pre-rating: Still having difficulties understanding the best way to use the joint function.
```{r}player’s’}
build a data frame, with the extracted data, except the opponents pre rating.
```r
RatingsFinal <- data.frame( Player,
State,
Total_points,
Pre_rating)
RatingsFinal
## Player State Total_points Pre_rating
## 1 GARY HUA ON 6.0 1794
## 2 DAKSHESH DARURI MI 6.0 1553
## 3 ADITYA BAJAJ MI 6.0 1384
## 4 PATRICK H SCHILLING MI 5.5 1716
## 5 HANSHI ZUO MI 5.5 1655
## 6 HANSEN SONG OH 5.0 1686
## 7 GARY DEE SWATHELL MI 5.0 1649
## 8 EZEKIEL HOUGHTON MI 5.0 1641
## 9 STEFANO LEE ON 5.0 1411
## 10 ANVIT RAO MI 5.0 1365
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712
## 12 KENNETH J TACK MI 4.5 1663
## 13 TORRANCE HENRY JR MI 4.5 1666
## 14 BRADLEY SHAW MI 4.5 1610
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220
## 16 MIKE NIKITIN MI 4.0 1604
## 17 RONALD GRZEGORCZYK MI 4.0 1629
## 18 DAVID SUNDEEN MI 4.0 1600
## 19 DIPANKAR ROY MI 4.0 1564
## 20 JASON ZHENG MI 4.0 1595
## 21 DINH DANG BUI ON 4.0 1563
## 22 EUGENE L MCCLURE MI 4.0 1555
## 23 ALAN BUI ON 4.0 1363
## 24 MICHAEL R ALDRICH MI 4.0 1229
## 25 LOREN SCHWIEBERT MI 3.5 1745
## 26 MAX ZHU ON 3.5 1579
## 27 GAURAV GIDWANI MI 3.5 1552
## 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507
## 29 CHIEDOZIE OKORIE MI 3.5 1602
## 30 GEORGE AVERY JONES ON 3.5 1522
## 31 RISHI SHETTY MI 3.5 1494
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441
## 33 JADE GE MI 3.5 1449
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399
## 35 JOSHUA DAVID LEE MI 3.5 1438
## 36 SIDDHARTH JHA MI 3.5 1355
## 37 AMIYATOSH PWNANANDAM MI 3.5 980
## 38 BRIAN LIU MI 3.0 1423
## 39 JOEL R HENDON MI 3.0 1436
## 40 FOREST ZHANG MI 3.0 1348
## 41 KYLE WILLIAM MURPHY MI 3.0 1403
## 42 JARED GE MI 3.0 1332
## 43 ROBERT GLEN VASEY MI 3.0 1283
## 44 JUSTIN D SCHILLING MI 3.0 1199
## 45 DEREK YAN MI 3.0 1242
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377
## 47 ERIC WRIGHT MI 2.5 1362
## 48 DANIEL KHAIN MI 2.5 1382
## 49 MICHAEL J MARTIN MI 2.5 1291
## 50 SHIVAM JHA MI 2.5 1056
## 51 TEJAS AYYAGARI MI 2.5 1011
## 52 ETHAN GUO MI 2.5 935
## 53 JOSE C YBARRA MI 2.0 1393
## 54 LARRY HODGE MI 2.0 1270
## 55 ALEX KONG MI 2.0 1186
## 56 MARISA RICCI MI 2.0 1153
## 57 MICHAEL LU MI 2.0 1092
## 58 VIRAJ MOHILE MI 2.0 917
## 59 SEAN M MC CORMICK MI 2.0 853
## 60 JULIA SHEN MI 1.5 967
## 61 JEZZEL FARKAS ON 1.5 955
## 62 ASHWIN BALAJI MI 1.0 1530
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175
## 64 BEN LI MI 1.0 1163
Converting into a CSV
write.csv(RatingsFinal, file = "ELO_Ratings.csv", row.names = FALSE)