Working with Ratings text file

Text file was uploaded to personal Github account

#load necessary packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
library(readr)

#Read text file, uploaded on personal Github

ELOratings1 = read.csv("https://raw.githubusercontent.com/sokkarbishoy/DATA607/main/tournamentinfo.txt")

ELOratings2 <- read_lines(url("https://raw.githubusercontent.com/sokkarbishoy/DATA607/main/tournamentinfo.txt"))
ELOratings3 <- str_squish(ELOratings2)
head(ELOratings3, n=15)
##  [1] "-----------------------------------------------------------------------------------------"
##  [2] "Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round|"                     
##  [3] "Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 |"                      
##  [4] "-----------------------------------------------------------------------------------------"
##  [5] "1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"                                     
##  [6] "ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"                               
##  [7] "-----------------------------------------------------------------------------------------"
##  [8] "2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"                              
##  [9] "MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"                               
## [10] "-----------------------------------------------------------------------------------------"
## [11] "3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"                                
## [12] "MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"                               
## [13] "-----------------------------------------------------------------------------------------"
## [14] "4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"                           
## [15] "MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"

Extracting the names of the players using REGEX

elorating4 <- str_view(ELOratings3, "\\s\\|\\s[A-Z].*\\s\\|\\d")
head(elorating4, n=40)
##  [5] │ 1< | GARY HUA |6>.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
##  [8] │ 2< | DAKSHESH DARURI |6>.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|
## [11] │ 3< | ADITYA BAJAJ |6>.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|
## [14] │ 4< | PATRICK H SCHILLING |5>.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|
## [17] │ 5< | HANSHI ZUO |5>.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|
## [20] │ 6< | HANSEN SONG |5>.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|
## [23] │ 7< | GARY DEE SWATHELL |5>.0 |W 57|W 46|W 13|W 11|L 1|W 9|L 2|
## [26] │ 8< | EZEKIEL HOUGHTON |5>.0 |W 3|W 32|L 14|L 9|W 47|W 28|W 19|
## [29] │ 9< | STEFANO LEE |5>.0 |W 25|L 18|W 59|W 8|W 26|L 7|W 20|
## [32] │ 10< | ANVIT RAO |5>.0 |D 16|L 19|W 55|W 31|D 6|W 25|W 18|
## [35] │ 11< | CAMERON WILLIAM MC LEMAN |4>.5 |D 38|W 56|W 6|L 7|L 3|W 34|W 26|
## [38] │ 12< | KENNETH J TACK |4>.5 |W 42|W 33|D 5|W 38|H |D 1|L 3|
## [41] │ 13< | TORRANCE HENRY JR |4>.5 |W 36|W 27|L 7|D 5|W 33|L 3|W 32|
## [44] │ 14< | BRADLEY SHAW |4>.5 |W 54|W 44|W 8|L 1|D 27|L 5|W 31|
## [47] │ 15< | ZACHARY JAMES HOUGHTON |4>.5 |D 19|L 16|W 30|L 22|W 54|W 33|W 38|
## [50] │ 16< | MIKE NIKITIN |4>.0 |D 10|W 15|H |W 39|L 2|W 36|U |
## [53] │ 17< | RONALD GRZEGORCZYK |4>.0 |W 48|W 41|L 26|L 2|W 23|W 22|L 5|
## [56] │ 18< | DAVID SUNDEEN |4>.0 |W 47|W 9|L 1|W 32|L 19|W 38|L 10|
## [59] │ 19< | DIPANKAR ROY |4>.0 |D 15|W 10|W 52|D 28|W 18|L 4|L 8|
## [62] │ 20< | JASON ZHENG |4>.0 |L 40|W 49|W 23|W 41|W 28|L 2|L 9|
## ... and 20 more
elorating4 <- str_extract(elorating4,"\\|\\s[^0-9]+\\s\\|" )
head(elorating4)
## [1] "| GARY HUA |"            "| DAKSHESH DARURI |"    
## [3] "| ADITYA BAJAJ |"        "| PATRICK H SCHILLING |"
## [5] "| HANSHI ZUO |"          "| HANSEN SONG |"
Player <- str_extract(elorating4, "\\s.*\\s")
Player <- str_trim(Player)
print(Player)
##  [1] "GARY HUA"                   "DAKSHESH DARURI"           
##  [3] "ADITYA BAJAJ"               "PATRICK H SCHILLING"       
##  [5] "HANSHI ZUO"                 "HANSEN SONG"               
##  [7] "GARY DEE SWATHELL"          "EZEKIEL HOUGHTON"          
##  [9] "STEFANO LEE"                "ANVIT RAO"                 
## [11] "CAMERON WILLIAM MC LEMAN"   "KENNETH J TACK"            
## [13] "TORRANCE HENRY JR"          "BRADLEY SHAW"              
## [15] "ZACHARY JAMES HOUGHTON"     "MIKE NIKITIN"              
## [17] "RONALD GRZEGORCZYK"         "DAVID SUNDEEN"             
## [19] "DIPANKAR ROY"               "JASON ZHENG"               
## [21] "DINH DANG BUI"              "EUGENE L MCCLURE"          
## [23] "ALAN BUI"                   "MICHAEL R ALDRICH"         
## [25] "LOREN SCHWIEBERT"           "MAX ZHU"                   
## [27] "GAURAV GIDWANI"             "SOFIA ADINA STANESCU-BELLU"
## [29] "CHIEDOZIE OKORIE"           "GEORGE AVERY JONES"        
## [31] "RISHI SHETTY"               "JOSHUA PHILIP MATHEWS"     
## [33] "JADE GE"                    "MICHAEL JEFFERY THOMAS"    
## [35] "JOSHUA DAVID LEE"           "SIDDHARTH JHA"             
## [37] "AMIYATOSH PWNANANDAM"       "BRIAN LIU"                 
## [39] "JOEL R HENDON"              "FOREST ZHANG"              
## [41] "KYLE WILLIAM MURPHY"        "JARED GE"                  
## [43] "ROBERT GLEN VASEY"          "JUSTIN D SCHILLING"        
## [45] "DEREK YAN"                  "JACOB ALEXANDER LAVALLEY"  
## [47] "ERIC WRIGHT"                "DANIEL KHAIN"              
## [49] "MICHAEL J MARTIN"           "SHIVAM JHA"                
## [51] "TEJAS AYYAGARI"             "ETHAN GUO"                 
## [53] "JOSE C YBARRA"              "LARRY HODGE"               
## [55] "ALEX KONG"                  "MARISA RICCI"              
## [57] "MICHAEL LU"                 "VIRAJ MOHILE"              
## [59] "SEAN M MC CORMICK"          "JULIA SHEN"                
## [61] "JEZZEL FARKAS"              "ASHWIN BALAJI"             
## [63] "THOMAS JOSEPH HOSMER"       "BEN LI"

Extracting States using REGEX

State <- str_view(ELOratings3, "^[A-Z].\\s")
head(State)
##  [6] │ <ON >| 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
##  [9] │ <MI >| 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |
## [12] │ <MI >| 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |
## [15] │ <MI >| 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |
## [18] │ <MI >| 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |
## [21] │ <OH >| 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |
State <- str_extract(State, "[A-Z][A-Z]\\s")
print(State)
##  [1] "ON " "MI " "MI " "MI " "MI " "OH " "MI " "MI " "ON " "MI " "MI " "MI "
## [13] "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "ON " "MI " "ON " "MI "
## [25] "MI " "ON " "MI " "MI " "MI " "ON " "MI " "ON " "MI " "MI " "MI " "MI "
## [37] "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI "
## [49] "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI " "MI "
## [61] "ON " "MI " "MI " "MI "

Extracting total points

Total_points <- str_view(ELOratings3, "\\s\\|\\s[A-Z].*\\s\\|\\d.\\d")
Total_points <- str_extract(Total_points, "\\d.\\d")
print(Total_points)
##  [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" "5.0" "5.0" "5.0" "4.5" "4.5"
## [13] "4.5" "4.5" "4.5" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0"
## [25] "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5"
## [37] "3.5" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "2.5" "2.5"
## [49] "2.5" "2.5" "2.5" "2.5" "2.0" "2.0" "2.0" "2.0" "2.0" "2.0" "2.0" "1.5"
## [61] "1.5" "1.0" "1.0" "1.0"

Extracting Pre Rating

Pre_rating <- str_view(ELOratings3, "R:\\s.*\\-")
Pre_rating <- str_extract(Pre_rating, ":\\s.*\\-")
Pre_rating <- str_extract(Pre_rating, "\\d.*\\d")
Pre_rating <- str_extract(Pre_rating, "\\d{3,4}")
print(Pre_rating) 
##  [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980"  "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377"  "1362" "1382" "1291" "1056"
## [51] "1011" "935"  "1393" "1270" "1186" "1153" "1092" "917"  "853"  "967" 
## [61] "955"  "1530" "1175" "1163"

Extracting players opponent average pre-rating: Still having difficulties understanding the best way to use the joint function.

```{r}player’s’}


build a data frame, with the extracted data, except the opponents pre rating.


```r
RatingsFinal <- data.frame( Player,
                            State,
                            Total_points,
                            Pre_rating)
RatingsFinal
##                        Player State Total_points Pre_rating
## 1                    GARY HUA   ON           6.0       1794
## 2             DAKSHESH DARURI   MI           6.0       1553
## 3                ADITYA BAJAJ   MI           6.0       1384
## 4         PATRICK H SCHILLING   MI           5.5       1716
## 5                  HANSHI ZUO   MI           5.5       1655
## 6                 HANSEN SONG   OH           5.0       1686
## 7           GARY DEE SWATHELL   MI           5.0       1649
## 8            EZEKIEL HOUGHTON   MI           5.0       1641
## 9                 STEFANO LEE   ON           5.0       1411
## 10                  ANVIT RAO   MI           5.0       1365
## 11   CAMERON WILLIAM MC LEMAN   MI           4.5       1712
## 12             KENNETH J TACK   MI           4.5       1663
## 13          TORRANCE HENRY JR   MI           4.5       1666
## 14               BRADLEY SHAW   MI           4.5       1610
## 15     ZACHARY JAMES HOUGHTON   MI           4.5       1220
## 16               MIKE NIKITIN   MI           4.0       1604
## 17         RONALD GRZEGORCZYK   MI           4.0       1629
## 18              DAVID SUNDEEN   MI           4.0       1600
## 19               DIPANKAR ROY   MI           4.0       1564
## 20                JASON ZHENG   MI           4.0       1595
## 21              DINH DANG BUI   ON           4.0       1563
## 22           EUGENE L MCCLURE   MI           4.0       1555
## 23                   ALAN BUI   ON           4.0       1363
## 24          MICHAEL R ALDRICH   MI           4.0       1229
## 25           LOREN SCHWIEBERT   MI           3.5       1745
## 26                    MAX ZHU   ON           3.5       1579
## 27             GAURAV GIDWANI   MI           3.5       1552
## 28 SOFIA ADINA STANESCU-BELLU   MI           3.5       1507
## 29           CHIEDOZIE OKORIE   MI           3.5       1602
## 30         GEORGE AVERY JONES   ON           3.5       1522
## 31               RISHI SHETTY   MI           3.5       1494
## 32      JOSHUA PHILIP MATHEWS   ON           3.5       1441
## 33                    JADE GE   MI           3.5       1449
## 34     MICHAEL JEFFERY THOMAS   MI           3.5       1399
## 35           JOSHUA DAVID LEE   MI           3.5       1438
## 36              SIDDHARTH JHA   MI           3.5       1355
## 37       AMIYATOSH PWNANANDAM   MI           3.5        980
## 38                  BRIAN LIU   MI           3.0       1423
## 39              JOEL R HENDON   MI           3.0       1436
## 40               FOREST ZHANG   MI           3.0       1348
## 41        KYLE WILLIAM MURPHY   MI           3.0       1403
## 42                   JARED GE   MI           3.0       1332
## 43          ROBERT GLEN VASEY   MI           3.0       1283
## 44         JUSTIN D SCHILLING   MI           3.0       1199
## 45                  DEREK YAN   MI           3.0       1242
## 46   JACOB ALEXANDER LAVALLEY   MI           3.0        377
## 47                ERIC WRIGHT   MI           2.5       1362
## 48               DANIEL KHAIN   MI           2.5       1382
## 49           MICHAEL J MARTIN   MI           2.5       1291
## 50                 SHIVAM JHA   MI           2.5       1056
## 51             TEJAS AYYAGARI   MI           2.5       1011
## 52                  ETHAN GUO   MI           2.5        935
## 53              JOSE C YBARRA   MI           2.0       1393
## 54                LARRY HODGE   MI           2.0       1270
## 55                  ALEX KONG   MI           2.0       1186
## 56               MARISA RICCI   MI           2.0       1153
## 57                 MICHAEL LU   MI           2.0       1092
## 58               VIRAJ MOHILE   MI           2.0        917
## 59          SEAN M MC CORMICK   MI           2.0        853
## 60                 JULIA SHEN   MI           1.5        967
## 61              JEZZEL FARKAS   ON           1.5        955
## 62              ASHWIN BALAJI   MI           1.0       1530
## 63       THOMAS JOSEPH HOSMER   MI           1.0       1175
## 64                     BEN LI   MI           1.0       1163

Converting into a CSV

write.csv(RatingsFinal, file = "ELO_Ratings.csv", row.names = FALSE)