Project 1

by Catherine Cho

Reading in the text file via github.

library(readr)
#string of Text file uploaded from github.
urlfile<-"https://raw.githubusercontent.com/catcho1632/607-Project-1/main/tournamentinfo.txt"
#The text file is assigned as a string to the variable ELO
ELO<-read_file(url(urlfile))

Extracting strings of interest from ELO (text file), and creating string vectors for each. The strings were extracted using package stringr.

library(stringr)
#Player's Name extracted using str_extract_all and regular expressions of different name structuring is defined. (i.e. First name and last name...First name middle initial last name...first name middle name last name...first name and last name only). Then the string is "unlisted" after splitting the string per name. 
w<-str_extract_all(ELO,"([A-Z]+\\s[A-Z]+\\s[A-Z]+-[A-Z]+)|([A-Z]+\\s[A-Z]\\s[A-Z]+(\\s|-)[A-Z]+)|([A-Z]+\\s[A-Z]+\\s[A-Z]+)|([A-Z]+\\s[A-Z]+)",simplify = TRUE)
names_unlist<-unlist(strsplit(w, "^[A-Z]+\\s[A-Z]+$)"))
names<-names_unlist[-1]
#Player's state extracted using str_extract_all and all states in the text file has at least 2 spaces preceding 2 capital cased letters. 
x<-str_extract_all(ELO,"\\s\\s[A-Z]{2}",simplify = TRUE)
state<-unlist(str_extract(x, "[A-Z][A-Z]"))
#Total Number of Points is extracted using str_extract_all and is identified by the digit decimal digit regular expression. These are the only text value with this combination. 
y<-str_extract_all(ELO,"\\d\\.\\d",simplify = TRUE)
points<-unlist(str_extract(y, "\\d\\.\\d"))
#Player's Pre-Rating is identified using regular expression that captures every character after "R: " and before "-".The string is then extracted by digits only. The numeric values are strings and converted to dbl in order to utilize the numeric value. 
z<-str_extract_all(ELO,"[R]\\:\\s.+\\-",simplify = TRUE)
pre_rating<-as.double(unlist(str_extract(z, "\\d\\d\\d+")))

This section extracts all the opponents played by one player.

#The text file lists the player id number of each opponent per round. The only consistent character found was "|". This is the marker by which the string is extracted to form a dataframe. o1 extracts all digits and spaces preceding a "|" (with the "|" itself).  
o1<-str_extract_all(ELO,"(\\d+\\|)|(\\s\\|)",simplify = TRUE)
#o2 deletes all the values preceding the first round1 opponent for player 1. 
o2<-o1[-1:-15]
#o3 extracts all digits and all non digit strings are assigned <NA>. This will make it simpler to calculate the average later since a NA value will not be considered in the total average. 
o3<-unlist(str_extract(o2, "\\d+"))

#These are the opponent number played against each player for rounds 1-7.The opponent values are equally spaced per row, by 20 specifically. So a sequence is used to separate all round 1 apponents, round 2 opponents, round 3 opponents, etc. And the sequence will be listed in sequential order for players 1 to 64. The vectors are converted from string values to double. 
round1_opp<-as.double(o3[seq(1,length(o3),20)])
round2_opp<-as.double(o3[seq(2,length(o3),20)])
round3_opp<-as.double(o3[seq(3,length(o3),20)])
round4_opp<-as.double(o3[seq(4,length(o3),20)])
round5_opp<-as.double(o3[seq(5,length(o3),20)])
round6_opp<-as.double(o3[seq(6,length(o3),20)])
round7_opp<-as.double(o3[seq(7,length(o3),20)])

#Since the previous lines of code identifies opponent number, the following code will provide the pre-rating assocaited with each opponent. 
round1_pre_r<-pre_rating[round1_opp]
round2_pre_r<-pre_rating[round2_opp]
round3_pre_r<-pre_rating[round3_opp]
round4_pre_r<-pre_rating[round4_opp]
round5_pre_r<-pre_rating[round5_opp]
round6_pre_r<-pre_rating[round6_opp]
round7_pre_r<-pre_rating[round7_opp]

Calculating the average pre chess rating of opponents.

library(matrixStats)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:matrixStats':
## 
##     count
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#Dataframe ELO_rating is constructed in order to summarize the results thus far generated. 
pair_num<-1:64
ELO_rating<-data.frame(pair_num,names,state,points,pre_rating,round1_pre_r,round2_pre_r,round3_pre_r,round4_pre_r,round5_pre_r,round6_pre_r,round7_pre_r)
ELO_rating
##    pair_num                      names state points pre_rating round1_pre_r
## 1         1                   GARY HUA    ON    6.0       1794         1436
## 2         2            DAKSHESH DARURI    MI    6.0       1553         1175
## 3         3               ADITYA BAJAJ    MI    6.0       1384         1641
## 4         4        PATRICK H SCHILLING    MI    5.5       1716         1363
## 5         5                 HANSHI ZUO    MI    5.5       1655         1242
## 6         6                HANSEN SONG    OH    5.0       1686         1399
## 7         7          GARY DEE SWATHELL    MI    5.0       1649         1092
## 8         8           EZEKIEL HOUGHTON    MI    5.0       1641         1384
## 9         9                STEFANO LEE    ON    5.0       1411         1745
## 10       10                  ANVIT RAO    MI    5.0       1365         1604
## 11       11         CAMERON WILLIAM MC    MI    4.5       1712         1423
## 12       12             KENNETH J TACK    MI    4.5       1663         1332
## 13       13          TORRANCE HENRY JR    MI    4.5       1666         1355
## 14       14               BRADLEY SHAW    MI    4.5       1610         1270
## 15       15     ZACHARY JAMES HOUGHTON    MI    4.5       1220         1564
## 16       16               MIKE NIKITIN    MI    4.0       1604         1365
## 17       17         RONALD GRZEGORCZYK    MI    4.0       1629         1382
## 18       18              DAVID SUNDEEN    MI    4.0       1600         1362
## 19       19               DIPANKAR ROY    MI    4.0       1564         1220
## 20       20                JASON ZHENG    MI    4.0       1595         1348
## 21       21              DINH DANG BUI    ON    4.0       1563         1283
## 22       22           EUGENE L MCCLURE    MI    4.0       1555         1163
## 23       23                   ALAN BUI    ON    4.0       1363         1716
## 24       24          MICHAEL R ALDRICH    MI    4.0       1229         1507
## 25       25           LOREN SCHWIEBERT    MI    3.5       1745         1411
## 26       26                    MAX ZHU    ON    3.5       1579         1291
## 27       27             GAURAV GIDWANI    MI    3.5       1552         1011
## 28       28 SOFIA ADINA STANESCU-BELLU    MI    3.5       1507         1229
## 29       29           CHIEDOZIE OKORIE    MI    3.5       1602         1056
## 30       30         GEORGE AVERY JONES    ON    3.5       1522          935
## 31       31               RISHI SHETTY    MI    3.5       1494          917
## 32       32      JOSHUA PHILIP MATHEWS    ON    3.5       1441          955
## 33       33                    JADE GE    MI    3.5       1449          967
## 34       34     MICHAEL JEFFERY THOMAS    MI    3.5       1399         1686
## 35       35           JOSHUA DAVID LEE    MI    3.5       1438          377
## 36       36              SIDDHARTH JHA    MI    3.5       1355         1666
## 37       37       AMIYATOSH PWNANANDAM    MI    3.5        980           NA
## 38       38                  BRIAN LIU    MI    3.0       1423         1712
## 39       39              JOEL R HENDON    MI    3.0       1436         1794
## 40       40               FOREST ZHANG    MI    3.0       1348         1595
## 41       41        KYLE WILLIAM MURPHY    MI    3.0       1403          853
## 42       42                   JARED GE    MI    3.0       1332         1663
## 43       43          ROBERT GLEN VASEY    MI    3.0       1283         1563
## 44       44         JUSTIN D SCHILLING    MI    3.0       1199           NA
## 45       45                  DEREK YAN    MI    3.0       1242         1655
## 46       46   JACOB ALEXANDER LAVALLEY    MI    3.0        377         1438
## 47       47                ERIC WRIGHT    MI    2.5       1362         1600
## 48       48               DANIEL KHAIN    MI    2.5       1382         1629
## 49       49           MICHAEL J MARTIN    MI    2.5       1291         1579
## 50       50                 SHIVAM JHA    MI    2.5       1056         1602
## 51       51             TEJAS AYYAGARI    MI    2.5       1011         1552
## 52       52                  ETHAN GUO    MI    2.5        935         1522
## 53       53              JOSE C YBARRA    MI    2.0       1393           NA
## 54       54                LARRY HODGE    MI    2.0       1270         1610
## 55       55                  ALEX KONG    MI    2.0       1186         1530
## 56       56               MARISA RICCI    MI    2.0       1153           NA
## 57       57                 MICHAEL LU    MI    2.0       1092         1649
## 58       58               VIRAJ MOHILE    MI    2.0        917         1494
## 59       59          SEAN M MC CORMICK    MI    2.0        853         1403
## 60       60                 JULIA SHEN    MI    1.5        967         1449
## 61       61              JEZZEL FARKAS    ON    1.5        955         1441
## 62       62              ASHWIN BALAJI    MI    1.0       1530         1186
## 63       63       THOMAS JOSEPH HOSMER    MI    1.0       1175         1553
## 64       64                     BEN LI    MI    1.0       1163         1555
##    round2_pre_r round3_pre_r round4_pre_r round5_pre_r round6_pre_r
## 1          1563         1600         1610         1649         1663
## 2           917         1716         1629         1604         1595
## 3           955         1745         1563         1712         1666
## 4          1507         1553         1579         1655         1564
## 5           980         1663         1666         1716         1610
## 6          1602         1712         1438         1365         1552
## 7           377         1666         1712         1794         1411
## 8          1441         1610         1411         1362         1507
## 9          1600          853         1641         1579         1649
## 10         1564         1186         1494         1686         1745
## 11         1153         1686         1649         1384         1399
## 12         1449         1655         1423           NA         1794
## 13         1552         1649         1655         1449         1384
## 14         1199         1641         1794         1552         1655
## 15         1604         1522         1555         1270         1449
## 16         1220           NA         1436         1553         1355
## 17         1403         1579         1553         1363         1555
## 18         1411         1794         1441         1564         1423
## 19         1365          935         1507         1600         1716
## 20         1291         1363         1403         1507         1553
## 21         1794         1362         1384         1348         1436
## 22          935         1507         1220           NA         1629
## 23         1283         1595          917         1629          980
## 24         1362         1283         1745          967         1199
## 25         1393         1384         1229         1399         1365
## 26         1348         1629         1716         1411         1441
## 27         1666          377          980         1610         1686
## 28         1716         1555         1564         1595         1641
## 29         1686         1423         1399          935         1382
## 30         1163         1220         1186         1494          955
## 31         1186         1163         1365         1522         1056
## 32         1641         1199         1600         1011         1579
## 33         1663         1056         1355         1666         1220
## 34          967          980         1602         1745         1712
## 35         1423         1153         1686         1092          935
## 36         1092         1011         1449           NA         1604
## 37         1655         1399         1552           NA         1363
## 38         1438         1602         1663           NA         1600
## 39         1270         1348         1604         1199         1563
## 40         1579         1436          853         1563         1153
## 41         1629          917         1595           NA           NA
## 42         1056         1092          967          955         1163
## 43         1363         1229         1175          853          377
## 44         1610         1441         1393         1436         1229
## 45         1011          967         1153         1175         1186
## 46         1649         1552         1056         1163         1283
## 47         1229         1563          955         1641         1011
## 48         1175           NA          935           NA         1602
## 49         1595         1175         1163          917           NA
## 50         1332         1449          377           NA         1494
## 51         1242         1355         1092         1441         1362
## 52         1555         1564         1382         1602         1438
## 53         1745           NA         1199           NA         1092
## 54         1436          955           NA         1220          853
## 55         1494         1365         1522           NA         1242
## 56         1712         1438         1242           NA         1348
## 57         1355         1332         1011         1438         1393
## 58         1553         1403         1363         1291           NA
## 59           NA         1411         1348         1283         1270
## 60         1399         1242         1332         1229           NA
## 61         1384         1270         1362         1332         1522
## 62           NA           NA           NA           NA           NA
## 63         1382         1291         1283         1242           NA
## 64         1522         1494         1291          377         1332
##    round7_pre_r
## 1          1716
## 2          1649
## 3          1663
## 4          1794
## 5          1629
## 6          1563
## 7          1553
## 8          1564
## 9          1595
## 10         1600
## 11         1579
## 12         1384
## 13         1441
## 14         1494
## 15         1423
## 16           NA
## 17         1655
## 18         1365
## 19         1641
## 20         1411
## 21         1686
## 22         1348
## 23          377
## 24         1436
## 25         1362
## 26         1712
## 27           NA
## 28         1355
## 29           NA
## 30         1056
## 31         1610
## 32         1666
## 33         1011
## 34          935
## 35         1382
## 36         1507
## 37          955
## 38         1220
## 39         1229
## 40         1555
## 41           NA
## 42         1153
## 43         1186
## 44          853
## 45          917
## 46         1363
## 47         1745
## 48         1438
## 49           NA
## 50         1522
## 51         1449
## 52         1399
## 53           NA
## 54         1163
## 55         1283
## 56         1332
## 57           NA
## 58         1242
## 59         1199
## 60           NA
## 61          980
## 62           NA
## 63           NA
## 64         1270
#The mean is calculated per row (average of opponent pre-rating) and the final values are added to dataframe ELO_rating and is now called ELO_rating_Mean. 
ELO_rating_Mean<-ELO_rating %>% mutate(Opp_pre_mean = apply(.[(6:12)],1,mean,na.rm=TRUE))

#Subsetting Mean Column from ELO_rating_Mean dataframe. Each value represents the mean of each player's oppponent's pre-ratings for rounds 1-7.
Opp_Pre_Mean<-round(subset(ELO_rating_Mean,select=c(Opp_pre_mean)))

#Final dataframe summarizing player number, name, state, total points, pre-rating, opponent pre-rating average
pair_num<-1:64
ELO_final<-data.frame(pair_num,names,state,points,pre_rating,Opp_Pre_Mean)
ELO_final
##    pair_num                      names state points pre_rating Opp_pre_mean
## 1         1                   GARY HUA    ON    6.0       1794         1605
## 2         2            DAKSHESH DARURI    MI    6.0       1553         1469
## 3         3               ADITYA BAJAJ    MI    6.0       1384         1564
## 4         4        PATRICK H SCHILLING    MI    5.5       1716         1574
## 5         5                 HANSHI ZUO    MI    5.5       1655         1501
## 6         6                HANSEN SONG    OH    5.0       1686         1519
## 7         7          GARY DEE SWATHELL    MI    5.0       1649         1372
## 8         8           EZEKIEL HOUGHTON    MI    5.0       1641         1468
## 9         9                STEFANO LEE    ON    5.0       1411         1523
## 10       10                  ANVIT RAO    MI    5.0       1365         1554
## 11       11         CAMERON WILLIAM MC    MI    4.5       1712         1468
## 12       12             KENNETH J TACK    MI    4.5       1663         1506
## 13       13          TORRANCE HENRY JR    MI    4.5       1666         1498
## 14       14               BRADLEY SHAW    MI    4.5       1610         1515
## 15       15     ZACHARY JAMES HOUGHTON    MI    4.5       1220         1484
## 16       16               MIKE NIKITIN    MI    4.0       1604         1386
## 17       17         RONALD GRZEGORCZYK    MI    4.0       1629         1499
## 18       18              DAVID SUNDEEN    MI    4.0       1600         1480
## 19       19               DIPANKAR ROY    MI    4.0       1564         1426
## 20       20                JASON ZHENG    MI    4.0       1595         1411
## 21       21              DINH DANG BUI    ON    4.0       1563         1470
## 22       22           EUGENE L MCCLURE    MI    4.0       1555         1300
## 23       23                   ALAN BUI    ON    4.0       1363         1214
## 24       24          MICHAEL R ALDRICH    MI    4.0       1229         1357
## 25       25           LOREN SCHWIEBERT    MI    3.5       1745         1363
## 26       26                    MAX ZHU    ON    3.5       1579         1507
## 27       27             GAURAV GIDWANI    MI    3.5       1552         1222
## 28       28 SOFIA ADINA STANESCU-BELLU    MI    3.5       1507         1522
## 29       29           CHIEDOZIE OKORIE    MI    3.5       1602         1314
## 30       30         GEORGE AVERY JONES    ON    3.5       1522         1144
## 31       31               RISHI SHETTY    MI    3.5       1494         1260
## 32       32      JOSHUA PHILIP MATHEWS    ON    3.5       1441         1379
## 33       33                    JADE GE    MI    3.5       1449         1277
## 34       34     MICHAEL JEFFERY THOMAS    MI    3.5       1399         1375
## 35       35           JOSHUA DAVID LEE    MI    3.5       1438         1150
## 36       36              SIDDHARTH JHA    MI    3.5       1355         1388
## 37       37       AMIYATOSH PWNANANDAM    MI    3.5        980         1385
## 38       38                  BRIAN LIU    MI    3.0       1423         1539
## 39       39              JOEL R HENDON    MI    3.0       1436         1430
## 40       40               FOREST ZHANG    MI    3.0       1348         1391
## 41       41        KYLE WILLIAM MURPHY    MI    3.0       1403         1248
## 42       42                   JARED GE    MI    3.0       1332         1150
## 43       43          ROBERT GLEN VASEY    MI    3.0       1283         1107
## 44       44         JUSTIN D SCHILLING    MI    3.0       1199         1327
## 45       45                  DEREK YAN    MI    3.0       1242         1152
## 46       46   JACOB ALEXANDER LAVALLEY    MI    3.0        377         1358
## 47       47                ERIC WRIGHT    MI    2.5       1362         1392
## 48       48               DANIEL KHAIN    MI    2.5       1382         1356
## 49       49           MICHAEL J MARTIN    MI    2.5       1291         1286
## 50       50                 SHIVAM JHA    MI    2.5       1056         1296
## 51       51             TEJAS AYYAGARI    MI    2.5       1011         1356
## 52       52                  ETHAN GUO    MI    2.5        935         1495
## 53       53              JOSE C YBARRA    MI    2.0       1393         1345
## 54       54                LARRY HODGE    MI    2.0       1270         1206
## 55       55                  ALEX KONG    MI    2.0       1186         1406
## 56       56               MARISA RICCI    MI    2.0       1153         1414
## 57       57                 MICHAEL LU    MI    2.0       1092         1363
## 58       58               VIRAJ MOHILE    MI    2.0        917         1391
## 59       59          SEAN M MC CORMICK    MI    2.0        853         1319
## 60       60                 JULIA SHEN    MI    1.5        967         1330
## 61       61              JEZZEL FARKAS    ON    1.5        955         1327
## 62       62              ASHWIN BALAJI    MI    1.0       1530         1186
## 63       63       THOMAS JOSEPH HOSMER    MI    1.0       1175         1350
## 64       64                     BEN LI    MI    1.0       1163         1263