Project1-ChessRankings

#Standard
file <-("https://raw.githubusercontent.com/d-ev-craig/DATA607/main/Projects/Project1%20-%20Chess%20Rankings/chessRankings.txt")


data<-readLines(file)

## Warning in readLines(file): incomplete final line found on
## 'https://raw.githubusercontent.com/d-ev-craig/DATA607/main/Projects/Project1%20-%20Chess%20Rankings/chessRankings.txt'

garbage<-seq(1,196, by = 3)
data<-data[-(garbage)]
data<-data.frame(data)

data

##                                                                                           data
## 1    Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| 
## 2    Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | 
## 3        1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|
## 4       ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |
## 5        2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|
## 6       MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |
## 7        3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|
## 8       MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |
## 9        4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|
## 10      MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |
## 11       5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|
## 12      MI | 14601533 / R: 1655   ->1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |
## 13       6 | HANSEN SONG                     |5.0  |W  34|D  29|L  11|W  35|D  10|W  27|W  21|
## 14      OH | 15055204 / R: 1686   ->1687     |N:3  |W    |B    |W    |B    |B    |W    |B    |
## 15       7 | GARY DEE SWATHELL               |5.0  |W  57|W  46|W  13|W  11|L   1|W   9|L   2|
## 16      MI | 11146376 / R: 1649   ->1673     |N:3  |W    |B    |W    |B    |B    |W    |W    |
## 17       8 | EZEKIEL HOUGHTON                |5.0  |W   3|W  32|L  14|L   9|W  47|W  28|W  19|
## 18      MI | 15142253 / R: 1641P17->1657P24  |N:3  |B    |W    |B    |W    |B    |W    |W    |
## 19       9 | STEFANO LEE                     |5.0  |W  25|L  18|W  59|W   8|W  26|L   7|W  20|
## 20      ON | 14954524 / R: 1411   ->1564     |N:2  |W    |B    |W    |B    |W    |B    |B    |
## 21      10 | ANVIT RAO                       |5.0  |D  16|L  19|W  55|W  31|D   6|W  25|W  18|
## 22      MI | 14150362 / R: 1365   ->1544     |N:3  |W    |W    |B    |B    |W    |B    |W    |
## 23      11 | CAMERON WILLIAM MC LEMAN        |4.5  |D  38|W  56|W   6|L   7|L   3|W  34|W  26|
## 24      MI | 12581589 / R: 1712   ->1696     |N:3  |B    |W    |B    |W    |B    |W    |B    |
## 25      12 | KENNETH J TACK                  |4.5  |W  42|W  33|D   5|W  38|H    |D   1|L   3|
## 26      MI | 12681257 / R: 1663   ->1670     |N:3  |W    |B    |W    |B    |     |W    |B    |
## 27      13 | TORRANCE HENRY JR               |4.5  |W  36|W  27|L   7|D   5|W  33|L   3|W  32|
## 28      MI | 15082995 / R: 1666   ->1662     |N:3  |B    |W    |B    |B    |W    |W    |B    |
## 29      14 | BRADLEY SHAW                    |4.5  |W  54|W  44|W   8|L   1|D  27|L   5|W  31|
## 30      MI | 10131499 / R: 1610   ->1618     |N:3  |W    |B    |W    |W    |B    |B    |W    |
## 31      15 | ZACHARY JAMES HOUGHTON          |4.5  |D  19|L  16|W  30|L  22|W  54|W  33|W  38|
## 32      MI | 15619130 / R: 1220P13->1416P20  |N:3  |B    |B    |W    |W    |B    |B    |W    |
## 33      16 | MIKE NIKITIN                    |4.0  |D  10|W  15|H    |W  39|L   2|W  36|U    |
## 34      MI | 10295068 / R: 1604   ->1613     |N:3  |B    |W    |     |B    |W    |B    |     |
## 35      17 | RONALD GRZEGORCZYK              |4.0  |W  48|W  41|L  26|L   2|W  23|W  22|L   5|
## 36      MI | 10297702 / R: 1629   ->1610     |N:3  |W    |B    |W    |B    |W    |B    |W    |
## 37      18 | DAVID SUNDEEN                   |4.0  |W  47|W   9|L   1|W  32|L  19|W  38|L  10|
## 38      MI | 11342094 / R: 1600   ->1600     |N:3  |B    |W    |B    |W    |B    |W    |B    |
## 39      19 | DIPANKAR ROY                    |4.0  |D  15|W  10|W  52|D  28|W  18|L   4|L   8|
## 40      MI | 14862333 / R: 1564   ->1570     |N:3  |W    |B    |W    |B    |W    |W    |B    |
## 41      20 | JASON ZHENG                     |4.0  |L  40|W  49|W  23|W  41|W  28|L   2|L   9|
## 42      MI | 14529060 / R: 1595   ->1569     |N:4  |W    |B    |W    |B    |W    |B    |W    |
## 43      21 | DINH DANG BUI                   |4.0  |W  43|L   1|W  47|L   3|W  40|W  39|L   6|
## 44      ON | 15495066 / R: 1563P22->1562     |N:3  |B    |W    |B    |W    |W    |B    |W    |
## 45      22 | EUGENE L MCCLURE                |4.0  |W  64|D  52|L  28|W  15|H    |L  17|W  40|
## 46      MI | 12405534 / R: 1555   ->1529     |N:4  |W    |B    |W    |B    |     |W    |B    |
## 47      23 | ALAN BUI                        |4.0  |L   4|W  43|L  20|W  58|L  17|W  37|W  46|
## 48      ON | 15030142 / R: 1363   ->1371     |     |B    |W    |B    |W    |B    |W    |B    |
## 49      24 | MICHAEL R ALDRICH               |4.0  |L  28|L  47|W  43|L  25|W  60|W  44|W  39|
## 50      MI | 13469010 / R: 1229   ->1300     |N:4  |B    |W    |B    |B    |W    |W    |B    |
## 51      25 | LOREN SCHWIEBERT                |3.5  |L   9|W  53|L   3|W  24|D  34|L  10|W  47|
## 52      MI | 12486656 / R: 1745   ->1681     |N:4  |B    |W    |B    |W    |B    |W    |B    |
## 53      26 | MAX ZHU                         |3.5  |W  49|W  40|W  17|L   4|L   9|D  32|L  11|
## 54      ON | 15131520 / R: 1579   ->1564     |N:4  |B    |W    |B    |W    |B    |W    |W    |
## 55      27 | GAURAV GIDWANI                  |3.5  |W  51|L  13|W  46|W  37|D  14|L   6|U    |
## 56      MI | 14476567 / R: 1552   ->1539     |N:4  |W    |B    |W    |B    |W    |B    |     |
## 57      28 | SOFIA ADINA STANESCU-BELLU      |3.5  |W  24|D   4|W  22|D  19|L  20|L   8|D  36|
## 58      MI | 14882954 / R: 1507   ->1513     |N:3  |W    |W    |B    |W    |B    |B    |W    |
## 59      29 | CHIEDOZIE OKORIE                |3.5  |W  50|D   6|L  38|L  34|W  52|W  48|U    |
## 60      MI | 15323285 / R: 1602P6 ->1508P12  |N:4  |B    |W    |B    |W    |W    |B    |     |
## 61      30 | GEORGE AVERY JONES              |3.5  |L  52|D  64|L  15|W  55|L  31|W  61|W  50|
## 62      ON | 12577178 / R: 1522   ->1444     |     |W    |B    |B    |W    |W    |B    |B    |
## 63      31 | RISHI SHETTY                    |3.5  |L  58|D  55|W  64|L  10|W  30|W  50|L  14|
## 64      MI | 15131618 / R: 1494   ->1444     |     |B    |W    |B    |W    |B    |W    |B    |
## 65      32 | JOSHUA PHILIP MATHEWS           |3.5  |W  61|L   8|W  44|L  18|W  51|D  26|L  13|
## 66      ON | 14073750 / R: 1441   ->1433     |N:4  |W    |B    |W    |B    |W    |B    |W    |
## 67      33 | JADE GE                         |3.5  |W  60|L  12|W  50|D  36|L  13|L  15|W  51|
## 68      MI | 14691842 / R: 1449   ->1421     |     |B    |W    |B    |W    |B    |W    |B    |
## 69      34 | MICHAEL JEFFERY THOMAS          |3.5  |L   6|W  60|L  37|W  29|D  25|L  11|W  52|
## 70      MI | 15051807 / R: 1399   ->1400     |     |B    |W    |B    |B    |W    |B    |W    |
## 71      35 | JOSHUA DAVID LEE                |3.5  |L  46|L  38|W  56|L   6|W  57|D  52|W  48|
## 72      MI | 14601397 / R: 1438   ->1392     |     |W    |W    |B    |W    |B    |B    |W    |
## 73      36 | SIDDHARTH JHA                   |3.5  |L  13|W  57|W  51|D  33|H    |L  16|D  28|
## 74      MI | 14773163 / R: 1355   ->1367     |N:4  |W    |B    |W    |B    |     |W    |B    |
## 75      37 | AMIYATOSH PWNANANDAM            |3.5  |B    |L   5|W  34|L  27|H    |L  23|W  61|
## 76      MI | 15489571 / R:  980P12->1077P17  |     |     |B    |W    |W    |     |B    |W    |
## 77      38 | BRIAN LIU                       |3.0  |D  11|W  35|W  29|L  12|H    |L  18|L  15|
## 78      MI | 15108523 / R: 1423   ->1439     |N:4  |W    |B    |W    |W    |     |B    |B    |
## 79      39 | JOEL R HENDON                   |3.0  |L   1|W  54|W  40|L  16|W  44|L  21|L  24|
## 80      MI | 12923035 / R: 1436P23->1413     |N:4  |B    |W    |B    |W    |B    |W    |W    |
## 81      40 | FOREST ZHANG                    |3.0  |W  20|L  26|L  39|W  59|L  21|W  56|L  22|
## 82      MI | 14892710 / R: 1348   ->1346     |     |B    |B    |W    |W    |B    |W    |W    |
## 83      41 | KYLE WILLIAM MURPHY             |3.0  |W  59|L  17|W  58|L  20|X    |U    |U    |
## 84      MI | 15761443 / R: 1403P5 ->1341P9   |     |B    |W    |B    |W    |     |     |     |
## 85      42 | JARED GE                        |3.0  |L  12|L  50|L  57|D  60|D  61|W  64|W  56|
## 86      MI | 14462326 / R: 1332   ->1256     |     |B    |W    |B    |B    |W    |W    |B    |
## 87      43 | ROBERT GLEN VASEY               |3.0  |L  21|L  23|L  24|W  63|W  59|L  46|W  55|
## 88      MI | 14101068 / R: 1283   ->1244     |     |W    |B    |W    |W    |B    |B    |W    |
## 89      44 | JUSTIN D SCHILLING              |3.0  |B    |L  14|L  32|W  53|L  39|L  24|W  59|
## 90      MI | 15323504 / R: 1199   ->1199     |     |     |W    |B    |B    |W    |B    |W    |
## 91      45 | DEREK YAN                       |3.0  |L   5|L  51|D  60|L  56|W  63|D  55|W  58|
## 92      MI | 15372807 / R: 1242   ->1191     |     |W    |B    |W    |B    |W    |B    |W    |
## 93      46 | JACOB ALEXANDER LAVALLEY        |3.0  |W  35|L   7|L  27|L  50|W  64|W  43|L  23|
## 94      MI | 15490981 / R:  377P3 ->1076P10  |     |B    |W    |B    |W    |B    |W    |W    |
## 95      47 | ERIC WRIGHT                     |2.5  |L  18|W  24|L  21|W  61|L   8|D  51|L  25|
## 96      MI | 12533115 / R: 1362   ->1341     |     |W    |B    |W    |B    |W    |B    |W    |
## 97      48 | DANIEL KHAIN                    |2.5  |L  17|W  63|H    |D  52|H    |L  29|L  35|
## 98      MI | 14369165 / R: 1382   ->1335     |     |B    |W    |     |B    |     |W    |B    |
## 99      49 | MICHAEL J MARTIN                |2.5  |L  26|L  20|D  63|D  64|W  58|H    |U    |
## 100     MI | 12531685 / R: 1291P12->1259P17  |     |W    |W    |B    |W    |B    |     |     |
## 101     50 | SHIVAM JHA                      |2.5  |L  29|W  42|L  33|W  46|H    |L  31|L  30|
## 102     MI | 14773178 / R: 1056   ->1111     |     |W    |B    |W    |B    |     |B    |W    |
## 103     51 | TEJAS AYYAGARI                  |2.5  |L  27|W  45|L  36|W  57|L  32|D  47|L  33|
## 104     MI | 15205474 / R: 1011   ->1097     |     |B    |W    |B    |W    |B    |W    |W    |
## 105     52 | ETHAN GUO                       |2.5  |W  30|D  22|L  19|D  48|L  29|D  35|L  34|
## 106     MI | 14918803 / R:  935   ->1092     |N:4  |B    |W    |B    |W    |B    |W    |B    |
## 107     53 | JOSE C YBARRA                   |2.0  |H    |L  25|H    |L  44|U    |W  57|U    |
## 108     MI | 12578849 / R: 1393   ->1359     |     |     |B    |     |W    |     |W    |     |
## 109     54 | LARRY HODGE                     |2.0  |L  14|L  39|L  61|B    |L  15|L  59|W  64|
## 110     MI | 12836773 / R: 1270   ->1200     |     |B    |B    |W    |     |W    |B    |W    |
## 111     55 | ALEX KONG                       |2.0  |L  62|D  31|L  10|L  30|B    |D  45|L  43|
## 112     MI | 15412571 / R: 1186   ->1163     |     |W    |B    |W    |B    |     |W    |B    |
## 113     56 | MARISA RICCI                    |2.0  |H    |L  11|L  35|W  45|H    |L  40|L  42|
## 114     MI | 14679887 / R: 1153   ->1140     |     |     |B    |W    |W    |     |B    |W    |
## 115     57 | MICHAEL LU                      |2.0  |L   7|L  36|W  42|L  51|L  35|L  53|B    |
## 116     MI | 15113330 / R: 1092   ->1079     |     |B    |W    |W    |B    |W    |B    |     |
## 117     58 | VIRAJ MOHILE                    |2.0  |W  31|L   2|L  41|L  23|L  49|B    |L  45|
## 118     MI | 14700365 / R:  917   -> 941     |     |W    |B    |W    |B    |W    |     |B    |
## 119     59 | SEAN M MC CORMICK               |2.0  |L  41|B    |L   9|L  40|L  43|W  54|L  44|
## 120     MI | 12841036 / R:  853   -> 878     |     |W    |     |B    |B    |W    |W    |B    |
## 121     60 | JULIA SHEN                      |1.5  |L  33|L  34|D  45|D  42|L  24|H    |U    |
## 122     MI | 14579262 / R:  967   -> 984     |     |W    |B    |B    |W    |B    |     |     |
## 123     61 | JEZZEL FARKAS                   |1.5  |L  32|L   3|W  54|L  47|D  42|L  30|L  37|
## 124     ON | 15771592 / R:  955P11-> 979P18  |     |B    |W    |B    |W    |B    |W    |B    |
## 125     62 | ASHWIN BALAJI                   |1.0  |W  55|U    |U    |U    |U    |U    |U    |
## 126     MI | 15219542 / R: 1530   ->1535     |     |B    |     |     |     |     |     |     |
## 127     63 | THOMAS JOSEPH HOSMER            |1.0  |L   2|L  48|D  49|L  43|L  45|H    |U    |
## 128     MI | 15057092 / R: 1175   ->1125     |     |W    |B    |W    |B    |B    |     |     |
## 129     64 | BEN LI                          |1.0  |L  22|D  30|L  31|D  49|L  46|L  42|L  54|
## 130     MI | 15006561 / R: 1163   ->1112     |     |B    |W    |W    |B    |W    |B    |B    |

dataTibble <- tibble(1:130,data[,1])


names(dataTibble)[2] <- 'chessText'

#Attempts at generalizing this to work on a tournament of any number of rounds of Chess
#Method2
numRounds <- str_count(dataTibble[1,2], 'Round')

#Set the number of times we counted the word 'Round' to a variable and combine it with 'r' to create our column names to separate columns out based on the number of rounds in the chess tournament
roundText <- seq(from=1, to=numRounds,by =1)
roundText <- str_c('r',roundText)
roundText

## [1] "r1" "r2" "r3" "r4" "r5" "r6" "r7"

#Method1
# data4<-separate_wider_delim(dataTibble,cols=chessText,delim="|",names = c('playerID','playerName','Total','r1','r2','r3','r4','r5','r6','r7','blank'))

#Separate string data into columns and using our variable number of Rounds text as names for columns
data4<-separate_wider_delim(dataTibble,cols=chessText,delim="|",names = c('playerID','playerName','Total',roundText,'blank'))


data4

## # A tibble: 130 × 12
##    `1:130` playe…¹ playe…² Total r1    r2    r3    r4    r5    r6    r7    blank
##      <int> <chr>   <chr>   <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
##  1       1 " Pair… " Play… "Tot… "Rou… "Rou… "Rou… "Rou… "Rou… "Rou… "Rou… " "  
##  2       2 " Num … " USCF… " Pt… "  1… "  2… "  3… "  4… "  5… "  6… "  7… " "  
##  3       3 "    1… " GARY… "6.0… "W  … "W  … "W  … "W  … "W  … "D  … "D  … ""   
##  4       4 "   ON… " 1544… "N:2… "W  … "B  … "W  … "B  … "W  … "B  … "W  … ""   
##  5       5 "    2… " DAKS… "6.0… "W  … "W  … "L  … "W  … "W  … "W  … "W  … ""   
##  6       6 "   MI… " 1459… "N:2… "B  … "W  … "B  … "W  … "B  … "W  … "B  … ""   
##  7       7 "    3… " ADIT… "6.0… "L  … "W  … "W  … "W  … "W  … "W  … "W  … ""   
##  8       8 "   MI… " 1495… "N:2… "W  … "B  … "W  … "B  … "W  … "B  … "W  … ""   
##  9       9 "    4… " PATR… "5.5… "W  … "D  … "W  … "W  … "D  … "W  … "D  … ""   
## 10      10 "   MI… " 1261… "N:2… "W  … "B  … "W  … "B  … "W  … "B  … "B  … ""   
## # … with 120 more rows, and abbreviated variable names ¹playerID, ²playerName

data4

## # A tibble: 130 × 12
##    `1:130` playe…¹ playe…² Total r1    r2    r3    r4    r5    r6    r7    blank
##      <int> <chr>   <chr>   <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
##  1       1 " Pair… " Play… "Tot… "Rou… "Rou… "Rou… "Rou… "Rou… "Rou… "Rou… " "  
##  2       2 " Num … " USCF… " Pt… "  1… "  2… "  3… "  4… "  5… "  6… "  7… " "  
##  3       3 "    1… " GARY… "6.0… "W  … "W  … "W  … "W  … "W  … "D  … "D  … ""   
##  4       4 "   ON… " 1544… "N:2… "W  … "B  … "W  … "B  … "W  … "B  … "W  … ""   
##  5       5 "    2… " DAKS… "6.0… "W  … "W  … "L  … "W  … "W  … "W  … "W  … ""   
##  6       6 "   MI… " 1459… "N:2… "B  … "W  … "B  … "W  … "B  … "W  … "B  … ""   
##  7       7 "    3… " ADIT… "6.0… "L  … "W  … "W  … "W  … "W  … "W  … "W  … ""   
##  8       8 "   MI… " 1495… "N:2… "W  … "B  … "W  … "B  … "W  … "B  … "W  … ""   
##  9       9 "    4… " PATR… "5.5… "W  … "D  … "W  … "W  … "D  … "W  … "D  … ""   
## 10      10 "   MI… " 1261… "N:2… "W  … "B  … "W  … "B  … "W  … "B  … "B  … ""   
## # … with 120 more rows, and abbreviated variable names ¹playerID, ²playerName

#Removing unnecessary columns
oddRows <- data4 %>% filter(row_number() %% 2 == 1) %>% select(-blank,-'1:130') %>% slice(-1)
evenRows <- data4 %>% filter(row_number() %% 2 == 0) %>% select(-blank, -'1:130')%>%slice(-1)

oddRows

## # A tibble: 64 × 10
##    playerID playerName           Total r1    r2    r3    r4    r5    r6    r7   
##    <chr>    <chr>                <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
##  1 "    1 " " GARY HUA         … "6.0… W  39 W  21 W  18 W  14 W   7 D  12 D   4
##  2 "    2 " " DAKSHESH DARURI  … "6.0… W  63 W  58 L   4 W  17 W  16 W  20 W   7
##  3 "    3 " " ADITYA BAJAJ     … "6.0… L   8 W  61 W  25 W  21 W  11 W  13 W  12
##  4 "    4 " " PATRICK H SCHILLI… "5.5… W  23 D  28 W   2 W  26 D   5 W  19 D   1
##  5 "    5 " " HANSHI ZUO       … "5.5… W  45 W  37 D  12 D  13 D   4 W  14 W  17
##  6 "    6 " " HANSEN SONG      … "5.0… W  34 D  29 L  11 W  35 D  10 W  27 W  21
##  7 "    7 " " GARY DEE SWATHELL… "5.0… W  57 W  46 W  13 W  11 L   1 W   9 L   2
##  8 "    8 " " EZEKIEL HOUGHTON … "5.0… W   3 W  32 L  14 L   9 W  47 W  28 W  19
##  9 "    9 " " STEFANO LEE      … "5.0… W  25 L  18 W  59 W   8 W  26 L   7 W  20
## 10 "   10 " " ANVIT RAO        … "5.0… D  16 L  19 W  55 W  31 D   6 W  25 W  18
## # … with 54 more rows

#Apply str_replace_all to each column of interest and keeping it dynamic in case rounds played aren't always 7
col <- colnames(oddRows[,4:(3+numRounds)])
oddRows[col] <- apply(oddRows[col],MARGIN =2, function(x) str_replace_all(x, "[WLDB] ", ""))

oddRows[col] <- apply(oddRows[col],2, function(x) as.numeric(x))

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

#Our goal here is to grab the player's pre-rating and state. Pre-rating will require some string manipulation
# First we remove the extra tables and rename them to be accurate
evenRows <- evenRows[,c(1,2)]
evenRows <- rename(evenRows,playerState = playerID)


#separate out our playerName column to section out our ratings
evenRows <- separate_wider_delim(evenRows,cols=c(playerName), delim=":", names = c('uscfID','ratings'))

#perform again to split our pre and post ratings from the ratings column we just made
evenRows <- separate_wider_delim(evenRows,cols=c(ratings), delim="->", names = c('preRating','postRating'))

#Here we want to remove any characters that follow the pattern of a letter followed by any character/number that can repeat itself any number of times. I tried my best to future proof this in case ELO's start to get into 5 digits (probably possible in the coming years via AI). If I didn't want to future proof, I would've removed based on position.
evenRows$preRating <- str_replace_all(evenRows$preRating, "([A-Z]|[a-z])[^.]*$","")
evenRows$postRating <- str_replace_all(evenRows$postRating, "([A-Z]|[a-z])[^.]*$","")

#Dropping excess columns
evenRows <- evenRows %>% select(-uscfID,-postRating)

result <- bind_cols(evenRows,oddRows)
result

## # A tibble: 64 × 12
##    playerState preRa…¹ playe…² playe…³ Total    r1    r2    r3    r4    r5    r6
##    <chr>       <chr>   <chr>   <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 "   ON "    " 1794… "    1… " GARY… "6.0…    39    21    18    14     7    12
##  2 "   MI "    " 1553… "    2… " DAKS… "6.0…    63    58     4    17    16    20
##  3 "   MI "    " 1384… "    3… " ADIT… "6.0…     8    61    25    21    11    13
##  4 "   MI "    " 1716… "    4… " PATR… "5.5…    23    28     2    26     5    19
##  5 "   MI "    " 1655… "    5… " HANS… "5.5…    45    37    12    13     4    14
##  6 "   OH "    " 1686… "    6… " HANS… "5.0…    34    29    11    35    10    27
##  7 "   MI "    " 1649… "    7… " GARY… "5.0…    57    46    13    11     1     9
##  8 "   MI "    " 1641" "    8… " EZEK… "5.0…     3    32    14     9    47    28
##  9 "   ON "    " 1411… "    9… " STEF… "5.0…    25    18    59     8    26     7
## 10 "   MI "    " 1365… "   10… " ANVI… "5.0…    16    19    55    31     6    25
## # … with 54 more rows, 1 more variable: r7 <dbl>, and abbreviated variable
## #   names ¹preRating, ²playerID, ³playerName

#Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents


# Below lines are where I tried to do add a column by performing a 'vlookup' in R but across columns with different names, but similar values. The idea was to use r1ID's column's values to lookup the playerID column and select its appropriate preRating. All functions/methods I found required a column with the same name and since I can't have duplicate column names in my df I can't perform it via self join. I also don't want to make a separate lookup table to perform this action because I think it'd be more efficient this way.

# resultCondensed<-select(result, playerID...16,playerName,preRating,postRating,Total...18,r1ID,r2ID,r3ID,r4ID,r5ID,r6ID,r7ID)
# resultAvgOpp <- mutate(resultCondensed,avgOpp = merge(resultCondensed,resultCondensed, by = playerID...16) )

## Duplicating the rdID columns so that there is a column in common to perform the merge on (will test using by.x and by.y later)
lookupResult<- select(result,playerID,preRating)
# lookupResult <-mutate(lookupResult, r1ID=playerID)
# lookupResult<- mutate(lookupResult, r2ID = r1ID)
# lookupResult<- mutate(lookupResult, r3ID = r1ID)
# lookupResult<- mutate(lookupResult, r4ID = r1ID)
# lookupResult<- mutate(lookupResult, r5ID = r1ID)
# lookupResult<- mutate(lookupResult, r6ID = r1ID)
# lookupResult<- mutate(lookupResult, r7ID = r1ID)

#Dropping unnecessary playerID column since it won't be referenced in the later merge
#lookupResult<-lookupResult[,-1]

#Trimming whitespace from column to ensure a merge later down the road doesn't get blocked
lookupResult

## # A tibble: 64 × 2
##    playerID preRating 
##    <chr>    <chr>     
##  1 "    1 " " 1794   "
##  2 "    2 " " 1553   "
##  3 "    3 " " 1384   "
##  4 "    4 " " 1716   "
##  5 "    5 " " 1655   "
##  6 "    6 " " 1686   "
##  7 "    7 " " 1649   "
##  8 "    8 " " 1641"   
##  9 "    9 " " 1411   "
## 10 "   10 " " 1365   "
## # … with 54 more rows

names(lookupResult)[2] <- 'oppRating'
lookupResult$playerID<-trimws(lookupResult$playerID,which='both')
result

## # A tibble: 64 × 12
##    playerState preRa…¹ playe…² playe…³ Total    r1    r2    r3    r4    r5    r6
##    <chr>       <chr>   <chr>   <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 "   ON "    " 1794… "    1… " GARY… "6.0…    39    21    18    14     7    12
##  2 "   MI "    " 1553… "    2… " DAKS… "6.0…    63    58     4    17    16    20
##  3 "   MI "    " 1384… "    3… " ADIT… "6.0…     8    61    25    21    11    13
##  4 "   MI "    " 1716… "    4… " PATR… "5.5…    23    28     2    26     5    19
##  5 "   MI "    " 1655… "    5… " HANS… "5.5…    45    37    12    13     4    14
##  6 "   OH "    " 1686… "    6… " HANS… "5.0…    34    29    11    35    10    27
##  7 "   MI "    " 1649… "    7… " GARY… "5.0…    57    46    13    11     1     9
##  8 "   MI "    " 1641" "    8… " EZEK… "5.0…     3    32    14     9    47    28
##  9 "   ON "    " 1411… "    9… " STEF… "5.0…    25    18    59     8    26     7
## 10 "   MI "    " 1365… "   10… " ANVI… "5.0…    16    19    55    31     6    25
## # … with 54 more rows, 1 more variable: r7 <dbl>, and abbreviated variable
## #   names ¹preRating, ²playerID, ³playerName

lookupResult

## # A tibble: 64 × 2
##    playerID oppRating 
##    <chr>    <chr>     
##  1 1        " 1794   "
##  2 2        " 1553   "
##  3 3        " 1384   "
##  4 4        " 1716   "
##  5 5        " 1655   "
##  6 6        " 1686   "
##  7 7        " 1649   "
##  8 8        " 1641"   
##  9 9        " 1411   "
## 10 10       " 1365   "
## # … with 54 more rows

#Attempts at getting this to be dynamic. The process is as follows, I need to merge my tables to add in the appropriate opponent rating for each round for each player. Afterwards, I need to rename that column before I can perform another lookup for the next round.Recall that our current table has the following for columns: playerState,preRating,playerID,playerName,Total,r1,r2,..
#When I perform the merge for the values in r1.. I add a column called oppRating which needs to be dynamically named so that there aren't conflicts when this process repeats and names for multiple rounds.
#Attempts: 
#1. Using rename(), rename would throw an error stating the '=' was unexpected, I assume this is due to my attempt at dynamically naming it based off the current column in question.
#2. Tried to pass three total vectors as variables into a homebrew function via apply and hoped R would process the first value of each variables' vector of values, and then the 2nd value in the list of variable vectors... etc. I was able to successfully generate the names dynamically based on dataframe dimensions and column names, but had issues getting apply to recognize all my variables. You'll notice that there are some values that are hard coded to test some things. I was running out of time for this.

#My valiant attempts' remains are below

# colResult <- colnames(result[,6:(5+numRounds)])
# xVar <- colnames(result[,6:(5+numRounds)])
# yVar <- str_c(xVar,'oppRating')
# yVar
# work <- merge(result,lookupResult,by.x='r1',by.y='playerID') %>% rename(work,str_c('"r1','oppRating"') ='oppRating'))
# myFun <- function(data,x){
#   merge(data,lookupResult,by.x="r1",by.y='playerID')}

# newResult <- 
#   apply(result[colResult], MARGIN =2,FUN= myFun,x=xVar,y=yVar)
#   
#   result %>% merge(lookupResult, by.x = )


#newResult

newResult<- result %>% merge(lookupResult, by.x='r1',by.y='playerID') %>% rename(c("rd1OppRating"="oppRating")) %>% arrange(playerID)
newResult<- newResult %>% merge(lookupResult, by.x='r2',by.y='playerID') %>% rename(c("rd2OppRating"="oppRating")) %>% arrange(playerID)
newResult<- newResult %>% merge(lookupResult, by.x='r3',by.y='playerID') %>% rename(c("rd3OppRating"="oppRating")) %>% arrange(playerID)
newResult<- newResult %>% merge(lookupResult, by.x='r4',by.y='playerID') %>% rename(c("rd4OppRating"="oppRating")) %>% arrange(playerID)
newResult<- newResult %>% merge(lookupResult, by.x='r5',by.y='playerID') %>% rename(c("rd5OppRating"="oppRating")) %>% arrange(playerID)
newResult<- newResult %>% merge(lookupResult, by.x='r6',by.y='playerID') %>% rename(c("rd6OppRating"="oppRating")) %>% arrange(playerID)
newResult<- newResult %>% merge(lookupResult, by.x='r7',by.y='playerID') %>% rename(c("rd7OppRating"="oppRating")) %>% arrange(playerID)
newResult <- newResult %>% rename(c("totalPoints"="Total"))


#Drop the added merged cols
newResult <- newResult[,-(1:(numRounds))]

newResult[,6:(5+numRounds)] <- apply(newResult[,6:(5+numRounds)],2, function(x) as.numeric(x))


# isNum <- sapply(newResult, as.numeric)
# avgOpp[isNum] <- lapply(avgOpp[isNum], round, 0)


avgOpp <- newResult %>% mutate(avgOpp = rowMeans(select(.,rd1OppRating:rd7OppRating)))

#Round all values
 is.dub <- sapply(avgOpp, is.double)
 avgOpp[is.dub] <- lapply(avgOpp[is.dub], round, 0)

avgOpp

##    playerState preRating playerID                        playerName totalPoints
## 1          ON    1794          1   GARY HUA                               6.0  
## 2          MI    1553          2   DAKSHESH DARURI                        6.0  
## 3          MI    1384          3   ADITYA BAJAJ                           6.0  
## 4          MI    1716          4   PATRICK H SCHILLING                    5.5  
## 5          MI    1655          5   HANSHI ZUO                             5.5  
## 6          OH    1686          6   HANSEN SONG                            5.0  
## 7          MI    1649          7   GARY DEE SWATHELL                      5.0  
## 8          MI       1641       8   EZEKIEL HOUGHTON                       5.0  
## 9          ON    1411          9   STEFANO LEE                            5.0  
## 10         MI    1365         10   ANVIT RAO                              5.0  
## 11         MI    1712         11   CAMERON WILLIAM MC LEMAN               4.5  
## 12         MI    1666         13   TORRANCE HENRY JR                      4.5  
## 13         MI    1610         14   BRADLEY SHAW                           4.5  
## 14         MI       1220      15   ZACHARY JAMES HOUGHTON                 4.5  
## 15         MI    1629         17   RONALD GRZEGORCZYK                     4.0  
## 16         MI    1600         18   DAVID SUNDEEN                          4.0  
## 17         MI    1564         19   DIPANKAR ROY                           4.0  
## 18         MI    1595         20   JASON ZHENG                            4.0  
## 19         ON       1563      21   DINH DANG BUI                          4.0  
## 20         ON    1363         23   ALAN BUI                               4.0  
## 21         MI    1229         24   MICHAEL R ALDRICH                      4.0  
## 22         MI    1745         25   LOREN SCHWIEBERT                       3.5  
## 23         ON    1579         26   MAX ZHU                                3.5  
## 24         MI    1507         28   SOFIA ADINA STANESCU-BELLU             3.5  
## 25         ON    1522         30   GEORGE AVERY JONES                     3.5  
## 26         MI    1494         31   RISHI SHETTY                           3.5  
## 27         ON    1441         32   JOSHUA PHILIP MATHEWS                  3.5  
## 28         MI    1449         33   JADE GE                                3.5  
## 29         MI    1399         34   MICHAEL JEFFERY THOMAS                 3.5  
## 30         MI    1438         35   JOSHUA DAVID LEE                       3.5  
## 31         MI       1436      39   JOEL R HENDON                          3.0  
## 32         MI    1348         40   FOREST ZHANG                           3.0  
## 33         MI    1332         42   JARED GE                               3.0  
## 34         MI    1283         43   ROBERT GLEN VASEY                      3.0  
## 35         MI    1242         45   DEREK YAN                              3.0  
## 36         MI        377      46   JACOB ALEXANDER LAVALLEY               3.0  
## 37         MI    1362         47   ERIC WRIGHT                            2.5  
## 38         MI    1011         51   TEJAS AYYAGARI                         2.5  
## 39         MI     935         52   ETHAN GUO                              2.5  
## 40         ON        955      61   JEZZEL FARKAS                          1.5  
## 41         MI    1163         64   BEN LI                                 1.0  
##    rd1OppRating rd2OppRating rd3OppRating rd4OppRating rd5OppRating
## 1          1436         1563         1600         1610         1649
## 2          1175          917         1716         1629         1604
## 3          1641          955         1745         1563         1712
## 4          1363         1507         1553         1579         1655
## 5          1242          980         1663         1666         1716
## 6          1399         1602         1712         1438         1365
## 7          1092          377         1666         1712         1794
## 8          1384         1441         1610         1411         1362
## 9          1745         1600          853         1641         1579
## 10         1604         1564         1186         1494         1686
## 11         1423         1153         1686         1649         1384
## 12         1355         1552         1649         1655         1449
## 13         1270         1199         1641         1794         1552
## 14         1564         1604         1522         1555         1270
## 15         1382         1403         1579         1553         1363
## 16         1362         1411         1794         1441         1564
## 17         1220         1365          935         1507         1600
## 18         1348         1291         1363         1403         1507
## 19         1283         1794         1362         1384         1348
## 20         1716         1283         1595          917         1629
## 21         1507         1362         1283         1745          967
## 22         1411         1393         1384         1229         1399
## 23         1291         1348         1629         1716         1411
## 24         1229         1716         1555         1564         1595
## 25          935         1163         1220         1186         1494
## 26          917         1186         1163         1365         1522
## 27          955         1641         1199         1600         1011
## 28          967         1663         1056         1355         1666
## 29         1686          967          980         1602         1745
## 30          377         1423         1153         1686         1092
## 31         1794         1270         1348         1604         1199
## 32         1595         1579         1436          853         1563
## 33         1663         1056         1092          967          955
## 34         1563         1363         1229         1175          853
## 35         1655         1011          967         1153         1175
## 36         1438         1649         1552         1056         1163
## 37         1600         1229         1563          955         1641
## 38         1552         1242         1355         1092         1441
## 39         1522         1555         1564         1382         1602
## 40         1441         1384         1270         1362         1332
## 41         1555         1522         1494         1291          377
##    rd6OppRating rd7OppRating avgOpp
## 1          1663         1716   1605
## 2          1595         1649   1469
## 3          1666         1663   1564
## 4          1564         1794   1574
## 5          1610         1629   1501
## 6          1552         1563   1519
## 7          1411         1553   1372
## 8          1507         1564   1468
## 9          1649         1595   1523
## 10         1745         1600   1554
## 11         1399         1579   1468
## 12         1384         1441   1498
## 13         1655         1494   1515
## 14         1449         1423   1484
## 15         1555         1655   1499
## 16         1423         1365   1480
## 17         1716         1641   1426
## 18         1553         1411   1411
## 19         1436         1686   1470
## 20          980          377   1214
## 21         1199         1436   1357
## 22         1365         1362   1363
## 23         1441         1712   1507
## 24         1641         1355   1522
## 25          955         1056   1144
## 26         1056         1610   1260
## 27         1579         1666   1379
## 28         1220         1011   1277
## 29         1712          935   1375
## 30          935         1382   1150
## 31         1563         1229   1430
## 32         1153         1555   1391
## 33         1163         1153   1150
## 34          377         1186   1107
## 35         1186          917   1152
## 36         1283         1363   1358
## 37         1011         1745   1392
## 38         1362         1449   1356
## 39         1438         1399   1495
## 40         1522          980   1327
## 41         1332         1270   1263

#Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents

finalCSV<-avgOpp[,c('playerName','playerState','totalPoints','preRating','avgOpp')]

write_csv(finalCSV,"C:\\Users\\DCraig\\Documents\\repos\\DATA607\\Projects\\Project1 - Chess Rankings\\finalCSV.csv")

Project1-ChessRankings

Daniel Craig

2023-02-18