#Standard
file <-("https://raw.githubusercontent.com/d-ev-craig/DATA607/main/Projects/Project1%20-%20Chess%20Rankings/chessRankings.txt")
data<-readLines(file)
## Warning in readLines(file): incomplete final line found on
## 'https://raw.githubusercontent.com/d-ev-craig/DATA607/main/Projects/Project1%20-%20Chess%20Rankings/chessRankings.txt'
garbage<-seq(1,196, by = 3)
data<-data[-(garbage)]
data<-data.frame(data)
data
## data
## 1 Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round|
## 2 Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
## 3 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## 4 ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## 5 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|
## 6 MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |
## 7 3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|
## 8 MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |
## 9 4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|
## 10 MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |
## 11 5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|
## 12 MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |
## 13 6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|
## 14 OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |
## 15 7 | GARY DEE SWATHELL |5.0 |W 57|W 46|W 13|W 11|L 1|W 9|L 2|
## 16 MI | 11146376 / R: 1649 ->1673 |N:3 |W |B |W |B |B |W |W |
## 17 8 | EZEKIEL HOUGHTON |5.0 |W 3|W 32|L 14|L 9|W 47|W 28|W 19|
## 18 MI | 15142253 / R: 1641P17->1657P24 |N:3 |B |W |B |W |B |W |W |
## 19 9 | STEFANO LEE |5.0 |W 25|L 18|W 59|W 8|W 26|L 7|W 20|
## 20 ON | 14954524 / R: 1411 ->1564 |N:2 |W |B |W |B |W |B |B |
## 21 10 | ANVIT RAO |5.0 |D 16|L 19|W 55|W 31|D 6|W 25|W 18|
## 22 MI | 14150362 / R: 1365 ->1544 |N:3 |W |W |B |B |W |B |W |
## 23 11 | CAMERON WILLIAM MC LEMAN |4.5 |D 38|W 56|W 6|L 7|L 3|W 34|W 26|
## 24 MI | 12581589 / R: 1712 ->1696 |N:3 |B |W |B |W |B |W |B |
## 25 12 | KENNETH J TACK |4.5 |W 42|W 33|D 5|W 38|H |D 1|L 3|
## 26 MI | 12681257 / R: 1663 ->1670 |N:3 |W |B |W |B | |W |B |
## 27 13 | TORRANCE HENRY JR |4.5 |W 36|W 27|L 7|D 5|W 33|L 3|W 32|
## 28 MI | 15082995 / R: 1666 ->1662 |N:3 |B |W |B |B |W |W |B |
## 29 14 | BRADLEY SHAW |4.5 |W 54|W 44|W 8|L 1|D 27|L 5|W 31|
## 30 MI | 10131499 / R: 1610 ->1618 |N:3 |W |B |W |W |B |B |W |
## 31 15 | ZACHARY JAMES HOUGHTON |4.5 |D 19|L 16|W 30|L 22|W 54|W 33|W 38|
## 32 MI | 15619130 / R: 1220P13->1416P20 |N:3 |B |B |W |W |B |B |W |
## 33 16 | MIKE NIKITIN |4.0 |D 10|W 15|H |W 39|L 2|W 36|U |
## 34 MI | 10295068 / R: 1604 ->1613 |N:3 |B |W | |B |W |B | |
## 35 17 | RONALD GRZEGORCZYK |4.0 |W 48|W 41|L 26|L 2|W 23|W 22|L 5|
## 36 MI | 10297702 / R: 1629 ->1610 |N:3 |W |B |W |B |W |B |W |
## 37 18 | DAVID SUNDEEN |4.0 |W 47|W 9|L 1|W 32|L 19|W 38|L 10|
## 38 MI | 11342094 / R: 1600 ->1600 |N:3 |B |W |B |W |B |W |B |
## 39 19 | DIPANKAR ROY |4.0 |D 15|W 10|W 52|D 28|W 18|L 4|L 8|
## 40 MI | 14862333 / R: 1564 ->1570 |N:3 |W |B |W |B |W |W |B |
## 41 20 | JASON ZHENG |4.0 |L 40|W 49|W 23|W 41|W 28|L 2|L 9|
## 42 MI | 14529060 / R: 1595 ->1569 |N:4 |W |B |W |B |W |B |W |
## 43 21 | DINH DANG BUI |4.0 |W 43|L 1|W 47|L 3|W 40|W 39|L 6|
## 44 ON | 15495066 / R: 1563P22->1562 |N:3 |B |W |B |W |W |B |W |
## 45 22 | EUGENE L MCCLURE |4.0 |W 64|D 52|L 28|W 15|H |L 17|W 40|
## 46 MI | 12405534 / R: 1555 ->1529 |N:4 |W |B |W |B | |W |B |
## 47 23 | ALAN BUI |4.0 |L 4|W 43|L 20|W 58|L 17|W 37|W 46|
## 48 ON | 15030142 / R: 1363 ->1371 | |B |W |B |W |B |W |B |
## 49 24 | MICHAEL R ALDRICH |4.0 |L 28|L 47|W 43|L 25|W 60|W 44|W 39|
## 50 MI | 13469010 / R: 1229 ->1300 |N:4 |B |W |B |B |W |W |B |
## 51 25 | LOREN SCHWIEBERT |3.5 |L 9|W 53|L 3|W 24|D 34|L 10|W 47|
## 52 MI | 12486656 / R: 1745 ->1681 |N:4 |B |W |B |W |B |W |B |
## 53 26 | MAX ZHU |3.5 |W 49|W 40|W 17|L 4|L 9|D 32|L 11|
## 54 ON | 15131520 / R: 1579 ->1564 |N:4 |B |W |B |W |B |W |W |
## 55 27 | GAURAV GIDWANI |3.5 |W 51|L 13|W 46|W 37|D 14|L 6|U |
## 56 MI | 14476567 / R: 1552 ->1539 |N:4 |W |B |W |B |W |B | |
## 57 28 | SOFIA ADINA STANESCU-BELLU |3.5 |W 24|D 4|W 22|D 19|L 20|L 8|D 36|
## 58 MI | 14882954 / R: 1507 ->1513 |N:3 |W |W |B |W |B |B |W |
## 59 29 | CHIEDOZIE OKORIE |3.5 |W 50|D 6|L 38|L 34|W 52|W 48|U |
## 60 MI | 15323285 / R: 1602P6 ->1508P12 |N:4 |B |W |B |W |W |B | |
## 61 30 | GEORGE AVERY JONES |3.5 |L 52|D 64|L 15|W 55|L 31|W 61|W 50|
## 62 ON | 12577178 / R: 1522 ->1444 | |W |B |B |W |W |B |B |
## 63 31 | RISHI SHETTY |3.5 |L 58|D 55|W 64|L 10|W 30|W 50|L 14|
## 64 MI | 15131618 / R: 1494 ->1444 | |B |W |B |W |B |W |B |
## 65 32 | JOSHUA PHILIP MATHEWS |3.5 |W 61|L 8|W 44|L 18|W 51|D 26|L 13|
## 66 ON | 14073750 / R: 1441 ->1433 |N:4 |W |B |W |B |W |B |W |
## 67 33 | JADE GE |3.5 |W 60|L 12|W 50|D 36|L 13|L 15|W 51|
## 68 MI | 14691842 / R: 1449 ->1421 | |B |W |B |W |B |W |B |
## 69 34 | MICHAEL JEFFERY THOMAS |3.5 |L 6|W 60|L 37|W 29|D 25|L 11|W 52|
## 70 MI | 15051807 / R: 1399 ->1400 | |B |W |B |B |W |B |W |
## 71 35 | JOSHUA DAVID LEE |3.5 |L 46|L 38|W 56|L 6|W 57|D 52|W 48|
## 72 MI | 14601397 / R: 1438 ->1392 | |W |W |B |W |B |B |W |
## 73 36 | SIDDHARTH JHA |3.5 |L 13|W 57|W 51|D 33|H |L 16|D 28|
## 74 MI | 14773163 / R: 1355 ->1367 |N:4 |W |B |W |B | |W |B |
## 75 37 | AMIYATOSH PWNANANDAM |3.5 |B |L 5|W 34|L 27|H |L 23|W 61|
## 76 MI | 15489571 / R: 980P12->1077P17 | | |B |W |W | |B |W |
## 77 38 | BRIAN LIU |3.0 |D 11|W 35|W 29|L 12|H |L 18|L 15|
## 78 MI | 15108523 / R: 1423 ->1439 |N:4 |W |B |W |W | |B |B |
## 79 39 | JOEL R HENDON |3.0 |L 1|W 54|W 40|L 16|W 44|L 21|L 24|
## 80 MI | 12923035 / R: 1436P23->1413 |N:4 |B |W |B |W |B |W |W |
## 81 40 | FOREST ZHANG |3.0 |W 20|L 26|L 39|W 59|L 21|W 56|L 22|
## 82 MI | 14892710 / R: 1348 ->1346 | |B |B |W |W |B |W |W |
## 83 41 | KYLE WILLIAM MURPHY |3.0 |W 59|L 17|W 58|L 20|X |U |U |
## 84 MI | 15761443 / R: 1403P5 ->1341P9 | |B |W |B |W | | | |
## 85 42 | JARED GE |3.0 |L 12|L 50|L 57|D 60|D 61|W 64|W 56|
## 86 MI | 14462326 / R: 1332 ->1256 | |B |W |B |B |W |W |B |
## 87 43 | ROBERT GLEN VASEY |3.0 |L 21|L 23|L 24|W 63|W 59|L 46|W 55|
## 88 MI | 14101068 / R: 1283 ->1244 | |W |B |W |W |B |B |W |
## 89 44 | JUSTIN D SCHILLING |3.0 |B |L 14|L 32|W 53|L 39|L 24|W 59|
## 90 MI | 15323504 / R: 1199 ->1199 | | |W |B |B |W |B |W |
## 91 45 | DEREK YAN |3.0 |L 5|L 51|D 60|L 56|W 63|D 55|W 58|
## 92 MI | 15372807 / R: 1242 ->1191 | |W |B |W |B |W |B |W |
## 93 46 | JACOB ALEXANDER LAVALLEY |3.0 |W 35|L 7|L 27|L 50|W 64|W 43|L 23|
## 94 MI | 15490981 / R: 377P3 ->1076P10 | |B |W |B |W |B |W |W |
## 95 47 | ERIC WRIGHT |2.5 |L 18|W 24|L 21|W 61|L 8|D 51|L 25|
## 96 MI | 12533115 / R: 1362 ->1341 | |W |B |W |B |W |B |W |
## 97 48 | DANIEL KHAIN |2.5 |L 17|W 63|H |D 52|H |L 29|L 35|
## 98 MI | 14369165 / R: 1382 ->1335 | |B |W | |B | |W |B |
## 99 49 | MICHAEL J MARTIN |2.5 |L 26|L 20|D 63|D 64|W 58|H |U |
## 100 MI | 12531685 / R: 1291P12->1259P17 | |W |W |B |W |B | | |
## 101 50 | SHIVAM JHA |2.5 |L 29|W 42|L 33|W 46|H |L 31|L 30|
## 102 MI | 14773178 / R: 1056 ->1111 | |W |B |W |B | |B |W |
## 103 51 | TEJAS AYYAGARI |2.5 |L 27|W 45|L 36|W 57|L 32|D 47|L 33|
## 104 MI | 15205474 / R: 1011 ->1097 | |B |W |B |W |B |W |W |
## 105 52 | ETHAN GUO |2.5 |W 30|D 22|L 19|D 48|L 29|D 35|L 34|
## 106 MI | 14918803 / R: 935 ->1092 |N:4 |B |W |B |W |B |W |B |
## 107 53 | JOSE C YBARRA |2.0 |H |L 25|H |L 44|U |W 57|U |
## 108 MI | 12578849 / R: 1393 ->1359 | | |B | |W | |W | |
## 109 54 | LARRY HODGE |2.0 |L 14|L 39|L 61|B |L 15|L 59|W 64|
## 110 MI | 12836773 / R: 1270 ->1200 | |B |B |W | |W |B |W |
## 111 55 | ALEX KONG |2.0 |L 62|D 31|L 10|L 30|B |D 45|L 43|
## 112 MI | 15412571 / R: 1186 ->1163 | |W |B |W |B | |W |B |
## 113 56 | MARISA RICCI |2.0 |H |L 11|L 35|W 45|H |L 40|L 42|
## 114 MI | 14679887 / R: 1153 ->1140 | | |B |W |W | |B |W |
## 115 57 | MICHAEL LU |2.0 |L 7|L 36|W 42|L 51|L 35|L 53|B |
## 116 MI | 15113330 / R: 1092 ->1079 | |B |W |W |B |W |B | |
## 117 58 | VIRAJ MOHILE |2.0 |W 31|L 2|L 41|L 23|L 49|B |L 45|
## 118 MI | 14700365 / R: 917 -> 941 | |W |B |W |B |W | |B |
## 119 59 | SEAN M MC CORMICK |2.0 |L 41|B |L 9|L 40|L 43|W 54|L 44|
## 120 MI | 12841036 / R: 853 -> 878 | |W | |B |B |W |W |B |
## 121 60 | JULIA SHEN |1.5 |L 33|L 34|D 45|D 42|L 24|H |U |
## 122 MI | 14579262 / R: 967 -> 984 | |W |B |B |W |B | | |
## 123 61 | JEZZEL FARKAS |1.5 |L 32|L 3|W 54|L 47|D 42|L 30|L 37|
## 124 ON | 15771592 / R: 955P11-> 979P18 | |B |W |B |W |B |W |B |
## 125 62 | ASHWIN BALAJI |1.0 |W 55|U |U |U |U |U |U |
## 126 MI | 15219542 / R: 1530 ->1535 | |B | | | | | | |
## 127 63 | THOMAS JOSEPH HOSMER |1.0 |L 2|L 48|D 49|L 43|L 45|H |U |
## 128 MI | 15057092 / R: 1175 ->1125 | |W |B |W |B |B | | |
## 129 64 | BEN LI |1.0 |L 22|D 30|L 31|D 49|L 46|L 42|L 54|
## 130 MI | 15006561 / R: 1163 ->1112 | |B |W |W |B |W |B |B |
dataTibble <- tibble(1:130,data[,1])
names(dataTibble)[2] <- 'chessText'
#Attempts at generalizing this to work on a tournament of any number of rounds of Chess
#Method2
numRounds <- str_count(dataTibble[1,2], 'Round')
#Set the number of times we counted the word 'Round' to a variable and combine it with 'r' to create our column names to separate columns out based on the number of rounds in the chess tournament
roundText <- seq(from=1, to=numRounds,by =1)
roundText <- str_c('r',roundText)
roundText
## [1] "r1" "r2" "r3" "r4" "r5" "r6" "r7"
#Method1
# data4<-separate_wider_delim(dataTibble,cols=chessText,delim="|",names = c('playerID','playerName','Total','r1','r2','r3','r4','r5','r6','r7','blank'))
#Separate string data into columns and using our variable number of Rounds text as names for columns
data4<-separate_wider_delim(dataTibble,cols=chessText,delim="|",names = c('playerID','playerName','Total',roundText,'blank'))
data4
## # A tibble: 130 × 12
## `1:130` playe…¹ playe…² Total r1 r2 r3 r4 r5 r6 r7 blank
## <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 " Pair… " Play… "Tot… "Rou… "Rou… "Rou… "Rou… "Rou… "Rou… "Rou… " "
## 2 2 " Num … " USCF… " Pt… " 1… " 2… " 3… " 4… " 5… " 6… " 7… " "
## 3 3 " 1… " GARY… "6.0… "W … "W … "W … "W … "W … "D … "D … ""
## 4 4 " ON… " 1544… "N:2… "W … "B … "W … "B … "W … "B … "W … ""
## 5 5 " 2… " DAKS… "6.0… "W … "W … "L … "W … "W … "W … "W … ""
## 6 6 " MI… " 1459… "N:2… "B … "W … "B … "W … "B … "W … "B … ""
## 7 7 " 3… " ADIT… "6.0… "L … "W … "W … "W … "W … "W … "W … ""
## 8 8 " MI… " 1495… "N:2… "W … "B … "W … "B … "W … "B … "W … ""
## 9 9 " 4… " PATR… "5.5… "W … "D … "W … "W … "D … "W … "D … ""
## 10 10 " MI… " 1261… "N:2… "W … "B … "W … "B … "W … "B … "B … ""
## # … with 120 more rows, and abbreviated variable names ¹playerID, ²playerName
data4
## # A tibble: 130 × 12
## `1:130` playe…¹ playe…² Total r1 r2 r3 r4 r5 r6 r7 blank
## <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 " Pair… " Play… "Tot… "Rou… "Rou… "Rou… "Rou… "Rou… "Rou… "Rou… " "
## 2 2 " Num … " USCF… " Pt… " 1… " 2… " 3… " 4… " 5… " 6… " 7… " "
## 3 3 " 1… " GARY… "6.0… "W … "W … "W … "W … "W … "D … "D … ""
## 4 4 " ON… " 1544… "N:2… "W … "B … "W … "B … "W … "B … "W … ""
## 5 5 " 2… " DAKS… "6.0… "W … "W … "L … "W … "W … "W … "W … ""
## 6 6 " MI… " 1459… "N:2… "B … "W … "B … "W … "B … "W … "B … ""
## 7 7 " 3… " ADIT… "6.0… "L … "W … "W … "W … "W … "W … "W … ""
## 8 8 " MI… " 1495… "N:2… "W … "B … "W … "B … "W … "B … "W … ""
## 9 9 " 4… " PATR… "5.5… "W … "D … "W … "W … "D … "W … "D … ""
## 10 10 " MI… " 1261… "N:2… "W … "B … "W … "B … "W … "B … "B … ""
## # … with 120 more rows, and abbreviated variable names ¹playerID, ²playerName
#Removing unnecessary columns
oddRows <- data4 %>% filter(row_number() %% 2 == 1) %>% select(-blank,-'1:130') %>% slice(-1)
evenRows <- data4 %>% filter(row_number() %% 2 == 0) %>% select(-blank, -'1:130')%>%slice(-1)
oddRows
## # A tibble: 64 × 10
## playerID playerName Total r1 r2 r3 r4 r5 r6 r7
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 " 1 " " GARY HUA … "6.0… W 39 W 21 W 18 W 14 W 7 D 12 D 4
## 2 " 2 " " DAKSHESH DARURI … "6.0… W 63 W 58 L 4 W 17 W 16 W 20 W 7
## 3 " 3 " " ADITYA BAJAJ … "6.0… L 8 W 61 W 25 W 21 W 11 W 13 W 12
## 4 " 4 " " PATRICK H SCHILLI… "5.5… W 23 D 28 W 2 W 26 D 5 W 19 D 1
## 5 " 5 " " HANSHI ZUO … "5.5… W 45 W 37 D 12 D 13 D 4 W 14 W 17
## 6 " 6 " " HANSEN SONG … "5.0… W 34 D 29 L 11 W 35 D 10 W 27 W 21
## 7 " 7 " " GARY DEE SWATHELL… "5.0… W 57 W 46 W 13 W 11 L 1 W 9 L 2
## 8 " 8 " " EZEKIEL HOUGHTON … "5.0… W 3 W 32 L 14 L 9 W 47 W 28 W 19
## 9 " 9 " " STEFANO LEE … "5.0… W 25 L 18 W 59 W 8 W 26 L 7 W 20
## 10 " 10 " " ANVIT RAO … "5.0… D 16 L 19 W 55 W 31 D 6 W 25 W 18
## # … with 54 more rows
#Apply str_replace_all to each column of interest and keeping it dynamic in case rounds played aren't always 7
col <- colnames(oddRows[,4:(3+numRounds)])
oddRows[col] <- apply(oddRows[col],MARGIN =2, function(x) str_replace_all(x, "[WLDB] ", ""))
oddRows[col] <- apply(oddRows[col],2, function(x) as.numeric(x))
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
#Our goal here is to grab the player's pre-rating and state. Pre-rating will require some string manipulation
# First we remove the extra tables and rename them to be accurate
evenRows <- evenRows[,c(1,2)]
evenRows <- rename(evenRows,playerState = playerID)
#separate out our playerName column to section out our ratings
evenRows <- separate_wider_delim(evenRows,cols=c(playerName), delim=":", names = c('uscfID','ratings'))
#perform again to split our pre and post ratings from the ratings column we just made
evenRows <- separate_wider_delim(evenRows,cols=c(ratings), delim="->", names = c('preRating','postRating'))
#Here we want to remove any characters that follow the pattern of a letter followed by any character/number that can repeat itself any number of times. I tried my best to future proof this in case ELO's start to get into 5 digits (probably possible in the coming years via AI). If I didn't want to future proof, I would've removed based on position.
evenRows$preRating <- str_replace_all(evenRows$preRating, "([A-Z]|[a-z])[^.]*$","")
evenRows$postRating <- str_replace_all(evenRows$postRating, "([A-Z]|[a-z])[^.]*$","")
#Dropping excess columns
evenRows <- evenRows %>% select(-uscfID,-postRating)
result <- bind_cols(evenRows,oddRows)
result
## # A tibble: 64 × 12
## playerState preRa…¹ playe…² playe…³ Total r1 r2 r3 r4 r5 r6
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 " ON " " 1794… " 1… " GARY… "6.0… 39 21 18 14 7 12
## 2 " MI " " 1553… " 2… " DAKS… "6.0… 63 58 4 17 16 20
## 3 " MI " " 1384… " 3… " ADIT… "6.0… 8 61 25 21 11 13
## 4 " MI " " 1716… " 4… " PATR… "5.5… 23 28 2 26 5 19
## 5 " MI " " 1655… " 5… " HANS… "5.5… 45 37 12 13 4 14
## 6 " OH " " 1686… " 6… " HANS… "5.0… 34 29 11 35 10 27
## 7 " MI " " 1649… " 7… " GARY… "5.0… 57 46 13 11 1 9
## 8 " MI " " 1641" " 8… " EZEK… "5.0… 3 32 14 9 47 28
## 9 " ON " " 1411… " 9… " STEF… "5.0… 25 18 59 8 26 7
## 10 " MI " " 1365… " 10… " ANVI… "5.0… 16 19 55 31 6 25
## # … with 54 more rows, 1 more variable: r7 <dbl>, and abbreviated variable
## # names ¹preRating, ²playerID, ³playerName
#Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents
# Below lines are where I tried to do add a column by performing a 'vlookup' in R but across columns with different names, but similar values. The idea was to use r1ID's column's values to lookup the playerID column and select its appropriate preRating. All functions/methods I found required a column with the same name and since I can't have duplicate column names in my df I can't perform it via self join. I also don't want to make a separate lookup table to perform this action because I think it'd be more efficient this way.
# resultCondensed<-select(result, playerID...16,playerName,preRating,postRating,Total...18,r1ID,r2ID,r3ID,r4ID,r5ID,r6ID,r7ID)
# resultAvgOpp <- mutate(resultCondensed,avgOpp = merge(resultCondensed,resultCondensed, by = playerID...16) )
## Duplicating the rdID columns so that there is a column in common to perform the merge on (will test using by.x and by.y later)
lookupResult<- select(result,playerID,preRating)
# lookupResult <-mutate(lookupResult, r1ID=playerID)
# lookupResult<- mutate(lookupResult, r2ID = r1ID)
# lookupResult<- mutate(lookupResult, r3ID = r1ID)
# lookupResult<- mutate(lookupResult, r4ID = r1ID)
# lookupResult<- mutate(lookupResult, r5ID = r1ID)
# lookupResult<- mutate(lookupResult, r6ID = r1ID)
# lookupResult<- mutate(lookupResult, r7ID = r1ID)
#Dropping unnecessary playerID column since it won't be referenced in the later merge
#lookupResult<-lookupResult[,-1]
#Trimming whitespace from column to ensure a merge later down the road doesn't get blocked
lookupResult
## # A tibble: 64 × 2
## playerID preRating
## <chr> <chr>
## 1 " 1 " " 1794 "
## 2 " 2 " " 1553 "
## 3 " 3 " " 1384 "
## 4 " 4 " " 1716 "
## 5 " 5 " " 1655 "
## 6 " 6 " " 1686 "
## 7 " 7 " " 1649 "
## 8 " 8 " " 1641"
## 9 " 9 " " 1411 "
## 10 " 10 " " 1365 "
## # … with 54 more rows
names(lookupResult)[2] <- 'oppRating'
lookupResult$playerID<-trimws(lookupResult$playerID,which='both')
result
## # A tibble: 64 × 12
## playerState preRa…¹ playe…² playe…³ Total r1 r2 r3 r4 r5 r6
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 " ON " " 1794… " 1… " GARY… "6.0… 39 21 18 14 7 12
## 2 " MI " " 1553… " 2… " DAKS… "6.0… 63 58 4 17 16 20
## 3 " MI " " 1384… " 3… " ADIT… "6.0… 8 61 25 21 11 13
## 4 " MI " " 1716… " 4… " PATR… "5.5… 23 28 2 26 5 19
## 5 " MI " " 1655… " 5… " HANS… "5.5… 45 37 12 13 4 14
## 6 " OH " " 1686… " 6… " HANS… "5.0… 34 29 11 35 10 27
## 7 " MI " " 1649… " 7… " GARY… "5.0… 57 46 13 11 1 9
## 8 " MI " " 1641" " 8… " EZEK… "5.0… 3 32 14 9 47 28
## 9 " ON " " 1411… " 9… " STEF… "5.0… 25 18 59 8 26 7
## 10 " MI " " 1365… " 10… " ANVI… "5.0… 16 19 55 31 6 25
## # … with 54 more rows, 1 more variable: r7 <dbl>, and abbreviated variable
## # names ¹preRating, ²playerID, ³playerName
lookupResult
## # A tibble: 64 × 2
## playerID oppRating
## <chr> <chr>
## 1 1 " 1794 "
## 2 2 " 1553 "
## 3 3 " 1384 "
## 4 4 " 1716 "
## 5 5 " 1655 "
## 6 6 " 1686 "
## 7 7 " 1649 "
## 8 8 " 1641"
## 9 9 " 1411 "
## 10 10 " 1365 "
## # … with 54 more rows
#Attempts at getting this to be dynamic. The process is as follows, I need to merge my tables to add in the appropriate opponent rating for each round for each player. Afterwards, I need to rename that column before I can perform another lookup for the next round.Recall that our current table has the following for columns: playerState,preRating,playerID,playerName,Total,r1,r2,..
#When I perform the merge for the values in r1.. I add a column called oppRating which needs to be dynamically named so that there aren't conflicts when this process repeats and names for multiple rounds.
#Attempts:
#1. Using rename(), rename would throw an error stating the '=' was unexpected, I assume this is due to my attempt at dynamically naming it based off the current column in question.
#2. Tried to pass three total vectors as variables into a homebrew function via apply and hoped R would process the first value of each variables' vector of values, and then the 2nd value in the list of variable vectors... etc. I was able to successfully generate the names dynamically based on dataframe dimensions and column names, but had issues getting apply to recognize all my variables. You'll notice that there are some values that are hard coded to test some things. I was running out of time for this.
#My valiant attempts' remains are below
# colResult <- colnames(result[,6:(5+numRounds)])
# xVar <- colnames(result[,6:(5+numRounds)])
# yVar <- str_c(xVar,'oppRating')
# yVar
# work <- merge(result,lookupResult,by.x='r1',by.y='playerID') %>% rename(work,str_c('"r1','oppRating"') ='oppRating'))
# myFun <- function(data,x){
# merge(data,lookupResult,by.x="r1",by.y='playerID')}
# newResult <-
# apply(result[colResult], MARGIN =2,FUN= myFun,x=xVar,y=yVar)
#
# result %>% merge(lookupResult, by.x = )
#newResult
newResult<- result %>% merge(lookupResult, by.x='r1',by.y='playerID') %>% rename(c("rd1OppRating"="oppRating")) %>% arrange(playerID)
newResult<- newResult %>% merge(lookupResult, by.x='r2',by.y='playerID') %>% rename(c("rd2OppRating"="oppRating")) %>% arrange(playerID)
newResult<- newResult %>% merge(lookupResult, by.x='r3',by.y='playerID') %>% rename(c("rd3OppRating"="oppRating")) %>% arrange(playerID)
newResult<- newResult %>% merge(lookupResult, by.x='r4',by.y='playerID') %>% rename(c("rd4OppRating"="oppRating")) %>% arrange(playerID)
newResult<- newResult %>% merge(lookupResult, by.x='r5',by.y='playerID') %>% rename(c("rd5OppRating"="oppRating")) %>% arrange(playerID)
newResult<- newResult %>% merge(lookupResult, by.x='r6',by.y='playerID') %>% rename(c("rd6OppRating"="oppRating")) %>% arrange(playerID)
newResult<- newResult %>% merge(lookupResult, by.x='r7',by.y='playerID') %>% rename(c("rd7OppRating"="oppRating")) %>% arrange(playerID)
newResult <- newResult %>% rename(c("totalPoints"="Total"))
#Drop the added merged cols
newResult <- newResult[,-(1:(numRounds))]
newResult[,6:(5+numRounds)] <- apply(newResult[,6:(5+numRounds)],2, function(x) as.numeric(x))
# isNum <- sapply(newResult, as.numeric)
# avgOpp[isNum] <- lapply(avgOpp[isNum], round, 0)
avgOpp <- newResult %>% mutate(avgOpp = rowMeans(select(.,rd1OppRating:rd7OppRating)))
#Round all values
is.dub <- sapply(avgOpp, is.double)
avgOpp[is.dub] <- lapply(avgOpp[is.dub], round, 0)
avgOpp
## playerState preRating playerID playerName totalPoints
## 1 ON 1794 1 GARY HUA 6.0
## 2 MI 1553 2 DAKSHESH DARURI 6.0
## 3 MI 1384 3 ADITYA BAJAJ 6.0
## 4 MI 1716 4 PATRICK H SCHILLING 5.5
## 5 MI 1655 5 HANSHI ZUO 5.5
## 6 OH 1686 6 HANSEN SONG 5.0
## 7 MI 1649 7 GARY DEE SWATHELL 5.0
## 8 MI 1641 8 EZEKIEL HOUGHTON 5.0
## 9 ON 1411 9 STEFANO LEE 5.0
## 10 MI 1365 10 ANVIT RAO 5.0
## 11 MI 1712 11 CAMERON WILLIAM MC LEMAN 4.5
## 12 MI 1666 13 TORRANCE HENRY JR 4.5
## 13 MI 1610 14 BRADLEY SHAW 4.5
## 14 MI 1220 15 ZACHARY JAMES HOUGHTON 4.5
## 15 MI 1629 17 RONALD GRZEGORCZYK 4.0
## 16 MI 1600 18 DAVID SUNDEEN 4.0
## 17 MI 1564 19 DIPANKAR ROY 4.0
## 18 MI 1595 20 JASON ZHENG 4.0
## 19 ON 1563 21 DINH DANG BUI 4.0
## 20 ON 1363 23 ALAN BUI 4.0
## 21 MI 1229 24 MICHAEL R ALDRICH 4.0
## 22 MI 1745 25 LOREN SCHWIEBERT 3.5
## 23 ON 1579 26 MAX ZHU 3.5
## 24 MI 1507 28 SOFIA ADINA STANESCU-BELLU 3.5
## 25 ON 1522 30 GEORGE AVERY JONES 3.5
## 26 MI 1494 31 RISHI SHETTY 3.5
## 27 ON 1441 32 JOSHUA PHILIP MATHEWS 3.5
## 28 MI 1449 33 JADE GE 3.5
## 29 MI 1399 34 MICHAEL JEFFERY THOMAS 3.5
## 30 MI 1438 35 JOSHUA DAVID LEE 3.5
## 31 MI 1436 39 JOEL R HENDON 3.0
## 32 MI 1348 40 FOREST ZHANG 3.0
## 33 MI 1332 42 JARED GE 3.0
## 34 MI 1283 43 ROBERT GLEN VASEY 3.0
## 35 MI 1242 45 DEREK YAN 3.0
## 36 MI 377 46 JACOB ALEXANDER LAVALLEY 3.0
## 37 MI 1362 47 ERIC WRIGHT 2.5
## 38 MI 1011 51 TEJAS AYYAGARI 2.5
## 39 MI 935 52 ETHAN GUO 2.5
## 40 ON 955 61 JEZZEL FARKAS 1.5
## 41 MI 1163 64 BEN LI 1.0
## rd1OppRating rd2OppRating rd3OppRating rd4OppRating rd5OppRating
## 1 1436 1563 1600 1610 1649
## 2 1175 917 1716 1629 1604
## 3 1641 955 1745 1563 1712
## 4 1363 1507 1553 1579 1655
## 5 1242 980 1663 1666 1716
## 6 1399 1602 1712 1438 1365
## 7 1092 377 1666 1712 1794
## 8 1384 1441 1610 1411 1362
## 9 1745 1600 853 1641 1579
## 10 1604 1564 1186 1494 1686
## 11 1423 1153 1686 1649 1384
## 12 1355 1552 1649 1655 1449
## 13 1270 1199 1641 1794 1552
## 14 1564 1604 1522 1555 1270
## 15 1382 1403 1579 1553 1363
## 16 1362 1411 1794 1441 1564
## 17 1220 1365 935 1507 1600
## 18 1348 1291 1363 1403 1507
## 19 1283 1794 1362 1384 1348
## 20 1716 1283 1595 917 1629
## 21 1507 1362 1283 1745 967
## 22 1411 1393 1384 1229 1399
## 23 1291 1348 1629 1716 1411
## 24 1229 1716 1555 1564 1595
## 25 935 1163 1220 1186 1494
## 26 917 1186 1163 1365 1522
## 27 955 1641 1199 1600 1011
## 28 967 1663 1056 1355 1666
## 29 1686 967 980 1602 1745
## 30 377 1423 1153 1686 1092
## 31 1794 1270 1348 1604 1199
## 32 1595 1579 1436 853 1563
## 33 1663 1056 1092 967 955
## 34 1563 1363 1229 1175 853
## 35 1655 1011 967 1153 1175
## 36 1438 1649 1552 1056 1163
## 37 1600 1229 1563 955 1641
## 38 1552 1242 1355 1092 1441
## 39 1522 1555 1564 1382 1602
## 40 1441 1384 1270 1362 1332
## 41 1555 1522 1494 1291 377
## rd6OppRating rd7OppRating avgOpp
## 1 1663 1716 1605
## 2 1595 1649 1469
## 3 1666 1663 1564
## 4 1564 1794 1574
## 5 1610 1629 1501
## 6 1552 1563 1519
## 7 1411 1553 1372
## 8 1507 1564 1468
## 9 1649 1595 1523
## 10 1745 1600 1554
## 11 1399 1579 1468
## 12 1384 1441 1498
## 13 1655 1494 1515
## 14 1449 1423 1484
## 15 1555 1655 1499
## 16 1423 1365 1480
## 17 1716 1641 1426
## 18 1553 1411 1411
## 19 1436 1686 1470
## 20 980 377 1214
## 21 1199 1436 1357
## 22 1365 1362 1363
## 23 1441 1712 1507
## 24 1641 1355 1522
## 25 955 1056 1144
## 26 1056 1610 1260
## 27 1579 1666 1379
## 28 1220 1011 1277
## 29 1712 935 1375
## 30 935 1382 1150
## 31 1563 1229 1430
## 32 1153 1555 1391
## 33 1163 1153 1150
## 34 377 1186 1107
## 35 1186 917 1152
## 36 1283 1363 1358
## 37 1011 1745 1392
## 38 1362 1449 1356
## 39 1438 1399 1495
## 40 1522 980 1327
## 41 1332 1270 1263
#Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents
finalCSV<-avgOpp[,c('playerName','playerState','totalPoints','preRating','avgOpp')]
write_csv(finalCSV,"C:\\Users\\DCraig\\Documents\\repos\\DATA607\\Projects\\Project1 - Chess Rankings\\finalCSV.csv")