The goal of this project is to create an R Markdown file that generates a .CSV file containing all of the information for all of the chess players.
# Read the text file
data <- read_lines("https://raw.githubusercontent.com/pujaroy280/DATA607Project1/main/tournamentinfo.txt", skip=4)
head(data)## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [3] "-----------------------------------------------------------------------------------------"
## [4] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [5] " MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [6] "-----------------------------------------------------------------------------------------"
I cleaned and transformed the data by deleting the divider lines and extracting the values into a dataframe. I also worked on categorizing the dataframe into even and odd rows and later merging the dataframes by each column.
data <- read.table(text = data_transformed, sep = "|", col.names = c("Pair", "Name", "Points", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "NA"))
data <- data[1:(length(data) - 1)]
print(data)## Pair Name Points R1 R2 R3 R4
## 1 1 GARY HUA 6.0 W 39 W 21 W 18 W 14
## 2 ON 15445895 / R: 1794 ->1817 N:2 W B W B
## 3 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17
## 4 MI 14598900 / R: 1553 ->1663 N:2 B W B W
## 5 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21
## 6 MI 14959604 / R: 1384 ->1640 N:2 W B W B
## 7 4 PATRICK H SCHILLING 5.5 W 23 D 28 W 2 W 26
## 8 MI 12616049 / R: 1716 ->1744 N:2 W B W B
## 9 5 HANSHI ZUO 5.5 W 45 W 37 D 12 D 13
## 10 MI 14601533 / R: 1655 ->1690 N:2 B W B W
## 11 6 HANSEN SONG 5.0 W 34 D 29 L 11 W 35
## 12 OH 15055204 / R: 1686 ->1687 N:3 W B W B
## 13 7 GARY DEE SWATHELL 5.0 W 57 W 46 W 13 W 11
## 14 MI 11146376 / R: 1649 ->1673 N:3 W B W B
## 15 8 EZEKIEL HOUGHTON 5.0 W 3 W 32 L 14 L 9
## 16 MI 15142253 / R: 1641P17->1657P24 N:3 B W B W
## 17 9 STEFANO LEE 5.0 W 25 L 18 W 59 W 8
## 18 ON 14954524 / R: 1411 ->1564 N:2 W B W B
## 19 10 ANVIT RAO 5.0 D 16 L 19 W 55 W 31
## 20 MI 14150362 / R: 1365 ->1544 N:3 W W B B
## 21 11 CAMERON WILLIAM MC LEMAN 4.5 D 38 W 56 W 6 L 7
## 22 MI 12581589 / R: 1712 ->1696 N:3 B W B W
## 23 12 KENNETH J TACK 4.5 W 42 W 33 D 5 W 38
## 24 MI 12681257 / R: 1663 ->1670 N:3 W B W B
## 25 13 TORRANCE HENRY JR 4.5 W 36 W 27 L 7 D 5
## 26 MI 15082995 / R: 1666 ->1662 N:3 B W B B
## 27 14 BRADLEY SHAW 4.5 W 54 W 44 W 8 L 1
## 28 MI 10131499 / R: 1610 ->1618 N:3 W B W W
## 29 15 ZACHARY JAMES HOUGHTON 4.5 D 19 L 16 W 30 L 22
## 30 MI 15619130 / R: 1220P13->1416P20 N:3 B B W W
## 31 16 MIKE NIKITIN 4.0 D 10 W 15 H W 39
## 32 MI 10295068 / R: 1604 ->1613 N:3 B W B
## 33 17 RONALD GRZEGORCZYK 4.0 W 48 W 41 L 26 L 2
## 34 MI 10297702 / R: 1629 ->1610 N:3 W B W B
## 35 18 DAVID SUNDEEN 4.0 W 47 W 9 L 1 W 32
## 36 MI 11342094 / R: 1600 ->1600 N:3 B W B W
## 37 19 DIPANKAR ROY 4.0 D 15 W 10 W 52 D 28
## 38 MI 14862333 / R: 1564 ->1570 N:3 W B W B
## 39 20 JASON ZHENG 4.0 L 40 W 49 W 23 W 41
## 40 MI 14529060 / R: 1595 ->1569 N:4 W B W B
## 41 21 DINH DANG BUI 4.0 W 43 L 1 W 47 L 3
## 42 ON 15495066 / R: 1563P22->1562 N:3 B W B W
## 43 22 EUGENE L MCCLURE 4.0 W 64 D 52 L 28 W 15
## 44 MI 12405534 / R: 1555 ->1529 N:4 W B W B
## 45 23 ALAN BUI 4.0 L 4 W 43 L 20 W 58
## 46 ON 15030142 / R: 1363 ->1371 B W B W
## 47 24 MICHAEL R ALDRICH 4.0 L 28 L 47 W 43 L 25
## 48 MI 13469010 / R: 1229 ->1300 N:4 B W B B
## 49 25 LOREN SCHWIEBERT 3.5 L 9 W 53 L 3 W 24
## 50 MI 12486656 / R: 1745 ->1681 N:4 B W B W
## 51 26 MAX ZHU 3.5 W 49 W 40 W 17 L 4
## 52 ON 15131520 / R: 1579 ->1564 N:4 B W B W
## 53 27 GAURAV GIDWANI 3.5 W 51 L 13 W 46 W 37
## 54 MI 14476567 / R: 1552 ->1539 N:4 W B W B
## 55 28 SOFIA ADINA STANESCU-BELLU 3.5 W 24 D 4 W 22 D 19
## 56 MI 14882954 / R: 1507 ->1513 N:3 W W B W
## 57 29 CHIEDOZIE OKORIE 3.5 W 50 D 6 L 38 L 34
## 58 MI 15323285 / R: 1602P6 ->1508P12 N:4 B W B W
## 59 30 GEORGE AVERY JONES 3.5 L 52 D 64 L 15 W 55
## 60 ON 12577178 / R: 1522 ->1444 W B B W
## 61 31 RISHI SHETTY 3.5 L 58 D 55 W 64 L 10
## 62 MI 15131618 / R: 1494 ->1444 B W B W
## 63 32 JOSHUA PHILIP MATHEWS 3.5 W 61 L 8 W 44 L 18
## 64 ON 14073750 / R: 1441 ->1433 N:4 W B W B
## 65 33 JADE GE 3.5 W 60 L 12 W 50 D 36
## 66 MI 14691842 / R: 1449 ->1421 B W B W
## 67 34 MICHAEL JEFFERY THOMAS 3.5 L 6 W 60 L 37 W 29
## 68 MI 15051807 / R: 1399 ->1400 B W B B
## 69 35 JOSHUA DAVID LEE 3.5 L 46 L 38 W 56 L 6
## 70 MI 14601397 / R: 1438 ->1392 W W B W
## 71 36 SIDDHARTH JHA 3.5 L 13 W 57 W 51 D 33
## 72 MI 14773163 / R: 1355 ->1367 N:4 W B W B
## 73 37 AMIYATOSH PWNANANDAM 3.5 B L 5 W 34 L 27
## 74 MI 15489571 / R: 980P12->1077P17 B W W
## 75 38 BRIAN LIU 3.0 D 11 W 35 W 29 L 12
## 76 MI 15108523 / R: 1423 ->1439 N:4 W B W W
## 77 39 JOEL R HENDON 3.0 L 1 W 54 W 40 L 16
## 78 MI 12923035 / R: 1436P23->1413 N:4 B W B W
## 79 40 FOREST ZHANG 3.0 W 20 L 26 L 39 W 59
## 80 MI 14892710 / R: 1348 ->1346 B B W W
## 81 41 KYLE WILLIAM MURPHY 3.0 W 59 L 17 W 58 L 20
## 82 MI 15761443 / R: 1403P5 ->1341P9 B W B W
## 83 42 JARED GE 3.0 L 12 L 50 L 57 D 60
## 84 MI 14462326 / R: 1332 ->1256 B W B B
## 85 43 ROBERT GLEN VASEY 3.0 L 21 L 23 L 24 W 63
## 86 MI 14101068 / R: 1283 ->1244 W B W W
## 87 44 JUSTIN D SCHILLING 3.0 B L 14 L 32 W 53
## 88 MI 15323504 / R: 1199 ->1199 W B B
## 89 45 DEREK YAN 3.0 L 5 L 51 D 60 L 56
## 90 MI 15372807 / R: 1242 ->1191 W B W B
## 91 46 JACOB ALEXANDER LAVALLEY 3.0 W 35 L 7 L 27 L 50
## 92 MI 15490981 / R: 377P3 ->1076P10 B W B W
## 93 47 ERIC WRIGHT 2.5 L 18 W 24 L 21 W 61
## 94 MI 12533115 / R: 1362 ->1341 W B W B
## 95 48 DANIEL KHAIN 2.5 L 17 W 63 H D 52
## 96 MI 14369165 / R: 1382 ->1335 B W B
## 97 49 MICHAEL J MARTIN 2.5 L 26 L 20 D 63 D 64
## 98 MI 12531685 / R: 1291P12->1259P17 W W B W
## 99 50 SHIVAM JHA 2.5 L 29 W 42 L 33 W 46
## 100 MI 14773178 / R: 1056 ->1111 W B W B
## 101 51 TEJAS AYYAGARI 2.5 L 27 W 45 L 36 W 57
## 102 MI 15205474 / R: 1011 ->1097 B W B W
## 103 52 ETHAN GUO 2.5 W 30 D 22 L 19 D 48
## 104 MI 14918803 / R: 935 ->1092 N:4 B W B W
## 105 53 JOSE C YBARRA 2.0 H L 25 H L 44
## 106 MI 12578849 / R: 1393 ->1359 B W
## 107 54 LARRY HODGE 2.0 L 14 L 39 L 61 B
## 108 MI 12836773 / R: 1270 ->1200 B B W
## 109 55 ALEX KONG 2.0 L 62 D 31 L 10 L 30
## 110 MI 15412571 / R: 1186 ->1163 W B W B
## 111 56 MARISA RICCI 2.0 H L 11 L 35 W 45
## 112 MI 14679887 / R: 1153 ->1140 B W W
## 113 57 MICHAEL LU 2.0 L 7 L 36 W 42 L 51
## 114 MI 15113330 / R: 1092 ->1079 B W W B
## 115 58 VIRAJ MOHILE 2.0 W 31 L 2 L 41 L 23
## 116 MI 14700365 / R: 917 -> 941 W B W B
## 117 59 SEAN M MC CORMICK 2.0 L 41 B L 9 L 40
## 118 MI 12841036 / R: 853 -> 878 W B B
## 119 60 JULIA SHEN 1.5 L 33 L 34 D 45 D 42
## 120 MI 14579262 / R: 967 -> 984 W B B W
## 121 61 JEZZEL FARKAS 1.5 L 32 L 3 W 54 L 47
## 122 ON 15771592 / R: 955P11-> 979P18 B W B W
## 123 62 ASHWIN BALAJI 1.0 W 55 U U U
## 124 MI 15219542 / R: 1530 ->1535 B
## 125 63 THOMAS JOSEPH HOSMER 1.0 L 2 L 48 D 49 L 43
## 126 MI 15057092 / R: 1175 ->1125 W B W B
## 127 64 BEN LI 1.0 L 22 D 30 L 31 D 49
## 128 MI 15006561 / R: 1163 ->1112 B W W B
## R5 R6 R7
## 1 W 7 D 12 D 4
## 2 W B W
## 3 W 16 W 20 W 7
## 4 B W B
## 5 W 11 W 13 W 12
## 6 W B W
## 7 D 5 W 19 D 1
## 8 W B B
## 9 D 4 W 14 W 17
## 10 B W B
## 11 D 10 W 27 W 21
## 12 B W B
## 13 L 1 W 9 L 2
## 14 B W W
## 15 W 47 W 28 W 19
## 16 B W W
## 17 W 26 L 7 W 20
## 18 W B B
## 19 D 6 W 25 W 18
## 20 W B W
## 21 L 3 W 34 W 26
## 22 B W B
## 23 H D 1 L 3
## 24 W B
## 25 W 33 L 3 W 32
## 26 W W B
## 27 D 27 L 5 W 31
## 28 B B W
## 29 W 54 W 33 W 38
## 30 B B W
## 31 L 2 W 36 U
## 32 W B
## 33 W 23 W 22 L 5
## 34 W B W
## 35 L 19 W 38 L 10
## 36 B W B
## 37 W 18 L 4 L 8
## 38 W W B
## 39 W 28 L 2 L 9
## 40 W B W
## 41 W 40 W 39 L 6
## 42 W B W
## 43 H L 17 W 40
## 44 W B
## 45 L 17 W 37 W 46
## 46 B W B
## 47 W 60 W 44 W 39
## 48 W W B
## 49 D 34 L 10 W 47
## 50 B W B
## 51 L 9 D 32 L 11
## 52 B W W
## 53 D 14 L 6 U
## 54 W B
## 55 L 20 L 8 D 36
## 56 B B W
## 57 W 52 W 48 U
## 58 W B
## 59 L 31 W 61 W 50
## 60 W B B
## 61 W 30 W 50 L 14
## 62 B W B
## 63 W 51 D 26 L 13
## 64 W B W
## 65 L 13 L 15 W 51
## 66 B W B
## 67 D 25 L 11 W 52
## 68 W B W
## 69 W 57 D 52 W 48
## 70 B B W
## 71 H L 16 D 28
## 72 W B
## 73 H L 23 W 61
## 74 B W
## 75 H L 18 L 15
## 76 B B
## 77 W 44 L 21 L 24
## 78 B W W
## 79 L 21 W 56 L 22
## 80 B W W
## 81 X U U
## 82
## 83 D 61 W 64 W 56
## 84 W W B
## 85 W 59 L 46 W 55
## 86 B B W
## 87 L 39 L 24 W 59
## 88 W B W
## 89 W 63 D 55 W 58
## 90 W B W
## 91 W 64 W 43 L 23
## 92 B W W
## 93 L 8 D 51 L 25
## 94 W B W
## 95 H L 29 L 35
## 96 W B
## 97 W 58 H U
## 98 B
## 99 H L 31 L 30
## 100 B W
## 101 L 32 D 47 L 33
## 102 B W W
## 103 L 29 D 35 L 34
## 104 B W B
## 105 U W 57 U
## 106 W
## 107 L 15 L 59 W 64
## 108 W B W
## 109 B D 45 L 43
## 110 W B
## 111 H L 40 L 42
## 112 B W
## 113 L 35 L 53 B
## 114 W B
## 115 L 49 B L 45
## 116 W B
## 117 L 43 W 54 L 44
## 118 W W B
## 119 L 24 H U
## 120 B
## 121 D 42 L 30 L 37
## 122 B W B
## 123 U U U
## 124
## 125 L 45 H U
## 126 B
## 127 L 46 L 42 L 54
## 128 W B B
row_1 <- data %>% filter(row_number() %%2 == 1)
row_2 <- data %>% filter(row_number() %%2 == 0)
data_w <- cbind(row_1, row_2)
colnames(data_w) = c("Pair", "Name", "Points", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "State", "ID_Rating", "Points_2", "R1_2", "R2_2", "R3_2", "R4_2", "R5_2", "R6_2", "R7_2")
print(data_w)## Pair Name Points R1 R2 R3 R4
## 1 1 GARY HUA 6.0 W 39 W 21 W 18 W 14
## 2 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17
## 3 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21
## 4 4 PATRICK H SCHILLING 5.5 W 23 D 28 W 2 W 26
## 5 5 HANSHI ZUO 5.5 W 45 W 37 D 12 D 13
## 6 6 HANSEN SONG 5.0 W 34 D 29 L 11 W 35
## 7 7 GARY DEE SWATHELL 5.0 W 57 W 46 W 13 W 11
## 8 8 EZEKIEL HOUGHTON 5.0 W 3 W 32 L 14 L 9
## 9 9 STEFANO LEE 5.0 W 25 L 18 W 59 W 8
## 10 10 ANVIT RAO 5.0 D 16 L 19 W 55 W 31
## 11 11 CAMERON WILLIAM MC LEMAN 4.5 D 38 W 56 W 6 L 7
## 12 12 KENNETH J TACK 4.5 W 42 W 33 D 5 W 38
## 13 13 TORRANCE HENRY JR 4.5 W 36 W 27 L 7 D 5
## 14 14 BRADLEY SHAW 4.5 W 54 W 44 W 8 L 1
## 15 15 ZACHARY JAMES HOUGHTON 4.5 D 19 L 16 W 30 L 22
## 16 16 MIKE NIKITIN 4.0 D 10 W 15 H W 39
## 17 17 RONALD GRZEGORCZYK 4.0 W 48 W 41 L 26 L 2
## 18 18 DAVID SUNDEEN 4.0 W 47 W 9 L 1 W 32
## 19 19 DIPANKAR ROY 4.0 D 15 W 10 W 52 D 28
## 20 20 JASON ZHENG 4.0 L 40 W 49 W 23 W 41
## 21 21 DINH DANG BUI 4.0 W 43 L 1 W 47 L 3
## 22 22 EUGENE L MCCLURE 4.0 W 64 D 52 L 28 W 15
## 23 23 ALAN BUI 4.0 L 4 W 43 L 20 W 58
## 24 24 MICHAEL R ALDRICH 4.0 L 28 L 47 W 43 L 25
## 25 25 LOREN SCHWIEBERT 3.5 L 9 W 53 L 3 W 24
## 26 26 MAX ZHU 3.5 W 49 W 40 W 17 L 4
## 27 27 GAURAV GIDWANI 3.5 W 51 L 13 W 46 W 37
## 28 28 SOFIA ADINA STANESCU-BELLU 3.5 W 24 D 4 W 22 D 19
## 29 29 CHIEDOZIE OKORIE 3.5 W 50 D 6 L 38 L 34
## 30 30 GEORGE AVERY JONES 3.5 L 52 D 64 L 15 W 55
## 31 31 RISHI SHETTY 3.5 L 58 D 55 W 64 L 10
## 32 32 JOSHUA PHILIP MATHEWS 3.5 W 61 L 8 W 44 L 18
## 33 33 JADE GE 3.5 W 60 L 12 W 50 D 36
## 34 34 MICHAEL JEFFERY THOMAS 3.5 L 6 W 60 L 37 W 29
## 35 35 JOSHUA DAVID LEE 3.5 L 46 L 38 W 56 L 6
## 36 36 SIDDHARTH JHA 3.5 L 13 W 57 W 51 D 33
## 37 37 AMIYATOSH PWNANANDAM 3.5 B L 5 W 34 L 27
## 38 38 BRIAN LIU 3.0 D 11 W 35 W 29 L 12
## 39 39 JOEL R HENDON 3.0 L 1 W 54 W 40 L 16
## 40 40 FOREST ZHANG 3.0 W 20 L 26 L 39 W 59
## 41 41 KYLE WILLIAM MURPHY 3.0 W 59 L 17 W 58 L 20
## 42 42 JARED GE 3.0 L 12 L 50 L 57 D 60
## 43 43 ROBERT GLEN VASEY 3.0 L 21 L 23 L 24 W 63
## 44 44 JUSTIN D SCHILLING 3.0 B L 14 L 32 W 53
## 45 45 DEREK YAN 3.0 L 5 L 51 D 60 L 56
## 46 46 JACOB ALEXANDER LAVALLEY 3.0 W 35 L 7 L 27 L 50
## 47 47 ERIC WRIGHT 2.5 L 18 W 24 L 21 W 61
## 48 48 DANIEL KHAIN 2.5 L 17 W 63 H D 52
## 49 49 MICHAEL J MARTIN 2.5 L 26 L 20 D 63 D 64
## 50 50 SHIVAM JHA 2.5 L 29 W 42 L 33 W 46
## 51 51 TEJAS AYYAGARI 2.5 L 27 W 45 L 36 W 57
## 52 52 ETHAN GUO 2.5 W 30 D 22 L 19 D 48
## 53 53 JOSE C YBARRA 2.0 H L 25 H L 44
## 54 54 LARRY HODGE 2.0 L 14 L 39 L 61 B
## 55 55 ALEX KONG 2.0 L 62 D 31 L 10 L 30
## 56 56 MARISA RICCI 2.0 H L 11 L 35 W 45
## 57 57 MICHAEL LU 2.0 L 7 L 36 W 42 L 51
## 58 58 VIRAJ MOHILE 2.0 W 31 L 2 L 41 L 23
## 59 59 SEAN M MC CORMICK 2.0 L 41 B L 9 L 40
## 60 60 JULIA SHEN 1.5 L 33 L 34 D 45 D 42
## 61 61 JEZZEL FARKAS 1.5 L 32 L 3 W 54 L 47
## 62 62 ASHWIN BALAJI 1.0 W 55 U U U
## 63 63 THOMAS JOSEPH HOSMER 1.0 L 2 L 48 D 49 L 43
## 64 64 BEN LI 1.0 L 22 D 30 L 31 D 49
## R5 R6 R7 State ID_Rating Points_2 R1_2
## 1 W 7 D 12 D 4 ON 15445895 / R: 1794 ->1817 N:2 W
## 2 W 16 W 20 W 7 MI 14598900 / R: 1553 ->1663 N:2 B
## 3 W 11 W 13 W 12 MI 14959604 / R: 1384 ->1640 N:2 W
## 4 D 5 W 19 D 1 MI 12616049 / R: 1716 ->1744 N:2 W
## 5 D 4 W 14 W 17 MI 14601533 / R: 1655 ->1690 N:2 B
## 6 D 10 W 27 W 21 OH 15055204 / R: 1686 ->1687 N:3 W
## 7 L 1 W 9 L 2 MI 11146376 / R: 1649 ->1673 N:3 W
## 8 W 47 W 28 W 19 MI 15142253 / R: 1641P17->1657P24 N:3 B
## 9 W 26 L 7 W 20 ON 14954524 / R: 1411 ->1564 N:2 W
## 10 D 6 W 25 W 18 MI 14150362 / R: 1365 ->1544 N:3 W
## 11 L 3 W 34 W 26 MI 12581589 / R: 1712 ->1696 N:3 B
## 12 H D 1 L 3 MI 12681257 / R: 1663 ->1670 N:3 W
## 13 W 33 L 3 W 32 MI 15082995 / R: 1666 ->1662 N:3 B
## 14 D 27 L 5 W 31 MI 10131499 / R: 1610 ->1618 N:3 W
## 15 W 54 W 33 W 38 MI 15619130 / R: 1220P13->1416P20 N:3 B
## 16 L 2 W 36 U MI 10295068 / R: 1604 ->1613 N:3 B
## 17 W 23 W 22 L 5 MI 10297702 / R: 1629 ->1610 N:3 W
## 18 L 19 W 38 L 10 MI 11342094 / R: 1600 ->1600 N:3 B
## 19 W 18 L 4 L 8 MI 14862333 / R: 1564 ->1570 N:3 W
## 20 W 28 L 2 L 9 MI 14529060 / R: 1595 ->1569 N:4 W
## 21 W 40 W 39 L 6 ON 15495066 / R: 1563P22->1562 N:3 B
## 22 H L 17 W 40 MI 12405534 / R: 1555 ->1529 N:4 W
## 23 L 17 W 37 W 46 ON 15030142 / R: 1363 ->1371 B
## 24 W 60 W 44 W 39 MI 13469010 / R: 1229 ->1300 N:4 B
## 25 D 34 L 10 W 47 MI 12486656 / R: 1745 ->1681 N:4 B
## 26 L 9 D 32 L 11 ON 15131520 / R: 1579 ->1564 N:4 B
## 27 D 14 L 6 U MI 14476567 / R: 1552 ->1539 N:4 W
## 28 L 20 L 8 D 36 MI 14882954 / R: 1507 ->1513 N:3 W
## 29 W 52 W 48 U MI 15323285 / R: 1602P6 ->1508P12 N:4 B
## 30 L 31 W 61 W 50 ON 12577178 / R: 1522 ->1444 W
## 31 W 30 W 50 L 14 MI 15131618 / R: 1494 ->1444 B
## 32 W 51 D 26 L 13 ON 14073750 / R: 1441 ->1433 N:4 W
## 33 L 13 L 15 W 51 MI 14691842 / R: 1449 ->1421 B
## 34 D 25 L 11 W 52 MI 15051807 / R: 1399 ->1400 B
## 35 W 57 D 52 W 48 MI 14601397 / R: 1438 ->1392 W
## 36 H L 16 D 28 MI 14773163 / R: 1355 ->1367 N:4 W
## 37 H L 23 W 61 MI 15489571 / R: 980P12->1077P17
## 38 H L 18 L 15 MI 15108523 / R: 1423 ->1439 N:4 W
## 39 W 44 L 21 L 24 MI 12923035 / R: 1436P23->1413 N:4 B
## 40 L 21 W 56 L 22 MI 14892710 / R: 1348 ->1346 B
## 41 X U U MI 15761443 / R: 1403P5 ->1341P9 B
## 42 D 61 W 64 W 56 MI 14462326 / R: 1332 ->1256 B
## 43 W 59 L 46 W 55 MI 14101068 / R: 1283 ->1244 W
## 44 L 39 L 24 W 59 MI 15323504 / R: 1199 ->1199
## 45 W 63 D 55 W 58 MI 15372807 / R: 1242 ->1191 W
## 46 W 64 W 43 L 23 MI 15490981 / R: 377P3 ->1076P10 B
## 47 L 8 D 51 L 25 MI 12533115 / R: 1362 ->1341 W
## 48 H L 29 L 35 MI 14369165 / R: 1382 ->1335 B
## 49 W 58 H U MI 12531685 / R: 1291P12->1259P17 W
## 50 H L 31 L 30 MI 14773178 / R: 1056 ->1111 W
## 51 L 32 D 47 L 33 MI 15205474 / R: 1011 ->1097 B
## 52 L 29 D 35 L 34 MI 14918803 / R: 935 ->1092 N:4 B
## 53 U W 57 U MI 12578849 / R: 1393 ->1359
## 54 L 15 L 59 W 64 MI 12836773 / R: 1270 ->1200 B
## 55 B D 45 L 43 MI 15412571 / R: 1186 ->1163 W
## 56 H L 40 L 42 MI 14679887 / R: 1153 ->1140
## 57 L 35 L 53 B MI 15113330 / R: 1092 ->1079 B
## 58 L 49 B L 45 MI 14700365 / R: 917 -> 941 W
## 59 L 43 W 54 L 44 MI 12841036 / R: 853 -> 878 W
## 60 L 24 H U MI 14579262 / R: 967 -> 984 W
## 61 D 42 L 30 L 37 ON 15771592 / R: 955P11-> 979P18 B
## 62 U U U MI 15219542 / R: 1530 ->1535 B
## 63 L 45 H U MI 15057092 / R: 1175 ->1125 W
## 64 L 46 L 42 L 54 MI 15006561 / R: 1163 ->1112 B
## R2_2 R3_2 R4_2 R5_2 R6_2 R7_2
## 1 B W B W B W
## 2 W B W B W B
## 3 B W B W B W
## 4 B W B W B B
## 5 W B W B W B
## 6 B W B B W B
## 7 B W B B W W
## 8 W B W B W W
## 9 B W B W B B
## 10 W B B W B W
## 11 W B W B W B
## 12 B W B W B
## 13 W B B W W B
## 14 B W W B B W
## 15 B W W B B W
## 16 W B W B
## 17 B W B W B W
## 18 W B W B W B
## 19 B W B W W B
## 20 B W B W B W
## 21 W B W W B W
## 22 B W B W B
## 23 W B W B W B
## 24 W B B W W B
## 25 W B W B W B
## 26 W B W B W W
## 27 B W B W B
## 28 W B W B B W
## 29 W B W W B
## 30 B B W W B B
## 31 W B W B W B
## 32 B W B W B W
## 33 W B W B W B
## 34 W B B W B W
## 35 W B W B B W
## 36 B W B W B
## 37 B W W B W
## 38 B W W B B
## 39 W B W B W W
## 40 B W W B W W
## 41 W B W
## 42 W B B W W B
## 43 B W W B B W
## 44 W B B W B W
## 45 B W B W B W
## 46 W B W B W W
## 47 B W B W B W
## 48 W B W B
## 49 W B W B
## 50 B W B B W
## 51 W B W B W W
## 52 W B W B W B
## 53 B W W
## 54 B W W B W
## 55 B W B W B
## 56 B W W B W
## 57 W W B W B
## 58 B W B W B
## 59 B B W W B
## 60 B B W B
## 61 W B W B W B
## 62
## 63 B W B B
## 64 W W B W B B
I leveraged Regex to create new columns of each round to identify Wins, Losses or Draws and likewise for the column pre_ratings.
data_w <- data_w %>%
separate_wider_regex(
c(R1:R7),
patterns = c(
"[BDHLUWX] {2,4}",
# Create new columns by extracting data using Regex expressions to identify Wins, Losses and Draws from Rounds 1-7.
opponent_ID = "(?:\\d+)?"
),
# Include new colums to identify which Rounds in the chess game had Wins, Losses or Draws
names_sep = "_"
)
print(data_w)## # A tibble: 64 × 20
## Pair Name Points R1_opponent_ID R2_opponent_ID R3_opponent_ID
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 " 1 " " GARY HUA … "6.0 … 39 21 18
## 2 " 2 " " DAKSHESH DARU… "6.0 … 63 58 4
## 3 " 3 " " ADITYA BAJAJ … "6.0 … 8 61 25
## 4 " 4 " " PATRICK H SCH… "5.5 … 23 28 2
## 5 " 5 " " HANSHI ZUO … "5.5 … 45 37 12
## 6 " 6 " " HANSEN SONG … "5.0 … 34 29 11
## 7 " 7 " " GARY DEE SWAT… "5.0 … 57 46 13
## 8 " 8 " " EZEKIEL HOUGH… "5.0 … 3 32 14
## 9 " 9 " " STEFANO LEE … "5.0 … 25 18 59
## 10 " 10 " " ANVIT RAO … "5.0 … 16 19 55
## # ℹ 54 more rows
## # ℹ 14 more variables: R4_opponent_ID <chr>, R5_opponent_ID <chr>,
## # R6_opponent_ID <chr>, R7_opponent_ID <chr>, State <chr>, ID_Rating <chr>,
## # Points_2 <chr>, R1_2 <chr>, R2_2 <chr>, R3_2 <chr>, R4_2 <chr>, R5_2 <chr>,
## # R6_2 <chr>, R7_2 <chr>
data_w <- data_w %>%
separate_wider_regex(
ID_Rating,
patterns = c(
" \\d{8} / R: +",
pre_ratings = "\\d+",
"[ P0-9]*-> *",
"\\d+[ P0-9]*"
)
)
print(data_w)## # A tibble: 64 × 20
## Pair Name Points R1_opponent_ID R2_opponent_ID R3_opponent_ID
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 " 1 " " GARY HUA … "6.0 … 39 21 18
## 2 " 2 " " DAKSHESH DARU… "6.0 … 63 58 4
## 3 " 3 " " ADITYA BAJAJ … "6.0 … 8 61 25
## 4 " 4 " " PATRICK H SCH… "5.5 … 23 28 2
## 5 " 5 " " HANSHI ZUO … "5.5 … 45 37 12
## 6 " 6 " " HANSEN SONG … "5.0 … 34 29 11
## 7 " 7 " " GARY DEE SWAT… "5.0 … 57 46 13
## 8 " 8 " " EZEKIEL HOUGH… "5.0 … 3 32 14
## 9 " 9 " " STEFANO LEE … "5.0 … 25 18 59
## 10 " 10 " " ANVIT RAO … "5.0 … 16 19 55
## # ℹ 54 more rows
## # ℹ 14 more variables: R4_opponent_ID <chr>, R5_opponent_ID <chr>,
## # R6_opponent_ID <chr>, R7_opponent_ID <chr>, State <chr>, pre_ratings <chr>,
## # Points_2 <chr>, R1_2 <chr>, R2_2 <chr>, R3_2 <chr>, R4_2 <chr>, R5_2 <chr>,
## # R6_2 <chr>, R7_2 <chr>
## # A tibble: 64 × 20
## Pair Name Points R1_opponent_ID R2_opponent_ID R3_opponent_ID
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 " 1 " " GARY HUA … "6.0 … 39 21 18
## 2 " 2 " " DAKSHESH DARU… "6.0 … 63 58 4
## 3 " 3 " " ADITYA BAJAJ … "6.0 … 8 61 25
## 4 " 4 " " PATRICK H SCH… "5.5 … 23 28 2
## 5 " 5 " " HANSHI ZUO … "5.5 … 45 37 12
## 6 " 6 " " HANSEN SONG … "5.0 … 34 29 11
## 7 " 7 " " GARY DEE SWAT… "5.0 … 57 46 13
## 8 " 8 " " EZEKIEL HOUGH… "5.0 … 3 32 14
## 9 " 9 " " STEFANO LEE … "5.0 … 25 18 59
## 10 " 10 " " ANVIT RAO … "5.0 … 16 19 55
## # ℹ 54 more rows
## # ℹ 14 more variables: R4_opponent_ID <chr>, R5_opponent_ID <chr>,
## # R6_opponent_ID <chr>, R7_opponent_ID <chr>, State <chr>, pre_ratings <chr>,
## # Points_2 <chr>, R1_2 <chr>, R2_2 <chr>, R3_2 <chr>, R4_2 <chr>, R5_2 <chr>,
## # R6_2 <chr>, R7_2 <chr>
Once the data was cleaned and transformed, I calculated the average pre-rating of each player’s opponents.
pre_ratings_updated <- data_w$pre_ratings
names(pre_ratings_updated) <- data_w$Pair
print(pre_ratings_updated)## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 1794 1553 1384 1716 1655 1686 1649 1641 1411 1365 1712 1663 1666 1610 1220 1604
## 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
## 1629 1600 1564 1595 1563 1555 1363 1229 1745 1579 1552 1507 1602 1522 1494 1441
## 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
## 1449 1399 1438 1355 980 1423 1436 1348 1403 1332 1283 1199 1242 377 1362 1382
## 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
## 1291 1056 1011 935 1393 1270 1186 1153 1092 917 853 967 955 1530 1175 1163
data_w <- data_w %>%
rowwise %>%
mutate(
total_opponent_pre_ratings = sum(across(
.cols = ends_with("_opponent_ID"),
.fns = ~ unname(pre_ratings_updated[.x])),
na.rm = TRUE),
num_of_rounds = ncol(select(data_w, starts_with("R"))), # Count the number of rounds
average_opponent_pre_rating = ifelse(num_of_rounds == 0, NA,
floor(total_opponent_pre_ratings / 7)) # Divide by 7 to get the average
)
print(data_w)## # A tibble: 64 × 23
## # Rowwise:
## Pair Name Points R1_opponent_ID R2_opponent_ID R3_opponent_ID
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 " GARY HUA … "6.0 … 39 21 18
## 2 2 " DAKSHESH DARURI … "6.0 … 63 58 4
## 3 3 " ADITYA BAJAJ … "6.0 … 8 61 25
## 4 4 " PATRICK H SCHILL… "5.5 … 23 28 2
## 5 5 " HANSHI ZUO … "5.5 … 45 37 12
## 6 6 " HANSEN SONG … "5.0 … 34 29 11
## 7 7 " GARY DEE SWATHEL… "5.0 … 57 46 13
## 8 8 " EZEKIEL HOUGHTON… "5.0 … 3 32 14
## 9 9 " STEFANO LEE … "5.0 … 25 18 59
## 10 10 " ANVIT RAO … "5.0 … 16 19 55
## # ℹ 54 more rows
## # ℹ 17 more variables: R4_opponent_ID <chr>, R5_opponent_ID <chr>,
## # R6_opponent_ID <chr>, R7_opponent_ID <chr>, State <chr>, pre_ratings <dbl>,
## # Points_2 <chr>, R1_2 <chr>, R2_2 <chr>, R3_2 <chr>, R4_2 <chr>, R5_2 <chr>,
## # R6_2 <chr>, R7_2 <chr>, total_opponent_pre_ratings <dbl>,
## # num_of_rounds <int>, average_opponent_pre_rating <dbl>
Before converting the file into a CSV file, I organized the data by sorting the data by total points and after that pre_ratings in descending order.
I converted the complete_results data into a CSV file.
# Read the text file
results_data <- read_lines("https://raw.githubusercontent.com/pujaroy280/DATA607Project1/main/tournamentinfo_updatedsummary.csv")
print(results_data)## [1] "Name,State,Points,pre_ratings,average_opponent_pre_rating"
## [2] " GARY HUA , ON ,6.0 ,1794,1605"
## [3] " DAKSHESH DARURI , MI ,6.0 ,1553,1469"
## [4] " ADITYA BAJAJ , MI ,6.0 ,1384,1563"
## [5] " PATRICK H SCHILLING , MI ,5.5 ,1716,1573"
## [6] " HANSHI ZUO , MI ,5.5 ,1655,1500"
## [7] " HANSEN SONG , OH ,5.0 ,1686,1518"
## [8] " GARY DEE SWATHELL , MI ,5.0 ,1649,1372"
## [9] " EZEKIEL HOUGHTON , MI ,5.0 ,1641,1468"
## [10] " STEFANO LEE , ON ,5.0 ,1411,1523"
## [11] " ANVIT RAO , MI ,5.0 ,1365,1554"
## [12] " CAMERON WILLIAM MC LEMAN , MI ,4.5 ,1712,1467"
## [13] " TORRANCE HENRY JR , MI ,4.5 ,1666,1497"
## [14] " KENNETH J TACK , MI ,4.5 ,1663,1291"
## [15] " BRADLEY SHAW , MI ,4.5 ,1610,1515"
## [16] " ZACHARY JAMES HOUGHTON , MI ,4.5 ,1220,1483"
## [17] " RONALD GRZEGORCZYK , MI ,4.0 ,1629,1498"
## [18] " MIKE NIKITIN , MI ,4.0 ,1604,989"
## [19] " DAVID SUNDEEN , MI ,4.0 ,1600,1480"
## [20] " JASON ZHENG , MI ,4.0 ,1595,1410"
## [21] " DIPANKAR ROY , MI ,4.0 ,1564,1426"
## [22] " DINH DANG BUI , ON ,4.0 ,1563,1470"
## [23] " EUGENE L MCCLURE , MI ,4.0 ,1555,1114"
## [24] " ALAN BUI , ON ,4.0 ,1363,1213"
## [25] " MICHAEL R ALDRICH , MI ,4.0 ,1229,1357"
## [26] " LOREN SCHWIEBERT , MI ,3.5 ,1745,1363"
## [27] " CHIEDOZIE OKORIE , MI ,3.5 ,1602,1125"
## [28] " MAX ZHU , ON ,3.5 ,1579,1506"
## [29] " GAURAV GIDWANI , MI ,3.5 ,1552,1047"
## [30] " GEORGE AVERY JONES , ON ,3.5 ,1522,1144"
## [31] " SOFIA ADINA STANESCU-BELLU , MI ,3.5 ,1507,1522"
## [32] " RISHI SHETTY , MI ,3.5 ,1494,1259"
## [33] " JADE GE , MI ,3.5 ,1449,1276"
## [34] " JOSHUA PHILIP MATHEWS , ON ,3.5 ,1441,1378"
## [35] " JOSHUA DAVID LEE , MI ,3.5 ,1438,1149"
## [36] " MICHAEL JEFFERY THOMAS , MI ,3.5 ,1399,1375"
## [37] " SIDDHARTH JHA , MI ,3.5 ,1355,1189"
## [38] " AMIYATOSH PWNANANDAM , MI ,3.5 ,980,989"
## [39] " JOEL R HENDON , MI ,3.0 ,1436,1429"
## [40] " BRIAN LIU , MI ,3.0 ,1423,1319"
## [41] " KYLE WILLIAM MURPHY , MI ,3.0 ,1403,713"
## [42] " FOREST ZHANG , MI ,3.0 ,1348,1390"
## [43] " JARED GE , MI ,3.0 ,1332,1149"
## [44] " ROBERT GLEN VASEY , MI ,3.0 ,1283,1106"
## [45] " DEREK YAN , MI ,3.0 ,1242,1152"
## [46] " JUSTIN D SCHILLING , MI ,3.0 ,1199,1137"
## [47] " JACOB ALEXANDER LAVALLEY , MI ,3.0 ,377,1357"
## [48] " DANIEL KHAIN , MI ,2.5 ,1382,968"
## [49] " ERIC WRIGHT , MI ,2.5 ,1362,1392"
## [50] " MICHAEL J MARTIN , MI ,2.5 ,1291,918"
## [51] " SHIVAM JHA , MI ,2.5 ,1056,1110"
## [52] " TEJAS AYYAGARI , MI ,2.5 ,1011,1356"
## [53] " ETHAN GUO , MI ,2.5 ,935,1494"
## [54] " JOSE C YBARRA , MI ,2.0 ,1393,576"
## [55] " LARRY HODGE , MI ,2.0 ,1270,1033"
## [56] " ALEX KONG , MI ,2.0 ,1186,1205"
## [57] " MARISA RICCI , MI ,2.0 ,1153,1010"
## [58] " MICHAEL LU , MI ,2.0 ,1092,1168"
## [59] " VIRAJ MOHILE , MI ,2.0 ,917,1192"
## [60] " SEAN M MC CORMICK , MI ,2.0 ,853,1130"
## [61] " JULIA SHEN , MI ,1.5 ,967,950"
## [62] " JEZZEL FARKAS , ON ,1.5 ,955,1327"
## [63] " ASHWIN BALAJI , MI ,1.0 ,1530,169"
## [64] " THOMAS JOSEPH HOSMER , MI ,1.0 ,1175,964"
## [65] " BEN LI , MI ,1.0 ,1163,1263"
Throughout this project, I learned how to load, extract, clean and transform and convert data into a CSV file.