Reading file and creating two data frames -“readLines” function is used to read the data from input file. In this step the line with hyphen is removed. - The records were split based on new line to create a vector - Next, two data frames were created and data from vector was pulled into data frames. s we can notice, thare are rows with different columns. So we are creating unique dataframe.
library("stringr")
rawfile <- trimws(str_replace_all(readLines("~/Desktop/CUNY/Project - 1/tournamentinfo.txt"),"-----------------------------------------------------------------------------------------",""))
## Warning in readLines("~/Desktop/CUNY/Project - 1/tournamentinfo.txt"):
## incomplete final line found on '~/Desktop/CUNY/Project - 1/
## tournamentinfo.txt'
input_vector <- trimws(unlist(strsplit(rawfile, "\n")))
input_vector
## [1] "Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round|"
## [2] "Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 |"
## [3] "1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [4] "ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
## [5] "2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [6] "MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |"
## [7] "3 | ADITYA BAJAJ |6.0 |L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [8] "MI | 14959604 / R: 1384 ->1640 |N:2 |W |B |W |B |W |B |W |"
## [9] "4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [10] "MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |"
## [11] "5 | HANSHI ZUO |5.5 |W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [12] "MI | 14601533 / R: 1655 ->1690 |N:2 |B |W |B |W |B |W |B |"
## [13] "6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
## [14] "OH | 15055204 / R: 1686 ->1687 |N:3 |W |B |W |B |B |W |B |"
## [15] "7 | GARY DEE SWATHELL |5.0 |W 57|W 46|W 13|W 11|L 1|W 9|L 2|"
## [16] "MI | 11146376 / R: 1649 ->1673 |N:3 |W |B |W |B |B |W |W |"
## [17] "8 | EZEKIEL HOUGHTON |5.0 |W 3|W 32|L 14|L 9|W 47|W 28|W 19|"
## [18] "MI | 15142253 / R: 1641P17->1657P24 |N:3 |B |W |B |W |B |W |W |"
## [19] "9 | STEFANO LEE |5.0 |W 25|L 18|W 59|W 8|W 26|L 7|W 20|"
## [20] "ON | 14954524 / R: 1411 ->1564 |N:2 |W |B |W |B |W |B |B |"
## [21] "10 | ANVIT RAO |5.0 |D 16|L 19|W 55|W 31|D 6|W 25|W 18|"
## [22] "MI | 14150362 / R: 1365 ->1544 |N:3 |W |W |B |B |W |B |W |"
## [23] "11 | CAMERON WILLIAM MC LEMAN |4.5 |D 38|W 56|W 6|L 7|L 3|W 34|W 26|"
## [24] "MI | 12581589 / R: 1712 ->1696 |N:3 |B |W |B |W |B |W |B |"
## [25] "12 | KENNETH J TACK |4.5 |W 42|W 33|D 5|W 38|H |D 1|L 3|"
## [26] "MI | 12681257 / R: 1663 ->1670 |N:3 |W |B |W |B | |W |B |"
## [27] "13 | TORRANCE HENRY JR |4.5 |W 36|W 27|L 7|D 5|W 33|L 3|W 32|"
## [28] "MI | 15082995 / R: 1666 ->1662 |N:3 |B |W |B |B |W |W |B |"
## [29] "14 | BRADLEY SHAW |4.5 |W 54|W 44|W 8|L 1|D 27|L 5|W 31|"
## [30] "MI | 10131499 / R: 1610 ->1618 |N:3 |W |B |W |W |B |B |W |"
## [31] "15 | ZACHARY JAMES HOUGHTON |4.5 |D 19|L 16|W 30|L 22|W 54|W 33|W 38|"
## [32] "MI | 15619130 / R: 1220P13->1416P20 |N:3 |B |B |W |W |B |B |W |"
## [33] "16 | MIKE NIKITIN |4.0 |D 10|W 15|H |W 39|L 2|W 36|U |"
## [34] "MI | 10295068 / R: 1604 ->1613 |N:3 |B |W | |B |W |B | |"
## [35] "17 | RONALD GRZEGORCZYK |4.0 |W 48|W 41|L 26|L 2|W 23|W 22|L 5|"
## [36] "MI | 10297702 / R: 1629 ->1610 |N:3 |W |B |W |B |W |B |W |"
## [37] "18 | DAVID SUNDEEN |4.0 |W 47|W 9|L 1|W 32|L 19|W 38|L 10|"
## [38] "MI | 11342094 / R: 1600 ->1600 |N:3 |B |W |B |W |B |W |B |"
## [39] "19 | DIPANKAR ROY |4.0 |D 15|W 10|W 52|D 28|W 18|L 4|L 8|"
## [40] "MI | 14862333 / R: 1564 ->1570 |N:3 |W |B |W |B |W |W |B |"
## [41] "20 | JASON ZHENG |4.0 |L 40|W 49|W 23|W 41|W 28|L 2|L 9|"
## [42] "MI | 14529060 / R: 1595 ->1569 |N:4 |W |B |W |B |W |B |W |"
## [43] "21 | DINH DANG BUI |4.0 |W 43|L 1|W 47|L 3|W 40|W 39|L 6|"
## [44] "ON | 15495066 / R: 1563P22->1562 |N:3 |B |W |B |W |W |B |W |"
## [45] "22 | EUGENE L MCCLURE |4.0 |W 64|D 52|L 28|W 15|H |L 17|W 40|"
## [46] "MI | 12405534 / R: 1555 ->1529 |N:4 |W |B |W |B | |W |B |"
## [47] "23 | ALAN BUI |4.0 |L 4|W 43|L 20|W 58|L 17|W 37|W 46|"
## [48] "ON | 15030142 / R: 1363 ->1371 | |B |W |B |W |B |W |B |"
## [49] "24 | MICHAEL R ALDRICH |4.0 |L 28|L 47|W 43|L 25|W 60|W 44|W 39|"
## [50] "MI | 13469010 / R: 1229 ->1300 |N:4 |B |W |B |B |W |W |B |"
## [51] "25 | LOREN SCHWIEBERT |3.5 |L 9|W 53|L 3|W 24|D 34|L 10|W 47|"
## [52] "MI | 12486656 / R: 1745 ->1681 |N:4 |B |W |B |W |B |W |B |"
## [53] "26 | MAX ZHU |3.5 |W 49|W 40|W 17|L 4|L 9|D 32|L 11|"
## [54] "ON | 15131520 / R: 1579 ->1564 |N:4 |B |W |B |W |B |W |W |"
## [55] "27 | GAURAV GIDWANI |3.5 |W 51|L 13|W 46|W 37|D 14|L 6|U |"
## [56] "MI | 14476567 / R: 1552 ->1539 |N:4 |W |B |W |B |W |B | |"
## [57] "28 | SOFIA ADINA STANESCU-BELLU |3.5 |W 24|D 4|W 22|D 19|L 20|L 8|D 36|"
## [58] "MI | 14882954 / R: 1507 ->1513 |N:3 |W |W |B |W |B |B |W |"
## [59] "29 | CHIEDOZIE OKORIE |3.5 |W 50|D 6|L 38|L 34|W 52|W 48|U |"
## [60] "MI | 15323285 / R: 1602P6 ->1508P12 |N:4 |B |W |B |W |W |B | |"
## [61] "30 | GEORGE AVERY JONES |3.5 |L 52|D 64|L 15|W 55|L 31|W 61|W 50|"
## [62] "ON | 12577178 / R: 1522 ->1444 | |W |B |B |W |W |B |B |"
## [63] "31 | RISHI SHETTY |3.5 |L 58|D 55|W 64|L 10|W 30|W 50|L 14|"
## [64] "MI | 15131618 / R: 1494 ->1444 | |B |W |B |W |B |W |B |"
## [65] "32 | JOSHUA PHILIP MATHEWS |3.5 |W 61|L 8|W 44|L 18|W 51|D 26|L 13|"
## [66] "ON | 14073750 / R: 1441 ->1433 |N:4 |W |B |W |B |W |B |W |"
## [67] "33 | JADE GE |3.5 |W 60|L 12|W 50|D 36|L 13|L 15|W 51|"
## [68] "MI | 14691842 / R: 1449 ->1421 | |B |W |B |W |B |W |B |"
## [69] "34 | MICHAEL JEFFERY THOMAS |3.5 |L 6|W 60|L 37|W 29|D 25|L 11|W 52|"
## [70] "MI | 15051807 / R: 1399 ->1400 | |B |W |B |B |W |B |W |"
## [71] "35 | JOSHUA DAVID LEE |3.5 |L 46|L 38|W 56|L 6|W 57|D 52|W 48|"
## [72] "MI | 14601397 / R: 1438 ->1392 | |W |W |B |W |B |B |W |"
## [73] "36 | SIDDHARTH JHA |3.5 |L 13|W 57|W 51|D 33|H |L 16|D 28|"
## [74] "MI | 14773163 / R: 1355 ->1367 |N:4 |W |B |W |B | |W |B |"
## [75] "37 | AMIYATOSH PWNANANDAM |3.5 |B |L 5|W 34|L 27|H |L 23|W 61|"
## [76] "MI | 15489571 / R: 980P12->1077P17 | | |B |W |W | |B |W |"
## [77] "38 | BRIAN LIU |3.0 |D 11|W 35|W 29|L 12|H |L 18|L 15|"
## [78] "MI | 15108523 / R: 1423 ->1439 |N:4 |W |B |W |W | |B |B |"
## [79] "39 | JOEL R HENDON |3.0 |L 1|W 54|W 40|L 16|W 44|L 21|L 24|"
## [80] "MI | 12923035 / R: 1436P23->1413 |N:4 |B |W |B |W |B |W |W |"
## [81] "40 | FOREST ZHANG |3.0 |W 20|L 26|L 39|W 59|L 21|W 56|L 22|"
## [82] "MI | 14892710 / R: 1348 ->1346 | |B |B |W |W |B |W |W |"
## [83] "41 | KYLE WILLIAM MURPHY |3.0 |W 59|L 17|W 58|L 20|X |U |U |"
## [84] "MI | 15761443 / R: 1403P5 ->1341P9 | |B |W |B |W | | | |"
## [85] "42 | JARED GE |3.0 |L 12|L 50|L 57|D 60|D 61|W 64|W 56|"
## [86] "MI | 14462326 / R: 1332 ->1256 | |B |W |B |B |W |W |B |"
## [87] "43 | ROBERT GLEN VASEY |3.0 |L 21|L 23|L 24|W 63|W 59|L 46|W 55|"
## [88] "MI | 14101068 / R: 1283 ->1244 | |W |B |W |W |B |B |W |"
## [89] "44 | JUSTIN D SCHILLING |3.0 |B |L 14|L 32|W 53|L 39|L 24|W 59|"
## [90] "MI | 15323504 / R: 1199 ->1199 | | |W |B |B |W |B |W |"
## [91] "45 | DEREK YAN |3.0 |L 5|L 51|D 60|L 56|W 63|D 55|W 58|"
## [92] "MI | 15372807 / R: 1242 ->1191 | |W |B |W |B |W |B |W |"
## [93] "46 | JACOB ALEXANDER LAVALLEY |3.0 |W 35|L 7|L 27|L 50|W 64|W 43|L 23|"
## [94] "MI | 15490981 / R: 377P3 ->1076P10 | |B |W |B |W |B |W |W |"
## [95] "47 | ERIC WRIGHT |2.5 |L 18|W 24|L 21|W 61|L 8|D 51|L 25|"
## [96] "MI | 12533115 / R: 1362 ->1341 | |W |B |W |B |W |B |W |"
## [97] "48 | DANIEL KHAIN |2.5 |L 17|W 63|H |D 52|H |L 29|L 35|"
## [98] "MI | 14369165 / R: 1382 ->1335 | |B |W | |B | |W |B |"
## [99] "49 | MICHAEL J MARTIN |2.5 |L 26|L 20|D 63|D 64|W 58|H |U |"
## [100] "MI | 12531685 / R: 1291P12->1259P17 | |W |W |B |W |B | | |"
## [101] "50 | SHIVAM JHA |2.5 |L 29|W 42|L 33|W 46|H |L 31|L 30|"
## [102] "MI | 14773178 / R: 1056 ->1111 | |W |B |W |B | |B |W |"
## [103] "51 | TEJAS AYYAGARI |2.5 |L 27|W 45|L 36|W 57|L 32|D 47|L 33|"
## [104] "MI | 15205474 / R: 1011 ->1097 | |B |W |B |W |B |W |W |"
## [105] "52 | ETHAN GUO |2.5 |W 30|D 22|L 19|D 48|L 29|D 35|L 34|"
## [106] "MI | 14918803 / R: 935 ->1092 |N:4 |B |W |B |W |B |W |B |"
## [107] "53 | JOSE C YBARRA |2.0 |H |L 25|H |L 44|U |W 57|U |"
## [108] "MI | 12578849 / R: 1393 ->1359 | | |B | |W | |W | |"
## [109] "54 | LARRY HODGE |2.0 |L 14|L 39|L 61|B |L 15|L 59|W 64|"
## [110] "MI | 12836773 / R: 1270 ->1200 | |B |B |W | |W |B |W |"
## [111] "55 | ALEX KONG |2.0 |L 62|D 31|L 10|L 30|B |D 45|L 43|"
## [112] "MI | 15412571 / R: 1186 ->1163 | |W |B |W |B | |W |B |"
## [113] "56 | MARISA RICCI |2.0 |H |L 11|L 35|W 45|H |L 40|L 42|"
## [114] "MI | 14679887 / R: 1153 ->1140 | | |B |W |W | |B |W |"
## [115] "57 | MICHAEL LU |2.0 |L 7|L 36|W 42|L 51|L 35|L 53|B |"
## [116] "MI | 15113330 / R: 1092 ->1079 | |B |W |W |B |W |B | |"
## [117] "58 | VIRAJ MOHILE |2.0 |W 31|L 2|L 41|L 23|L 49|B |L 45|"
## [118] "MI | 14700365 / R: 917 -> 941 | |W |B |W |B |W | |B |"
## [119] "59 | SEAN M MC CORMICK |2.0 |L 41|B |L 9|L 40|L 43|W 54|L 44|"
## [120] "MI | 12841036 / R: 853 -> 878 | |W | |B |B |W |W |B |"
## [121] "60 | JULIA SHEN |1.5 |L 33|L 34|D 45|D 42|L 24|H |U |"
## [122] "MI | 14579262 / R: 967 -> 984 | |W |B |B |W |B | | |"
## [123] "61 | JEZZEL FARKAS |1.5 |L 32|L 3|W 54|L 47|D 42|L 30|L 37|"
## [124] "ON | 15771592 / R: 955P11-> 979P18 | |B |W |B |W |B |W |B |"
## [125] "62 | ASHWIN BALAJI |1.0 |W 55|U |U |U |U |U |U |"
## [126] "MI | 15219542 / R: 1530 ->1535 | |B | | | | | | |"
## [127] "63 | THOMAS JOSEPH HOSMER |1.0 |L 2|L 48|D 49|L 43|L 45|H |U |"
## [128] "MI | 15057092 / R: 1175 ->1125 | |W |B |W |B |B | | |"
## [129] "64 | BEN LI |1.0 |L 22|D 30|L 31|D 49|L 46|L 42|L 54|"
## [130] "MI | 15006561 / R: 1163 ->1112 | |B |W |W |B |W |B |B |"
# Creating unique Dataframe for rowtpe 1
i <- 3
df_row1<-data.frame(matrix(ncol = 10, nrow = 0))
names(df_row1)<-c("Pair_Num", "Player_Name", "Total","Round1","Round2","Round3","Round4","Round5","Round6","Round7")
while(i <= length(input_vector))
{
vec <- unlist(as.vector(strsplit(input_vector[i], "[|]")))
df_row1 <- rbind(df_row1,data.frame(Pair_Num= trimws(vec[1]), Player_Name=trimws(vec[2]), Total=trimws(vec[3]),Round1=as.numeric(as.character(str_extract(vec[4],"[0-9]+"))),Round2=as.numeric(as.character(str_extract(vec[5],"[0-9]+"))),Round3=as.numeric(as.character(str_extract(vec[6],"[0-9]+"))),Round4=as.numeric(as.character(str_extract(vec[7],"[0-9]+"))),Round5=as.numeric(as.character(str_extract(vec[8],"[0-9]+"))),Round6=as.numeric(as.character(str_extract(vec[9],"[0-9]+"))),Round7=as.numeric(as.character(str_extract(vec[10],"[0-9]+")))))
i<-i+2
}
df_row1
## Pair_Num Player_Name Total Round1 Round2 Round3 Round4
## 1 1 GARY HUA 6.0 39 21 18 14
## 2 2 DAKSHESH DARURI 6.0 63 58 4 17
## 3 3 ADITYA BAJAJ 6.0 8 61 25 21
## 4 4 PATRICK H SCHILLING 5.5 23 28 2 26
## 5 5 HANSHI ZUO 5.5 45 37 12 13
## 6 6 HANSEN SONG 5.0 34 29 11 35
## 7 7 GARY DEE SWATHELL 5.0 57 46 13 11
## 8 8 EZEKIEL HOUGHTON 5.0 3 32 14 9
## 9 9 STEFANO LEE 5.0 25 18 59 8
## 10 10 ANVIT RAO 5.0 16 19 55 31
## 11 11 CAMERON WILLIAM MC LEMAN 4.5 38 56 6 7
## 12 12 KENNETH J TACK 4.5 42 33 5 38
## 13 13 TORRANCE HENRY JR 4.5 36 27 7 5
## 14 14 BRADLEY SHAW 4.5 54 44 8 1
## 15 15 ZACHARY JAMES HOUGHTON 4.5 19 16 30 22
## 16 16 MIKE NIKITIN 4.0 10 15 NA 39
## 17 17 RONALD GRZEGORCZYK 4.0 48 41 26 2
## 18 18 DAVID SUNDEEN 4.0 47 9 1 32
## 19 19 DIPANKAR ROY 4.0 15 10 52 28
## 20 20 JASON ZHENG 4.0 40 49 23 41
## 21 21 DINH DANG BUI 4.0 43 1 47 3
## 22 22 EUGENE L MCCLURE 4.0 64 52 28 15
## 23 23 ALAN BUI 4.0 4 43 20 58
## 24 24 MICHAEL R ALDRICH 4.0 28 47 43 25
## 25 25 LOREN SCHWIEBERT 3.5 9 53 3 24
## 26 26 MAX ZHU 3.5 49 40 17 4
## 27 27 GAURAV GIDWANI 3.5 51 13 46 37
## 28 28 SOFIA ADINA STANESCU-BELLU 3.5 24 4 22 19
## 29 29 CHIEDOZIE OKORIE 3.5 50 6 38 34
## 30 30 GEORGE AVERY JONES 3.5 52 64 15 55
## 31 31 RISHI SHETTY 3.5 58 55 64 10
## 32 32 JOSHUA PHILIP MATHEWS 3.5 61 8 44 18
## 33 33 JADE GE 3.5 60 12 50 36
## 34 34 MICHAEL JEFFERY THOMAS 3.5 6 60 37 29
## 35 35 JOSHUA DAVID LEE 3.5 46 38 56 6
## 36 36 SIDDHARTH JHA 3.5 13 57 51 33
## 37 37 AMIYATOSH PWNANANDAM 3.5 NA 5 34 27
## 38 38 BRIAN LIU 3.0 11 35 29 12
## 39 39 JOEL R HENDON 3.0 1 54 40 16
## 40 40 FOREST ZHANG 3.0 20 26 39 59
## 41 41 KYLE WILLIAM MURPHY 3.0 59 17 58 20
## 42 42 JARED GE 3.0 12 50 57 60
## 43 43 ROBERT GLEN VASEY 3.0 21 23 24 63
## 44 44 JUSTIN D SCHILLING 3.0 NA 14 32 53
## 45 45 DEREK YAN 3.0 5 51 60 56
## 46 46 JACOB ALEXANDER LAVALLEY 3.0 35 7 27 50
## 47 47 ERIC WRIGHT 2.5 18 24 21 61
## 48 48 DANIEL KHAIN 2.5 17 63 NA 52
## 49 49 MICHAEL J MARTIN 2.5 26 20 63 64
## 50 50 SHIVAM JHA 2.5 29 42 33 46
## 51 51 TEJAS AYYAGARI 2.5 27 45 36 57
## 52 52 ETHAN GUO 2.5 30 22 19 48
## 53 53 JOSE C YBARRA 2.0 NA 25 NA 44
## 54 54 LARRY HODGE 2.0 14 39 61 NA
## 55 55 ALEX KONG 2.0 62 31 10 30
## 56 56 MARISA RICCI 2.0 NA 11 35 45
## 57 57 MICHAEL LU 2.0 7 36 42 51
## 58 58 VIRAJ MOHILE 2.0 31 2 41 23
## 59 59 SEAN M MC CORMICK 2.0 41 NA 9 40
## 60 60 JULIA SHEN 1.5 33 34 45 42
## 61 61 JEZZEL FARKAS 1.5 32 3 54 47
## 62 62 ASHWIN BALAJI 1.0 55 NA NA NA
## 63 63 THOMAS JOSEPH HOSMER 1.0 2 48 49 43
## 64 64 BEN LI 1.0 22 30 31 49
## Round5 Round6 Round7
## 1 7 12 4
## 2 16 20 7
## 3 11 13 12
## 4 5 19 1
## 5 4 14 17
## 6 10 27 21
## 7 1 9 2
## 8 47 28 19
## 9 26 7 20
## 10 6 25 18
## 11 3 34 26
## 12 NA 1 3
## 13 33 3 32
## 14 27 5 31
## 15 54 33 38
## 16 2 36 NA
## 17 23 22 5
## 18 19 38 10
## 19 18 4 8
## 20 28 2 9
## 21 40 39 6
## 22 NA 17 40
## 23 17 37 46
## 24 60 44 39
## 25 34 10 47
## 26 9 32 11
## 27 14 6 NA
## 28 20 8 36
## 29 52 48 NA
## 30 31 61 50
## 31 30 50 14
## 32 51 26 13
## 33 13 15 51
## 34 25 11 52
## 35 57 52 48
## 36 NA 16 28
## 37 NA 23 61
## 38 NA 18 15
## 39 44 21 24
## 40 21 56 22
## 41 NA NA NA
## 42 61 64 56
## 43 59 46 55
## 44 39 24 59
## 45 63 55 58
## 46 64 43 23
## 47 8 51 25
## 48 NA 29 35
## 49 58 NA NA
## 50 NA 31 30
## 51 32 47 33
## 52 29 35 34
## 53 NA 57 NA
## 54 15 59 64
## 55 NA 45 43
## 56 NA 40 42
## 57 35 53 NA
## 58 49 NA 45
## 59 43 54 44
## 60 24 NA NA
## 61 42 30 37
## 62 NA NA NA
## 63 45 NA NA
## 64 46 42 54
# Creating unique Dataframe for rowtpe 2
j <- 4
df_row2<-data.frame(matrix(ncol = 12, nrow = 0))
names(df_row2)<-c("state", "USCF_ID","Rtg_Pre","Rtg_Post", "Total","Round1","Round2","Round3","Round4","Round5","Round6","Round7")
while(j <= length(input_vector))
{
vec <- unlist(as.vector(strsplit(input_vector[j], "[|]")))
df_row2 <- rbind(df_row2,data.frame(state=trimws(vec[1]), USCF_ID=trimws(str_extract(vec[2], "[ ][0-9]+")), Rtg_Pre= trimws(str_extract(str_extract(vec[2], "\\:\\s*[0-9]+"),"[0-9]+")), Rtg_Post=trimws(str_extract(str_extract(vec[2], "\\>\\s*[0-9]+"),"[0-9]+")), Total=trimws(vec[3]),Round1=trimws(vec[4]),Round2=trimws(vec[5]),Round3=trimws(vec[6]),Round4=trimws(vec[7]),Round5=trimws(vec[8]),Round6=trimws(vec[9]),Round7=trimws(vec[10])))
j<-j+2
}
df_row2
## state USCF_ID Rtg_Pre Rtg_Post Total Round1 Round2 Round3 Round4
## 1 ON 15445895 1794 1817 N:2 W B W B
## 2 MI 14598900 1553 1663 N:2 B W B W
## 3 MI 14959604 1384 1640 N:2 W B W B
## 4 MI 12616049 1716 1744 N:2 W B W B
## 5 MI 14601533 1655 1690 N:2 B W B W
## 6 OH 15055204 1686 1687 N:3 W B W B
## 7 MI 11146376 1649 1673 N:3 W B W B
## 8 MI 15142253 1641 1657 N:3 B W B W
## 9 ON 14954524 1411 1564 N:2 W B W B
## 10 MI 14150362 1365 1544 N:3 W W B B
## 11 MI 12581589 1712 1696 N:3 B W B W
## 12 MI 12681257 1663 1670 N:3 W B W B
## 13 MI 15082995 1666 1662 N:3 B W B B
## 14 MI 10131499 1610 1618 N:3 W B W W
## 15 MI 15619130 1220 1416 N:3 B B W W
## 16 MI 10295068 1604 1613 N:3 B W B
## 17 MI 10297702 1629 1610 N:3 W B W B
## 18 MI 11342094 1600 1600 N:3 B W B W
## 19 MI 14862333 1564 1570 N:3 W B W B
## 20 MI 14529060 1595 1569 N:4 W B W B
## 21 ON 15495066 1563 1562 N:3 B W B W
## 22 MI 12405534 1555 1529 N:4 W B W B
## 23 ON 15030142 1363 1371 B W B W
## 24 MI 13469010 1229 1300 N:4 B W B B
## 25 MI 12486656 1745 1681 N:4 B W B W
## 26 ON 15131520 1579 1564 N:4 B W B W
## 27 MI 14476567 1552 1539 N:4 W B W B
## 28 MI 14882954 1507 1513 N:3 W W B W
## 29 MI 15323285 1602 1508 N:4 B W B W
## 30 ON 12577178 1522 1444 W B B W
## 31 MI 15131618 1494 1444 B W B W
## 32 ON 14073750 1441 1433 N:4 W B W B
## 33 MI 14691842 1449 1421 B W B W
## 34 MI 15051807 1399 1400 B W B B
## 35 MI 14601397 1438 1392 W W B W
## 36 MI 14773163 1355 1367 N:4 W B W B
## 37 MI 15489571 980 1077 B W W
## 38 MI 15108523 1423 1439 N:4 W B W W
## 39 MI 12923035 1436 1413 N:4 B W B W
## 40 MI 14892710 1348 1346 B B W W
## 41 MI 15761443 1403 1341 B W B W
## 42 MI 14462326 1332 1256 B W B B
## 43 MI 14101068 1283 1244 W B W W
## 44 MI 15323504 1199 1199 W B B
## 45 MI 15372807 1242 1191 W B W B
## 46 MI 15490981 377 1076 B W B W
## 47 MI 12533115 1362 1341 W B W B
## 48 MI 14369165 1382 1335 B W B
## 49 MI 12531685 1291 1259 W W B W
## 50 MI 14773178 1056 1111 W B W B
## 51 MI 15205474 1011 1097 B W B W
## 52 MI 14918803 935 1092 N:4 B W B W
## 53 MI 12578849 1393 1359 B W
## 54 MI 12836773 1270 1200 B B W
## 55 MI 15412571 1186 1163 W B W B
## 56 MI 14679887 1153 1140 B W W
## 57 MI 15113330 1092 1079 B W W B
## 58 MI 14700365 917 941 W B W B
## 59 MI 12841036 853 878 W B B
## 60 MI 14579262 967 984 W B B W
## 61 ON 15771592 955 979 B W B W
## 62 MI 15219542 1530 1535 B
## 63 MI 15057092 1175 1125 W B W B
## 64 MI 15006561 1163 1112 B W W B
## Round5 Round6 Round7
## 1 W B W
## 2 B W B
## 3 W B W
## 4 W B B
## 5 B W B
## 6 B W B
## 7 B W W
## 8 B W W
## 9 W B B
## 10 W B W
## 11 B W B
## 12 W B
## 13 W W B
## 14 B B W
## 15 B B W
## 16 W B
## 17 W B W
## 18 B W B
## 19 W W B
## 20 W B W
## 21 W B W
## 22 W B
## 23 B W B
## 24 W W B
## 25 B W B
## 26 B W W
## 27 W B
## 28 B B W
## 29 W B
## 30 W B B
## 31 B W B
## 32 W B W
## 33 B W B
## 34 W B W
## 35 B B W
## 36 W B
## 37 B W
## 38 B B
## 39 B W W
## 40 B W W
## 41
## 42 W W B
## 43 B B W
## 44 W B W
## 45 W B W
## 46 B W W
## 47 W B W
## 48 W B
## 49 B
## 50 B W
## 51 B W W
## 52 B W B
## 53 W
## 54 W B W
## 55 W B
## 56 B W
## 57 W B
## 58 W B
## 59 W W B
## 60 B
## 61 B W B
## 62
## 63 B
## 64 W B B
# Creating final dataframe by doing column bind with required columns from data frames df_row1 and df_row2
df_final <- cbind(df_row1, df_row2[1:4])
df_final
## Pair_Num Player_Name Total Round1 Round2 Round3 Round4
## 1 1 GARY HUA 6.0 39 21 18 14
## 2 2 DAKSHESH DARURI 6.0 63 58 4 17
## 3 3 ADITYA BAJAJ 6.0 8 61 25 21
## 4 4 PATRICK H SCHILLING 5.5 23 28 2 26
## 5 5 HANSHI ZUO 5.5 45 37 12 13
## 6 6 HANSEN SONG 5.0 34 29 11 35
## 7 7 GARY DEE SWATHELL 5.0 57 46 13 11
## 8 8 EZEKIEL HOUGHTON 5.0 3 32 14 9
## 9 9 STEFANO LEE 5.0 25 18 59 8
## 10 10 ANVIT RAO 5.0 16 19 55 31
## 11 11 CAMERON WILLIAM MC LEMAN 4.5 38 56 6 7
## 12 12 KENNETH J TACK 4.5 42 33 5 38
## 13 13 TORRANCE HENRY JR 4.5 36 27 7 5
## 14 14 BRADLEY SHAW 4.5 54 44 8 1
## 15 15 ZACHARY JAMES HOUGHTON 4.5 19 16 30 22
## 16 16 MIKE NIKITIN 4.0 10 15 NA 39
## 17 17 RONALD GRZEGORCZYK 4.0 48 41 26 2
## 18 18 DAVID SUNDEEN 4.0 47 9 1 32
## 19 19 DIPANKAR ROY 4.0 15 10 52 28
## 20 20 JASON ZHENG 4.0 40 49 23 41
## 21 21 DINH DANG BUI 4.0 43 1 47 3
## 22 22 EUGENE L MCCLURE 4.0 64 52 28 15
## 23 23 ALAN BUI 4.0 4 43 20 58
## 24 24 MICHAEL R ALDRICH 4.0 28 47 43 25
## 25 25 LOREN SCHWIEBERT 3.5 9 53 3 24
## 26 26 MAX ZHU 3.5 49 40 17 4
## 27 27 GAURAV GIDWANI 3.5 51 13 46 37
## 28 28 SOFIA ADINA STANESCU-BELLU 3.5 24 4 22 19
## 29 29 CHIEDOZIE OKORIE 3.5 50 6 38 34
## 30 30 GEORGE AVERY JONES 3.5 52 64 15 55
## 31 31 RISHI SHETTY 3.5 58 55 64 10
## 32 32 JOSHUA PHILIP MATHEWS 3.5 61 8 44 18
## 33 33 JADE GE 3.5 60 12 50 36
## 34 34 MICHAEL JEFFERY THOMAS 3.5 6 60 37 29
## 35 35 JOSHUA DAVID LEE 3.5 46 38 56 6
## 36 36 SIDDHARTH JHA 3.5 13 57 51 33
## 37 37 AMIYATOSH PWNANANDAM 3.5 NA 5 34 27
## 38 38 BRIAN LIU 3.0 11 35 29 12
## 39 39 JOEL R HENDON 3.0 1 54 40 16
## 40 40 FOREST ZHANG 3.0 20 26 39 59
## 41 41 KYLE WILLIAM MURPHY 3.0 59 17 58 20
## 42 42 JARED GE 3.0 12 50 57 60
## 43 43 ROBERT GLEN VASEY 3.0 21 23 24 63
## 44 44 JUSTIN D SCHILLING 3.0 NA 14 32 53
## 45 45 DEREK YAN 3.0 5 51 60 56
## 46 46 JACOB ALEXANDER LAVALLEY 3.0 35 7 27 50
## 47 47 ERIC WRIGHT 2.5 18 24 21 61
## 48 48 DANIEL KHAIN 2.5 17 63 NA 52
## 49 49 MICHAEL J MARTIN 2.5 26 20 63 64
## 50 50 SHIVAM JHA 2.5 29 42 33 46
## 51 51 TEJAS AYYAGARI 2.5 27 45 36 57
## 52 52 ETHAN GUO 2.5 30 22 19 48
## 53 53 JOSE C YBARRA 2.0 NA 25 NA 44
## 54 54 LARRY HODGE 2.0 14 39 61 NA
## 55 55 ALEX KONG 2.0 62 31 10 30
## 56 56 MARISA RICCI 2.0 NA 11 35 45
## 57 57 MICHAEL LU 2.0 7 36 42 51
## 58 58 VIRAJ MOHILE 2.0 31 2 41 23
## 59 59 SEAN M MC CORMICK 2.0 41 NA 9 40
## 60 60 JULIA SHEN 1.5 33 34 45 42
## 61 61 JEZZEL FARKAS 1.5 32 3 54 47
## 62 62 ASHWIN BALAJI 1.0 55 NA NA NA
## 63 63 THOMAS JOSEPH HOSMER 1.0 2 48 49 43
## 64 64 BEN LI 1.0 22 30 31 49
## Round5 Round6 Round7 state USCF_ID Rtg_Pre Rtg_Post
## 1 7 12 4 ON 15445895 1794 1817
## 2 16 20 7 MI 14598900 1553 1663
## 3 11 13 12 MI 14959604 1384 1640
## 4 5 19 1 MI 12616049 1716 1744
## 5 4 14 17 MI 14601533 1655 1690
## 6 10 27 21 OH 15055204 1686 1687
## 7 1 9 2 MI 11146376 1649 1673
## 8 47 28 19 MI 15142253 1641 1657
## 9 26 7 20 ON 14954524 1411 1564
## 10 6 25 18 MI 14150362 1365 1544
## 11 3 34 26 MI 12581589 1712 1696
## 12 NA 1 3 MI 12681257 1663 1670
## 13 33 3 32 MI 15082995 1666 1662
## 14 27 5 31 MI 10131499 1610 1618
## 15 54 33 38 MI 15619130 1220 1416
## 16 2 36 NA MI 10295068 1604 1613
## 17 23 22 5 MI 10297702 1629 1610
## 18 19 38 10 MI 11342094 1600 1600
## 19 18 4 8 MI 14862333 1564 1570
## 20 28 2 9 MI 14529060 1595 1569
## 21 40 39 6 ON 15495066 1563 1562
## 22 NA 17 40 MI 12405534 1555 1529
## 23 17 37 46 ON 15030142 1363 1371
## 24 60 44 39 MI 13469010 1229 1300
## 25 34 10 47 MI 12486656 1745 1681
## 26 9 32 11 ON 15131520 1579 1564
## 27 14 6 NA MI 14476567 1552 1539
## 28 20 8 36 MI 14882954 1507 1513
## 29 52 48 NA MI 15323285 1602 1508
## 30 31 61 50 ON 12577178 1522 1444
## 31 30 50 14 MI 15131618 1494 1444
## 32 51 26 13 ON 14073750 1441 1433
## 33 13 15 51 MI 14691842 1449 1421
## 34 25 11 52 MI 15051807 1399 1400
## 35 57 52 48 MI 14601397 1438 1392
## 36 NA 16 28 MI 14773163 1355 1367
## 37 NA 23 61 MI 15489571 980 1077
## 38 NA 18 15 MI 15108523 1423 1439
## 39 44 21 24 MI 12923035 1436 1413
## 40 21 56 22 MI 14892710 1348 1346
## 41 NA NA NA MI 15761443 1403 1341
## 42 61 64 56 MI 14462326 1332 1256
## 43 59 46 55 MI 14101068 1283 1244
## 44 39 24 59 MI 15323504 1199 1199
## 45 63 55 58 MI 15372807 1242 1191
## 46 64 43 23 MI 15490981 377 1076
## 47 8 51 25 MI 12533115 1362 1341
## 48 NA 29 35 MI 14369165 1382 1335
## 49 58 NA NA MI 12531685 1291 1259
## 50 NA 31 30 MI 14773178 1056 1111
## 51 32 47 33 MI 15205474 1011 1097
## 52 29 35 34 MI 14918803 935 1092
## 53 NA 57 NA MI 12578849 1393 1359
## 54 15 59 64 MI 12836773 1270 1200
## 55 NA 45 43 MI 15412571 1186 1163
## 56 NA 40 42 MI 14679887 1153 1140
## 57 35 53 NA MI 15113330 1092 1079
## 58 49 NA 45 MI 14700365 917 941
## 59 43 54 44 MI 12841036 853 878
## 60 24 NA NA MI 14579262 967 984
## 61 42 30 37 ON 15771592 955 979
## 62 NA NA NA MI 15219542 1530 1535
## 63 45 NA NA MI 15057092 1175 1125
## 64 46 42 54 MI 15006561 1163 1112
In this step we calculate the average opponent prerating and create a CSV file with desired columns - We make use of while anf for oops - converted the data types from factor to numeric to calulate average - used write.table function to create CSV file
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# defining vector
colvalues<-c(1:7)
# creating a 1*n matrix
Average_Opponent_Pre_Rating <- data.frame(matrix(nrow = 0, ncol = 1))
k<-1
while(k <= nrow(df_final))
{
colnames_1<-c("Round1","Round2","Round3","Round4","Round5","Round6","Round7")
i<-1
for (variable in colnames_1) {
colvalues[i]<-as.numeric(levels(df_final[df_final[k,][[variable]], "Rtg_Pre"]))[df_final[df_final[k,][[variable]], "Rtg_Pre"]]
i <- i+1
}
mean_df <- data.frame(round(mean(colvalues, na.rm = TRUE), digits = 0))
Average_Opponent_Pre_Rating <- rbind(Average_Opponent_Pre_Rating, mean_df)
k <- k+1
}
#doing a column bind to add our averages rating to exisitng data frame
df_final <- cbind(df_final, Average_Opponent_Pre_Rating)
# Renaming the newly added column
colnames(df_final)[15] <- "Average_Opponent_Rtg_Pre"
# creating a Dataframe with required columns
df_result <- select(df_final, Player_Name, state, Total , Rtg_Pre, Average_Opponent_Rtg_Pre)
df_result
## Player_Name state Total Rtg_Pre Average_Opponent_Rtg_Pre
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519
## 7 GARY DEE SWATHELL MI 5.0 1649 1372
## 8 EZEKIEL HOUGHTON MI 5.0 1641 1468
## 9 STEFANO LEE ON 5.0 1411 1523
## 10 ANVIT RAO MI 5.0 1365 1554
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 1468
## 12 KENNETH J TACK MI 4.5 1663 1506
## 13 TORRANCE HENRY JR MI 4.5 1666 1498
## 14 BRADLEY SHAW MI 4.5 1610 1515
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1484
## 16 MIKE NIKITIN MI 4.0 1604 1386
## 17 RONALD GRZEGORCZYK MI 4.0 1629 1499
## 18 DAVID SUNDEEN MI 4.0 1600 1480
## 19 DIPANKAR ROY MI 4.0 1564 1426
## 20 JASON ZHENG MI 4.0 1595 1411
## 21 DINH DANG BUI ON 4.0 1563 1470
## 22 EUGENE L MCCLURE MI 4.0 1555 1300
## 23 ALAN BUI ON 4.0 1363 1214
## 24 MICHAEL R ALDRICH MI 4.0 1229 1357
## 25 LOREN SCHWIEBERT MI 3.5 1745 1363
## 26 MAX ZHU ON 3.5 1579 1507
## 27 GAURAV GIDWANI MI 3.5 1552 1222
## 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507 1522
## 29 CHIEDOZIE OKORIE MI 3.5 1602 1314
## 30 GEORGE AVERY JONES ON 3.5 1522 1144
## 31 RISHI SHETTY MI 3.5 1494 1260
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441 1379
## 33 JADE GE MI 3.5 1449 1277
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399 1375
## 35 JOSHUA DAVID LEE MI 3.5 1438 1150
## 36 SIDDHARTH JHA MI 3.5 1355 1388
## 37 AMIYATOSH PWNANANDAM MI 3.5 980 1385
## 38 BRIAN LIU MI 3.0 1423 1539
## 39 JOEL R HENDON MI 3.0 1436 1430
## 40 FOREST ZHANG MI 3.0 1348 1391
## 41 KYLE WILLIAM MURPHY MI 3.0 1403 1248
## 42 JARED GE MI 3.0 1332 1150
## 43 ROBERT GLEN VASEY MI 3.0 1283 1107
## 44 JUSTIN D SCHILLING MI 3.0 1199 1327
## 45 DEREK YAN MI 3.0 1242 1152
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377 1358
## 47 ERIC WRIGHT MI 2.5 1362 1392
## 48 DANIEL KHAIN MI 2.5 1382 1356
## 49 MICHAEL J MARTIN MI 2.5 1291 1286
## 50 SHIVAM JHA MI 2.5 1056 1296
## 51 TEJAS AYYAGARI MI 2.5 1011 1356
## 52 ETHAN GUO MI 2.5 935 1495
## 53 JOSE C YBARRA MI 2.0 1393 1345
## 54 LARRY HODGE MI 2.0 1270 1206
## 55 ALEX KONG MI 2.0 1186 1406
## 56 MARISA RICCI MI 2.0 1153 1414
## 57 MICHAEL LU MI 2.0 1092 1363
## 58 VIRAJ MOHILE MI 2.0 917 1391
## 59 SEAN M MC CORMICK MI 2.0 853 1319
## 60 JULIA SHEN MI 1.5 967 1330
## 61 JEZZEL FARKAS ON 1.5 955 1327
## 62 ASHWIN BALAJI MI 1.0 1530 1186
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175 1350
## 64 BEN LI MI 1.0 1163 1263
# writing the results to a CSV file to local folder
write.table(df_result, file = "~/Desktop/CUNY/Project - 1/df_result.csv", row.names = F, col.names=FALSE, sep = ",")
Analysis and Visualization -Convert data into numeric -Analysis summary on elements and visual graphs
#Analyze and Visualization
#Convert entire data frame to numeric
df_result <- as.data.frame(sapply(df_result, as.numeric))
#Summary of Players Pre-Ratings
summary(df_result$'Rtg_Pre')
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 16.75 32.50 32.50 48.25 64.00
#Summary of Average Opponent Ratings
summary(df_result$'Average_Opponent_Rtg_Pre')
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1107 1310 1382 1379 1481 1605
#install.packages("histogram")
library(histogram)
hist(df_result$'Total', breaks = 30, main = "Distribution of Player Ratings Pre-Tournament", xlab = "Total", ylab = "Count")