Line1 is extracted using Regex and used couple of replace code to remove unwanted spaces and characters. Use lapply to apply a series of replace functions to remove unwanted characters and extra space and end result will be delimited with “|”
mystring <- read_file("https://raw.githubusercontent.com/charlsjoseph/CUNY-Data607/master/week3/tournamentinfo.txt")
#extract the 1st Line using regex
lines1 <- str_extract_all(mystring, "\\w+.+[|]\\d+[.]\\d.+\\|")
firstlines <- lapply(lines1, function (x){
x <- str_replace_all(x, "[|][A-Z]\\s{1,}", "|")
x <- str_replace_all(x, "\\s{2,}" , "")
x<- str_replace_all(x, "[|]{2}" , "|0|")
str_replace_all(x, "[|]{2}" , "|0|")
})
head(firstlines)
## [[1]]
## [1] "1 | GARY HUA|6.0|39|21|18|14|7|12|4|"
## [2] "2 | DAKSHESH DARURI|6.0|63|58|4|17|16|20|7|"
## [3] "3 | ADITYA BAJAJ|6.0|8|61|25|21|11|13|12|"
## [4] "4 | PATRICK H SCHILLING|5.5|23|28|2|26|5|19|1|"
## [5] "5 | HANSHI ZUO|5.5|45|37|12|13|4|14|17|"
## [6] "6 | HANSEN SONG|5.0|34|29|11|35|10|27|21|"
## [7] "7 | GARY DEE SWATHELL|5.0|57|46|13|11|1|9|2|"
## [8] "8 | EZEKIEL HOUGHTON|5.0|3|32|14|9|47|28|19|"
## [9] "9 | STEFANO LEE|5.0|25|18|59|8|26|7|20|"
## [10] "10 | ANVIT RAO|5.0|16|19|55|31|6|25|18|"
## [11] "11 | CAMERON WILLIAM MC LEMAN|4.5|38|56|6|7|3|34|26|"
## [12] "12 | KENNETH J TACK|4.5|42|33|5|38|0|1|3|"
## [13] "13 | TORRANCE HENRY JR|4.5|36|27|7|5|33|3|32|"
## [14] "14 | BRADLEY SHAW|4.5|54|44|8|1|27|5|31|"
## [15] "15 | ZACHARY JAMES HOUGHTON|4.5|19|16|30|22|54|33|38|"
## [16] "16 | MIKE NIKITIN|4.0|10|15|0|39|2|36|0|"
## [17] "17 | RONALD GRZEGORCZYK|4.0|48|41|26|2|23|22|5|"
## [18] "18 | DAVID SUNDEEN|4.0|47|9|1|32|19|38|10|"
## [19] "19 | DIPANKAR ROY|4.0|15|10|52|28|18|4|8|"
## [20] "20 | JASON ZHENG|4.0|40|49|23|41|28|2|9|"
## [21] "21 | DINH DANG BUI|4.0|43|1|47|3|40|39|6|"
## [22] "22 | EUGENE L MCCLURE|4.0|64|52|28|15|0|17|40|"
## [23] "23 | ALAN BUI|4.0|4|43|20|58|17|37|46|"
## [24] "24 | MICHAEL R ALDRICH|4.0|28|47|43|25|60|44|39|"
## [25] "25 | LOREN SCHWIEBERT|3.5|9|53|3|24|34|10|47|"
## [26] "26 | MAX ZHU|3.5|49|40|17|4|9|32|11|"
## [27] "27 | GAURAV GIDWANI|3.5|51|13|46|37|14|6|0|"
## [28] "28 | SOFIA ADINA STANESCU-BELLU|3.5|24|4|22|19|20|8|36|"
## [29] "29 | CHIEDOZIE OKORIE|3.5|50|6|38|34|52|48|0|"
## [30] "30 | GEORGE AVERY JONES|3.5|52|64|15|55|31|61|50|"
## [31] "31 | RISHI SHETTY|3.5|58|55|64|10|30|50|14|"
## [32] "32 | JOSHUA PHILIP MATHEWS|3.5|61|8|44|18|51|26|13|"
## [33] "33 | JADE GE|3.5|60|12|50|36|13|15|51|"
## [34] "34 | MICHAEL JEFFERY THOMAS|3.5|6|60|37|29|25|11|52|"
## [35] "35 | JOSHUA DAVID LEE|3.5|46|38|56|6|57|52|48|"
## [36] "36 | SIDDHARTH JHA|3.5|13|57|51|33|0|16|28|"
## [37] "37 | AMIYATOSH PWNANANDAM|3.5|0|5|34|27|0|23|61|"
## [38] "38 | BRIAN LIU|3.0|11|35|29|12|0|18|15|"
## [39] "39 | JOEL R HENDON|3.0|1|54|40|16|44|21|24|"
## [40] "40 | FOREST ZHANG|3.0|20|26|39|59|21|56|22|"
## [41] "41 | KYLE WILLIAM MURPHY|3.0|59|17|58|20|0|0|0|"
## [42] "42 | JARED GE|3.0|12|50|57|60|61|64|56|"
## [43] "43 | ROBERT GLEN VASEY|3.0|21|23|24|63|59|46|55|"
## [44] "44 | JUSTIN D SCHILLING|3.0|0|14|32|53|39|24|59|"
## [45] "45 | DEREK YAN|3.0|5|51|60|56|63|55|58|"
## [46] "46 | JACOB ALEXANDER LAVALLEY|3.0|35|7|27|50|64|43|23|"
## [47] "47 | ERIC WRIGHT|2.5|18|24|21|61|8|51|25|"
## [48] "48 | DANIEL KHAIN|2.5|17|63|0|52|0|29|35|"
## [49] "49 | MICHAEL J MARTIN|2.5|26|20|63|64|58|0|0|"
## [50] "50 | SHIVAM JHA|2.5|29|42|33|46|0|31|30|"
## [51] "51 | TEJAS AYYAGARI|2.5|27|45|36|57|32|47|33|"
## [52] "52 | ETHAN GUO|2.5|30|22|19|48|29|35|34|"
## [53] "53 | JOSE C YBARRA|2.0|0|25|0|44|0|57|0|"
## [54] "54 | LARRY HODGE|2.0|14|39|61|0|15|59|64|"
## [55] "55 | ALEX KONG|2.0|62|31|10|30|0|45|43|"
## [56] "56 | MARISA RICCI|2.0|0|11|35|45|0|40|42|"
## [57] "57 | MICHAEL LU|2.0|7|36|42|51|35|53|0|"
## [58] "58 | VIRAJ MOHILE|2.0|31|2|41|23|49|0|45|"
## [59] "59 | SEAN M MC CORMICK|2.0|41|0|9|40|43|54|44|"
## [60] "60 | JULIA SHEN|1.5|33|34|45|42|24|0|0|"
## [61] "61 | JEZZEL FARKAS|1.5|32|3|54|47|42|30|37|"
## [62] "62 | ASHWIN BALAJI|1.0|55|0|0|0|0|0|0|"
## [63] "63 | THOMAS JOSEPH HOSMER|1.0|2|48|49|43|45|0|0|"
## [64] "64 | BEN LI|1.0|22|30|31|49|46|42|54|"
Format the firstLines and extract the data into names, points and averagePoints vector variables. notes: used str_split() function to split the line with the delimiter “|”
For e.g 1 | GARY HUA|6.0|39|21|18|14|7|12|4| Extract following into names, points, averagePoints vector variables
names =“GARY HUA” points = 6.0 averagePoints = mean(21,18,14,7,12,4)
names <- c()
id <- c()
points <- c()
round1 <- c()
round2 <- c()
round3 <- c()
round4 <- c()
round5 <- c()
round6 <- c()
round7 <- c()
for (row in unlist(firstlines)) {
ar<- unlist(str_split(row, fixed("|")))
id <- c(id, as.integer(ar[1]))
names <- c(names, ar[2])
points <- c(points, ar[3])
print(row)
round1 <- c(round1, as.integer(ar[4]))
round2 <- c(round2, as.integer(ar[5]))
round3 <- c(round3, as.integer(ar[6]))
round4 <- c(round4, as.integer(ar[7]))
round5 <- c(round5, as.integer(ar[8]))
round6 <- c(round6, as.integer(ar[9]))
round7 <- c(round7, as.integer(ar[10]))
}
## [1] "1 | GARY HUA|6.0|39|21|18|14|7|12|4|"
## [1] "2 | DAKSHESH DARURI|6.0|63|58|4|17|16|20|7|"
## [1] "3 | ADITYA BAJAJ|6.0|8|61|25|21|11|13|12|"
## [1] "4 | PATRICK H SCHILLING|5.5|23|28|2|26|5|19|1|"
## [1] "5 | HANSHI ZUO|5.5|45|37|12|13|4|14|17|"
## [1] "6 | HANSEN SONG|5.0|34|29|11|35|10|27|21|"
## [1] "7 | GARY DEE SWATHELL|5.0|57|46|13|11|1|9|2|"
## [1] "8 | EZEKIEL HOUGHTON|5.0|3|32|14|9|47|28|19|"
## [1] "9 | STEFANO LEE|5.0|25|18|59|8|26|7|20|"
## [1] "10 | ANVIT RAO|5.0|16|19|55|31|6|25|18|"
## [1] "11 | CAMERON WILLIAM MC LEMAN|4.5|38|56|6|7|3|34|26|"
## [1] "12 | KENNETH J TACK|4.5|42|33|5|38|0|1|3|"
## [1] "13 | TORRANCE HENRY JR|4.5|36|27|7|5|33|3|32|"
## [1] "14 | BRADLEY SHAW|4.5|54|44|8|1|27|5|31|"
## [1] "15 | ZACHARY JAMES HOUGHTON|4.5|19|16|30|22|54|33|38|"
## [1] "16 | MIKE NIKITIN|4.0|10|15|0|39|2|36|0|"
## [1] "17 | RONALD GRZEGORCZYK|4.0|48|41|26|2|23|22|5|"
## [1] "18 | DAVID SUNDEEN|4.0|47|9|1|32|19|38|10|"
## [1] "19 | DIPANKAR ROY|4.0|15|10|52|28|18|4|8|"
## [1] "20 | JASON ZHENG|4.0|40|49|23|41|28|2|9|"
## [1] "21 | DINH DANG BUI|4.0|43|1|47|3|40|39|6|"
## [1] "22 | EUGENE L MCCLURE|4.0|64|52|28|15|0|17|40|"
## [1] "23 | ALAN BUI|4.0|4|43|20|58|17|37|46|"
## [1] "24 | MICHAEL R ALDRICH|4.0|28|47|43|25|60|44|39|"
## [1] "25 | LOREN SCHWIEBERT|3.5|9|53|3|24|34|10|47|"
## [1] "26 | MAX ZHU|3.5|49|40|17|4|9|32|11|"
## [1] "27 | GAURAV GIDWANI|3.5|51|13|46|37|14|6|0|"
## [1] "28 | SOFIA ADINA STANESCU-BELLU|3.5|24|4|22|19|20|8|36|"
## [1] "29 | CHIEDOZIE OKORIE|3.5|50|6|38|34|52|48|0|"
## [1] "30 | GEORGE AVERY JONES|3.5|52|64|15|55|31|61|50|"
## [1] "31 | RISHI SHETTY|3.5|58|55|64|10|30|50|14|"
## [1] "32 | JOSHUA PHILIP MATHEWS|3.5|61|8|44|18|51|26|13|"
## [1] "33 | JADE GE|3.5|60|12|50|36|13|15|51|"
## [1] "34 | MICHAEL JEFFERY THOMAS|3.5|6|60|37|29|25|11|52|"
## [1] "35 | JOSHUA DAVID LEE|3.5|46|38|56|6|57|52|48|"
## [1] "36 | SIDDHARTH JHA|3.5|13|57|51|33|0|16|28|"
## [1] "37 | AMIYATOSH PWNANANDAM|3.5|0|5|34|27|0|23|61|"
## [1] "38 | BRIAN LIU|3.0|11|35|29|12|0|18|15|"
## [1] "39 | JOEL R HENDON|3.0|1|54|40|16|44|21|24|"
## [1] "40 | FOREST ZHANG|3.0|20|26|39|59|21|56|22|"
## [1] "41 | KYLE WILLIAM MURPHY|3.0|59|17|58|20|0|0|0|"
## [1] "42 | JARED GE|3.0|12|50|57|60|61|64|56|"
## [1] "43 | ROBERT GLEN VASEY|3.0|21|23|24|63|59|46|55|"
## [1] "44 | JUSTIN D SCHILLING|3.0|0|14|32|53|39|24|59|"
## [1] "45 | DEREK YAN|3.0|5|51|60|56|63|55|58|"
## [1] "46 | JACOB ALEXANDER LAVALLEY|3.0|35|7|27|50|64|43|23|"
## [1] "47 | ERIC WRIGHT|2.5|18|24|21|61|8|51|25|"
## [1] "48 | DANIEL KHAIN|2.5|17|63|0|52|0|29|35|"
## [1] "49 | MICHAEL J MARTIN|2.5|26|20|63|64|58|0|0|"
## [1] "50 | SHIVAM JHA|2.5|29|42|33|46|0|31|30|"
## [1] "51 | TEJAS AYYAGARI|2.5|27|45|36|57|32|47|33|"
## [1] "52 | ETHAN GUO|2.5|30|22|19|48|29|35|34|"
## [1] "53 | JOSE C YBARRA|2.0|0|25|0|44|0|57|0|"
## [1] "54 | LARRY HODGE|2.0|14|39|61|0|15|59|64|"
## [1] "55 | ALEX KONG|2.0|62|31|10|30|0|45|43|"
## [1] "56 | MARISA RICCI|2.0|0|11|35|45|0|40|42|"
## [1] "57 | MICHAEL LU|2.0|7|36|42|51|35|53|0|"
## [1] "58 | VIRAJ MOHILE|2.0|31|2|41|23|49|0|45|"
## [1] "59 | SEAN M MC CORMICK|2.0|41|0|9|40|43|54|44|"
## [1] "60 | JULIA SHEN|1.5|33|34|45|42|24|0|0|"
## [1] "61 | JEZZEL FARKAS|1.5|32|3|54|47|42|30|37|"
## [1] "62 | ASHWIN BALAJI|1.0|55|0|0|0|0|0|0|"
## [1] "63 | THOMAS JOSEPH HOSMER|1.0|2|48|49|43|45|0|0|"
## [1] "64 | BEN LI|1.0|22|30|31|49|46|42|54|"
length(round7)
## [1] 64
statesData <- str_extract_all(mystring, ".+[A-Z]{2}\\s{1}[|]")
states = c()
for (state in unlist(statesData)) {
state <- str_replace_all(state, "\\s+" , "")
state <- str_replace_all(state, "[|]" , "")
states <- c(states, state)
}
states
## [1] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" "ON" "MI" "MI" "MI" "MI" "MI"
## [15] "MI" "MI" "MI" "MI" "MI" "MI" "ON" "MI" "ON" "MI" "MI" "ON" "MI" "MI"
## [29] "MI" "ON" "MI" "ON" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [43] "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [57] "MI" "MI" "MI" "MI" "ON" "MI" "MI" "MI"
length(states)
## [1] 64
preRating = c()
prepostData <- str_extract_all(mystring, "[0-9,A-Z]+\\s*->\\s?[0-9,A-Z]+")
for (prepost in unlist(prepostData)) {
prepost <- str_replace_all(prepost, "P?[\\d+]?.?->" , "|")
ar<- unlist(str_split(prepost, fixed("|")))
preRating <- c(preRating,as.integer(ar[1]))
}
preRating
## [1] 1794 1553 1384 1716 1655 1686 1649 1641 1411 1365 1712 1663 1666 1610
## [15] 1220 1604 1629 1600 1564 1595 1563 1555 1363 1229 1745 1579 1552 1507
## [29] 1602 1522 1494 1441 1449 1399 1438 1355 980 1423 1436 1348 1403 1332
## [43] 1283 1199 1242 377 1362 1382 1291 1056 1011 935 1393 1270 1186 1153
## [57] 1092 917 853 967 955 1530 1175 1163
id=id names points oponent1 oponent2 oponent3 oponent4 oponent5 oponent6 oponent7 state preRating
players.data <- data.frame(id=id,names=names,points=points, oponent1=round1,
oponent2=round2,
oponent3=round3,
oponent4=round4,
oponent5=round5,
oponent6=round6,
oponent7=round7,
state=states ,preRating=preRating )
head(players.data)
## id names points oponent1 oponent2 oponent3 oponent4
## 1 1 GARY HUA 6.0 39 21 18 14
## 2 2 DAKSHESH DARURI 6.0 63 58 4 17
## 3 3 ADITYA BAJAJ 6.0 8 61 25 21
## 4 4 PATRICK H SCHILLING 5.5 23 28 2 26
## 5 5 HANSHI ZUO 5.5 45 37 12 13
## 6 6 HANSEN SONG 5.0 34 29 11 35
## oponent5 oponent6 oponent7 state preRating
## 1 7 12 4 ON 1794
## 2 16 20 7 MI 1553
## 3 11 13 12 MI 1384
## 4 5 19 1 MI 1716
## 5 4 14 17 MI 1655
## 6 10 27 21 OH 1686
Created a fn called fetch_PreRating_oponent to fetch the prerating for the opponent
Input Parameter: Opponent Id
OutPut Parameter : PreRating Value
use Apply function to generate a new calculated column called “avgPreRating”
fetch_PreRating_oponent <- function(oponent) {
df= players.data[players.data[1] == oponent ]
c <- as.integer(df[12])
if (is.na(c)) {
c <- 0
}
return(c)
}
players.data$avgPreRating <- apply(players.data, MARGIN=1, function(x){
trunc(mean(c(fetch_PreRating_oponent(as.integer(x[4])), fetch_PreRating_oponent(as.integer(x[5])), fetch_PreRating_oponent(as.integer(x[6])), fetch_PreRating_oponent(as.integer(x[7])), fetch_PreRating_oponent(as.integer(x[8])), fetch_PreRating_oponent(as.integer(x[9])), fetch_PreRating_oponent(as.integer(x[10])))))
})
finalResult <- players.data[,c("names", "state" , "points", "avgPreRating")]
write.csv(finalResult, "finalResults.csv")
finalResult
## names state points avgPreRating
## 1 GARY HUA ON 6.0 1605
## 2 DAKSHESH DARURI MI 6.0 1469
## 3 ADITYA BAJAJ MI 6.0 1563
## 4 PATRICK H SCHILLING MI 5.5 1573
## 5 HANSHI ZUO MI 5.5 1500
## 6 HANSEN SONG OH 5.0 1518
## 7 GARY DEE SWATHELL MI 5.0 1372
## 8 EZEKIEL HOUGHTON MI 5.0 1468
## 9 STEFANO LEE ON 5.0 1523
## 10 ANVIT RAO MI 5.0 1554
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1467
## 12 KENNETH J TACK MI 4.5 1291
## 13 TORRANCE HENRY JR MI 4.5 1497
## 14 BRADLEY SHAW MI 4.5 1515
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1483
## 16 MIKE NIKITIN MI 4.0 989
## 17 RONALD GRZEGORCZYK MI 4.0 1498
## 18 DAVID SUNDEEN MI 4.0 1480
## 19 DIPANKAR ROY MI 4.0 1426
## 20 JASON ZHENG MI 4.0 1410
## 21 DINH DANG BUI ON 4.0 1470
## 22 EUGENE L MCCLURE MI 4.0 1114
## 23 ALAN BUI ON 4.0 1213
## 24 MICHAEL R ALDRICH MI 4.0 1357
## 25 LOREN SCHWIEBERT MI 3.5 1363
## 26 MAX ZHU ON 3.5 1506
## 27 GAURAV GIDWANI MI 3.5 1047
## 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1522
## 29 CHIEDOZIE OKORIE MI 3.5 1125
## 30 GEORGE AVERY JONES ON 3.5 1144
## 31 RISHI SHETTY MI 3.5 1259
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1378
## 33 JADE GE MI 3.5 1276
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1375
## 35 JOSHUA DAVID LEE MI 3.5 1149
## 36 SIDDHARTH JHA MI 3.5 1189
## 37 AMIYATOSH PWNANANDAM MI 3.5 989
## 38 BRIAN LIU MI 3.0 1319
## 39 JOEL R HENDON MI 3.0 1429
## 40 FOREST ZHANG MI 3.0 1390
## 41 KYLE WILLIAM MURPHY MI 3.0 713
## 42 JARED GE MI 3.0 1149
## 43 ROBERT GLEN VASEY MI 3.0 1106
## 44 JUSTIN D SCHILLING MI 3.0 1137
## 45 DEREK YAN MI 3.0 1152
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 1357
## 47 ERIC WRIGHT MI 2.5 1392
## 48 DANIEL KHAIN MI 2.5 968
## 49 MICHAEL J MARTIN MI 2.5 918
## 50 SHIVAM JHA MI 2.5 1110
## 51 TEJAS AYYAGARI MI 2.5 1356
## 52 ETHAN GUO MI 2.5 1494
## 53 JOSE C YBARRA MI 2.0 576
## 54 LARRY HODGE MI 2.0 1033
## 55 ALEX KONG MI 2.0 1205
## 56 MARISA RICCI MI 2.0 1010
## 57 MICHAEL LU MI 2.0 1168
## 58 VIRAJ MOHILE MI 2.0 1192
## 59 SEAN M MC CORMICK MI 2.0 1130
## 60 JULIA SHEN MI 1.5 950
## 61 JEZZEL FARKAS ON 1.5 1327
## 62 ASHWIN BALAJI MI 1.0 169
## 63 THOMAS JOSEPH HOSMER MI 1.0 964
## 64 BEN LI MI 1.0 1263