library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.3 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
df <- data.frame(read.delim("tournamentinfo.txt"))
head(df)
## X.........................................................................................
## 1 Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round|
## 2 Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
## 3 -----------------------------------------------------------------------------------------
## 4 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## 5 ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## 6 -----------------------------------------------------------------------------------------
text <- toString(df)
name <- str_extract_all(text, '([A-Z]+ ){2}([A-Z])*+')
name <- unlist(name)
name <- name[-1]
name <- str_trim(name)
head(name)
## [1] "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ"
## [4] "PATRICK H SCHILLING" "HANSHI ZUO" "HANSEN SONG"
length(name)
## [1] 64
state <- str_extract_all(text, "([A-Z][A-Z]) \\|")
state <- unlist(state)
state <- strtrim(state, 2)
head(state)
## [1] "ON" "MI" "MI" "MI" "MI" "OH"
length(state)
## [1] 64
tot_pts <- str_extract_all(text, '[0-9][\\.][0-9]')
tot_pts <- unlist(tot_pts)
tot_pts <- as.double(tot_pts)
head(tot_pts)
## [1] 6.0 6.0 6.0 5.5 5.5 5.0
length(tot_pts)
## [1] 64
pr <- str_extract_all(text, ': *[0-9]+...-')
pr <- unlist(pr)
pr <- toString(pr)
pr <- str_extract_all(pr, ' [0-9]+')
pr <- unlist(pr)
pr <- as.integer(pr)
head(pr)
## [1] 1794 1553 1384 1716 1655 1686
length(pr)
## [1] 64
num <- str_extract_all(text, ' .[0-9] ')
num <- unlist(num)
num <- as.integer(num)
head(num)
## [1] 1 2 3 4 5 6
length(num)
## [1] 64
x <- str_extract_all(text, '\\..{3}\\|[A-Z]....\\|[A-Z]....\\|[A-Z]....\\|[A-Z]....\\|[A-Z]....\\|[A-Z]....\\|[A-Z]....\\|')
x <- unlist(x)
y <- data.frame(matrix(ncol = 7, nrow = 0))
c <- c("R1", "R2", "R3", "R4", "R5", "R6", "R7")
colnames(y) <- c
cols <- colnames(y)
for (i in x)
{
r <- str_extract_all(i, ' ..')
r <- unlist(r)
r <- r[-1]
r <- as.integer(r)
y[nrow(y)+1,] <- r
}
head(y)
## R1 R2 R3 R4 R5 R6 R7
## 1 39 21 18 14 7 12 4
## 2 63 58 4 17 16 20 7
## 3 8 61 25 21 11 13 12
## 4 23 28 2 26 5 19 1
## 5 45 37 12 13 4 14 17
## 6 34 29 11 35 10 27 21
pre_rating <- data.frame(Num = num, Rating = pr)
opponents <- data.frame(R1 = y$R1, R2 = y$R2, R3 = y$R3, R4 = y$R4, R5 = y$R5, R6 = y$R6, R7 = y$R7)
avg <- c()
for (i in num)
{
c <- 0
sum <- 0
for (val in opponents[i,])
{
if(is.na(val) == FALSE)
{
sum <- sum + pre_rating$Rating[pre_rating$Num == val]
c <- c + 1
}
}
avg[i] <- as.integer(round(sum/c))
}
avg <- data.frame(avg)
head(avg)
## avg
## 1 1605
## 2 1469
## 3 1564
## 4 1574
## 5 1501
## 6 1519
info <- data.frame(Name = name, State = state, Total_Pts = tot_pts, Pre_Rt = pr, R1 = y$R1, R2 = y$R2, R3 = y$R3, R4 = y$R4, R5 = y$R5, R6 = y$R6, R7 = y$R7)
head(info)
## Name State Total_Pts Pre_Rt R1 R2 R3 R4 R5 R6 R7
## 1 GARY HUA ON 6.0 1794 39 21 18 14 7 12 4
## 2 DAKSHESH DARURI MI 6.0 1553 63 58 4 17 16 20 7
## 3 ADITYA BAJAJ MI 6.0 1384 8 61 25 21 11 13 12
## 4 PATRICK H SCHILLING MI 5.5 1716 23 28 2 26 5 19 1
## 5 HANSHI ZUO MI 5.5 1655 45 37 12 13 4 14 17
## 6 HANSEN SONG OH 5.0 1686 34 29 11 35 10 27 21
player_info <- data.frame(Name = name, State = state, Total_Pts = tot_pts, Pre_Rating = pr, Avg_Op = avg)
colnames(player_info) <- c("Name", "State", "Total_Pts", "Pre-Rating", "Avg_Opp")
player_info
## Name State Total_Pts Pre-Rating Avg_Opp
## 1 GARY HUA ON 6.0 1794 1605
## 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 HANSHI ZUO MI 5.5 1655 1501
## 6 HANSEN SONG OH 5.0 1686 1519
## 7 GARY DEE SWATHELL MI 5.0 1649 1372
## 8 EZEKIEL HOUGHTON MI 5.0 1641 1468
## 9 STEFANO LEE ON 5.0 1411 1523
## 10 ANVIT RAO MI 5.0 1365 1554
## 11 CAMERON WILLIAM MC MI 4.5 1712 1468
## 12 KENNETH J TACK MI 4.5 1663 1506
## 13 TORRANCE HENRY JR MI 4.5 1666 1498
## 14 BRADLEY SHAW MI 4.5 1610 1515
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1484
## 16 MIKE NIKITIN MI 4.0 1604 1386
## 17 RONALD GRZEGORCZYK MI 4.0 1629 1499
## 18 DAVID SUNDEEN MI 4.0 1600 1480
## 19 DIPANKAR ROY MI 4.0 1564 1426
## 20 JASON ZHENG MI 4.0 1595 1411
## 21 DINH DANG BUI ON 4.0 1563 1470
## 22 EUGENE L MCCLURE MI 4.0 1555 1300
## 23 ALAN BUI ON 4.0 1363 1214
## 24 MICHAEL R ALDRICH MI 4.0 1229 1357
## 25 LOREN SCHWIEBERT MI 3.5 1745 1363
## 26 MAX ZHU ON 3.5 1579 1507
## 27 GAURAV GIDWANI MI 3.5 1552 1222
## 28 SOFIA ADINA STANESCU MI 3.5 1507 1522
## 29 CHIEDOZIE OKORIE MI 3.5 1602 1314
## 30 GEORGE AVERY JONES ON 3.5 1522 1144
## 31 RISHI SHETTY MI 3.5 1494 1260
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441 1379
## 33 JADE GE MI 3.5 1449 1277
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399 1375
## 35 JOSHUA DAVID LEE MI 3.5 1438 1150
## 36 SIDDHARTH JHA MI 3.5 1355 1388
## 37 AMIYATOSH PWNANANDAM MI 3.5 980 1385
## 38 BRIAN LIU MI 3.0 1423 1539
## 39 JOEL R HENDON MI 3.0 1436 1430
## 40 FOREST ZHANG MI 3.0 1348 1391
## 41 KYLE WILLIAM MURPHY MI 3.0 1403 1248
## 42 JARED GE MI 3.0 1332 1150
## 43 ROBERT GLEN VASEY MI 3.0 1283 1107
## 44 JUSTIN D SCHILLING MI 3.0 1199 1327
## 45 DEREK YAN MI 3.0 1242 1152
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377 1358
## 47 ERIC WRIGHT MI 2.5 1362 1392
## 48 DANIEL KHAIN MI 2.5 1382 1356
## 49 MICHAEL J MARTIN MI 2.5 1291 1286
## 50 SHIVAM JHA MI 2.5 1056 1296
## 51 TEJAS AYYAGARI MI 2.5 1011 1356
## 52 ETHAN GUO MI 2.5 935 1495
## 53 JOSE C YBARRA MI 2.0 1393 1345
## 54 LARRY HODGE MI 2.0 1270 1206
## 55 ALEX KONG MI 2.0 1186 1406
## 56 MARISA RICCI MI 2.0 1153 1414
## 57 MICHAEL LU MI 2.0 1092 1363
## 58 VIRAJ MOHILE MI 2.0 917 1391
## 59 SEAN M MC MI 2.0 853 1319
## 60 JULIA SHEN MI 1.5 967 1330
## 61 JEZZEL FARKAS ON 1.5 955 1327
## 62 ASHWIN BALAJI MI 1.0 1530 1186
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175 1350
## 64 BEN LI MI 1.0 1163 1263
write.csv(player_info,"Player_Info.csv", row.names = FALSE)