library("RCurl")
url <- "https://raw.githubusercontent.com/christinataylor/IS/master/tournamentinfo.txt"
setInternet2(TRUE)
download.file(url, destfile="./info.txt")
v<-readLines("info.txt")
## Warning in readLines("info.txt"): incomplete final line found on 'info.txt'
v<-v[5:196]
summary(v) #character vector of length 192
## Length Class Mode
## 192 character character
i<-1:length(v)
v1<-v[i%%3==1] #separately handle data based on line number; ignore 3rd line
v2<-v[i%%3==2]
l1<-strsplit(v1,"\\|")
l2<-strsplit(v2,"\\|") #split pipe delimited values
trim <- function (x) gsub("^\\s+|\\s+$", "", x) #trim leading and trailing spaces
df1 <- data.frame(matrix(trim(unlist(l1)), nrow=64, byrow=T),stringsAsFactors = FALSE)
tail(df1)
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## 59 59 SEAN M MC CORMICK 2.0 L 41 B L 9 L 40 L 43 W 54 L 44
## 60 60 JULIA SHEN 1.5 L 33 L 34 D 45 D 42 L 24 H U
## 61 61 JEZZEL FARKAS 1.5 L 32 L 3 W 54 L 47 D 42 L 30 L 37
## 62 62 ASHWIN BALAJI 1.0 W 55 U U U U U U
## 63 63 THOMAS JOSEPH HOSMER 1.0 L 2 L 48 D 49 L 43 L 45 H U
## 64 64 BEN LI 1.0 L 22 D 30 L 31 D 49 L 46 L 42 L 54
df2 <- data.frame(matrix(trim(unlist(l2)), nrow=64, byrow=T), stringsAsFactors = FALSE)
head(df2)
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## 1 ON 15445895 / R: 1794 ->1817 N:2 W B W B W B W
## 2 MI 14598900 / R: 1553 ->1663 N:2 B W B W B W B
## 3 MI 14959604 / R: 1384 ->1640 N:2 W B W B W B W
## 4 MI 12616049 / R: 1716 ->1744 N:2 W B W B W B B
## 5 MI 14601533 / R: 1655 ->1690 N:2 B W B W B W B
## 6 OH 15055204 / R: 1686 ->1687 N:3 W B W B B W B
pattern<-"R:\\s{1,2}\\d{3,4}"
m2 <- regexpr(pattern, df2$X2)
rank <- regmatches(df2$X2, m2) #extract R:, 4 digits or 1 space followed by 3 digits
df2$X2<-trim(sapply(strsplit(rank, ":"), "[[", 2)) #split and keep only digits via index
tail(df2)
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## 59 MI 853 W B B W W B
## 60 MI 967 W B B W B
## 61 ON 955 B W B W B W B
## 62 MI 1530 B
## 63 MI 1175 W B W B B
## 64 MI 1163 B W W B W B B
findid <- function(x){ #extract ids at the end
pattern<-"\\d+$"
m1 <- regexpr(pattern, x)
id <- rep(NA, length(x)) #prevent regmatches drop non-matches in vector
id[m1!=-1]<- regmatches(x, m1)
return(id)
}
df1[,4:10]<-do.call(cbind,lapply(df1[,4:10],findid))
tail(df1)
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## 59 59 SEAN M MC CORMICK 2.0 41 <NA> 9 40 43 54 44
## 60 60 JULIA SHEN 1.5 33 34 45 42 24 <NA> <NA>
## 61 61 JEZZEL FARKAS 1.5 32 3 54 47 42 30 37
## 62 62 ASHWIN BALAJI 1.0 55 <NA> <NA> <NA> <NA> <NA> <NA>
## 63 63 THOMAS JOSEPH HOSMER 1.0 2 48 49 43 45 <NA> <NA>
## 64 64 BEN LI 1.0 22 30 31 49 46 42 54
#extract R:, 4 digits or 1 space followed by 3 digits
info <- cbind(df1[,1:2], df2[,1], as.numeric(df1[,3]), as.numeric(df2[,2]), df1[,4:10])
colnames(info) <- c("id","name","state","pts","p_rating","r1","r2","r3","r4","r5","r6","r7")
head(info)
## id name state pts p_rating r1 r2 r3 r4 r5 r6 r7
## 1 1 GARY HUA ON 6.0 1794 39 21 18 14 7 12 4
## 2 2 DAKSHESH DARURI MI 6.0 1553 63 58 4 17 16 20 7
## 3 3 ADITYA BAJAJ MI 6.0 1384 8 61 25 21 11 13 12
## 4 4 PATRICK H SCHILLING MI 5.5 1716 23 28 2 26 5 19 1
## 5 5 HANSHI ZUO MI 5.5 1655 45 37 12 13 4 14 17
## 6 6 HANSEN SONG OH 5.0 1686 34 29 11 35 10 27 21
findavg <- function(id)
{
ratings <- info[id,6:12]
ratings <- Filter(function(x)!all(is.na(x)), ratings) #remove empty
library("sqldf")
sql <- paste("select p_rating from info where id in (", paste(ratings, collapse = ","), ")") #generate sql statement
total <- sqldf(sql) #covert result to dataframe
return (round(mean(total$p_rating))) #find average
}
Sample calculations for players 1 and 62:
findavg(1)
## [1] 1605
findavg(62)
## [1] 1186
avg_opprating<-sapply(id<-1:64,findavg)
cbind(info[1:5], avg_opprating)
## id name state pts p_rating avg_opprating
## 1 1 GARY HUA ON 6.0 1794 1605
## 2 2 DAKSHESH DARURI MI 6.0 1553 1469
## 3 3 ADITYA BAJAJ MI 6.0 1384 1564
## 4 4 PATRICK H SCHILLING MI 5.5 1716 1574
## 5 5 HANSHI ZUO MI 5.5 1655 1501
## 6 6 HANSEN SONG OH 5.0 1686 1519
## 7 7 GARY DEE SWATHELL MI 5.0 1649 1372
## 8 8 EZEKIEL HOUGHTON MI 5.0 1641 1468
## 9 9 STEFANO LEE ON 5.0 1411 1523
## 10 10 ANVIT RAO MI 5.0 1365 1554
## 11 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712 1468
## 12 12 KENNETH J TACK MI 4.5 1663 1506
## 13 13 TORRANCE HENRY JR MI 4.5 1666 1498
## 14 14 BRADLEY SHAW MI 4.5 1610 1515
## 15 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1484
## 16 16 MIKE NIKITIN MI 4.0 1604 1386
## 17 17 RONALD GRZEGORCZYK MI 4.0 1629 1499
## 18 18 DAVID SUNDEEN MI 4.0 1600 1480
## 19 19 DIPANKAR ROY MI 4.0 1564 1426
## 20 20 JASON ZHENG MI 4.0 1595 1411
## 21 21 DINH DANG BUI ON 4.0 1563 1470
## 22 22 EUGENE L MCCLURE MI 4.0 1555 1300
## 23 23 ALAN BUI ON 4.0 1363 1214
## 24 24 MICHAEL R ALDRICH MI 4.0 1229 1357
## 25 25 LOREN SCHWIEBERT MI 3.5 1745 1363
## 26 26 MAX ZHU ON 3.5 1579 1507
## 27 27 GAURAV GIDWANI MI 3.5 1552 1222
## 28 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507 1522
## 29 29 CHIEDOZIE OKORIE MI 3.5 1602 1314
## 30 30 GEORGE AVERY JONES ON 3.5 1522 1144
## 31 31 RISHI SHETTY MI 3.5 1494 1260
## 32 32 JOSHUA PHILIP MATHEWS ON 3.5 1441 1379
## 33 33 JADE GE MI 3.5 1449 1277
## 34 34 MICHAEL JEFFERY THOMAS MI 3.5 1399 1375
## 35 35 JOSHUA DAVID LEE MI 3.5 1438 1150
## 36 36 SIDDHARTH JHA MI 3.5 1355 1388
## 37 37 AMIYATOSH PWNANANDAM MI 3.5 980 1385
## 38 38 BRIAN LIU MI 3.0 1423 1539
## 39 39 JOEL R HENDON MI 3.0 1436 1430
## 40 40 FOREST ZHANG MI 3.0 1348 1391
## 41 41 KYLE WILLIAM MURPHY MI 3.0 1403 1248
## 42 42 JARED GE MI 3.0 1332 1150
## 43 43 ROBERT GLEN VASEY MI 3.0 1283 1107
## 44 44 JUSTIN D SCHILLING MI 3.0 1199 1327
## 45 45 DEREK YAN MI 3.0 1242 1152
## 46 46 JACOB ALEXANDER LAVALLEY MI 3.0 377 1358
## 47 47 ERIC WRIGHT MI 2.5 1362 1392
## 48 48 DANIEL KHAIN MI 2.5 1382 1356
## 49 49 MICHAEL J MARTIN MI 2.5 1291 1286
## 50 50 SHIVAM JHA MI 2.5 1056 1296
## 51 51 TEJAS AYYAGARI MI 2.5 1011 1356
## 52 52 ETHAN GUO MI 2.5 935 1495
## 53 53 JOSE C YBARRA MI 2.0 1393 1345
## 54 54 LARRY HODGE MI 2.0 1270 1206
## 55 55 ALEX KONG MI 2.0 1186 1406
## 56 56 MARISA RICCI MI 2.0 1153 1414
## 57 57 MICHAEL LU MI 2.0 1092 1363
## 58 58 VIRAJ MOHILE MI 2.0 917 1391
## 59 59 SEAN M MC CORMICK MI 2.0 853 1319
## 60 60 JULIA SHEN MI 1.5 967 1330
## 61 61 JEZZEL FARKAS ON 1.5 955 1327
## 62 62 ASHWIN BALAJI MI 1.0 1530 1186
## 63 63 THOMAS JOSEPH HOSMER MI 1.0 1175 1350
## 64 64 BEN LI MI 1.0 1163 1263