## Player State_or_Province Tournament_Points Prior_Rating Opponents'_Average_Rating
## 1 GARY HUA ON 6.0 1794 1605.29
## 2 DAKSHESH DARURI MI 6.0 1553 1469.29
## 3 ADITYA BAJAJ MI 6.0 1384 1563.57
## 4 PATRICK H SCHILLING MI 5.5 1716 1573.57
## 5 HANSHI ZUO MI 5.5 1655 1500.86
## 6 HANSEN SONG OH 5.0 1686 1518.71
## 7 GARY DEE SWATHELL MI 5.0 1649 1372.14
## 8 EZEKIEL HOUGHTON MI 5.0 1641 1468.43
## 9 STEFANO LEE ON 5.0 1411 1523.14
## 10 ANVIT RAO MI 5.0 1365 1554.14
## 11 CAMERON WILLIAM MCLEMAN MI 4.5 1712 1467.57
## 12 KENNETH J TACK MI 4.5 1663 1506.17
## 13 TORRANCE HENRY JR MI 4.5 1666 1497.86
## 14 BRADLEY SHAW MI 4.5 1610 1515.00
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220 1483.86
## 16 MIKE NIKITIN MI 4.0 1604 1385.80
## 17 RONALD GRZEGORCZYK MI 4.0 1629 1498.57
## 18 DAVID SUNDEEN MI 4.0 1600 1480.00
## 19 DIPANKAR ROY MI 4.0 1564 1426.29
## 20 JASON ZHENG MI 4.0 1595 1410.86
## 21 DINH DANG BUI ON 4.0 1563 1470.43
## 22 EUGENE L MCCLURE MI 4.0 1555 1300.33
## 23 ALAN BUI ON 4.0 1363 1213.86
## 24 MICHAEL R ALDRICH MI 4.0 1229 1357.00
## 25 LOREN SCHWIEBERT MI 3.5 1745 1363.29
## 26 MAX ZHU ON 3.5 1579 1506.86
## 27 GAURAV GIDWANI MI 3.5 1552 1221.67
## 28 SOFIA ADINA STANESCU-BELLU MI 3.5 1507 1522.14
## 29 CHIEDOZIE OKORIE MI 3.5 1602 1313.50
## 30 GEORGE AVERY JONES ON 3.5 1522 1144.14
## 31 RISHI SHETTY MI 3.5 1494 1259.86
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441 1378.71
## 33 JADE GE MI 3.5 1449 1276.86
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399 1375.29
## 35 JOSHUA DAVID LEE MI 3.5 1438 1149.71
## 36 SIDDHARTH JHA MI 3.5 1355 1388.17
## 37 AMIYATOSH PWNANANDAM MI 3.5 980 1384.80
## 38 BRIAN LIU MI 3.0 1423 1539.17
## 39 JOEL R HENDON MI 3.0 1436 1429.57
## 40 FOREST ZHANG MI 3.0 1348 1390.57
## 41 KYLE WILLIAM MURPHY MI 3.0 1403 1248.50
## 42 JARED GE MI 3.0 1332 1149.86
## 43 ROBERT GLEN VASEY MI 3.0 1283 1106.57
## 44 JUSTIN D SCHILLING MI 3.0 1199 1327.00
## 45 DEREK YAN MI 3.0 1242 1152.00
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377 1357.71
## 47 ERIC WRIGHT MI 2.5 1362 1392.00
## 48 DANIEL KHAIN MI 2.5 1382 1355.80
## 49 MICHAEL J MARTIN MI 2.5 1291 1285.80
## 50 SHIVAM JHA MI 2.5 1056 1296.00
## 51 TEJAS AYYAGARI MI 2.5 1011 1356.14
## 52 ETHAN GUO MI 2.5 935 1494.57
## 53 JOSE C YBARRA MI 2.0 1393 1345.33
## 54 LARRY HODGE MI 2.0 1270 1206.17
## 55 ALEX KONG MI 2.0 1186 1406.00
## 56 MARISA RICCI MI 2.0 1153 1414.40
## 57 MICHAEL LU MI 2.0 1092 1363.00
## 58 VIRAJ MOHILE MI 2.0 917 1391.00
## 59 SEAN M MCCORMICK MI 2.0 853 1319.00
## 60 JULIA SHEN MI 1.5 967 1330.20
## 61 JEZZEL FARKAS ON 1.5 955 1327.29
## 62 ASHWIN BALAJI MI 1.0 1530 1186.00
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175 1350.20
## 64 BEN LI MI 1.0 1163 1263.00
##
## Call:
## lm(formula = as.numeric(final.data[[3]]) ~ as.numeric(final.data[[4]]))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.86623 -0.56507 -0.06999 0.34002 2.54694
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.4635332 0.6562549 -0.706 0.483
## as.numeric(final.data[[4]]) 0.0028299 0.0004676 6.052 9.09e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9853 on 62 degrees of freedom
## Multiple R-squared: 0.3714, Adjusted R-squared: 0.3612
## F-statistic: 36.63 on 1 and 62 DF, p-value: 9.093e-08
library(stringr)
library(ggplot2)
fileName <- 'C:/Users/dawig/Documents/tournamentinfo.txt'
OurData<-readChar(fileName, file.info(fileName)$size)
#Extract all names in order
split.string<-unlist(str_sub(OurData,375,-1))
split.string<-unlist(str_replace_all(split.string,"MC ","MC"))
split.string2<-unlist(str_extract_all(split.string,"([A-Z]{1,20}[ ]{1}[A-Z]{1,20}[ ]{1}[A-Z]{1,20})|[A-Z]{1,20}[ ]{1}[A-Z]{1,20}"))
#Split each entry into a separate row in a data frame
split.by.player<-data.frame(nrow=64)
for (i in 1:63){
split.string3<-unlist(str_locate(split.string,split.string2[[i+1]]))
split.by.player[i,1]<-unlist(str_sub(split.string,0,split.string3[1,1]))
split.string<-unlist(str_sub(split.string,split.string3[1,1],-1))
}
#The last row is still in split.string and has to be dealt with separately.
split.by.player[64,1]<-split.string
for(i in 1:64){
split.string3<-unlist(str_locate(split.by.player[i,1],"\\s{2,25}"))
split.by.player[i,2]<-unlist(str_sub(split.by.player[i,1],split.string3[1,1],-1))
split.by.player[i,1]<-unlist(str_sub(split.by.player[i,1],0,split.string3[1,1]))
split.by.player[i,3]<-unlist(str_extract(split.by.player[i,2]," OH | ON | MI "))
split.by.player[i,4]<-unlist(str_extract(split.by.player[i,2],"[[:digit:]]\\.[[:digit:]]"))
split.by.player[i,5]<-unlist(str_extract(split.by.player[i,2],"R\\: {1,2}[[:digit:]]{3,4}"))
split.by.player[i,5]<-unlist(str_extract(split.by.player[i,5],"[[:digit:]]{3,4}"))
}
tempVar<-data.frame(nrow=64)
for(j in 1:7) {
for(i in 1:64){
helper<-unlist(str_locate(split.by.player[i,2], "\\|[WLDBU].+?[[:digit:]]"))
tempVar<-unlist(str_sub(split.by.player[i,2],helper[2],-1))
split.by.player[i,j+5]<-unlist(str_extract(tempVar,"[[:digit:]]{0,3}"))
split.by.player[i,2]<-unlist(str_sub(split.by.player[i,2],helper[2],-1))
}
}
for(h in 1:64){
split.by.player[h,13]<-0
split.by.player[h,14]<-0
for (i in 6:12){
if (!is.na(split.by.player[h,i])) {
split.by.player[h,13]<-split.by.player[h,13]+1
opponent.reference.for.rating<-split.by.player[h,i]
split.by.player[h,14]<-(as.numeric(split.by.player[opponent.reference.for.rating,5])+split.by.player[h,14])
}
split.by.player[h,15]<-(split.by.player[h,14]/split.by.player[h,13])
split.by.player[h,15]<-round(split.by.player[h,15], digits=2)
}}
final.data<-split.by.player[,c(1,3,4,5,15)]
colnames(final.data)<-c("Player","State_or_Province","Tournament_Points", "Prior_Rating", "Opponents'_Average_Rating")
final.data
write.csv(final.data, file = "Chess_Tournament_Data.csv")