First load the libraries, text file and clean up some excess lines.
Want the data output to be a csv file with the following columns
“Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents”"
library(RCurl)
## Loading required package: bitops
library(foreign)
library(stringr)
library(splitstackshape)
## Loading required package: data.table
url = getURL('https://raw.githubusercontent.com/scottogden10/607-Assignment2/master/tournamentinfo.txt')
data<-readLines("https://raw.githubusercontent.com/scottogden10/607-Assignment2/master/tournamentinfo.txt")
## Warning in readLines("https://raw.githubusercontent.com/scottogden10/607-
## Assignment2/master/tournamentinfo.txt"): incomplete final line found on
## 'https://raw.githubusercontent.com/scottogden10/607-Assignment2/master/
## tournamentinfo.txt'
data2<-str_replace_all(data[-c(1:4)],"-",'') ##Removes top rows and --
data3<-data2[lapply(data2,nchar)>0] ##Removes empty rows (Adapted from http://stackoverflow.com/questions/19023446/remove-empty-elements-from-list-with-character0 )
#Combine rows for each person by subsetting even and on lines
even<-seq(2, nrow(data.frame(data3))+1, by = 2)
odd<-seq(1, nrow(data.frame(data3))-1, by = 2)
names<-data3[odd]
second<-data3[even]
conc<-paste(names,second)
head(conc,2)
## [1] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4| ON | 15445895 / R: 1794 >1817 |N:2 |W |B |W |B |W |B |W |"
## [2] " 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7| MI | 14598900 / R: 1553 >1663 |N:2 |B |W |B |W |B |W |B |"
##You can see the data is now row-specific to players.
##Make all columns "|" delemeted
conc2<-str_replace_all(conc, ">","|")
conc3<-str_replace_all(conc2,"/ R:","|")
##Get the non-calculated fields
players<-str_trim(str_replace_all(str_extract(conc3, "[[:alpha:]].+?[|]"),"[|]",""))
idno<-str_trim(str_replace_all(str_extract(conc3, "^\\s+(\\d+)\\s+\\|"),"[|]",""))
states<-str_trim(str_replace_all(str_extract(second, "[[:alpha:]].+?[|]"),"[|]",""))
pts<-str_trim(str_replace_all(str_extract(conc3, "[|][:digit:][:punct:][:digit:]"),"[|]",""))
pre<-str_replace_all(str_trim(str_replace_all(str_extract(second, "R:[:space:]+[:digit:]+"),"[|]","")),"R: ","")
##Get Who they played
badguys<-str_replace_all(str_trim(str_replace_all(str_replace_all(str_replace_all(str_trim(str_replace_all(str_extract_all(conc3, "[WDL]{1}[:space:]+[:digit:]+"),"[|]","")),"W",""),"L",""),"D","")),"c","")
df<-data.frame(badguys)
y<-cSplit(df, "badguys", ",")
z1<-as.character(y$badguys_1)
z1<-as.numeric(str_trim(substr(z1,nchar(z1)-4,nchar(z1)-1)))
z2<-as.character(y$badguys_2)
z2<-as.numeric(str_trim(substr(z2,nchar(z2)-4,nchar(z2)-1)))
z3<-as.character(y$badguys_3)
z3<-as.numeric(str_trim(substr(z3,nchar(z3)-4,nchar(z3)-1)))
## Warning: NAs introduced by coercion
z4<-as.character(y$badguys_4)
z4<-as.numeric(str_trim(substr(z4,nchar(z4)-4,nchar(z4)-1)))
## Warning: NAs introduced by coercion
z5<-as.character(y$badguys_5)
z5<-(str_trim(substr(z5,nchar(z5)-4,nchar(z5)-1)))
z5<-as.numeric(substr(z5,1,2))
z6<-as.character(y$badguys_6)
z6<-str_trim(substr(z6,nchar(z6)-4,nchar(z6)-1))
z6<-as.numeric(substr(z6,1,2))
## Warning: NAs introduced by coercion
z7<-as.character(y$badguys_7)
z7<-str_trim(substr(z7,nchar(z7)-4,nchar(z7)-2))
z7<-as.numeric(z7)
opp<-data.frame(z1,z2,z3,z4,z5,z6,z7)
#opp<-data.frame(as.numeric(y$badguys_1),
# as.numeric(y$badguys_2),
# as.numeric(y$badguys_3),
# as.numeric(y$badguys_4),
# as.numeric(y$badguys_5),
# as.numeric(y$badguys_6),
# as.numeric(y$badguys_7)
# )
head(opp,4)
## z1 z2 z3 z4 z5 z6 z7
## 1 39 21 18 14 7 12 4
## 2 63 58 4 17 16 20 7
## 3 8 61 25 21 11 13 12
## 4 23 28 2 26 5 19 1
##We see a list of opponent's ids
##Now: "Lookup" the value of the opponens' score
scoresID<-data.frame(idno,pre)
##create indexes of matches with match function
index1<-match(opp$z1,scoresID$idno)
index2<-match(opp$z2,scoresID$idno)
index3<-match(opp$z3,scoresID$idno)
index4<-match(opp$z4,scoresID$idno)
index5<-match(opp$z5,scoresID$idno)
index6<-match(opp$z6,scoresID$idno)
index7<-match(opp$z7,scoresID$idno)
oppScore<-data.frame(as.numeric(paste(scoresID[index1,]$pre)),as.numeric(paste(scoresID[index2,]$pre)),as.numeric(paste(scoresID[index3,]$pre)),as.numeric(paste(scoresID[index4,]$pre)),as.numeric(paste(scoresID[index5,]$pre)),as.numeric(paste(scoresID[index6,]$pre)),as.numeric(paste(scoresID[index7,]$pre)))
## Warning in data.frame(as.numeric(paste(scoresID[index1, ]$pre)),
## as.numeric(paste(scoresID[index2, : NAs introduced by coercion
## Warning in data.frame(as.numeric(paste(scoresID[index1, ]$pre)),
## as.numeric(paste(scoresID[index2, : NAs introduced by coercion
## Warning in data.frame(as.numeric(paste(scoresID[index1, ]$pre)),
## as.numeric(paste(scoresID[index2, : NAs introduced by coercion
## Warning in data.frame(as.numeric(paste(scoresID[index1, ]$pre)),
## as.numeric(paste(scoresID[index2, : NAs introduced by coercion
## Warning in data.frame(as.numeric(paste(scoresID[index1, ]$pre)),
## as.numeric(paste(scoresID[index2, : NAs introduced by coercion
## Warning in data.frame(as.numeric(paste(scoresID[index1, ]$pre)),
## as.numeric(paste(scoresID[index2, : NAs introduced by coercion
#We see all the values for each person
head(oppScore)
## as.numeric.paste.scoresID.index1....pre..
## 1 1436
## 2 1175
## 3 1641
## 4 1363
## 5 1242
## 6 1399
## as.numeric.paste.scoresID.index2....pre..
## 1 1563
## 2 917
## 3 955
## 4 1507
## 5 980
## 6 1602
## as.numeric.paste.scoresID.index3....pre..
## 1 1600
## 2 1716
## 3 1745
## 4 1553
## 5 1663
## 6 1712
## as.numeric.paste.scoresID.index4....pre..
## 1 1610
## 2 1629
## 3 1563
## 4 1579
## 5 1666
## 6 1438
## as.numeric.paste.scoresID.index5....pre..
## 1 1649
## 2 1604
## 3 1712
## 4 1655
## 5 1716
## 6 1365
## as.numeric.paste.scoresID.index6....pre..
## 1 1663
## 2 1595
## 3 1666
## 4 1564
## 5 1610
## 6 1552
## as.numeric.paste.scoresID.index7....pre..
## 1 1716
## 2 1649
## 3 1663
## 4 1794
## 5 1629
## 6 1563
# Take row average
opp_avg<-rowMeans(oppScore,na.rm = TRUE)
##Results to csv
results<-data.frame("Players_Name"=players,"Players_State" = states,"Total_Points"=pts,"PreRating"=pre,"Avg_Opp_Pre"=opp_avg)
results
## Players_Name Players_State Total_Points PreRating
## 1 GARY HUA ON 6.0 1794
## 2 DAKSHESH DARURI MI 6.0 1553
## 3 ADITYA BAJAJ MI 6.0 1384
## 4 PATRICK H SCHILLING MI 5.5 1716
## 5 HANSHI ZUO MI 5.5 1655
## 6 HANSEN SONG OH 5.0 1686
## 7 GARY DEE SWATHELL MI 5.0 1649
## 8 EZEKIEL HOUGHTON MI 5.0 1641
## 9 STEFANO LEE ON 5.0 1411
## 10 ANVIT RAO MI 5.0 1365
## 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712
## 12 KENNETH J TACK MI 4.5 1663
## 13 TORRANCE HENRY JR MI 4.5 1666
## 14 BRADLEY SHAW MI 4.5 1610
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220
## 16 MIKE NIKITIN MI 4.0 1604
## 17 RONALD GRZEGORCZYK MI 4.0 1629
## 18 DAVID SUNDEEN MI 4.0 1600
## 19 DIPANKAR ROY MI 4.0 1564
## 20 JASON ZHENG MI 4.0 1595
## 21 DINH DANG BUI ON 4.0 1563
## 22 EUGENE L MCCLURE MI 4.0 1555
## 23 ALAN BUI ON 4.0 1363
## 24 MICHAEL R ALDRICH MI 4.0 1229
## 25 LOREN SCHWIEBERT MI 3.5 1745
## 26 MAX ZHU ON 3.5 1579
## 27 GAURAV GIDWANI MI 3.5 1552
## 28 SOFIA ADINA STANESCUBELLU MI 3.5 1507
## 29 CHIEDOZIE OKORIE MI 3.5 1602
## 30 GEORGE AVERY JONES ON 3.5 1522
## 31 RISHI SHETTY MI 3.5 1494
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441
## 33 JADE GE MI 3.5 1449
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399
## 35 JOSHUA DAVID LEE MI 3.5 1438
## 36 SIDDHARTH JHA MI 3.5 1355
## 37 AMIYATOSH PWNANANDAM MI 3.5 980
## 38 BRIAN LIU MI 3.0 1423
## 39 JOEL R HENDON MI 3.0 1436
## 40 FOREST ZHANG MI 3.0 1348
## 41 KYLE WILLIAM MURPHY MI 3.0 1403
## 42 JARED GE MI 3.0 1332
## 43 ROBERT GLEN VASEY MI 3.0 1283
## 44 JUSTIN D SCHILLING MI 3.0 1199
## 45 DEREK YAN MI 3.0 1242
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377
## 47 ERIC WRIGHT MI 2.5 1362
## 48 DANIEL KHAIN MI 2.5 1382
## 49 MICHAEL J MARTIN MI 2.5 1291
## 50 SHIVAM JHA MI 2.5 1056
## 51 TEJAS AYYAGARI MI 2.5 1011
## 52 ETHAN GUO MI 2.5 935
## 53 JOSE C YBARRA MI 2.0 1393
## 54 LARRY HODGE MI 2.0 1270
## 55 ALEX KONG MI 2.0 1186
## 56 MARISA RICCI MI 2.0 1153
## 57 MICHAEL LU MI 2.0 1092
## 58 VIRAJ MOHILE MI 2.0 917
## 59 SEAN M MC CORMICK MI 2.0 853
## 60 JULIA SHEN MI 1.5 967
## 61 JEZZEL FARKAS ON 1.5 955
## 62 ASHWIN BALAJI MI 1.0 1530
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175
## 64 BEN LI MI 1.0 1163
## Avg_Opp_Pre
## 1 1605.286
## 2 1469.286
## 3 1563.571
## 4 1573.571
## 5 1500.857
## 6 1518.714
## 7 1372.143
## 8 1468.429
## 9 1523.143
## 10 1554.143
## 11 1467.571
## 12 1530.600
## 13 1497.857
## 14 1515.000
## 15 1483.857
## 16 1385.800
## 17 1498.571
## 18 1480.000
## 19 1426.286
## 20 1410.857
## 21 1470.429
## 22 1300.333
## 23 1213.857
## 24 1357.000
## 25 1363.286
## 26 1506.857
## 27 1128.800
## 28 1522.143
## 29 1313.500
## 30 1144.143
## 31 1259.857
## 32 1378.714
## 33 1276.857
## 34 1375.286
## 35 1149.714
## 36 1388.167
## 37 1384.800
## 38 1539.167
## 39 1429.571
## 40 1390.571
## 41 1133.000
## 42 1149.857
## 43 1106.571
## 44 1327.000
## 45 1152.000
## 46 1357.714
## 47 1392.000
## 48 1355.800
## 49 1285.800
## 50 1296.000
## 51 1356.143
## 52 1494.571
## 53 1472.000
## 54 1206.167
## 55 1406.000
## 56 1414.400
## 57 1363.000
## 58 1391.000
## 59 1319.000
## 60 1330.200
## 61 1327.286
## 62 1655.000
## 63 1350.200
## 64 1263.000
write.csv(results, file = "Project1.csv")