Install two packages to be used: downloader and Stringr
install.packages("downloader")
## Installing package into '/home/jr/R/x86_64-pc-linux-gnu-library/3.4'
## (as 'lib' is unspecified)
install.packages("stringr")
## Installing package into '/home/jr/R/x86_64-pc-linux-gnu-library/3.4'
## (as 'lib' is unspecified)
library(downloader)
url <-"https://raw.githubusercontent.com/jrovalino/jrovalino-Data607-proj1/master/tournamentinfo.txt"
url
## [1] "https://raw.githubusercontent.com/jrovalino/jrovalino-Data607-proj1/master/tournamentinfo.txt"
#?download
tourfile <- "tournamentinfo.txt"
tourfile
## [1] "tournamentinfo.txt"
download(url, tourfile)
library(stringr)
#?readLines
rslts <- readLines("tournamentinfo.txt")
## Warning in readLines("tournamentinfo.txt"): incomplete final line found on
## 'tournamentinfo.txt'
# Automated data collection in R (ADC in R) chapter 8 - see table 8.3 selected symbols
#wiht special #meaning \w word characters [[:alnum:]_].
# Automated data collection in R (ADC in R) chapter 8 - pg 204 ^ "Putting the former #at the beginning of a character class matches the inverse of the character class' #contents
# another good reference - #https://stringr.tidyverse.org/articles/regular-expressions.html
pname <- unlist(str_extract_all(rslts, "\\w+[^USCF|a-z]\\w+ \\w+"))
#pname
pstate<- unlist(str_extract_all(rslts,"[A-Z][A-Z][[ ]][\\|]"))
#pstate
#?str_split simplify = F creates char vector, simplify = True creates a char matrix
pstate <- str_split(pstate, "[[ ]][\\|]", simplify=TRUE)
#pstate
#only nneed first column in matrix
pstate <- pstate[, 1]
pstate
## [1] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" "ON" "MI" "MI" "MI" "MI" "MI"
## [15] "MI" "MI" "MI" "MI" "MI" "MI" "ON" "MI" "ON" "MI" "MI" "ON" "MI" "MI"
## [29] "MI" "ON" "MI" "ON" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [43] "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [57] "MI" "MI" "MI" "MI" "ON" "MI" "MI" "MI"
totpts <- unlist(str_extract_all(rslts, "\\d\\.\\d"))
totpts
## [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" "5.0" "5.0" "5.0" "4.5"
## [12] "4.5" "4.5" "4.5" "4.5" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0"
## [23] "4.0" "4.0" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5"
## [34] "3.5" "3.5" "3.5" "3.5" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0"
## [45] "3.0" "3.0" "2.5" "2.5" "2.5" "2.5" "2.5" "2.5" "2.0" "2.0" "2.0"
## [56] "2.0" "2.0" "2.0" "2.0" "1.5" "1.5" "1.0" "1.0" "1.0"
prerating <- unlist(str_extract_all(rslts, "[R][:]([[ ]]+)([[:alnum:]]+)", simplify=FALSE))
#prerating
prerating <- unlist(str_extract_all(prerating, "([[^R: ]]+[[:alnum:]]+)", simplify=FALSE))
prerating
## [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649"
## [8] "1641P17" "1411" "1365" "1712" "1663" "1666" "1610"
## [15] "1220P13" "1604" "1629" "1600" "1564" "1595" "1563P22"
## [22] "1555" "1363" "1229" "1745" "1579" "1552" "1507"
## [29] "1602P6" "1522" "1494" "1441" "1449" "1399" "1438"
## [36] "1355" "980P12" "1423" "1436P23" "1348" "1403P5" "1332"
## [43] "1283" "1199" "1242" "377P3" "1362" "1382" "1291P12"
## [50] "1056" "1011" "935" "1393" "1270" "1186" "1153"
## [57] "1092" "917" "853" "967" "955P11" "1530" "1175"
## [64] "1163"
#Needed to read some refresher/background on looping and used primarily the text # https://r4ds.had.co.nz/iteration.html - chapter on iterations
#totol number of points (tnop)
#extract round component for each player by leveraging |#.# and greedy quantification #p203 table 8.2
meanprep<- unlist(str_extract_all(rslts, "\\|[0-9].*"))
meanprep[1]
## [1] "|6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
#need to extract out the opponent #'s to use as an index for later use in getting pre-rating.
meanprep2 <-unlist(str_replace_all(meanprep, "\\s{1,2}\\|","00|"))
class(meanprep2)
## [1] "character"
meanprep2[1]
## [1] "|6.000|W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
class(meanprep2)
## [1] "character"
?str_extract_all
#Need to figure out the problem with moving to matrix or to list creating eash
#meanprep3 <- matrix(nrows=7)
meanprep3 <- (str_extract_all(meanprep2, "\\s\\d{1,2}", simplify = TRUE))
meanprep3[1,]
## [1] " 39" " 21" " 18" " 14" " 7" " 12" " 4"
meanprep3 <-gsub('\\s+', '', meanprep3)
meanprep3[1,]
## [1] "39" "21" "18" "14" "7" "12" "4"
#meanprep3 <- apply(meanprep3, 1, FUN = str_trim)
#meanprep3[1]
ncol(meanprep3)
## [1] 7
nrow(meanprep3)
## [1] 64
length(meanprep3)
## [1] 448
#garbage meancalc <- matrix(unlist(meanprep3), byrow=TRUE, nrow=ncol(meanprep3) )
#garbade meancalc[1,]
mode(meanprep3) = "numeric"
class( meanprep3 [1, ])
## [1] "numeric"
meanprep3 [1, ]
## [1] 39 21 18 14 7 12 4
tmean <- meanprep3
tmean
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 39 21 18 14 7 12 4
## [2,] 63 58 4 17 16 20 7
## [3,] 8 61 25 21 11 13 12
## [4,] 23 28 2 26 5 19 1
## [5,] 45 37 12 13 4 14 17
## [6,] 34 29 11 35 10 27 21
## [7,] 57 46 13 11 1 9 2
## [8,] 3 32 14 9 47 28 19
## [9,] 25 18 59 8 26 7 20
## [10,] 16 19 55 31 6 25 18
## [11,] 38 56 6 7 3 34 26
## [12,] 42 33 5 38 0 1 3
## [13,] 36 27 7 5 33 3 32
## [14,] 54 44 8 1 27 5 31
## [15,] 19 16 30 22 54 33 38
## [16,] 10 15 0 39 2 36 0
## [17,] 48 41 26 2 23 22 5
## [18,] 47 9 1 32 19 38 10
## [19,] 15 10 52 28 18 4 8
## [20,] 40 49 23 41 28 2 9
## [21,] 43 1 47 3 40 39 6
## [22,] 64 52 28 15 0 17 40
## [23,] 4 43 20 58 17 37 46
## [24,] 28 47 43 25 60 44 39
## [25,] 9 53 3 24 34 10 47
## [26,] 49 40 17 4 9 32 11
## [27,] 51 13 46 37 14 6 0
## [28,] 24 4 22 19 20 8 36
## [29,] 50 6 38 34 52 48 0
## [30,] 52 64 15 55 31 61 50
## [31,] 58 55 64 10 30 50 14
## [32,] 61 8 44 18 51 26 13
## [33,] 60 12 50 36 13 15 51
## [34,] 6 60 37 29 25 11 52
## [35,] 46 38 56 6 57 52 48
## [36,] 13 57 51 33 0 16 28
## [37,] 0 5 34 27 0 23 61
## [38,] 11 35 29 12 0 18 15
## [39,] 1 54 40 16 44 21 24
## [40,] 20 26 39 59 21 56 22
## [41,] 59 17 58 20 0 0 0
## [42,] 12 50 57 60 61 64 56
## [43,] 21 23 24 63 59 46 55
## [44,] 0 14 32 53 39 24 59
## [45,] 5 51 60 56 63 55 58
## [46,] 35 7 27 50 64 43 23
## [47,] 18 24 21 61 8 51 25
## [48,] 17 63 0 52 0 29 35
## [49,] 26 20 63 64 58 0 0
## [50,] 29 42 33 46 0 31 30
## [51,] 27 45 36 57 32 47 33
## [52,] 30 22 19 48 29 35 34
## [53,] 0 25 0 44 0 57 0
## [54,] 14 39 61 0 15 59 64
## [55,] 62 31 10 30 0 45 43
## [56,] 0 11 35 45 0 40 42
## [57,] 7 36 42 51 35 53 0
## [58,] 31 2 41 23 49 0 45
## [59,] 41 0 9 40 43 54 44
## [60,] 33 34 45 42 24 0 0
## [61,] 32 3 54 47 42 30 37
## [62,] 55 0 0 0 0 0 0
## [63,] 2 48 49 43 45 0 0
## [64,] 22 30 31 49 46 42 54
#m <- matrix(unlist(meanprep3), nrow(meanprep3), dimnames = dimnames(meanprep3))
#rowMeans(m)
#data.frame(datamatrix)
#going to use the row and column to populate the matrix so that average can be done.
#class(nrow(tmean))
tmean
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 39 21 18 14 7 12 4
## [2,] 63 58 4 17 16 20 7
## [3,] 8 61 25 21 11 13 12
## [4,] 23 28 2 26 5 19 1
## [5,] 45 37 12 13 4 14 17
## [6,] 34 29 11 35 10 27 21
## [7,] 57 46 13 11 1 9 2
## [8,] 3 32 14 9 47 28 19
## [9,] 25 18 59 8 26 7 20
## [10,] 16 19 55 31 6 25 18
## [11,] 38 56 6 7 3 34 26
## [12,] 42 33 5 38 0 1 3
## [13,] 36 27 7 5 33 3 32
## [14,] 54 44 8 1 27 5 31
## [15,] 19 16 30 22 54 33 38
## [16,] 10 15 0 39 2 36 0
## [17,] 48 41 26 2 23 22 5
## [18,] 47 9 1 32 19 38 10
## [19,] 15 10 52 28 18 4 8
## [20,] 40 49 23 41 28 2 9
## [21,] 43 1 47 3 40 39 6
## [22,] 64 52 28 15 0 17 40
## [23,] 4 43 20 58 17 37 46
## [24,] 28 47 43 25 60 44 39
## [25,] 9 53 3 24 34 10 47
## [26,] 49 40 17 4 9 32 11
## [27,] 51 13 46 37 14 6 0
## [28,] 24 4 22 19 20 8 36
## [29,] 50 6 38 34 52 48 0
## [30,] 52 64 15 55 31 61 50
## [31,] 58 55 64 10 30 50 14
## [32,] 61 8 44 18 51 26 13
## [33,] 60 12 50 36 13 15 51
## [34,] 6 60 37 29 25 11 52
## [35,] 46 38 56 6 57 52 48
## [36,] 13 57 51 33 0 16 28
## [37,] 0 5 34 27 0 23 61
## [38,] 11 35 29 12 0 18 15
## [39,] 1 54 40 16 44 21 24
## [40,] 20 26 39 59 21 56 22
## [41,] 59 17 58 20 0 0 0
## [42,] 12 50 57 60 61 64 56
## [43,] 21 23 24 63 59 46 55
## [44,] 0 14 32 53 39 24 59
## [45,] 5 51 60 56 63 55 58
## [46,] 35 7 27 50 64 43 23
## [47,] 18 24 21 61 8 51 25
## [48,] 17 63 0 52 0 29 35
## [49,] 26 20 63 64 58 0 0
## [50,] 29 42 33 46 0 31 30
## [51,] 27 45 36 57 32 47 33
## [52,] 30 22 19 48 29 35 34
## [53,] 0 25 0 44 0 57 0
## [54,] 14 39 61 0 15 59 64
## [55,] 62 31 10 30 0 45 43
## [56,] 0 11 35 45 0 40 42
## [57,] 7 36 42 51 35 53 0
## [58,] 31 2 41 23 49 0 45
## [59,] 41 0 9 40 43 54 44
## [60,] 33 34 45 42 24 0 0
## [61,] 32 3 54 47 42 30 37
## [62,] 55 0 0 0 0 0 0
## [63,] 2 48 49 43 45 0 0
## [64,] 22 30 31 49 46 42 54
nrow(tmean)
## [1] 64
ncol(tmean)
## [1] 7
is.numeric(tmean[1,1])
## [1] TRUE
for (i in 1:nrow(tmean)) {
for (j in 1:ncol(tmean)){
if (tmean[i,j] == 0){tmean[i,j] = NA}
else{tmean[i,j] <- prerating[tmean[i,j]]}}}
tmean[1,1]
## [1] "1436P23"
df <- data.frame(pname,pstate, totpts,prerating)
df
## pname pstate totpts prerating
## 1 GARY HUA ON 6.0 1794
## 2 DAKSHESH DARURI MI 6.0 1553
## 3 ADITYA BAJAJ MI 6.0 1384
## 4 PATRICK H SCHILLING MI 5.5 1716
## 5 HANSHI ZUO MI 5.5 1655
## 6 HANSEN SONG OH 5.0 1686
## 7 GARY DEE SWATHELL MI 5.0 1649
## 8 EZEKIEL HOUGHTON MI 5.0 1641P17
## 9 STEFANO LEE ON 5.0 1411
## 10 ANVIT RAO MI 5.0 1365
## 11 CAMERON WILLIAM MC MI 4.5 1712
## 12 KENNETH J TACK MI 4.5 1663
## 13 TORRANCE HENRY JR MI 4.5 1666
## 14 BRADLEY SHAW MI 4.5 1610
## 15 ZACHARY JAMES HOUGHTON MI 4.5 1220P13
## 16 MIKE NIKITIN MI 4.0 1604
## 17 RONALD GRZEGORCZYK MI 4.0 1629
## 18 DAVID SUNDEEN MI 4.0 1600
## 19 DIPANKAR ROY MI 4.0 1564
## 20 JASON ZHENG MI 4.0 1595
## 21 DINH DANG BUI ON 4.0 1563P22
## 22 EUGENE L MCCLURE MI 4.0 1555
## 23 ALAN BUI ON 4.0 1363
## 24 MICHAEL R ALDRICH MI 4.0 1229
## 25 LOREN SCHWIEBERT MI 3.5 1745
## 26 MAX ZHU ON 3.5 1579
## 27 GAURAV GIDWANI MI 3.5 1552
## 28 SOFIA ADINA STANESCU MI 3.5 1507
## 29 CHIEDOZIE OKORIE MI 3.5 1602P6
## 30 GEORGE AVERY JONES ON 3.5 1522
## 31 RISHI SHETTY MI 3.5 1494
## 32 JOSHUA PHILIP MATHEWS ON 3.5 1441
## 33 JADE GE MI 3.5 1449
## 34 MICHAEL JEFFERY THOMAS MI 3.5 1399
## 35 JOSHUA DAVID LEE MI 3.5 1438
## 36 SIDDHARTH JHA MI 3.5 1355
## 37 AMIYATOSH PWNANANDAM MI 3.5 980P12
## 38 BRIAN LIU MI 3.0 1423
## 39 JOEL R HENDON MI 3.0 1436P23
## 40 FOREST ZHANG MI 3.0 1348
## 41 KYLE WILLIAM MURPHY MI 3.0 1403P5
## 42 JARED GE MI 3.0 1332
## 43 ROBERT GLEN VASEY MI 3.0 1283
## 44 JUSTIN D SCHILLING MI 3.0 1199
## 45 DEREK YAN MI 3.0 1242
## 46 JACOB ALEXANDER LAVALLEY MI 3.0 377P3
## 47 ERIC WRIGHT MI 2.5 1362
## 48 DANIEL KHAIN MI 2.5 1382
## 49 MICHAEL J MARTIN MI 2.5 1291P12
## 50 SHIVAM JHA MI 2.5 1056
## 51 TEJAS AYYAGARI MI 2.5 1011
## 52 ETHAN GUO MI 2.5 935
## 53 JOSE C YBARRA MI 2.0 1393
## 54 LARRY HODGE MI 2.0 1270
## 55 ALEX KONG MI 2.0 1186
## 56 MARISA RICCI MI 2.0 1153
## 57 MICHAEL LU MI 2.0 1092
## 58 VIRAJ MOHILE MI 2.0 917
## 59 SEAN M MC MI 2.0 853
## 60 JULIA SHEN MI 1.5 967
## 61 JEZZEL FARKAS ON 1.5 955P11
## 62 ASHWIN BALAJI MI 1.0 1530
## 63 THOMAS JOSEPH HOSMER MI 1.0 1175
## 64 BEN LI MI 1.0 1163
# Exporting the result into a CSV file.
write.csv(df, "chesssummary.csv", row.names=FALSE)