## import all the libraries
library(readr)
library(stringr)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.2.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
data <- readLines("https://raw.githubusercontent.com/AldataSci/Project-One-Data-607/main/tournament.txt",n=-1)
head(data)
## [1] "-----------------------------------------------------------------------------------------"
## [2] " Pair | Player Name |Total|Round|Round|Round|Round|Round|Round|Round| "
## [3] " Num | USCF ID / Rtg (Pre->Post) | Pts | 1 | 2 | 3 | 4 | 5 | 6 | 7 | "
## [4] "-----------------------------------------------------------------------------------------"
## [5] " 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [6] " ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |"
##Start the sequence at the 5th row in the list and count every 3 rows
Names <-data[seq(5,length(data),3)]
Names <-str_trim(str_extract(Names,'[A-Z].{1,26}'))
head(Names)
## [1] "GARY HUA" "DAKSHESH DARURI" "ADITYA BAJAJ"
## [4] "PATRICK H SCHILLING" "HANSHI ZUO" "HANSEN SONG"
## Start the sequence of the data at the 6 row and count every 3 rows
subset <- data[seq(6,length(data),3)]
State <- str_extract(subset,"[A-Z]{2}")
head(State)
## [1] "ON" "MI" "MI" "MI" "MI" "OH"
## Extract all the floating digits and convert it as a numeric
points <- data[seq(5,length(data),3)]
totalpts <- str_extract(points,'(\\d{1})(\\.)(\\d{1})')
totalpts <- as.numeric(totalpts)
totalpts
## [1] 6.0 6.0 6.0 5.5 5.5 5.0 5.0 5.0 5.0 5.0 4.5 4.5 4.5 4.5 4.5 4.0 4.0 4.0 4.0
## [20] 4.0 4.0 4.0 4.0 4.0 3.5 3.5 3.5 3.5 3.5 3.5 3.5 3.5 3.5 3.5 3.5 3.5 3.5 3.0
## [39] 3.0 3.0 3.0 3.0 3.0 3.0 3.0 3.0 2.5 2.5 2.5 2.5 2.5 2.5 2.0 2.0 2.0 2.0 2.0
## [58] 2.0 2.0 1.5 1.5 1.0 1.0 1.0
## use the specific regex to first get the numbers with the R: then extract the 4 digit elo from the previous string, filter out the na in the list and then convert the Elo as a integer value
Elo <- str_extract(data,"R: ....")
Elo <- str_extract(Elo,"....$")
ActualElo <- Elo[!is.na(Elo)]
ActualElo <-as.integer(ActualElo)
head(ActualElo)
## [1] 1794 1553 1384 1716 1655 1686
## calculate the individual person each player has faced, extract the subset extract all the matches now I have to get rid of the letters and white space next to the numbers, by extracting all the numbers and then checking if i got the values
round <- data[seq(5,length(data),3)]
round <- str_extract_all(round,'[A-Z]\\s{2,}\\d+')
round <- str_extract_all(round,'\\d{1,}')
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
## [[1]]
## [1] "39" "21" "18" "14" "7" "12" "4"
##
## [[2]]
## [1] "63" "58" "4" "17" "16" "20" "7"
##
## [[3]]
## [1] "8" "61" "25" "21" "11" "13" "12"
##
## [[4]]
## [1] "23" "28" "2" "26" "5" "19" "1"
##
## [[5]]
## [1] "45" "37" "12" "13" "4" "14" "17"
##
## [[6]]
## [1] "34" "29" "11" "35" "10" "27" "21"
## Creating an empty list and finding the subset, convert it to integer and looped it to get the elo and append it to a position of a list, then rounded the list
list <- c()
i <- 1
for(i in c(1:length(round))){
round[[i]]
as.integer(unlist(round[[i]]))
ActualElo[as.integer(unlist(round[[i]]))]
list[i] <- mean(ActualElo[as.integer(unlist(round[[i]]))]) ## list[i] helps append the number at a certain position
}
AvgElo <- round(list)
AvgElo
## [1] 1605 1469 1564 1574 1501 1519 1372 1468 1523 1554 1468 1506 1498 1515 1484
## [16] 1386 1499 1480 1426 1411 1470 1300 1214 1357 1363 1507 1222 1522 1314 1144
## [31] 1260 1379 1277 1375 1150 1388 1385 1539 1430 1391 1248 1150 1107 1327 1152
## [46] 1358 1392 1356 1286 1296 1356 1495 1345 1206 1406 1414 1363 1391 1319 1330
## [61] 1327 1186 1350 1263
project_1 <- data.frame(Names,State,totalpts,ActualElo,AvgElo)
write_csv(project_1,'project1.csv',append=FALSE)
LS0tDQp0aXRsZTogIlByb2plY3QgT25lIERhdGEgNjA3Ig0KYXV0aG9yOiAiQWwgSGFxdWUiDQpkYXRlOiAiYEZlYiAyNSwgMjAyMmAiDQpvdXRwdXQ6IG9wZW5pbnRybzo6bGFiX3JlcG9ydA0KLS0tDQoNCmBgYHtyfQ0KIyMgaW1wb3J0IGFsbCB0aGUgbGlicmFyaWVzIA0KbGlicmFyeShyZWFkcikNCmxpYnJhcnkoc3RyaW5ncikNCmxpYnJhcnkodGlkeXZlcnNlKQ0KDQpkYXRhIDwtIHJlYWRMaW5lcygiaHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL0FsZGF0YVNjaS9Qcm9qZWN0LU9uZS1EYXRhLTYwNy9tYWluL3RvdXJuYW1lbnQudHh0IixuPS0xKQ0KaGVhZChkYXRhKQ0KYGBgDQpgYGB7cn0NCiMjU3RhcnQgdGhlIHNlcXVlbmNlIGF0IHRoZSA1dGggcm93IGluIHRoZSBsaXN0IGFuZCBjb3VudCBldmVyeSAzIHJvd3MNCk5hbWVzIDwtZGF0YVtzZXEoNSxsZW5ndGgoZGF0YSksMyldDQpOYW1lcyA8LXN0cl90cmltKHN0cl9leHRyYWN0KE5hbWVzLCdbQS1aXS57MSwyNn0nKSkNCmhlYWQoTmFtZXMpDQpgYGANCg0KYGBge3J9DQojIyBTdGFydCB0aGUgc2VxdWVuY2Ugb2YgdGhlIGRhdGEgYXQgdGhlIDYgcm93IGFuZCBjb3VudCBldmVyeSAzIHJvd3MgDQpzdWJzZXQgPC0gZGF0YVtzZXEoNixsZW5ndGgoZGF0YSksMyldDQpTdGF0ZSA8LSBzdHJfZXh0cmFjdChzdWJzZXQsIltBLVpdezJ9IikNCmhlYWQoU3RhdGUpDQpgYGANCg0KYGBge3J9DQojIyBFeHRyYWN0IGFsbCB0aGUgZmxvYXRpbmcgZGlnaXRzIGFuZCBjb252ZXJ0IGl0IGFzIGEgbnVtZXJpYw0KcG9pbnRzIDwtIGRhdGFbc2VxKDUsbGVuZ3RoKGRhdGEpLDMpXQ0KdG90YWxwdHMgPC0gc3RyX2V4dHJhY3QocG9pbnRzLCcoXFxkezF9KShcXC4pKFxcZHsxfSknKQ0KdG90YWxwdHMgPC0gYXMubnVtZXJpYyh0b3RhbHB0cykNCnRvdGFscHRzDQpgYGANCmBgYHtyfQ0KIyMgdXNlIHRoZSBzcGVjaWZpYyByZWdleCB0byBmaXJzdCBnZXQgdGhlIG51bWJlcnMgd2l0aCB0aGUgUjogdGhlbiBleHRyYWN0IHRoZSA0IGRpZ2l0IGVsbyBmcm9tIHRoZSBwcmV2aW91cyBzdHJpbmcsIGZpbHRlciBvdXQgdGhlIG5hIGluIHRoZSBsaXN0IGFuZCB0aGVuIGNvbnZlcnQgdGhlIEVsbyBhcyBhIGludGVnZXIgdmFsdWUNCkVsbyA8LSBzdHJfZXh0cmFjdChkYXRhLCJSOiAuLi4uIikNCkVsbyA8LSBzdHJfZXh0cmFjdChFbG8sIi4uLi4kIikNCkFjdHVhbEVsbyA8LSBFbG9bIWlzLm5hKEVsbyldDQpBY3R1YWxFbG8gPC1hcy5pbnRlZ2VyKEFjdHVhbEVsbykNCmhlYWQoQWN0dWFsRWxvKQ0KYGBgDQoNCmBgYHtyfQ0KIyMgY2FsY3VsYXRlIHRoZSBpbmRpdmlkdWFsIHBlcnNvbiBlYWNoIHBsYXllciBoYXMgZmFjZWQsIGV4dHJhY3QgdGhlIHN1YnNldCBleHRyYWN0IGFsbCB0aGUgbWF0Y2hlcyBub3cgSSBoYXZlIHRvIGdldCByaWQgb2YgdGhlIGxldHRlcnMgYW5kIHdoaXRlIHNwYWNlIG5leHQgdG8gdGhlIG51bWJlcnMsIGJ5IGV4dHJhY3RpbmcgYWxsIHRoZSBudW1iZXJzIGFuZCB0aGVuIGNoZWNraW5nIGlmIGkgZ290IHRoZSB2YWx1ZXMNCnJvdW5kIDwtIGRhdGFbc2VxKDUsbGVuZ3RoKGRhdGEpLDMpXQ0Kcm91bmQgPC0gc3RyX2V4dHJhY3RfYWxsKHJvdW5kLCdbQS1aXVxcc3syLH1cXGQrJykNCnJvdW5kIDwtIHN0cl9leHRyYWN0X2FsbChyb3VuZCwnXFxkezEsfScpDQpoZWFkKHJvdW5kKQ0KDQpgYGANCg0KDQpgYGB7cn0NCiMjIENyZWF0aW5nIGFuIGVtcHR5IGxpc3QgYW5kIGZpbmRpbmcgdGhlIHN1YnNldCwgY29udmVydCBpdCB0byBpbnRlZ2VyIGFuZCBsb29wZWQgaXQgdG8gZ2V0IHRoZSBlbG8gYW5kIGFwcGVuZCBpdCB0byBhIHBvc2l0aW9uIG9mIGEgbGlzdCwgdGhlbiByb3VuZGVkIHRoZSBsaXN0IA0KDQpsaXN0IDwtIGMoKQ0KaSA8LSAxDQpmb3IoaSBpbiBjKDE6bGVuZ3RoKHJvdW5kKSkpew0Kcm91bmRbW2ldXQ0KYXMuaW50ZWdlcih1bmxpc3Qocm91bmRbW2ldXSkpDQpBY3R1YWxFbG9bYXMuaW50ZWdlcih1bmxpc3Qocm91bmRbW2ldXSkpXQ0KbGlzdFtpXSA8LSBtZWFuKEFjdHVhbEVsb1thcy5pbnRlZ2VyKHVubGlzdChyb3VuZFtbaV1dKSldKSAgICMjIGxpc3RbaV0gaGVscHMgYXBwZW5kIHRoZSBudW1iZXIgYXQgYSBjZXJ0YWluIHBvc2l0aW9uDQp9DQpBdmdFbG8gPC0gcm91bmQobGlzdCkNCkF2Z0Vsbw0KYGBgDQpgYGB7cn0NCnByb2plY3RfMSA8LSBkYXRhLmZyYW1lKE5hbWVzLFN0YXRlLHRvdGFscHRzLEFjdHVhbEVsbyxBdmdFbG8pDQoNCndyaXRlX2Nzdihwcm9qZWN0XzEsJ3Byb2plY3QxLmNzdicsYXBwZW5kPUZBTFNFKQ0KYGBgDQoNCg==