This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
#Raw data link for gibhub file
#https://raw.githubusercontent.com/schmalmr/Project-1-607/main/tournamentinfo.txt
#Assign chess data to chessresult and read/ extract
chessresult <- getURL('https://raw.githubusercontent.com/schmalmr/Project-1-607/main/tournamentinfo.txt')
extract_chessresults <- read.csv(text = chessresult)
tibble (extract_chessresults)
Use R for Data Sciences approach (page 209) to divide the targeted data into individual variable targets to establish regex for each target to be pulled from the dataset.
# Identification of the index # and then name starting 2 spaces after |
regname <- "[[:digit:]]+\\s+[|](\\s+[:alpha:]+){2,}"
# extract player name from chess results
player_name <- unlist(str_extract_all(extract_chessresults,regname))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
# The state is the white space with two capital alpha characters followed by space and | to end the section
regstate <- "(?<=\\s)[:upper:]{2}(?=\\s[|])"
# extract the state from the chess results file
state <- unlist(str_extract_all(extract_chessresults,regstate))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
# Total points is the only figure with the characteristic of a digit-period(puncation) the digit and this is used to find the total points
regtotalpoints <-"[:digit:][:punct:][:digit:]"
#extract the points through string extract
total_points <- unlist(str_extract_all(extract_chessresults,regtotalpoints))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
# starting chess rating score for players
rplayerstartingscore <- "(?<=R:\\s{1,2})[:digit:]+"
player_starting_score <- unlist(str_extract_all(extract_chessresults,rplayerstartingscore))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
# pull out the players final scores
regplayersfinalscore <- "(?<=->\\s?)[:digit:]+"
player_final_score <- unlist(str_extract_all(extract_chessresults,regplayersfinalscore))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
# Extract the games section from the file
reggame <- "(?<=[|][:digit:][:punct:][:digit:][:space:]{1,2}[|]{1}).{42}"
game <- unlist(str_extract_all(extract_chessresults,reggame))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
regdigit<-"(?<=[:alpha:]//s)[:digit:].{1,2}"
player_name
## [1] "1 | GARY HUA" "2 | DAKSHESH DARURI"
## [3] "3 | ADITYA BAJAJ" "4 | PATRICK H SCHILLING"
## [5] "5 | HANSHI ZUO" "6 | HANSEN SONG"
## [7] "7 | GARY DEE SWATHELL" "8 | EZEKIEL HOUGHTON"
## [9] "9 | STEFANO LEE" "10 | ANVIT RAO"
## [11] "11 | CAMERON WILLIAM MC LEMAN" "12 | KENNETH J TACK"
## [13] "13 | TORRANCE HENRY JR" "14 | BRADLEY SHAW"
## [15] "15 | ZACHARY JAMES HOUGHTON" "16 | MIKE NIKITIN"
## [17] "17 | RONALD GRZEGORCZYK" "18 | DAVID SUNDEEN"
## [19] "19 | DIPANKAR ROY" "20 | JASON ZHENG"
## [21] "21 | DINH DANG BUI" "22 | EUGENE L MCCLURE"
## [23] "23 | ALAN BUI" "24 | MICHAEL R ALDRICH"
## [25] "25 | LOREN SCHWIEBERT" "26 | MAX ZHU"
## [27] "27 | GAURAV GIDWANI" "28 | SOFIA ADINA STANESCU"
## [29] "29 | CHIEDOZIE OKORIE" "30 | GEORGE AVERY JONES"
## [31] "31 | RISHI SHETTY" "32 | JOSHUA PHILIP MATHEWS"
## [33] "33 | JADE GE" "34 | MICHAEL JEFFERY THOMAS"
## [35] "35 | JOSHUA DAVID LEE" "36 | SIDDHARTH JHA"
## [37] "37 | AMIYATOSH PWNANANDAM" "38 | BRIAN LIU"
## [39] "39 | JOEL R HENDON" "40 | FOREST ZHANG"
## [41] "41 | KYLE WILLIAM MURPHY" "42 | JARED GE"
## [43] "43 | ROBERT GLEN VASEY" "44 | JUSTIN D SCHILLING"
## [45] "45 | DEREK YAN" "46 | JACOB ALEXANDER LAVALLEY"
## [47] "47 | ERIC WRIGHT" "48 | DANIEL KHAIN"
## [49] "49 | MICHAEL J MARTIN" "50 | SHIVAM JHA"
## [51] "51 | TEJAS AYYAGARI" "52 | ETHAN GUO"
## [53] "53 | JOSE C YBARRA" "54 | LARRY HODGE"
## [55] "55 | ALEX KONG" "56 | MARISA RICCI"
## [57] "57 | MICHAEL LU" "58 | VIRAJ MOHILE"
## [59] "59 | SEAN M MC CORMICK" "60 | JULIA SHEN"
## [61] "61 | JEZZEL FARKAS" "62 | ASHWIN BALAJI"
## [63] "63 | THOMAS JOSEPH HOSMER" "64 | BEN LI"
state
## [1] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" "ON" "MI" "MI" "MI" "MI" "MI" "MI"
## [16] "MI" "MI" "MI" "MI" "MI" "ON" "MI" "ON" "MI" "MI" "ON" "MI" "MI" "MI" "ON"
## [31] "MI" "ON" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [46] "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [61] "ON" "MI" "MI" "MI"
player_starting_score
## [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980" "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377" "1362" "1382" "1291" "1056"
## [51] "1011" "935" "1393" "1270" "1186" "1153" "1092" "917" "853" "967"
## [61] "955" "1530" "1175" "1163"
total_points
## [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" "5.0" "5.0" "5.0" "4.5" "4.5"
## [13] "4.5" "4.5" "4.5" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0"
## [25] "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5"
## [37] "3.5" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "2.5" "2.5"
## [49] "2.5" "2.5" "2.5" "2.5" "2.0" "2.0" "2.0" "2.0" "2.0" "2.0" "2.0" "1.5"
## [61] "1.5" "1.0" "1.0" "1.0"
game
## [1] "W 39|W 21|W 18|W 14|W 7|D 12|D 4|"
## [2] "W 63|W 58|L 4|W 17|W 16|W 20|W 7|"
## [3] "L 8|W 61|W 25|W 21|W 11|W 13|W 12|"
## [4] "W 23|D 28|W 2|W 26|D 5|W 19|D 1|"
## [5] "W 45|W 37|D 12|D 13|D 4|W 14|W 17|"
## [6] "W 34|D 29|L 11|W 35|D 10|W 27|W 21|"
## [7] "W 57|W 46|W 13|W 11|L 1|W 9|L 2|"
## [8] "W 3|W 32|L 14|L 9|W 47|W 28|W 19|"
## [9] "W 25|L 18|W 59|W 8|W 26|L 7|W 20|"
## [10] "D 16|L 19|W 55|W 31|D 6|W 25|W 18|"
## [11] "D 38|W 56|W 6|L 7|L 3|W 34|W 26|"
## [12] "W 42|W 33|D 5|W 38|H |D 1|L 3|"
## [13] "W 36|W 27|L 7|D 5|W 33|L 3|W 32|"
## [14] "W 54|W 44|W 8|L 1|D 27|L 5|W 31|"
## [15] "D 19|L 16|W 30|L 22|W 54|W 33|W 38|"
## [16] "D 10|W 15|H |W 39|L 2|W 36|U |"
## [17] "W 48|W 41|L 26|L 2|W 23|W 22|L 5|"
## [18] "W 47|W 9|L 1|W 32|L 19|W 38|L 10|"
## [19] "D 15|W 10|W 52|D 28|W 18|L 4|L 8|"
## [20] "L 40|W 49|W 23|W 41|W 28|L 2|L 9|"
## [21] "W 43|L 1|W 47|L 3|W 40|W 39|L 6|"
## [22] "W 64|D 52|L 28|W 15|H |L 17|W 40|"
## [23] "L 4|W 43|L 20|W 58|L 17|W 37|W 46|"
## [24] "L 28|L 47|W 43|L 25|W 60|W 44|W 39|"
## [25] "L 9|W 53|L 3|W 24|D 34|L 10|W 47|"
## [26] "W 49|W 40|W 17|L 4|L 9|D 32|L 11|"
## [27] "W 51|L 13|W 46|W 37|D 14|L 6|U |"
## [28] "W 24|D 4|W 22|D 19|L 20|L 8|D 36|"
## [29] "W 50|D 6|L 38|L 34|W 52|W 48|U |"
## [30] "L 52|D 64|L 15|W 55|L 31|W 61|W 50|"
## [31] "L 58|D 55|W 64|L 10|W 30|W 50|L 14|"
## [32] "W 61|L 8|W 44|L 18|W 51|D 26|L 13|"
## [33] "W 60|L 12|W 50|D 36|L 13|L 15|W 51|"
## [34] "L 6|W 60|L 37|W 29|D 25|L 11|W 52|"
## [35] "L 46|L 38|W 56|L 6|W 57|D 52|W 48|"
## [36] "L 13|W 57|W 51|D 33|H |L 16|D 28|"
## [37] "B |L 5|W 34|L 27|H |L 23|W 61|"
## [38] "D 11|W 35|W 29|L 12|H |L 18|L 15|"
## [39] "L 1|W 54|W 40|L 16|W 44|L 21|L 24|"
## [40] "W 20|L 26|L 39|W 59|L 21|W 56|L 22|"
## [41] "W 59|L 17|W 58|L 20|X |U |U |"
## [42] "L 12|L 50|L 57|D 60|D 61|W 64|W 56|"
## [43] "L 21|L 23|L 24|W 63|W 59|L 46|W 55|"
## [44] "B |L 14|L 32|W 53|L 39|L 24|W 59|"
## [45] "L 5|L 51|D 60|L 56|W 63|D 55|W 58|"
## [46] "W 35|L 7|L 27|L 50|W 64|W 43|L 23|"
## [47] "L 18|W 24|L 21|W 61|L 8|D 51|L 25|"
## [48] "L 17|W 63|H |D 52|H |L 29|L 35|"
## [49] "L 26|L 20|D 63|D 64|W 58|H |U |"
## [50] "L 29|W 42|L 33|W 46|H |L 31|L 30|"
## [51] "L 27|W 45|L 36|W 57|L 32|D 47|L 33|"
## [52] "W 30|D 22|L 19|D 48|L 29|D 35|L 34|"
## [53] "H |L 25|H |L 44|U |W 57|U |"
## [54] "L 14|L 39|L 61|B |L 15|L 59|W 64|"
## [55] "L 62|D 31|L 10|L 30|B |D 45|L 43|"
## [56] "H |L 11|L 35|W 45|H |L 40|L 42|"
## [57] "L 7|L 36|W 42|L 51|L 35|L 53|B |"
## [58] "W 31|L 2|L 41|L 23|L 49|B |L 45|"
## [59] "L 41|B |L 9|L 40|L 43|W 54|L 44|"
## [60] "L 33|L 34|D 45|D 42|L 24|H |U |"
## [61] "L 32|L 3|W 54|L 47|D 42|L 30|L 37|"
## [62] "W 55|U |U |U |U |U |U |"
## [63] "L 2|L 48|D 49|L 43|L 45|H |U |"
## [64] "L 22|D 30|L 31|D 49|L 46|L 42|L 54|"
# Create data frame with the various extracted columns
chess_table <- data.frame( player_name, state, player_starting_score,
player_final_score,total_points,game)
col_names <- c("game_1","game_2","game_3","game_4","game_5","game_6","game_7")
chess_table <- chess_table%>%
separate(game,col_names,sep = "[|]")
## Warning: Expected 7 pieces. Additional pieces discarded in 64 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
chess_table %>%
select(game_1, game_2,game_3,game_4,game_5,game_6,game_7)
#format game columns as numeric
chess_table$game_1 = as.numeric(gsub(".*?([0-9]+).*", "\\1", chess_table$game_1))
## Warning: NAs introduced by coercion
chess_table$game_2 = as.numeric(gsub(".*?([0-9]+).*", "\\1", chess_table$game_2))
## Warning: NAs introduced by coercion
chess_table$game_3 = as.numeric(gsub(".*?([0-9]+).*", "\\1", chess_table$game_3))
## Warning: NAs introduced by coercion
chess_table$game_4 = as.numeric(gsub(".*?([0-9]+).*", "\\1", chess_table$game_4))
## Warning: NAs introduced by coercion
chess_table$game_5 = as.numeric(gsub(".*?([0-9]+).*", "\\1", chess_table$game_5))
## Warning: NAs introduced by coercion
chess_table$game_6 = as.numeric(gsub(".*?([0-9]+).*", "\\1", chess_table$game_6))
## Warning: NAs introduced by coercion
chess_table$game_7 = as.numeric(gsub(".*?([0-9]+).*", "\\1", chess_table$game_7))
## Warning: NAs introduced by coercion
index<-add_count(chess_table)
chess_table %>%
select(player_starting_score,game_1, game_2,game_3,game_4,game_5,game_6,game_7)
chess_table$player_starting_score <- as.numeric(as.character(chess_table$player_starting_score))
chess_table$toal_points <- as.numeric(as.character(chess_table$total_points))
#Seperate the column of player name into player index and name
chess_table<-separate(data = chess_table, col = player_name, into = c("player_index", "Name"), sep = "\\|")
# convert index character to numberic
chess_table$player_index <- as.numeric(as.character(chess_table$player_index))
chess_table
#Create chess table to store opponent starting scores
chess_table_oppscores<-chess_table
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
library(dplyr)
library(tidyr)
# Developing approach to index against the player index and the game columns to find and replace with the approach starting player scores before taking row means
#chess_table_oppscores %>%
#if (chess_table_oppscores$player_index==chess_table_oppscores$game_1), #chess_table_oppscores$game_1==chess_table_oppscores$player_starting_score, #{chess_table_oppscores$game_1==0}
#chess_table_oppscores <- chess_table_oppscores %>% mutate (chess_table_oppscores$game_1 = case_when(chess_table_oppscores$player_index== chess_table_oppscores%game_1,TRUE ~ chess_table_oppscores$game1=chess_table_oppscores$player_starting_score))
#chess_table_oppscores %>%
# gather(key = "player_index") %>%
# left_join(lookup, by = "col_names") %>%
# spread(key = player_index, value = player_starting_score)
result = chess_table_oppscores %>%
# need to add an overall average opponent pre rating when done above
select(Name, state, total_points, player_starting_score) %>%
ungroup() %>%
distinct()
# Store a csv
write_csv(result, "/Users/mark/607_Project_1_output.csv")