R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#Raw data link for gibhub file 
#https://raw.githubusercontent.com/schmalmr/Project-1-607/main/tournamentinfo.txt

#Assign chess data to chessresult and read/ extract 
chessresult <- getURL('https://raw.githubusercontent.com/schmalmr/Project-1-607/main/tournamentinfo.txt')

extract_chessresults <- read.csv(text = chessresult)
 tibble (extract_chessresults)

Use R for Data Sciences approach (page 209) to divide the targeted data into individual variable targets to establish regex for each target to be pulled from the dataset.

# Identification of the index # and then name starting 2 spaces after |
regname <- "[[:digit:]]+\\s+[|](\\s+[:alpha:]+){2,}"
# extract player name from chess results
player_name <- unlist(str_extract_all(extract_chessresults,regname))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
# The state is the white space with two capital alpha characters followed by space and | to end the section
regstate <- "(?<=\\s)[:upper:]{2}(?=\\s[|])"
# extract the state from the chess results file
state <- unlist(str_extract_all(extract_chessresults,regstate))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
# Total points is the only figure with the characteristic of a digit-period(puncation) the digit and this is used to find the total points
regtotalpoints <-"[:digit:][:punct:][:digit:]"

#extract the points through string extract 
total_points <- unlist(str_extract_all(extract_chessresults,regtotalpoints))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
# starting chess rating score for players 
rplayerstartingscore <- "(?<=R:\\s{1,2})[:digit:]+"
player_starting_score <- unlist(str_extract_all(extract_chessresults,rplayerstartingscore))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
# pull out the players final scores
regplayersfinalscore <- "(?<=->\\s?)[:digit:]+"
player_final_score <- unlist(str_extract_all(extract_chessresults,regplayersfinalscore))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
# Extract the games section from the file 
reggame <- "(?<=[|][:digit:][:punct:][:digit:][:space:]{1,2}[|]{1}).{42}"
game <- unlist(str_extract_all(extract_chessresults,reggame))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
regdigit<-"(?<=[:alpha:]//s)[:digit:].{1,2}"



player_name
##  [1] "1 | GARY HUA"                  "2 | DAKSHESH DARURI"          
##  [3] "3 | ADITYA BAJAJ"              "4 | PATRICK H SCHILLING"      
##  [5] "5 | HANSHI ZUO"                "6 | HANSEN SONG"              
##  [7] "7 | GARY DEE SWATHELL"         "8 | EZEKIEL HOUGHTON"         
##  [9] "9 | STEFANO LEE"               "10 | ANVIT RAO"               
## [11] "11 | CAMERON WILLIAM MC LEMAN" "12 | KENNETH J TACK"          
## [13] "13 | TORRANCE HENRY JR"        "14 | BRADLEY SHAW"            
## [15] "15 | ZACHARY JAMES HOUGHTON"   "16 | MIKE NIKITIN"            
## [17] "17 | RONALD GRZEGORCZYK"       "18 | DAVID SUNDEEN"           
## [19] "19 | DIPANKAR ROY"             "20 | JASON ZHENG"             
## [21] "21 | DINH DANG BUI"            "22 | EUGENE L MCCLURE"        
## [23] "23 | ALAN BUI"                 "24 | MICHAEL R ALDRICH"       
## [25] "25 | LOREN SCHWIEBERT"         "26 | MAX ZHU"                 
## [27] "27 | GAURAV GIDWANI"           "28 | SOFIA ADINA STANESCU"    
## [29] "29 | CHIEDOZIE OKORIE"         "30 | GEORGE AVERY JONES"      
## [31] "31 | RISHI SHETTY"             "32 | JOSHUA PHILIP MATHEWS"   
## [33] "33 | JADE GE"                  "34 | MICHAEL JEFFERY THOMAS"  
## [35] "35 | JOSHUA DAVID LEE"         "36 | SIDDHARTH JHA"           
## [37] "37 | AMIYATOSH PWNANANDAM"     "38 | BRIAN LIU"               
## [39] "39 | JOEL R HENDON"            "40 | FOREST ZHANG"            
## [41] "41 | KYLE WILLIAM MURPHY"      "42 | JARED GE"                
## [43] "43 | ROBERT GLEN VASEY"        "44 | JUSTIN D SCHILLING"      
## [45] "45 | DEREK YAN"                "46 | JACOB ALEXANDER LAVALLEY"
## [47] "47 | ERIC WRIGHT"              "48 | DANIEL KHAIN"            
## [49] "49 | MICHAEL J MARTIN"         "50 | SHIVAM JHA"              
## [51] "51 | TEJAS AYYAGARI"           "52 | ETHAN GUO"               
## [53] "53 | JOSE C YBARRA"            "54 | LARRY HODGE"             
## [55] "55 | ALEX KONG"                "56 | MARISA RICCI"            
## [57] "57 | MICHAEL LU"               "58 | VIRAJ MOHILE"            
## [59] "59 | SEAN M MC CORMICK"        "60 | JULIA SHEN"              
## [61] "61 | JEZZEL FARKAS"            "62 | ASHWIN BALAJI"           
## [63] "63 | THOMAS JOSEPH HOSMER"     "64 | BEN LI"
state
##  [1] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" "ON" "MI" "MI" "MI" "MI" "MI" "MI"
## [16] "MI" "MI" "MI" "MI" "MI" "ON" "MI" "ON" "MI" "MI" "ON" "MI" "MI" "MI" "ON"
## [31] "MI" "ON" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [46] "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [61] "ON" "MI" "MI" "MI"
player_starting_score
##  [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980"  "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377"  "1362" "1382" "1291" "1056"
## [51] "1011" "935"  "1393" "1270" "1186" "1153" "1092" "917"  "853"  "967" 
## [61] "955"  "1530" "1175" "1163"
total_points
##  [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" "5.0" "5.0" "5.0" "4.5" "4.5"
## [13] "4.5" "4.5" "4.5" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0"
## [25] "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5"
## [37] "3.5" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "2.5" "2.5"
## [49] "2.5" "2.5" "2.5" "2.5" "2.0" "2.0" "2.0" "2.0" "2.0" "2.0" "2.0" "1.5"
## [61] "1.5" "1.0" "1.0" "1.0"
game
##  [1] "W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
##  [2] "W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
##  [3] "L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
##  [4] "W  23|D  28|W   2|W  26|D   5|W  19|D   1|"
##  [5] "W  45|W  37|D  12|D  13|D   4|W  14|W  17|"
##  [6] "W  34|D  29|L  11|W  35|D  10|W  27|W  21|"
##  [7] "W  57|W  46|W  13|W  11|L   1|W   9|L   2|"
##  [8] "W   3|W  32|L  14|L   9|W  47|W  28|W  19|"
##  [9] "W  25|L  18|W  59|W   8|W  26|L   7|W  20|"
## [10] "D  16|L  19|W  55|W  31|D   6|W  25|W  18|"
## [11] "D  38|W  56|W   6|L   7|L   3|W  34|W  26|"
## [12] "W  42|W  33|D   5|W  38|H    |D   1|L   3|"
## [13] "W  36|W  27|L   7|D   5|W  33|L   3|W  32|"
## [14] "W  54|W  44|W   8|L   1|D  27|L   5|W  31|"
## [15] "D  19|L  16|W  30|L  22|W  54|W  33|W  38|"
## [16] "D  10|W  15|H    |W  39|L   2|W  36|U    |"
## [17] "W  48|W  41|L  26|L   2|W  23|W  22|L   5|"
## [18] "W  47|W   9|L   1|W  32|L  19|W  38|L  10|"
## [19] "D  15|W  10|W  52|D  28|W  18|L   4|L   8|"
## [20] "L  40|W  49|W  23|W  41|W  28|L   2|L   9|"
## [21] "W  43|L   1|W  47|L   3|W  40|W  39|L   6|"
## [22] "W  64|D  52|L  28|W  15|H    |L  17|W  40|"
## [23] "L   4|W  43|L  20|W  58|L  17|W  37|W  46|"
## [24] "L  28|L  47|W  43|L  25|W  60|W  44|W  39|"
## [25] "L   9|W  53|L   3|W  24|D  34|L  10|W  47|"
## [26] "W  49|W  40|W  17|L   4|L   9|D  32|L  11|"
## [27] "W  51|L  13|W  46|W  37|D  14|L   6|U    |"
## [28] "W  24|D   4|W  22|D  19|L  20|L   8|D  36|"
## [29] "W  50|D   6|L  38|L  34|W  52|W  48|U    |"
## [30] "L  52|D  64|L  15|W  55|L  31|W  61|W  50|"
## [31] "L  58|D  55|W  64|L  10|W  30|W  50|L  14|"
## [32] "W  61|L   8|W  44|L  18|W  51|D  26|L  13|"
## [33] "W  60|L  12|W  50|D  36|L  13|L  15|W  51|"
## [34] "L   6|W  60|L  37|W  29|D  25|L  11|W  52|"
## [35] "L  46|L  38|W  56|L   6|W  57|D  52|W  48|"
## [36] "L  13|W  57|W  51|D  33|H    |L  16|D  28|"
## [37] "B    |L   5|W  34|L  27|H    |L  23|W  61|"
## [38] "D  11|W  35|W  29|L  12|H    |L  18|L  15|"
## [39] "L   1|W  54|W  40|L  16|W  44|L  21|L  24|"
## [40] "W  20|L  26|L  39|W  59|L  21|W  56|L  22|"
## [41] "W  59|L  17|W  58|L  20|X    |U    |U    |"
## [42] "L  12|L  50|L  57|D  60|D  61|W  64|W  56|"
## [43] "L  21|L  23|L  24|W  63|W  59|L  46|W  55|"
## [44] "B    |L  14|L  32|W  53|L  39|L  24|W  59|"
## [45] "L   5|L  51|D  60|L  56|W  63|D  55|W  58|"
## [46] "W  35|L   7|L  27|L  50|W  64|W  43|L  23|"
## [47] "L  18|W  24|L  21|W  61|L   8|D  51|L  25|"
## [48] "L  17|W  63|H    |D  52|H    |L  29|L  35|"
## [49] "L  26|L  20|D  63|D  64|W  58|H    |U    |"
## [50] "L  29|W  42|L  33|W  46|H    |L  31|L  30|"
## [51] "L  27|W  45|L  36|W  57|L  32|D  47|L  33|"
## [52] "W  30|D  22|L  19|D  48|L  29|D  35|L  34|"
## [53] "H    |L  25|H    |L  44|U    |W  57|U    |"
## [54] "L  14|L  39|L  61|B    |L  15|L  59|W  64|"
## [55] "L  62|D  31|L  10|L  30|B    |D  45|L  43|"
## [56] "H    |L  11|L  35|W  45|H    |L  40|L  42|"
## [57] "L   7|L  36|W  42|L  51|L  35|L  53|B    |"
## [58] "W  31|L   2|L  41|L  23|L  49|B    |L  45|"
## [59] "L  41|B    |L   9|L  40|L  43|W  54|L  44|"
## [60] "L  33|L  34|D  45|D  42|L  24|H    |U    |"
## [61] "L  32|L   3|W  54|L  47|D  42|L  30|L  37|"
## [62] "W  55|U    |U    |U    |U    |U    |U    |"
## [63] "L   2|L  48|D  49|L  43|L  45|H    |U    |"
## [64] "L  22|D  30|L  31|D  49|L  46|L  42|L  54|"
# Create data frame with the various extracted columns
chess_table <- data.frame( player_name, state,  player_starting_score,
  player_final_score,total_points,game)


col_names <- c("game_1","game_2","game_3","game_4","game_5","game_6","game_7")
chess_table <- chess_table%>%
  separate(game,col_names,sep = "[|]")
## Warning: Expected 7 pieces. Additional pieces discarded in 64 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
chess_table %>%
  select(game_1, game_2,game_3,game_4,game_5,game_6,game_7)
#format game columns as numeric 

chess_table$game_1 = as.numeric(gsub(".*?([0-9]+).*", "\\1", chess_table$game_1)) 
## Warning: NAs introduced by coercion
chess_table$game_2 = as.numeric(gsub(".*?([0-9]+).*", "\\1", chess_table$game_2))   
## Warning: NAs introduced by coercion
chess_table$game_3 = as.numeric(gsub(".*?([0-9]+).*", "\\1", chess_table$game_3)) 
## Warning: NAs introduced by coercion
chess_table$game_4 = as.numeric(gsub(".*?([0-9]+).*", "\\1", chess_table$game_4)) 
## Warning: NAs introduced by coercion
chess_table$game_5 = as.numeric(gsub(".*?([0-9]+).*", "\\1", chess_table$game_5)) 
## Warning: NAs introduced by coercion
chess_table$game_6 = as.numeric(gsub(".*?([0-9]+).*", "\\1", chess_table$game_6)) 
## Warning: NAs introduced by coercion
chess_table$game_7 = as.numeric(gsub(".*?([0-9]+).*", "\\1", chess_table$game_7)) 
## Warning: NAs introduced by coercion
index<-add_count(chess_table)

chess_table %>%
  select(player_starting_score,game_1, game_2,game_3,game_4,game_5,game_6,game_7)
chess_table$player_starting_score <- as.numeric(as.character(chess_table$player_starting_score))
chess_table$toal_points <- as.numeric(as.character(chess_table$total_points))


#Seperate the column of player name into player index and name
chess_table<-separate(data = chess_table, col = player_name, into = c("player_index", "Name"), sep = "\\|")

# convert index character to numberic
chess_table$player_index <- as.numeric(as.character(chess_table$player_index))
chess_table
#Create chess table to store opponent starting scores
chess_table_oppscores<-chess_table

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

library(dplyr)
library(tidyr)

# Developing approach to index against the player index and the game columns to find and replace with the approach starting player scores before taking row means

#chess_table_oppscores %>%
#if (chess_table_oppscores$player_index==chess_table_oppscores$game_1), #chess_table_oppscores$game_1==chess_table_oppscores$player_starting_score, #{chess_table_oppscores$game_1==0}


#chess_table_oppscores <- chess_table_oppscores %>% mutate (chess_table_oppscores$game_1 = case_when(chess_table_oppscores$player_index== chess_table_oppscores%game_1,TRUE ~ chess_table_oppscores$game1=chess_table_oppscores$player_starting_score))


#chess_table_oppscores %>%
#   gather(key = "player_index") %>%
#   left_join(lookup, by = "col_names") %>%
#   spread(key = player_index, value = player_starting_score)


result = chess_table_oppscores %>% 
  # need to add an overall average opponent pre rating when done above
  select(Name, state, total_points, player_starting_score) %>% 
  ungroup() %>% 
  distinct()

# Store a csv

write_csv(result, "/Users/mark/607_Project_1_output.csv")