packages used
library(tidyverse)
library(stringr)
library(readr)
import raw data from github and parse it out. This set of code gets rid of most of the data I dont need. Keeping only Total PTS, Round 1-7, the state, the name and the pre-tournament rating. The Pre and Post are still joined I will need to figure that out.
my_data_raw <- read.csv('https://raw.githubusercontent.com/jonburns2454/DATA607/main/project%201%20data.txt', header = F, skip = 3)
chess_data <- str_split(my_data_raw[,], "-", simplify = T)
#print(chess_table[1:20])
Pulling all of the data we need
#Names
Names <- unlist(str_extract_all(chess_data[,], "\\D+\\w+[[:space:]]\\w+([[:space:]]\\w+)*", simplify = T))
Names <- gsub('[^[:alnum:] ]', '', Names)#Removed the "|" from the front of the names
Names <- Names[!apply(Names == "", 1, all),]
print(Names)
## [1] " GARY HUA" " DAKSHESH DARURI"
## [3] " ADITYA BAJAJ" " PATRICK H SCHILLING"
## [5] " HANSHI ZUO" " HANSEN SONG"
## [7] " GARY DEE SWATHELL" " EZEKIEL HOUGHTON"
## [9] " STEFANO LEE" " ANVIT RAO"
## [11] " CAMERON WILLIAM MC LEMAN" " KENNETH J TACK"
## [13] " TORRANCE HENRY JR" " BRADLEY SHAW"
## [15] " ZACHARY JAMES HOUGHTON" " MIKE NIKITIN"
## [17] " RONALD GRZEGORCZYK" " DAVID SUNDEEN"
## [19] " DIPANKAR ROY" " JASON ZHENG"
## [21] " DINH DANG BUI" " EUGENE L MCCLURE"
## [23] " ALAN BUI" " MICHAEL R ALDRICH"
## [25] " LOREN SCHWIEBERT" " MAX ZHU"
## [27] " GAURAV GIDWANI" " SOFIA ADINA STANESCU"
## [29] " CHIEDOZIE OKORIE" " GEORGE AVERY JONES"
## [31] " RISHI SHETTY" " JOSHUA PHILIP MATHEWS"
## [33] " JADE GE" " MICHAEL JEFFERY THOMAS"
## [35] " JOSHUA DAVID LEE" " SIDDHARTH JHA"
## [37] " AMIYATOSH PWNANANDAM" " BRIAN LIU"
## [39] " JOEL R HENDON" " FOREST ZHANG"
## [41] " KYLE WILLIAM MURPHY" " JARED GE"
## [43] " ROBERT GLEN VASEY" " JUSTIN D SCHILLING"
## [45] " DEREK YAN" " JACOB ALEXANDER LAVALLEY"
## [47] " ERIC WRIGHT" " DANIEL KHAIN"
## [49] " MICHAEL J MARTIN" " SHIVAM JHA"
## [51] " TEJAS AYYAGARI" " ETHAN GUO"
## [53] " JOSE C YBARRA" " LARRY HODGE"
## [55] " ALEX KONG" " MARISA RICCI"
## [57] " MICHAEL LU" " VIRAJ MOHILE"
## [59] " SEAN M MC CORMICK" " JULIA SHEN"
## [61] " JEZZEL FARKAS" " ASHWIN BALAJI"
## [63] " THOMAS JOSEPH HOSMER" " BEN LI"
#State data
States <- unlist(str_extract_all(my_data_raw, "[[:upper:]]{2}(?=\\s\\|)"))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
#Total Points
Total_Points <- unlist(str_extract_all(my_data_raw[,], "\\d\\.\\d"))
#Pre-rating
Pre_Ratings <- unlist(str_extract_all(my_data_raw, "[R:]([[:space:]]+\\d+)"))
## Warning in stri_extract_all_regex(string, pattern, simplify = simplify, :
## argument is not an atomic vector; coercing
Pre_Ratings <- gsub('[^[:alnum:] ]', '', Pre_Ratings)
print(Pre_Ratings)
## [1] " 1794" " 1553" " 1384" " 1716" " 1655" " 1686" " 1649" " 1641" " 1411"
## [10] " 1365" " 1712" " 1663" " 1666" " 1610" " 1220" " 1604" " 1629" " 1600"
## [19] " 1564" " 1595" " 1563" " 1555" " 1363" " 1229" " 1745" " 1579" " 1552"
## [28] " 1507" " 1602" " 1522" " 1494" " 1441" " 1449" " 1399" " 1438" " 1355"
## [37] " 980" " 1423" " 1436" " 1348" " 1403" " 1332" " 1283" " 1199" " 1242"
## [46] " 377" " 1362" " 1382" " 1291" " 1056" " 1011" " 935" " 1393" " 1270"
## [55] " 1186" " 1153" " 1092" " 917" " 853" " 967" " 955" " 1530" " 1175"
## [64] " 1163"
#Data.Frame Creation + Average Pre Chess Rating of Opponents
Tournament_df <- data.frame(Names, States, Total_Points, Pre_Ratings)
Tournament_df <- Tournament_df %>%
add_column(Pair_Num = 1:64) %>%
relocate(Pair_Num, .before = Names)
Opponents - I started this section but I could not for the life of me figure out the final opponent rating.
# Opponents <- unlist(str_extract_all(my_data_raw[,], "\\d{2}+[|]|\\d{1}+[|]", simplify = T))
#
# Opponents_Matrix <- matrix(ncol = 7)
#
# Opponents <- unlist(str_extract_all(Opponents[,], "\\d+", simplify=TRUE))
# Opponents_Filled_Matrix <- Opponents[rowSums(Opponents=="")!=ncol(Opponents)]
#
# print(Opponents)
data.table::data.table(Tournament_df)
## Pair_Num Names States Total_Points Pre_Ratings
## 1: 1 GARY HUA ON 6.0 1794
## 2: 2 DAKSHESH DARURI MI 6.0 1553
## 3: 3 ADITYA BAJAJ MI 6.0 1384
## 4: 4 PATRICK H SCHILLING MI 5.5 1716
## 5: 5 HANSHI ZUO MI 5.5 1655
## 6: 6 HANSEN SONG OH 5.0 1686
## 7: 7 GARY DEE SWATHELL MI 5.0 1649
## 8: 8 EZEKIEL HOUGHTON MI 5.0 1641
## 9: 9 STEFANO LEE ON 5.0 1411
## 10: 10 ANVIT RAO MI 5.0 1365
## 11: 11 CAMERON WILLIAM MC LEMAN MI 4.5 1712
## 12: 12 KENNETH J TACK MI 4.5 1663
## 13: 13 TORRANCE HENRY JR MI 4.5 1666
## 14: 14 BRADLEY SHAW MI 4.5 1610
## 15: 15 ZACHARY JAMES HOUGHTON MI 4.5 1220
## 16: 16 MIKE NIKITIN MI 4.0 1604
## 17: 17 RONALD GRZEGORCZYK MI 4.0 1629
## 18: 18 DAVID SUNDEEN MI 4.0 1600
## 19: 19 DIPANKAR ROY MI 4.0 1564
## 20: 20 JASON ZHENG MI 4.0 1595
## 21: 21 DINH DANG BUI ON 4.0 1563
## 22: 22 EUGENE L MCCLURE MI 4.0 1555
## 23: 23 ALAN BUI ON 4.0 1363
## 24: 24 MICHAEL R ALDRICH MI 4.0 1229
## 25: 25 LOREN SCHWIEBERT MI 3.5 1745
## 26: 26 MAX ZHU ON 3.5 1579
## 27: 27 GAURAV GIDWANI MI 3.5 1552
## 28: 28 SOFIA ADINA STANESCU MI 3.5 1507
## 29: 29 CHIEDOZIE OKORIE MI 3.5 1602
## 30: 30 GEORGE AVERY JONES ON 3.5 1522
## 31: 31 RISHI SHETTY MI 3.5 1494
## 32: 32 JOSHUA PHILIP MATHEWS ON 3.5 1441
## 33: 33 JADE GE MI 3.5 1449
## 34: 34 MICHAEL JEFFERY THOMAS MI 3.5 1399
## 35: 35 JOSHUA DAVID LEE MI 3.5 1438
## 36: 36 SIDDHARTH JHA MI 3.5 1355
## 37: 37 AMIYATOSH PWNANANDAM MI 3.5 980
## 38: 38 BRIAN LIU MI 3.0 1423
## 39: 39 JOEL R HENDON MI 3.0 1436
## 40: 40 FOREST ZHANG MI 3.0 1348
## 41: 41 KYLE WILLIAM MURPHY MI 3.0 1403
## 42: 42 JARED GE MI 3.0 1332
## 43: 43 ROBERT GLEN VASEY MI 3.0 1283
## 44: 44 JUSTIN D SCHILLING MI 3.0 1199
## 45: 45 DEREK YAN MI 3.0 1242
## 46: 46 JACOB ALEXANDER LAVALLEY MI 3.0 377
## 47: 47 ERIC WRIGHT MI 2.5 1362
## 48: 48 DANIEL KHAIN MI 2.5 1382
## 49: 49 MICHAEL J MARTIN MI 2.5 1291
## 50: 50 SHIVAM JHA MI 2.5 1056
## 51: 51 TEJAS AYYAGARI MI 2.5 1011
## 52: 52 ETHAN GUO MI 2.5 935
## 53: 53 JOSE C YBARRA MI 2.0 1393
## 54: 54 LARRY HODGE MI 2.0 1270
## 55: 55 ALEX KONG MI 2.0 1186
## 56: 56 MARISA RICCI MI 2.0 1153
## 57: 57 MICHAEL LU MI 2.0 1092
## 58: 58 VIRAJ MOHILE MI 2.0 917
## 59: 59 SEAN M MC CORMICK MI 2.0 853
## 60: 60 JULIA SHEN MI 1.5 967
## 61: 61 JEZZEL FARKAS ON 1.5 955
## 62: 62 ASHWIN BALAJI MI 1.0 1530
## 63: 63 THOMAS JOSEPH HOSMER MI 1.0 1175
## 64: 64 BEN LI MI 1.0 1163
## Pair_Num Names States Total_Points Pre_Ratings