Load data

library(stringr)
chess_data <- read.table(url("https://raw.githubusercontent.com/Zchen116/assignment-2/master/tournamentinfo.txt"), sep = ",")
head(chess_data)
##                                                                                           V1
## 1  -----------------------------------------------------------------------------------------
## 2  Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| 
## 3  Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | 
## 4  -----------------------------------------------------------------------------------------
## 5      1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|
## 6     ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |

Clean line dividers

chess_data1 <- data.frame(str_replace_all(chess_data$V1,"-",""))
head(chess_data1)
##                                                      str_replace_all.chess_data.V1..........
## 1                                                                                           
## 2  Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| 
## 3   Num  | USCF ID / Rtg (Pre>Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | 
## 4                                                                                           
## 5      1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|
## 6      ON | 15445895 / R: 1794   >1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |
chess_data1 <- chess_data1[c(5:nrow(chess_data1)),]
head(chess_data1)
## [1]     1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|
## [2]    ON | 15445895 / R: 1794   >1817     |N:2  |W    |B    |W    |B    |W    |B    |W    | 
## [3]                                                                                          
## [4]     2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|
## [5]    MI | 14598900 / R: 1553   >1663     |N:2  |B    |W    |B    |W    |B    |W    |B    | 
## [6]                                                                                          
## 131 Levels:  ...

Extract ID

id <- "\\d{1,2}(?=\\s\\|)"
chess_id <- unlist(str_extract_all(unlist(chess_data1), id))

Extract name

name <- "([[:upper:]]+\\s){2,}"
chess_names <- unlist(str_extract_all(unlist(chess_data1), name))
str_trim(chess_names, side = "right")
##  [1] "GARY HUA"                  "DAKSHESH DARURI"          
##  [3] "ADITYA BAJAJ"              "PATRICK H SCHILLING"      
##  [5] "HANSHI ZUO"                "HANSEN SONG"              
##  [7] "GARY DEE SWATHELL"         "EZEKIEL HOUGHTON"         
##  [9] "STEFANO LEE"               "ANVIT RAO"                
## [11] "CAMERON WILLIAM MC LEMAN"  "KENNETH J TACK"           
## [13] "TORRANCE HENRY JR"         "BRADLEY SHAW"             
## [15] "ZACHARY JAMES HOUGHTON"    "MIKE NIKITIN"             
## [17] "RONALD GRZEGORCZYK"        "DAVID SUNDEEN"            
## [19] "DIPANKAR ROY"              "JASON ZHENG"              
## [21] "DINH DANG BUI"             "EUGENE L MCCLURE"         
## [23] "ALAN BUI"                  "MICHAEL R ALDRICH"        
## [25] "LOREN SCHWIEBERT"          "MAX ZHU"                  
## [27] "GAURAV GIDWANI"            "SOFIA ADINA STANESCUBELLU"
## [29] "CHIEDOZIE OKORIE"          "GEORGE AVERY JONES"       
## [31] "RISHI SHETTY"              "JOSHUA PHILIP MATHEWS"    
## [33] "JADE GE"                   "MICHAEL JEFFERY THOMAS"   
## [35] "JOSHUA DAVID LEE"          "SIDDHARTH JHA"            
## [37] "AMIYATOSH PWNANANDAM"      "BRIAN LIU"                
## [39] "JOEL R HENDON"             "FOREST ZHANG"             
## [41] "KYLE WILLIAM MURPHY"       "JARED GE"                 
## [43] "ROBERT GLEN VASEY"         "JUSTIN D SCHILLING"       
## [45] "DEREK YAN"                 "JACOB ALEXANDER LAVALLEY" 
## [47] "ERIC WRIGHT"               "DANIEL KHAIN"             
## [49] "MICHAEL J MARTIN"          "SHIVAM JHA"               
## [51] "TEJAS AYYAGARI"            "ETHAN GUO"                
## [53] "JOSE C YBARRA"             "LARRY HODGE"              
## [55] "ALEX KONG"                 "MARISA RICCI"             
## [57] "MICHAEL LU"                "VIRAJ MOHILE"             
## [59] "SEAN M MC CORMICK"         "JULIA SHEN"               
## [61] "JEZZEL FARKAS"             "ASHWIN BALAJI"            
## [63] "THOMAS JOSEPH HOSMER"      "BEN LI"

Extract State

state <- "([[:upper:]]){2}\\s(?=\\|)"
chess_states <- unlist(str_extract_all(unlist(chess_data1), state))
str_trim(chess_states, side = "right")
##  [1] "ON" "MI" "MI" "MI" "MI" "OH" "MI" "MI" "ON" "MI" "MI" "MI" "MI" "MI"
## [15] "MI" "MI" "MI" "MI" "MI" "MI" "ON" "MI" "ON" "MI" "MI" "ON" "MI" "MI"
## [29] "MI" "ON" "MI" "ON" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [43] "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI" "MI"
## [57] "MI" "MI" "MI" "MI" "ON" "MI" "MI" "MI"

Extract Total Number of Points

points <- "\\d\\.\\d"
chess_point <- unlist(str_extract_all(unlist(chess_data1), points))
chess_point
##  [1] "6.0" "6.0" "6.0" "5.5" "5.5" "5.0" "5.0" "5.0" "5.0" "5.0" "4.5"
## [12] "4.5" "4.5" "4.5" "4.5" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0" "4.0"
## [23] "4.0" "4.0" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5" "3.5"
## [34] "3.5" "3.5" "3.5" "3.5" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0" "3.0"
## [45] "3.0" "3.0" "2.5" "2.5" "2.5" "2.5" "2.5" "2.5" "2.0" "2.0" "2.0"
## [56] "2.0" "2.0" "2.0" "2.0" "1.5" "1.5" "1.0" "1.0" "1.0"

Extract Player’s Pre-Rating

pre_rating <- "(?<!\\>\\s)(?<=\\s{1,2}|\\s\\:)(\\d{3,4}(?=\\s|P))"
chess_pre_rating <- unlist(str_extract_all(unlist(chess_data1), pre_rating))
chess_pre_rating <- str_trim(chess_pre_rating)
chess_pre_rating
##  [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980"  "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377"  "1362" "1382" "1291" "1056"
## [51] "1011" "935"  "1393" "1270" "1186" "1153" "1092" "917"  "853"  "967" 
## [61] "955"  "1530" "1175" "1163"

Extract Average Pre Chess Rating of Opponents

opponents <- "(\\d{1,}|[[:blank:]]{1})(?=\\|)"
player_opponents <- unlist(str_extract_all(unlist(chess_data1), opponents))
player_opponents[player_opponents==" "]  <- NA
opponent1 <- as.numeric(player_opponents[seq(4, length(player_opponents), 10)])
opponent1 <- as.numeric(opponent1[seq(1, length(opponent1), 2)])
opponent2 <- as.numeric(player_opponents[seq(5, length(player_opponents), 10)])
opponent2 <- as.numeric(opponent2[seq(1, length(opponent2), 2)])
opponent3 <- as.numeric(player_opponents[seq(6, length(player_opponents), 10)])
opponent3 <- as.numeric(opponent3[seq(1, length(opponent3), 2)])
opponent4 <- as.numeric(player_opponents[seq(7, length(player_opponents), 10)])
opponent4 <- as.numeric(opponent4[seq(1, length(opponent4), 2)])
opponent5 <- as.numeric(player_opponents[seq(8, length(player_opponents), 10)])
opponent5 <- as.numeric(opponent5[seq(1, length(opponent5), 2)])
opponent6 <- as.numeric(player_opponents[seq(9, length(player_opponents), 10)])
opponent6 <- as.numeric(opponent6[seq(1, length(opponent6), 2)])
opponent7 <- as.numeric(player_opponents[seq(10, length(player_opponents), 10)])
opponent7 <- as.numeric(opponent7[seq(1, length(opponent7), 2)])
player_opponents <- matrix(c(opponent1, opponent2, opponent3, opponent4, opponent5, opponent6, opponent7),nrow = 64, ncol = 7)
chess_avg <- 0
chess_table <- 0
for (i in 1:(length(chess_id)))
chess_avg[i] <- mean(as.numeric(chess_pre_rating[player_opponents[i,]]), na.rm = T)

Creat a Data Frame with all informations

ChessResults <- data.frame(chess_names, chess_states, chess_point, chess_pre_rating)
head(ChessResults, 5)
##            chess_names chess_states chess_point chess_pre_rating
## 1            GARY HUA           ON          6.0             1794
## 2     DAKSHESH DARURI           MI          6.0             1553
## 3        ADITYA BAJAJ           MI          6.0             1384
## 4 PATRICK H SCHILLING           MI          5.5             1716
## 5          HANSHI ZUO           MI          5.5             1655

Write csv in R

write.csv(ChessResults, file = "Porject_ChessResults.csv")