Exporting Chess Tournament Data to .csv

In this project, you’re given a text file with chess tournament results where the information has some structure. Your job is to create an R Markdown file that generates a .CSV file (that could for example be imported into a SQL database) with the following information for all of the players: Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents

For the first player, the information would be: Gary Hua, ON, 6.0, 1794, 1605

1605 was calculated by using the pre-tournament opponents’ ratings of 1436, 1563, 1600, 1610, 1649, 1663, 1716, and dividing by the total number of games played.

library(stringr)

filename <- "tournamentinfo.txt"

# Create a function to read in and structure the data

readTournamentFile <- function(filename, skip = 4, warn = FALSE) {
  lines <- readLines(filename, warn = warn)
  
  # Make one record by line
  recordPerLine <- c()
  for(i in seq(from=skip+1, to=length(lines), by=3)) {
    recordPerLine <- c(recordPerLine,paste(lines[i],lines[i+1]) ) 
  }
  
  # Split the records up based on the | delimiter
  records <- str_split(recordPerLine, "\\|")
  
  # Make a data frame from the table, one record at a time
  dd <- as.data.frame( t( unlist(records[1]) ) )
  for(i in 2:length(records)) {
    dd_t <- as.data.frame( t( unlist(records[i]) ) )
    dd <- rbind(dd, dd_t)
  }
  
  # Add column names 
  colnames( dd ) <- c("Player Number", "Player Name", "Total", 
                      "Rnd 1 Result", "Rnd 2 Result", "Rnd 3 Result", "Rnd 4 Result", "Rnd 5 Result", "Rnd 6 Result", "Rnd 7 Result", 
                      "Player State", "USCF ID / Rtg (Pre->Post)", "DONTKNOW",
                      "Rnd 1 Color", "Rnd 2 Color", "Rnd 3 Color", "Rnd 4 Color", "Rnd 5 Color", "Rnd 6 Color", "Rnd 7 Color",
                      "DONTCARE") 
  
  # Get rid of whitespace
  dd2 <- as.data.frame(apply(dd,2,function(x)gsub('\\s+', '',x)))
  
  # Fix player names (too much white space removed)
  dd2$`Player Name` <- trimws( as.character(dd$`Player Name`) )
  
  # Do extractions
  dd2 <- cbind( dd2, `UCSF ID`=str_extract( dd2$`USCF ID / Rtg (Pre->Post)`, "[[:digit:]]+") ) 
  
  # Pre tournament rating
  dd2 <- cbind( dd2, `Pre Rating`=str_extract( dd2$`USCF ID / Rtg (Pre->Post)`, "(?<=\\:)[[:digit:]]+") )
  
  # Round results 

  dd2 <- cbind( dd2, `Round 1 Opponent`=str_extract(dd2$`Rnd 1 Result`, "[[:digit:]]+") )
  dd2 <- cbind( dd2, `Round 2 Opponent`=str_extract(dd2$`Rnd 2 Result`, "[[:digit:]]+") )
  dd2 <- cbind( dd2, `Round 3 Opponent`=str_extract(dd2$`Rnd 3 Result`, "[[:digit:]]+") )
  dd2 <- cbind( dd2, `Round 4 Opponent`=str_extract(dd2$`Rnd 4 Result`, "[[:digit:]]+") )
  dd2 <- cbind( dd2, `Round 5 Opponent`=str_extract(dd2$`Rnd 5 Result`, "[[:digit:]]+") )
  dd2 <- cbind( dd2, `Round 6 Opponent`=str_extract(dd2$`Rnd 6 Result`, "[[:digit:]]+") )
  dd2 <- cbind( dd2, `Round 7 Opponent`=str_extract(dd2$`Rnd 7 Result`, "[[:digit:]]+") )
  
  # Fix types
  
  dd2$`Pre Rating` <- as.numeric(as.character(dd2$`Pre Rating`))
  dd2$`Total` <- as.numeric(as.character(dd2$`Total`))
  dd2$`Player Number` <- as.character(dd2$`Player Number`)
  dd2$`Round 1 Opponent` <- as.character(dd2$`Round 1 Opponent`)
  dd2$`Round 2 Opponent` <- as.character(dd2$`Round 2 Opponent`)
  dd2$`Round 3 Opponent` <- as.character(dd2$`Round 3 Opponent`)
  dd2$`Round 4 Opponent` <- as.character(dd2$`Round 4 Opponent`)
  dd2$`Round 5 Opponent` <- as.character(dd2$`Round 5 Opponent`)
  dd2$`Round 6 Opponent` <- as.character(dd2$`Round 6 Opponent`)
  dd2$`Round 7 Opponent` <- as.character(dd2$`Round 7 Opponent`)

  # Remove unneeded columns 
  
  dd2$DONTCARE <- NULL
  dd2$DONTKNOW <- NULL
  dd2$`USCF ID / Rtg (Pre->Post)` <- NULL
  dd2$`UCSF ID`<- NULL
  dd2$`Rnd 1 Result` <- NULL
  dd2$`Rnd 2 Result` <- NULL
  dd2$`Rnd 3 Result` <- NULL
  dd2$`Rnd 4 Result` <- NULL
  dd2$`Rnd 5 Result` <- NULL
  dd2$`Rnd 6 Result` <- NULL
  dd2$`Rnd 7 Result` <- NULL
  dd2$`Rnd 1 Color` <- NULL
  dd2$`Rnd 2 Color` <- NULL
  dd2$`Rnd 3 Color` <- NULL
  dd2$`Rnd 4 Color` <- NULL
  dd2$`Rnd 5 Color` <- NULL
  dd2$`Rnd 6 Color` <- NULL
  dd2$`Rnd 7 Color` <- NULL

  dd2
}

#Run function on text file 

data_chess <- readTournamentFile(filename)

head(data_chess)
##   Player Number         Player Name Total Player State Pre Rating
## 1             1            GARY HUA   6.0           ON       1794
## 2             2     DAKSHESH DARURI   6.0           MI       1553
## 3             3        ADITYA BAJAJ   6.0           MI       1384
## 4             4 PATRICK H SCHILLING   5.5           MI       1716
## 5             5          HANSHI ZUO   5.5           MI       1655
## 6             6         HANSEN SONG   5.0           OH       1686
##   Round 1 Opponent Round 2 Opponent Round 3 Opponent Round 4 Opponent
## 1               39               21               18               14
## 2               63               58                4               17
## 3                8               61               25               21
## 4               23               28                2               26
## 5               45               37               12               13
## 6               34               29               11               35
##   Round 5 Opponent Round 6 Opponent Round 7 Opponent
## 1                7               12                4
## 2               16               20                7
## 3               11               13               12
## 4                5               19                1
## 5                4               14               17
## 6               10               27               21
#Collect pre-rating scores and take the average

preavgscores <- c()
for(i in data_chess$`Player Number`) {
    row <- data_chess[ data_chess$`Player Number` == i,  ]
    opp1pre <- data_chess[ data_chess$`Player Number` == row$`Round 1 Opponent`, ]$`Pre Rating`
    opp2pre <- data_chess[ data_chess$`Player Number` == row$`Round 2 Opponent`, ]$`Pre Rating`
    opp3pre <- data_chess[ data_chess$`Player Number` == row$`Round 3 Opponent`, ]$`Pre Rating`
    opp4pre <- data_chess[ data_chess$`Player Number` == row$`Round 4 Opponent`, ]$`Pre Rating`
    opp5pre <- data_chess[ data_chess$`Player Number` == row$`Round 5 Opponent`, ]$`Pre Rating`
    opp6pre <- data_chess[ data_chess$`Player Number` == row$`Round 6 Opponent`, ]$`Pre Rating`
    opp7pre <- data_chess[ data_chess$`Player Number` == row$`Round 7 Opponent`, ]$`Pre Rating`
    avgpre <- round(mean(c(opp1pre,opp2pre,opp3pre,opp4pre,opp5pre,opp6pre,opp7pre),na.rm = TRUE),0)
    preavgscores <- c(preavgscores,avgpre)
}

data_chess <- cbind(data_chess,`Avg Opp Rating`=preavgscores)

#Remove unneeded columns

data_chess$`Round 1 Opponent` <- NULL
data_chess$`Round 2 Opponent` <- NULL
data_chess$`Round 3 Opponent` <- NULL
data_chess$`Round 4 Opponent` <- NULL
data_chess$`Round 5 Opponent` <- NULL
data_chess$`Round 6 Opponent` <- NULL
data_chess$`Round 7 Opponent` <- NULL
data_chess$`Player Number` <- NULL

#Final result

data_chess
##                   Player Name Total Player State Pre Rating Avg Opp Rating
## 1                    GARY HUA   6.0           ON       1794           1605
## 2             DAKSHESH DARURI   6.0           MI       1553           1469
## 3                ADITYA BAJAJ   6.0           MI       1384           1564
## 4         PATRICK H SCHILLING   5.5           MI       1716           1574
## 5                  HANSHI ZUO   5.5           MI       1655           1501
## 6                 HANSEN SONG   5.0           OH       1686           1519
## 7           GARY DEE SWATHELL   5.0           MI       1649           1372
## 8            EZEKIEL HOUGHTON   5.0           MI       1641           1468
## 9                 STEFANO LEE   5.0           ON       1411           1523
## 10                  ANVIT RAO   5.0           MI       1365           1554
## 11   CAMERON WILLIAM MC LEMAN   4.5           MI       1712           1468
## 12             KENNETH J TACK   4.5           MI       1663           1506
## 13          TORRANCE HENRY JR   4.5           MI       1666           1498
## 14               BRADLEY SHAW   4.5           MI       1610           1515
## 15     ZACHARY JAMES HOUGHTON   4.5           MI       1220           1484
## 16               MIKE NIKITIN   4.0           MI       1604           1386
## 17         RONALD GRZEGORCZYK   4.0           MI       1629           1499
## 18              DAVID SUNDEEN   4.0           MI       1600           1480
## 19               DIPANKAR ROY   4.0           MI       1564           1426
## 20                JASON ZHENG   4.0           MI       1595           1411
## 21              DINH DANG BUI   4.0           ON       1563           1470
## 22           EUGENE L MCCLURE   4.0           MI       1555           1300
## 23                   ALAN BUI   4.0           ON       1363           1214
## 24          MICHAEL R ALDRICH   4.0           MI       1229           1357
## 25           LOREN SCHWIEBERT   3.5           MI       1745           1363
## 26                    MAX ZHU   3.5           ON       1579           1507
## 27             GAURAV GIDWANI   3.5           MI       1552           1222
## 28 SOFIA ADINA STANESCU-BELLU   3.5           MI       1507           1522
## 29           CHIEDOZIE OKORIE   3.5           MI       1602           1314
## 30         GEORGE AVERY JONES   3.5           ON       1522           1144
## 31               RISHI SHETTY   3.5           MI       1494           1260
## 32      JOSHUA PHILIP MATHEWS   3.5           ON       1441           1379
## 33                    JADE GE   3.5           MI       1449           1277
## 34     MICHAEL JEFFERY THOMAS   3.5           MI       1399           1375
## 35           JOSHUA DAVID LEE   3.5           MI       1438           1150
## 36              SIDDHARTH JHA   3.5           MI       1355           1388
## 37       AMIYATOSH PWNANANDAM   3.5           MI        980           1385
## 38                  BRIAN LIU   3.0           MI       1423           1539
## 39              JOEL R HENDON   3.0           MI       1436           1430
## 40               FOREST ZHANG   3.0           MI       1348           1391
## 41        KYLE WILLIAM MURPHY   3.0           MI       1403           1248
## 42                   JARED GE   3.0           MI       1332           1150
## 43          ROBERT GLEN VASEY   3.0           MI       1283           1107
## 44         JUSTIN D SCHILLING   3.0           MI       1199           1327
## 45                  DEREK YAN   3.0           MI       1242           1152
## 46   JACOB ALEXANDER LAVALLEY   3.0           MI        377           1358
## 47                ERIC WRIGHT   2.5           MI       1362           1392
## 48               DANIEL KHAIN   2.5           MI       1382           1356
## 49           MICHAEL J MARTIN   2.5           MI       1291           1286
## 50                 SHIVAM JHA   2.5           MI       1056           1296
## 51             TEJAS AYYAGARI   2.5           MI       1011           1356
## 52                  ETHAN GUO   2.5           MI        935           1495
## 53              JOSE C YBARRA   2.0           MI       1393           1345
## 54                LARRY HODGE   2.0           MI       1270           1206
## 55                  ALEX KONG   2.0           MI       1186           1406
## 56               MARISA RICCI   2.0           MI       1153           1414
## 57                 MICHAEL LU   2.0           MI       1092           1363
## 58               VIRAJ MOHILE   2.0           MI        917           1391
## 59          SEAN M MC CORMICK   2.0           MI        853           1319
## 60                 JULIA SHEN   1.5           MI        967           1330
## 61              JEZZEL FARKAS   1.5           ON        955           1327
## 62              ASHWIN BALAJI   1.0           MI       1530           1186
## 63       THOMAS JOSEPH HOSMER   1.0           MI       1175           1350
## 64                     BEN LI   1.0           MI       1163           1263
# Create a .csv file

write.csv(data_chess, "ChessPlayerData.csv", row.names=FALSE)