Given a text file (CSV) with chess tournament results, generate a CSV file with Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents
Step 1: Load the required libraries. We need stringr to use regular expressions to filter text. We will also need dplyr library to filter values from the data frame.
library(stringr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Step 2: Load the contents of tournamentinfo.txt as a | separated file. Start from the 4th row because the first few rows is the header. Create a column list and attach it to the data when it is read as a table from a CSV file
columnNames = c("Number", "Name", "Points", "Round1", "Round2", "Round3", "Round4", "Round5", "Round6", "Round7", "State", "Ratings", "Opponent")
tournamentInfo  = read.table("tournamentinfo.txt", header = FALSE, skip = 4, sep = "|", fill = TRUE, stringsAsFactors = FALSE, col.names = columnNames)
tournamentInfo = filter(tournamentInfo, Name != "")
Step 3: Remove leading and trailing spaces from every item in the data frame. Then use regular expressions to extract meaningful text from the data frame. Loop through the data frame each row at a time. Within each row, iterate through the list of columns. Eliminate characters from points column and get numbers. Construct thre extra columns and copy data from the next line over to state and ratings columns.
i = 1
while (i < nrow(tournamentInfo) ) {
  tournamentInfo$Number[i]   = str_trim(tournamentInfo$Number[i])
  tournamentInfo$Name[i]     = str_trim(tournamentInfo$Name[i])
  tournamentInfo$Points[i]   = str_trim(tournamentInfo$Points[i])
  
  for (j in 4:10) {
    tournamentInfo[i, j] = str_trim(str_extract(tournamentInfo[i, j], "[[\\s]]+[[0-9]]{1,2}"))
  }
  
  tournamentInfo$State[i]    = str_trim(tournamentInfo$Number[i + 1])
  tournamentInfo$Ratings[i]  = str_trim(tournamentInfo$Name[i + 1])
  tournamentInfo$Ratings[i]  = str_trim(str_extract(tournamentInfo$Ratings[i], "[[\\s]]{1}[[0-9]]{1,}"))
  i = i+2
}
Step 4: Remove rows that are not required from the data frame using dplyr filter command
tournamentInfo_df = filter(tournamentInfo, row_number() %% 2 == 1)
Step 5: Loop thru the data frame again to populate average opponent’s rating
for (i in 1:nrow(tournamentInfo_df)) {
  sum = 0
  n = 0

  for (j in 4:10) {
    if (!is.na(tournamentInfo_df[i, j])) {
      sum = sum + as.numeric(tournamentInfo_df$Ratings[as.numeric(tournamentInfo_df[i, j])])
      n = n+1
    }
  }
  tournamentInfo_df$Opponent[i] = round(sum/n)
}
Step 6: Print the data frame to verify the contents
print(nrow(tournamentInfo_df))
## [1] 64
print.data.frame(tournamentInfo_df)
##    Number                       Name Points Round1 Round2 Round3 Round4 Round5 Round6 Round7 State Ratings Opponent
## 1       1                   GARY HUA    6.0     39     21     18     14      7     12      4    ON    1794     1605
## 2       2            DAKSHESH DARURI    6.0     63     58      4     17     16     20      7    MI    1553     1469
## 3       3               ADITYA BAJAJ    6.0      8     61     25     21     11     13     12    MI    1384     1564
## 4       4        PATRICK H SCHILLING    5.5     23     28      2     26      5     19      1    MI    1716     1574
## 5       5                 HANSHI ZUO    5.5     45     37     12     13      4     14     17    MI    1655     1501
## 6       6                HANSEN SONG    5.0     34     29     11     35     10     27     21    OH    1686     1519
## 7       7          GARY DEE SWATHELL    5.0     57     46     13     11      1      9      2    MI    1649     1372
## 8       8           EZEKIEL HOUGHTON    5.0      3     32     14      9     47     28     19    MI    1641     1468
## 9       9                STEFANO LEE    5.0     25     18     59      8     26      7     20    ON    1411     1523
## 10     10                  ANVIT RAO    5.0     16     19     55     31      6     25     18    MI    1365     1554
## 11     11   CAMERON WILLIAM MC LEMAN    4.5     38     56      6      7      3     34     26    MI    1712     1468
## 12     12             KENNETH J TACK    4.5     42     33      5     38   <NA>      1      3    MI    1663     1506
## 13     13          TORRANCE HENRY JR    4.5     36     27      7      5     33      3     32    MI    1666     1498
## 14     14               BRADLEY SHAW    4.5     54     44      8      1     27      5     31    MI    1610     1515
## 15     15     ZACHARY JAMES HOUGHTON    4.5     19     16     30     22     54     33     38    MI    1220     1484
## 16     16               MIKE NIKITIN    4.0     10     15   <NA>     39      2     36   <NA>    MI    1604     1386
## 17     17         RONALD GRZEGORCZYK    4.0     48     41     26      2     23     22      5    MI    1629     1499
## 18     18              DAVID SUNDEEN    4.0     47      9      1     32     19     38     10    MI    1600     1480
## 19     19               DIPANKAR ROY    4.0     15     10     52     28     18      4      8    MI    1564     1426
## 20     20                JASON ZHENG    4.0     40     49     23     41     28      2      9    MI    1595     1411
## 21     21              DINH DANG BUI    4.0     43      1     47      3     40     39      6    ON    1563     1470
## 22     22           EUGENE L MCCLURE    4.0     64     52     28     15   <NA>     17     40    MI    1555     1300
## 23     23                   ALAN BUI    4.0      4     43     20     58     17     37     46    ON    1363     1214
## 24     24          MICHAEL R ALDRICH    4.0     28     47     43     25     60     44     39    MI    1229     1357
## 25     25           LOREN SCHWIEBERT    3.5      9     53      3     24     34     10     47    MI    1745     1363
## 26     26                    MAX ZHU    3.5     49     40     17      4      9     32     11    ON    1579     1507
## 27     27             GAURAV GIDWANI    3.5     51     13     46     37     14      6   <NA>    MI    1552     1222
## 28     28 SOFIA ADINA STANESCU-BELLU    3.5     24      4     22     19     20      8     36    MI    1507     1522
## 29     29           CHIEDOZIE OKORIE    3.5     50      6     38     34     52     48   <NA>    MI    1602     1314
## 30     30         GEORGE AVERY JONES    3.5     52     64     15     55     31     61     50    ON    1522     1144
## 31     31               RISHI SHETTY    3.5     58     55     64     10     30     50     14    MI    1494     1260
## 32     32      JOSHUA PHILIP MATHEWS    3.5     61      8     44     18     51     26     13    ON    1441     1379
## 33     33                    JADE GE    3.5     60     12     50     36     13     15     51    MI    1449     1277
## 34     34     MICHAEL JEFFERY THOMAS    3.5      6     60     37     29     25     11     52    MI    1399     1375
## 35     35           JOSHUA DAVID LEE    3.5     46     38     56      6     57     52     48    MI    1438     1150
## 36     36              SIDDHARTH JHA    3.5     13     57     51     33   <NA>     16     28    MI    1355     1388
## 37     37       AMIYATOSH PWNANANDAM    3.5   <NA>      5     34     27   <NA>     23     61    MI     980     1385
## 38     38                  BRIAN LIU    3.0     11     35     29     12   <NA>     18     15    MI    1423     1539
## 39     39              JOEL R HENDON    3.0      1     54     40     16     44     21     24    MI    1436     1430
## 40     40               FOREST ZHANG    3.0     20     26     39     59     21     56     22    MI    1348     1391
## 41     41        KYLE WILLIAM MURPHY    3.0     59     17     58     20   <NA>   <NA>   <NA>    MI    1403     1248
## 42     42                   JARED GE    3.0     12     50     57     60     61     64     56    MI    1332     1150
## 43     43          ROBERT GLEN VASEY    3.0     21     23     24     63     59     46     55    MI    1283     1107
## 44     44         JUSTIN D SCHILLING    3.0   <NA>     14     32     53     39     24     59    MI    1199     1327
## 45     45                  DEREK YAN    3.0      5     51     60     56     63     55     58    MI    1242     1152
## 46     46   JACOB ALEXANDER LAVALLEY    3.0     35      7     27     50     64     43     23    MI     377     1358
## 47     47                ERIC WRIGHT    2.5     18     24     21     61      8     51     25    MI    1362     1392
## 48     48               DANIEL KHAIN    2.5     17     63   <NA>     52   <NA>     29     35    MI    1382     1356
## 49     49           MICHAEL J MARTIN    2.5     26     20     63     64     58   <NA>   <NA>    MI    1291     1286
## 50     50                 SHIVAM JHA    2.5     29     42     33     46   <NA>     31     30    MI    1056     1296
## 51     51             TEJAS AYYAGARI    2.5     27     45     36     57     32     47     33    MI    1011     1356
## 52     52                  ETHAN GUO    2.5     30     22     19     48     29     35     34    MI     935     1495
## 53     53              JOSE C YBARRA    2.0   <NA>     25   <NA>     44   <NA>     57   <NA>    MI    1393     1345
## 54     54                LARRY HODGE    2.0     14     39     61   <NA>     15     59     64    MI    1270     1206
## 55     55                  ALEX KONG    2.0     62     31     10     30   <NA>     45     43    MI    1186     1406
## 56     56               MARISA RICCI    2.0   <NA>     11     35     45   <NA>     40     42    MI    1153     1414
## 57     57                 MICHAEL LU    2.0      7     36     42     51     35     53   <NA>    MI    1092     1363
## 58     58               VIRAJ MOHILE    2.0     31      2     41     23     49   <NA>     45    MI     917     1391
## 59     59          SEAN M MC CORMICK    2.0     41   <NA>      9     40     43     54     44    MI     853     1319
## 60     60                 JULIA SHEN    1.5     33     34     45     42     24   <NA>   <NA>    MI     967     1330
## 61     61              JEZZEL FARKAS    1.5     32      3     54     47     42     30     37    ON     955     1327
## 62     62              ASHWIN BALAJI    1.0     55   <NA>   <NA>   <NA>   <NA>   <NA>   <NA>    MI    1530     1186
## 63     63       THOMAS JOSEPH HOSMER    1.0      2     48     49     43     45   <NA>   <NA>    MI    1175     1350
## 64     64                     BEN LI    1.0     22     30     31     49     46     42     54    MI    1163     1263
Step 7: Construct result data frame. In the result we need only 5 columns i.e. Player’s Name, Player’s state, Total Points, Pre-rating score and Opponent’s average pre-ratings. Write the data frame to a CSV file.
resultInfo_df = data.frame(tournamentInfo_df$Name, tournamentInfo_df$State, tournamentInfo_df$Points, tournamentInfo_df$Ratings, tournamentInfo_df$Opponent)
colnames(resultInfo_df) = c("Name", "State", "Total Points", "Pre-Rating", "Avg Pre-Rating Opponents")
write.table(resultInfo_df, file = "tournamentResults.csv", sep = ",", row.names = FALSE)
Step 8: Print the data frame to verify the contents
print(nrow(resultInfo_df))
## [1] 64
print.data.frame(resultInfo_df)
##                          Name State Total Points Pre-Rating Avg Pre-Rating Opponents
## 1                    GARY HUA    ON          6.0       1794                     1605
## 2             DAKSHESH DARURI    MI          6.0       1553                     1469
## 3                ADITYA BAJAJ    MI          6.0       1384                     1564
## 4         PATRICK H SCHILLING    MI          5.5       1716                     1574
## 5                  HANSHI ZUO    MI          5.5       1655                     1501
## 6                 HANSEN SONG    OH          5.0       1686                     1519
## 7           GARY DEE SWATHELL    MI          5.0       1649                     1372
## 8            EZEKIEL HOUGHTON    MI          5.0       1641                     1468
## 9                 STEFANO LEE    ON          5.0       1411                     1523
## 10                  ANVIT RAO    MI          5.0       1365                     1554
## 11   CAMERON WILLIAM MC LEMAN    MI          4.5       1712                     1468
## 12             KENNETH J TACK    MI          4.5       1663                     1506
## 13          TORRANCE HENRY JR    MI          4.5       1666                     1498
## 14               BRADLEY SHAW    MI          4.5       1610                     1515
## 15     ZACHARY JAMES HOUGHTON    MI          4.5       1220                     1484
## 16               MIKE NIKITIN    MI          4.0       1604                     1386
## 17         RONALD GRZEGORCZYK    MI          4.0       1629                     1499
## 18              DAVID SUNDEEN    MI          4.0       1600                     1480
## 19               DIPANKAR ROY    MI          4.0       1564                     1426
## 20                JASON ZHENG    MI          4.0       1595                     1411
## 21              DINH DANG BUI    ON          4.0       1563                     1470
## 22           EUGENE L MCCLURE    MI          4.0       1555                     1300
## 23                   ALAN BUI    ON          4.0       1363                     1214
## 24          MICHAEL R ALDRICH    MI          4.0       1229                     1357
## 25           LOREN SCHWIEBERT    MI          3.5       1745                     1363
## 26                    MAX ZHU    ON          3.5       1579                     1507
## 27             GAURAV GIDWANI    MI          3.5       1552                     1222
## 28 SOFIA ADINA STANESCU-BELLU    MI          3.5       1507                     1522
## 29           CHIEDOZIE OKORIE    MI          3.5       1602                     1314
## 30         GEORGE AVERY JONES    ON          3.5       1522                     1144
## 31               RISHI SHETTY    MI          3.5       1494                     1260
## 32      JOSHUA PHILIP MATHEWS    ON          3.5       1441                     1379
## 33                    JADE GE    MI          3.5       1449                     1277
## 34     MICHAEL JEFFERY THOMAS    MI          3.5       1399                     1375
## 35           JOSHUA DAVID LEE    MI          3.5       1438                     1150
## 36              SIDDHARTH JHA    MI          3.5       1355                     1388
## 37       AMIYATOSH PWNANANDAM    MI          3.5        980                     1385
## 38                  BRIAN LIU    MI          3.0       1423                     1539
## 39              JOEL R HENDON    MI          3.0       1436                     1430
## 40               FOREST ZHANG    MI          3.0       1348                     1391
## 41        KYLE WILLIAM MURPHY    MI          3.0       1403                     1248
## 42                   JARED GE    MI          3.0       1332                     1150
## 43          ROBERT GLEN VASEY    MI          3.0       1283                     1107
## 44         JUSTIN D SCHILLING    MI          3.0       1199                     1327
## 45                  DEREK YAN    MI          3.0       1242                     1152
## 46   JACOB ALEXANDER LAVALLEY    MI          3.0        377                     1358
## 47                ERIC WRIGHT    MI          2.5       1362                     1392
## 48               DANIEL KHAIN    MI          2.5       1382                     1356
## 49           MICHAEL J MARTIN    MI          2.5       1291                     1286
## 50                 SHIVAM JHA    MI          2.5       1056                     1296
## 51             TEJAS AYYAGARI    MI          2.5       1011                     1356
## 52                  ETHAN GUO    MI          2.5        935                     1495
## 53              JOSE C YBARRA    MI          2.0       1393                     1345
## 54                LARRY HODGE    MI          2.0       1270                     1206
## 55                  ALEX KONG    MI          2.0       1186                     1406
## 56               MARISA RICCI    MI          2.0       1153                     1414
## 57                 MICHAEL LU    MI          2.0       1092                     1363
## 58               VIRAJ MOHILE    MI          2.0        917                     1391
## 59          SEAN M MC CORMICK    MI          2.0        853                     1319
## 60                 JULIA SHEN    MI          1.5        967                     1330
## 61              JEZZEL FARKAS    ON          1.5        955                     1327
## 62              ASHWIN BALAJI    MI          1.0       1530                     1186
## 63       THOMAS JOSEPH HOSMER    MI          1.0       1175                     1350
## 64                     BEN LI    MI          1.0       1163                     1263