Read Txt File in R

getwd()
## [1] "/Users/aaronzalki/Documents/finalcode/data607"
Chess_RawData <- readLines("/Users/aaronzalki/Desktop/tournamentinfo.txt")
## Warning in readLines("/Users/aaronzalki/Desktop/tournamentinfo.txt"):
## incomplete final line found on '/Users/aaronzalki/Desktop/
## tournamentinfo.txt'
head(Chess_RawData, 10)
##  [1] "-----------------------------------------------------------------------------------------" 
##  [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
##  [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
##  [4] "-----------------------------------------------------------------------------------------" 
##  [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
##  [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [7] "-----------------------------------------------------------------------------------------" 
##  [8] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
##  [9] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [10] "-----------------------------------------------------------------------------------------"

As seen above, every third row starting with row 5 contains the chess player names and every third row starting on row 6 contains states and ratings.

Player

player_names <- seq(5, 196, 3)
names_and_rounds <- Chess_RawData[player_names]
head(names_and_rounds)
## [1] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|"
## [2] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|"
## [3] "    3 | ADITYA BAJAJ                    |6.0  |L   8|W  61|W  25|W  21|W  11|W  13|W  12|"
## [4] "    4 | PATRICK H SCHILLING             |5.5  |W  23|D  28|W   2|W  26|D   5|W  19|D   1|"
## [5] "    5 | HANSHI ZUO                      |5.5  |W  45|W  37|D  12|D  13|D   4|W  14|W  17|"
## [6] "    6 | HANSEN SONG                     |5.0  |W  34|D  29|L  11|W  35|D  10|W  27|W  21|"

State & Ratings

st_rate <- seq(6, 196, 3)
state_and_ratings <- Chess_RawData[st_rate]
head(state_and_ratings)
## [1] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [2] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
## [3] "   MI | 14959604 / R: 1384   ->1640     |N:2  |W    |B    |W    |B    |W    |B    |W    |"
## [4] "   MI | 12616049 / R: 1716   ->1744     |N:2  |W    |B    |W    |B    |W    |B    |B    |"
## [5] "   MI | 14601533 / R: 1655   ->1690     |N:2  |B    |W    |B    |W    |B    |W    |B    |"
## [6] "   OH | 15055204 / R: 1686   ->1687     |N:3  |W    |B    |W    |B    |B    |W    |B    |"

Extraction (Names, States, Ratings, Rounds)

library(stringr)
extracted_names <- unlist(str_extract_all(names_and_rounds, "\\w+\\s\\w+\\s\\w+\\s\\w+\\s|\\w+\\s\\w+\\s\\w+|\\w+\\s\\w+"))
head(extracted_names)
## [1] "GARY HUA"            "DAKSHESH DARURI"     "ADITYA BAJAJ"       
## [4] "PATRICK H SCHILLING" "HANSHI ZUO"          "HANSEN SONG"
extracted_states <- unlist(str_extract_all(state_and_ratings, "\\s\\w{2}\\s"))
head(extracted_states)
## [1] " ON " " MI " " MI " " MI " " MI " " OH "
preratings <- unlist(str_extract_all(state_and_ratings, "[:]\\s+[[:digit:]]+"))
preratings <- unlist(str_extract_all(preratings, "[[:digit:]]+"))
head(preratings)
## [1] "1794" "1553" "1384" "1716" "1655" "1686"
postratings <- unlist(str_extract_all(state_and_ratings, "[>]\\s*[[:digit:]]+"))
postratings <- unlist(str_extract_all(postratings, "[[:digit:]]+"))
head(postratings)
## [1] "1817" "1663" "1640" "1744" "1690" "1687"
allrounds <- unlist(str_extract_all(names_and_rounds, "\\s?[[:print:]]{1,2}[|][[:alpha:]]?"))
round1 <- allrounds[seq(4, length(allrounds), 10)]
round2 <- allrounds[seq(5, length(allrounds), 10)]
round3 <- allrounds[seq(6, length(allrounds), 10)]
round4 <- allrounds[seq(7, length(allrounds), 10)]
round5 <- allrounds[seq(8, length(allrounds), 10)]
round6 <- allrounds[seq(9, length(allrounds), 10)]
round7 <- allrounds[seq(10, length(allrounds), 10)]

round1 <- unlist(str_extract_all(round1, "[[:print:]]+[^[|]][^[:alpha:]_]"))
round1 <- unlist(str_extract_all(round1, "[[:print:]]+[^[|]]"))
round2 <- unlist(str_extract_all(round2, "[[:print:]]+[^[|]][^[:alpha:]_]"))
round2 <- unlist(str_extract_all(round2, "[[:print:]]+[^[|]]"))
round3 <- unlist(str_extract_all(round3, "[[:print:]]+[^[|]][^[:alpha:]_]"))
round3 <- unlist(str_extract_all(round3, "[[:print:]]+[^[|]]"))
round4 <- unlist(str_extract_all(round4, "[[:print:]]+[^[|]][^[:alpha:]_]"))
round4 <- unlist(str_extract_all(round4, "[[:print:]]+[^[|]]"))
round5 <- unlist(str_extract_all(round5, "[[:print:]]+[^[|]][^[:alpha:]_]"))
round5 <- unlist(str_extract_all(round5, "[[:print:]]+[^[|]]"))
round6 <- unlist(str_extract_all(round6, "[[:print:]]+[^[|]][^[:alpha:]_]"))
round6 <- unlist(str_extract_all(round6, "[[:print:]]+[^[|]]"))
round7 <- unlist(str_extract_all(round7, "[[:print:]]+[^[|]]"))

head (round1)
## [1] " 39" " 63" "  8" " 23" " 45" " 34"

Data Frame Without Opponent’s Pre Average Rating

id <- seq(1, 64, 1)
Chess_No_Avg<- data.frame(id, extracted_names, extracted_states,TableChess$Total, "Pre Rating" = as.numeric(preratings), "Post Rating" = as.numeric(postratings), "Round 1" = as.numeric(round1), "Round 2" = as.numeric(round2), "Round 3" = as.numeric(round3), "Round 4" = as.numeric(round4), "Round 5" = as.numeric(round5), "Round 6" = as.numeric(round6), "Round 7" = as.numeric(round7))
colnames(Chess_No_Avg) <- c ("Id", "Player Name", "State","Total Points", "Pre Rating", "Post Rating", "Round 1","Round 2","Round 3","Round 4","Round 5","Round 6","Round 7")

head(Chess_No_Avg)
##   Id         Player Name State Total Points Pre Rating Post Rating Round 1
## 1  1            GARY HUA   ON           6.0       1794        1817      39
## 2  2     DAKSHESH DARURI   MI           6.0       1553        1663      63
## 3  3        ADITYA BAJAJ   MI           6.0       1384        1640       8
## 4  4 PATRICK H SCHILLING   MI           5.5       1716        1744      23
## 5  5          HANSHI ZUO   MI           5.5       1655        1690      45
## 6  6         HANSEN SONG   OH           5.0       1686        1687      34
##   Round 2 Round 3 Round 4 Round 5 Round 6 Round 7
## 1      21      18      14       7      12       4
## 2      58       4      17      16      20       7
## 3      61      25      21      11      13      12
## 4      28       2      26       5      19       1
## 5      37      12      13       4      14      17
## 6      29      11      35      10      27      21

Loop Used to Calculate Average Pre Chess Rating of Opponent

Avg_PreRating <- c()

for (i in Chess_No_Avg$Id){
  a <- Chess_No_Avg[Chess_No_Avg$'Round 1'==i, "Pre Rating"]
  a <- a[!is.na(a)]
  b <- Chess_No_Avg[Chess_No_Avg$'Round 2'==i, "Pre Rating"]
  b <- b[!is.na(b)]
  c <- Chess_No_Avg[Chess_No_Avg$'Round 3'==i, "Pre Rating"]
  c <- c[!is.na(c)]
  d <- Chess_No_Avg[Chess_No_Avg$'Round 4'==i, "Pre Rating"]
  d <- d[!is.na(d)]
  e <- Chess_No_Avg[Chess_No_Avg$'Round 5'==i, "Pre Rating"]
  e <- e[!is.na(e)]
  f <- Chess_No_Avg[Chess_No_Avg$'Round 6'==i, "Pre Rating"]
  f <- f[!is.na(f)]
  g <- Chess_No_Avg[Chess_No_Avg$'Round 7'==i, "Pre Rating"]
  g <- g[!is.na(g)]
  avg_rate <- mean(c(a, b, c, d, e, f, g))
  Avg_PreRating  <- c(Avg_PreRating, avg_rate)}

Data Frame that includes the average calculated from above loop

Chess_No_Avg ["Rival Average Pre Rating"] <- Avg_PreRating
FinalDataCSV  <- subset(Chess_No_Avg, select = c ("Id", "Player Name", "State","Total Points","Pre Rating", "Rival Average Pre Rating"))
head(FinalDataCSV, 64)
##    Id               Player Name State Total Points Pre Rating
## 1   1                  GARY HUA   ON           6.0       1794
## 2   2           DAKSHESH DARURI   MI           6.0       1553
## 3   3              ADITYA BAJAJ   MI           6.0       1384
## 4   4       PATRICK H SCHILLING   MI           5.5       1716
## 5   5                HANSHI ZUO   MI           5.5       1655
## 6   6               HANSEN SONG   OH           5.0       1686
## 7   7         GARY DEE SWATHELL   MI           5.0       1649
## 8   8          EZEKIEL HOUGHTON   MI           5.0       1641
## 9   9               STEFANO LEE   ON           5.0       1411
## 10 10                 ANVIT RAO   MI           5.0       1365
## 11 11 CAMERON WILLIAM MC LEMAN    MI           4.5       1712
## 12 12            KENNETH J TACK   MI           4.5       1663
## 13 13         TORRANCE HENRY JR   MI           4.5       1666
## 14 14              BRADLEY SHAW   MI           4.5       1610
## 15 15    ZACHARY JAMES HOUGHTON   MI           4.5       1220
## 16 16              MIKE NIKITIN   MI           4.0       1604
## 17 17        RONALD GRZEGORCZYK   MI           4.0       1629
## 18 18             DAVID SUNDEEN   MI           4.0       1600
## 19 19              DIPANKAR ROY   MI           4.0       1564
## 20 20               JASON ZHENG   MI           4.0       1595
## 21 21             DINH DANG BUI   ON           4.0       1563
## 22 22          EUGENE L MCCLURE   MI           4.0       1555
## 23 23                  ALAN BUI   ON           4.0       1363
## 24 24         MICHAEL R ALDRICH   MI           4.0       1229
## 25 25          LOREN SCHWIEBERT   MI           3.5       1745
## 26 26                   MAX ZHU   ON           3.5       1579
## 27 27            GAURAV GIDWANI   MI           3.5       1552
## 28 28      SOFIA ADINA STANESCU   MI           3.5       1507
## 29 29          CHIEDOZIE OKORIE   MI           3.5       1602
## 30 30        GEORGE AVERY JONES   ON           3.5       1522
## 31 31              RISHI SHETTY   MI           3.5       1494
## 32 32     JOSHUA PHILIP MATHEWS   ON           3.5       1441
## 33 33                   JADE GE   MI           3.5       1449
## 34 34    MICHAEL JEFFERY THOMAS   MI           3.5       1399
## 35 35          JOSHUA DAVID LEE   MI           3.5       1438
## 36 36             SIDDHARTH JHA   MI           3.5       1355
## 37 37      AMIYATOSH PWNANANDAM   MI           3.5        980
## 38 38                 BRIAN LIU   MI           3.0       1423
## 39 39             JOEL R HENDON   MI           3.0       1436
## 40 40              FOREST ZHANG   MI           3.0       1348
## 41 41       KYLE WILLIAM MURPHY   MI           3.0       1403
## 42 42                  JARED GE   MI           3.0       1332
## 43 43         ROBERT GLEN VASEY   MI           3.0       1283
## 44 44        JUSTIN D SCHILLING   MI           3.0       1199
## 45 45                 DEREK YAN   MI           3.0       1242
## 46 46  JACOB ALEXANDER LAVALLEY   MI           3.0        377
## 47 47               ERIC WRIGHT   MI           2.5       1362
## 48 48              DANIEL KHAIN   MI           2.5       1382
## 49 49          MICHAEL J MARTIN   MI           2.5       1291
## 50 50                SHIVAM JHA   MI           2.5       1056
## 51 51            TEJAS AYYAGARI   MI           2.5       1011
## 52 52                 ETHAN GUO   MI           2.5        935
## 53 53             JOSE C YBARRA   MI           2.0       1393
## 54 54               LARRY HODGE   MI           2.0       1270
## 55 55                 ALEX KONG   MI           2.0       1186
## 56 56              MARISA RICCI   MI           2.0       1153
## 57 57                MICHAEL LU   MI           2.0       1092
## 58 58              VIRAJ MOHILE   MI           2.0        917
## 59 59        SEAN M MC CORMICK    MI           2.0        853
## 60 60                JULIA SHEN   MI           1.5        967
## 61 61             JEZZEL FARKAS   ON           1.5        955
## 62 62             ASHWIN BALAJI   MI           1.0       1530
## 63 63      THOMAS JOSEPH HOSMER   MI           1.0       1175
## 64 64                    BEN LI   MI           1.0       1163
##    Rival Average Pre Rating
## 1                  1605.286
## 2                  1469.286
## 3                  1563.571
## 4                  1573.571
## 5                  1500.857
## 6                  1518.714
## 7                  1372.143
## 8                  1468.429
## 9                  1523.143
## 10                 1554.143
## 11                 1467.571
## 12                 1506.167
## 13                 1497.857
## 14                 1515.000
## 15                 1483.857
## 16                 1385.800
## 17                 1498.571
## 18                 1480.000
## 19                 1426.286
## 20                 1410.857
## 21                 1470.429
## 22                 1300.333
## 23                 1213.857
## 24                 1357.000
## 25                 1363.286
## 26                 1506.857
## 27                 1221.667
## 28                 1522.143
## 29                 1313.500
## 30                 1144.143
## 31                 1259.857
## 32                 1378.714
## 33                 1276.857
## 34                 1375.286
## 35                 1149.714
## 36                 1388.167
## 37                 1384.800
## 38                 1539.167
## 39                 1429.571
## 40                 1390.571
## 41                 1248.500
## 42                 1149.857
## 43                 1106.571
## 44                 1327.000
## 45                 1152.000
## 46                 1357.714
## 47                 1392.000
## 48                 1355.800
## 49                 1285.800
## 50                 1296.000
## 51                 1356.143
## 52                 1494.571
## 53                 1345.333
## 54                 1206.167
## 55                 1406.000
## 56                 1414.400
## 57                 1363.000
## 58                 1391.000
## 59                 1319.000
## 60                 1330.200
## 61                 1327.286
## 62                 1186.000
## 63                 1350.200
## 64                 1263.000

Generate CSV File

write.csv(FinalDataCSV, "Chess Tournament.csv")