Load libraries

library(RCurl)
library(data.table)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:RCurl':
## 
##     complete
library(readr)
library(stringr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Read the text files

lines<-read_lines("F:\\CUNY masters\\project1_data_607\\7645617.txt")
lines[1:10]
##  [1] "-----------------------------------------------------------------------------------------" 
##  [2] " Pair | Player Name                     |Total|Round|Round|Round|Round|Round|Round|Round| "
##  [3] " Num  | USCF ID / Rtg (Pre->Post)       | Pts |  1  |  2  |  3  |  4  |  5  |  6  |  7  | "
##  [4] "-----------------------------------------------------------------------------------------" 
##  [5] "    1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|" 
##  [6] "   ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |" 
##  [7] "-----------------------------------------------------------------------------------------" 
##  [8] "    2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|" 
##  [9] "   MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |" 
## [10] "-----------------------------------------------------------------------------------------"

Creating two data frames with player data and round data from text_file

chess_player<-c()
chess_round<-c()

 for (i in seq(5, 195, 3)) {
  row5 <- unlist(strsplit(lines[i], '\\|'))
  player_num <- as.numeric(gsub(' ', '', row5[1]))
  player_name <- str_trim(row5[2])
  total_point <- str_trim(row5[3])
  
  row6 <- unlist(strsplit(lines[i+1], '\\|'))
  player_state <- str_trim(row6[1])
  player_pre_rating <- unlist(str_extract_all(row6[2], "[[:digit:]]+"))[2]
  
  chess_player<- rbind(chess_player, c(player_num, player_name, player_state, total_point, player_pre_rating))
  df1 <- data.frame(row5[4:10])
  df1$player_num <- player_num
  df1$player_pre_rating <- player_pre_rating
  
  chess_round<- rbind(chess_round,df1)
}

Get the player data into a data frame

df2 <- data.frame(chess_player)
colnames(df2) <- c("player_num", "player_name", "player_state", "total_point", "player_pre_rating")
head(df2)
##   player_num         player_name player_state total_point player_pre_rating
## 1          1            GARY HUA           ON         6.0              1794
## 2          2     DAKSHESH DARURI           MI         6.0              1553
## 3          3        ADITYA BAJAJ           MI         6.0              1384
## 4          4 PATRICK H SCHILLING           MI         5.5              1716
## 5          5          HANSHI ZUO           MI         5.5              1655
## 6          6         HANSEN SONG           OH         5.0              1686

Get the round data into a data frame by doing some string cleaning and data conversion.

colnames(chess_round) <- c("win_loss_opponent_id", "player_num", "player_pre_rating")
chess_round$win_loss <- sapply(strsplit(as.character(chess_round$win_loss_opponent_id), ' '), '[', 1)
chess_round$opponent_id <- sapply(chess_round$win_loss_opponent_id, function(x)gsub('\\s+', ' ', x))
chess_round$opponent_id <- as.numeric(sapply(strsplit(as.character(chess_round$opponent_id), ' '), '[', 2))
chess_round$player_pre_rating <- as.numeric(chess_round$player_pre_rating)
chess_round<- chess_round[, c('player_num', 'win_loss', 'opponent_id', 'player_pre_rating')]
head(chess_round)
##   player_num win_loss opponent_id player_pre_rating
## 1          1        W          39              1794
## 2          1        W          21              1794
## 3          1        W          18              1794
## 4          1        W          14              1794
## 5          1        W           7              1794
## 6          1        D          12              1794

Calculating average and merging player and round data frame into one data frame

col_01 <- c('player_name', 'player_state', 'total_point', 'player_pre_rating', 'avg')
avg_pre <- chess_round%>% group_by(opponent_id) %>% summarise(avg = as.integer(mean(player_pre_rating)), .groups='drop')

df2 <- merge(df2, avg_pre, by.x="player_num", by.y="opponent_id")[, col_01]
df2$total_point<-as.numeric(df2$total_point)
head(df2)
##                player_name player_state total_point player_pre_rating  avg
## 1                 GARY HUA           ON         6.0              1794 1605
## 2                ANVIT RAO           MI         5.0              1365 1554
## 3 CAMERON WILLIAM MC LEMAN           MI         4.5              1712 1467
## 4           KENNETH J TACK           MI         4.5              1663 1506
## 5        TORRANCE HENRY JR           MI         4.5              1666 1497
## 6             BRADLEY SHAW           MI         4.5              1610 1515

Save result into a csv file and read the csv file to check

write.csv(df2,"chess_game_file.csv")
csv_file<-read.csv("chess_game_file.csv")
csv_file
##     X                player_name player_state total_point player_pre_rating
## 1   1                   GARY HUA           ON         6.0              1794
## 2   2                  ANVIT RAO           MI         5.0              1365
## 3   3   CAMERON WILLIAM MC LEMAN           MI         4.5              1712
## 4   4             KENNETH J TACK           MI         4.5              1663
## 5   5          TORRANCE HENRY JR           MI         4.5              1666
## 6   6               BRADLEY SHAW           MI         4.5              1610
## 7   7     ZACHARY JAMES HOUGHTON           MI         4.5              1220
## 8   8               MIKE NIKITIN           MI         4.0              1604
## 9   9         RONALD GRZEGORCZYK           MI         4.0              1629
## 10 10              DAVID SUNDEEN           MI         4.0              1600
## 11 11               DIPANKAR ROY           MI         4.0              1564
## 12 12            DAKSHESH DARURI           MI         6.0              1553
## 13 13                JASON ZHENG           MI         4.0              1595
## 14 14              DINH DANG BUI           ON         4.0              1563
## 15 15           EUGENE L MCCLURE           MI         4.0              1555
## 16 16                   ALAN BUI           ON         4.0              1363
## 17 17          MICHAEL R ALDRICH           MI         4.0              1229
## 18 18           LOREN SCHWIEBERT           MI         3.5              1745
## 19 19                    MAX ZHU           ON         3.5              1579
## 20 20             GAURAV GIDWANI           MI         3.5              1552
## 21 21 SOFIA ADINA STANESCU-BELLU           MI         3.5              1507
## 22 22           CHIEDOZIE OKORIE           MI         3.5              1602
## 23 23               ADITYA BAJAJ           MI         6.0              1384
## 24 24         GEORGE AVERY JONES           ON         3.5              1522
## 25 25               RISHI SHETTY           MI         3.5              1494
## 26 26      JOSHUA PHILIP MATHEWS           ON         3.5              1441
## 27 27                    JADE GE           MI         3.5              1449
## 28 28     MICHAEL JEFFERY THOMAS           MI         3.5              1399
## 29 29           JOSHUA DAVID LEE           MI         3.5              1438
## 30 30              SIDDHARTH JHA           MI         3.5              1355
## 31 31       AMIYATOSH PWNANANDAM           MI         3.5               980
## 32 32                  BRIAN LIU           MI         3.0              1423
## 33 33              JOEL R HENDON           MI         3.0              1436
## 34 34        PATRICK H SCHILLING           MI         5.5              1716
## 35 35               FOREST ZHANG           MI         3.0              1348
## 36 36        KYLE WILLIAM MURPHY           MI         3.0              1403
## 37 37                   JARED GE           MI         3.0              1332
## 38 38          ROBERT GLEN VASEY           MI         3.0              1283
## 39 39         JUSTIN D SCHILLING           MI         3.0              1199
## 40 40                  DEREK YAN           MI         3.0              1242
## 41 41   JACOB ALEXANDER LAVALLEY           MI         3.0               377
## 42 42                ERIC WRIGHT           MI         2.5              1362
## 43 43               DANIEL KHAIN           MI         2.5              1382
## 44 44           MICHAEL J MARTIN           MI         2.5              1291
## 45 45                 HANSHI ZUO           MI         5.5              1655
## 46 46                 SHIVAM JHA           MI         2.5              1056
## 47 47             TEJAS AYYAGARI           MI         2.5              1011
## 48 48                  ETHAN GUO           MI         2.5               935
## 49 49              JOSE C YBARRA           MI         2.0              1393
## 50 50                LARRY HODGE           MI         2.0              1270
## 51 51                  ALEX KONG           MI         2.0              1186
## 52 52               MARISA RICCI           MI         2.0              1153
## 53 53                 MICHAEL LU           MI         2.0              1092
## 54 54               VIRAJ MOHILE           MI         2.0               917
## 55 55          SEAN M MC CORMICK           MI         2.0               853
## 56 56                HANSEN SONG           OH         5.0              1686
## 57 57                 JULIA SHEN           MI         1.5               967
## 58 58              JEZZEL FARKAS           ON         1.5               955
## 59 59              ASHWIN BALAJI           MI         1.0              1530
## 60 60       THOMAS JOSEPH HOSMER           MI         1.0              1175
## 61 61                     BEN LI           MI         1.0              1163
## 62 62          GARY DEE SWATHELL           MI         5.0              1649
## 63 63           EZEKIEL HOUGHTON           MI         5.0              1641
## 64 64                STEFANO LEE           ON         5.0              1411
##     avg
## 1  1605
## 2  1554
## 3  1467
## 4  1506
## 5  1497
## 6  1515
## 7  1483
## 8  1385
## 9  1498
## 10 1480
## 11 1426
## 12 1469
## 13 1410
## 14 1470
## 15 1300
## 16 1213
## 17 1357
## 18 1363
## 19 1506
## 20 1221
## 21 1522
## 22 1313
## 23 1563
## 24 1144
## 25 1259
## 26 1378
## 27 1276
## 28 1375
## 29 1149
## 30 1388
## 31 1384
## 32 1539
## 33 1429
## 34 1573
## 35 1390
## 36 1248
## 37 1149
## 38 1106
## 39 1327
## 40 1152
## 41 1357
## 42 1392
## 43 1355
## 44 1285
## 45 1500
## 46 1296
## 47 1356
## 48 1494
## 49 1345
## 50 1206
## 51 1406
## 52 1414
## 53 1363
## 54 1391
## 55 1319
## 56 1518
## 57 1330
## 58 1327
## 59 1186
## 60 1350
## 61 1263
## 62 1372
## 63 1468
## 64 1523