Setup

library(dplyr)
library(stringr)

Load tournament data

data <- read.table("tournamentinfo.txt", sep = "\t", skip = 4)
head(data,4)
##                                                                                          V1
## 1     1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|
## 2    ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |
## 3 -----------------------------------------------------------------------------------------
## 4     2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|
data <- filter(data, !grepl('----------------',V1))
head(data,4)
##                                                                                          V1
## 1     1 | GARY HUA                        |6.0  |W  39|W  21|W  18|W  14|W   7|D  12|D   4|
## 2    ON | 15445895 / R: 1794   ->1817     |N:2  |W    |B    |W    |B    |W    |B    |W    |
## 3     2 | DAKSHESH DARURI                 |6.0  |W  63|W  58|L   4|W  17|W  16|W  20|W   7|
## 4    MI | 14598900 / R: 1553   ->1663     |N:2  |B    |W    |B    |W    |B    |W    |B    |

Create tournament info row1 dataframe

data1 = data[seq(1, nrow(data), 2), ]

data1_df <- data.frame(do.call('rbind', strsplit(as.character(data1),'|',fixed=TRUE)))
data1_df <- tibble::rowid_to_column(data1_df, "ID")
head(data1_df,3)
##   ID     X1                                X2    X3    X4    X5    X6    X7
## 1  1     1   GARY HUA                         6.0   W  39 W  21 W  18 W  14
## 2  2     2   DAKSHESH DARURI                  6.0   W  63 W  58 L   4 W  17
## 3  3     3   ADITYA BAJAJ                     6.0   L   8 W  61 W  25 W  21
##      X8    X9   X10
## 1 W   7 D  12 D   4
## 2 W  16 W  20 W   7
## 3 W  11 W  13 W  12

Create tournament info row2 dataframe

data2 = data[seq(2, nrow(data), 2), ]

data2_df <- data.frame(do.call('rbind', strsplit(as.character(data2),'|',fixed=TRUE)))
data2_df <- tibble::rowid_to_column(data2_df, "ID")
head(data2_df,3)
##   ID     X1                                X2    X3    X4    X5    X6    X7
## 1  1    ON   15445895 / R: 1794   ->1817      N:2   W     B     W     B    
## 2  2    MI   14598900 / R: 1553   ->1663      N:2   B     W     B     W    
## 3  3    MI   14959604 / R: 1384   ->1640      N:2   W     B     W     B    
##      X8    X9   X10
## 1 W     B     W    
## 2 B     W     B    
## 3 W     B     W

Merge to one dataframe with a row for each tournament player

data3_df <- merge(data1_df, data2_df, by.x = "ID", by.y = "ID")

data3_df <- data3_df %>% `colnames<-`(c('ID', 'v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7', 'v8', 'v9', 'v10', 'v11', 'v12', 'v13', 'v14', 'v15', 'v16', 'v17', 'v18', 'v19', 'v20'))

Extract each player’s pre-rating from the field containing this data

data3_df$pre_rating = substr(data3_df$v12,15,19)

Calculate the average of the pre-tournament opponents’ ratings

data3_df$opp1 = as.integer(str_sub(data3_df$v4,-2))
data3_df$opp2 = as.integer(str_sub(data3_df$v5,-2))
data3_df$opp3 = as.integer(str_sub(data3_df$v6,-2))
data3_df$opp4 = as.integer(str_sub(data3_df$v7,-2))
data3_df$opp5 = as.integer(str_sub(data3_df$v8,-2))
data3_df$opp6 = as.integer(str_sub(data3_df$v9,-2))
data3_df$opp7 = as.integer(str_sub(data3_df$v10,-2))
player_prerating_df <- data.frame('pair_num' = data3_df$v1,
                                  'pre_rating' = data3_df$pre_rating)

player_prerating_df$pair_num <- as.integer(player_prerating_df$pair_num)
player_prerating_df$pre_rating <- as.character(player_prerating_df$pre_rating)
player_prerating_df$pre_rating <- as.integer(player_prerating_df$pre_rating)

head(player_prerating_df,1)
##   pair_num pre_rating
## 1        1       1794
avg_list <- c()

for(r1 in 1:nrow(data3_df)) 
{
    row1 <- data3_df[r1,]
    
    opponent_num = as.list(row1[,23:29])
    
    temp_df <- subset(player_prerating_df, (pair_num == opponent_num[1] | pair_num == opponent_num[2] | pair_num ==                     opponent_num[3] | pair_num == opponent_num[4] | pair_num == opponent_num[5] | pair_num == opponent_num[6]                | pair_num == opponent_num[7]))
    
    avg_prechess_rating <- mean(temp_df$pre_rating)
    
    avg_list <- c(avg_list, avg_prechess_rating)
}

data3_df$avg_pre_chess_opponents_rating = round(avg_list)

Create the output dataframe and write to csv

output_df <- data.frame('players_name' = data3_df$v2,
                        'players_state' = data3_df$v11,
                        'total_number_of_points' = data3_df$v3,
                        'players_pre_rating' = data3_df$pre_rating,
                        'avg_pre_chess_rating_of_opponents' = data3_df$avg_pre_chess_opponents_rating)

head(output_df,3)
##                        players_name players_state total_number_of_points
## 1  GARY HUA                                   ON                   6.0  
## 2  DAKSHESH DARURI                            MI                   6.0  
## 3  ADITYA BAJAJ                               MI                   6.0  
##   players_pre_rating avg_pre_chess_rating_of_opponents
## 1               1794                              1605
## 2               1553                              1469
## 3               1384                              1564
write.csv(output_df, 'chess_tournament_results.csv', row.names = FALSE)