Setup
library(dplyr)
library(stringr)
Load tournament data
data <- read.table("tournamentinfo.txt", sep = "\t", skip = 4)
head(data,4)
## V1
## 1 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## 2 ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## 3 -----------------------------------------------------------------------------------------
## 4 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|
data <- filter(data, !grepl('----------------',V1))
head(data,4)
## V1
## 1 1 | GARY HUA |6.0 |W 39|W 21|W 18|W 14|W 7|D 12|D 4|
## 2 ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |
## 3 2 | DAKSHESH DARURI |6.0 |W 63|W 58|L 4|W 17|W 16|W 20|W 7|
## 4 MI | 14598900 / R: 1553 ->1663 |N:2 |B |W |B |W |B |W |B |
Create tournament info row1 dataframe
data1 = data[seq(1, nrow(data), 2), ]
data1_df <- data.frame(do.call('rbind', strsplit(as.character(data1),'|',fixed=TRUE)))
data1_df <- tibble::rowid_to_column(data1_df, "ID")
head(data1_df,3)
## ID X1 X2 X3 X4 X5 X6 X7
## 1 1 1 GARY HUA 6.0 W 39 W 21 W 18 W 14
## 2 2 2 DAKSHESH DARURI 6.0 W 63 W 58 L 4 W 17
## 3 3 3 ADITYA BAJAJ 6.0 L 8 W 61 W 25 W 21
## X8 X9 X10
## 1 W 7 D 12 D 4
## 2 W 16 W 20 W 7
## 3 W 11 W 13 W 12
Create tournament info row2 dataframe
data2 = data[seq(2, nrow(data), 2), ]
data2_df <- data.frame(do.call('rbind', strsplit(as.character(data2),'|',fixed=TRUE)))
data2_df <- tibble::rowid_to_column(data2_df, "ID")
head(data2_df,3)
## ID X1 X2 X3 X4 X5 X6 X7
## 1 1 ON 15445895 / R: 1794 ->1817 N:2 W B W B
## 2 2 MI 14598900 / R: 1553 ->1663 N:2 B W B W
## 3 3 MI 14959604 / R: 1384 ->1640 N:2 W B W B
## X8 X9 X10
## 1 W B W
## 2 B W B
## 3 W B W
Merge to one dataframe with a row for each tournament player
data3_df <- merge(data1_df, data2_df, by.x = "ID", by.y = "ID")
data3_df <- data3_df %>% `colnames<-`(c('ID', 'v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7', 'v8', 'v9', 'v10', 'v11', 'v12', 'v13', 'v14', 'v15', 'v16', 'v17', 'v18', 'v19', 'v20'))
Calculate the average of the pre-tournament opponents’ ratings
data3_df$opp1 = as.integer(str_sub(data3_df$v4,-2))
data3_df$opp2 = as.integer(str_sub(data3_df$v5,-2))
data3_df$opp3 = as.integer(str_sub(data3_df$v6,-2))
data3_df$opp4 = as.integer(str_sub(data3_df$v7,-2))
data3_df$opp5 = as.integer(str_sub(data3_df$v8,-2))
data3_df$opp6 = as.integer(str_sub(data3_df$v9,-2))
data3_df$opp7 = as.integer(str_sub(data3_df$v10,-2))
player_prerating_df <- data.frame('pair_num' = data3_df$v1,
'pre_rating' = data3_df$pre_rating)
player_prerating_df$pair_num <- as.integer(player_prerating_df$pair_num)
player_prerating_df$pre_rating <- as.character(player_prerating_df$pre_rating)
player_prerating_df$pre_rating <- as.integer(player_prerating_df$pre_rating)
head(player_prerating_df,1)
## pair_num pre_rating
## 1 1 1794
avg_list <- c()
for(r1 in 1:nrow(data3_df))
{
row1 <- data3_df[r1,]
opponent_num = as.list(row1[,23:29])
temp_df <- subset(player_prerating_df, (pair_num == opponent_num[1] | pair_num == opponent_num[2] | pair_num == opponent_num[3] | pair_num == opponent_num[4] | pair_num == opponent_num[5] | pair_num == opponent_num[6] | pair_num == opponent_num[7]))
avg_prechess_rating <- mean(temp_df$pre_rating)
avg_list <- c(avg_list, avg_prechess_rating)
}
data3_df$avg_pre_chess_opponents_rating = round(avg_list)
Create the output dataframe and write to csv
output_df <- data.frame('players_name' = data3_df$v2,
'players_state' = data3_df$v11,
'total_number_of_points' = data3_df$v3,
'players_pre_rating' = data3_df$pre_rating,
'avg_pre_chess_rating_of_opponents' = data3_df$avg_pre_chess_opponents_rating)
head(output_df,3)
## players_name players_state total_number_of_points
## 1 GARY HUA ON 6.0
## 2 DAKSHESH DARURI MI 6.0
## 3 ADITYA BAJAJ MI 6.0
## players_pre_rating avg_pre_chess_rating_of_opponents
## 1 1794 1605
## 2 1553 1469
## 3 1384 1564
write.csv(output_df, 'chess_tournament_results.csv', row.names = FALSE)