Assignment 4 Project 1

Load the libraries to use:

library(readr)
library(stringr)

# Read the file as a string:
tournament_str <- read_file("tournamentinfo.txt")

# split at end of line and trim trailing white space:
tournament_split <- strsplit(tournament_str, "\n")[[1]] |> trimws()

# remove row separator:
sep <- "-----------------------------------------------------------------------------------------"

without_sep <- tournament_split[tournament_split != sep]

# split at column sep "|"
tournament_df <- without_sep |> strsplit(split = "\\|") |>
   # bind rows:
  do.call(what = rbind) |>
  # convert to dataframe:
  as.data.frame()

# add column names:
colnames(tournament_df) <- c(
  "pair_num", "player_name", "total_pts", paste0("round", 1:7)
)

# remove the first 2 rows, not needed:
tournament_df <- tournament_df[-c(1:2), ]

# remove rownames, not needed:
rownames(tournament_df) <- NULL

# take a look at the first 10 entries:
head(tournament_df, 10)

##    pair_num                       player_name total_pts round1 round2 round3
## 1        1   GARY HUA                             6.0    W  39  W  21  W  18
## 2       ON   15445895 / R: 1794   ->1817          N:2    W      B      W    
## 3        2   DAKSHESH DARURI                      6.0    W  63  W  58  L   4
## 4       MI   14598900 / R: 1553   ->1663          N:2    B      W      B    
## 5        3   ADITYA BAJAJ                         6.0    L   8  W  61  W  25
## 6       MI   14959604 / R: 1384   ->1640          N:2    W      B      W    
## 7        4   PATRICK H SCHILLING                  5.5    W  23  D  28  W   2
## 8       MI   12616049 / R: 1716   ->1744          N:2    W      B      W    
## 9        5   HANSHI ZUO                           5.5    W  45  W  37  D  12
## 10      MI   14601533 / R: 1655   ->1690          N:2    B      W      B    
##    round4 round5 round6 round7
## 1   W  14  W   7  D  12  D   4
## 2   B      W      B      W    
## 3   W  17  W  16  W  20  W   7
## 4   W      B      W      B    
## 5   W  21  W  11  W  13  W  12
## 6   B      W      B      W    
## 7   W  26  D   5  W  19  D   1
## 8   B      W      B      B    
## 9   D  13  D   4  W  14  W  17
## 10  W      B      W      B

# note: player names are on odd numbered rows, additional info is on even rows

Create a skeleton of the required dataframe:

required_info <- data.frame(
  player_name = character(),
  player_state = character(),
  total_pts = numeric(),
  player_prerating = numeric(),
  avg_prechess_rating_of_opponents = numeric()
)

Loop through odd row numbers of tournament_df and fill required_info as needed:

odd_rows <- seq(from = 1, to = nrow(tournament_df), by = 2)

pattern <- "\\bR:\\s*(\\d+)(?:\\D|$)"
# eg. string <- "15445895 / R: 1794 ->1817"
# the regex pattern would match 1794
# string2 <- "15495066 / R: 1563P22->1562"
# the regex pattern would match 1563

for (i in odd_rows) {
  player_name <- tournament_df$player_name[i] |> trimws()
  player_state <- tournament_df$pair_num[i + 1] |> trimws()
  total_pts <- tournament_df$total_pts[i] |> trimws() |> as.numeric()
  
  player_prerating <- {
    x <- tournament_df$player_name[i + 1] |> trimws()
    
    str_match(x, pattern)[2] |> as.numeric()
  }
  
  avg_prechess_rating_of_opponents <- {
    # all rounds in current row:
    all_rounds <- tournament_df[i, 4:ncol(tournament_df)] |>
      as.character()
    
    # get pair nums of the opponents:
    opponents_pair_num <- str_extract_all(all_rounds, "\\d+") |>
      unlist()
    
    opponents_info <- tournament_df$player_name[
      which(trimws(tournament_df$pair_num) %in% opponents_pair_num) + 1
    ] |> 
      str_match_all(pattern) |> 
      unlist()
    # opponents_info looks like this:
    # [1] "R: 1563P" "1563"     "R: 1436P" "1436"  
    
    # subset only even indices:
    opponents_info[seq(from = 2, to = length(opponents_info), by = 2)] |> 
      as.numeric() |> 
      # calculate pre-tournament opponent's rating:
      mean() |> 
      round()
  }
  
  # add row to 'required_info':
  required_info <- rbind(
    required_info,
    data.frame(
      player_name, player_state, total_pts, player_prerating, 
      avg_prechess_rating_of_opponents
    )
  )
}

Take a look at first 10 rows of required_info:

head(required_info, 10)

##            player_name player_state total_pts player_prerating
## 1             GARY HUA           ON       6.0             1794
## 2      DAKSHESH DARURI           MI       6.0             1553
## 3         ADITYA BAJAJ           MI       6.0             1384
## 4  PATRICK H SCHILLING           MI       5.5             1716
## 5           HANSHI ZUO           MI       5.5             1655
## 6          HANSEN SONG           OH       5.0             1686
## 7    GARY DEE SWATHELL           MI       5.0             1649
## 8     EZEKIEL HOUGHTON           MI       5.0             1641
## 9          STEFANO LEE           ON       5.0             1411
## 10           ANVIT RAO           MI       5.0             1365
##    avg_prechess_rating_of_opponents
## 1                              1605
## 2                              1469
## 3                              1564
## 4                              1574
## 5                              1501
## 6                              1519
## 7                              1372
## 8                              1468
## 9                              1523
## 10                             1554

Generate a csv file:

write.csv(required_info, file = "required_info.csv", row.names = FALSE)

Assignment 4 Project 1

Mohammed Rahman

2023-03-06