DATA607 Project 1

Introduction

The goal for this project is to transform the chess tournament data in the a text file to a R Markdown file that generates a .CSV file. The data should show Player’s Name, Player’s State, Total Number of Points, Player’s Pre-Rating, and Average Pre Chess Rating of Opponents.

# Load necessary packages
library(tidyverse)

# Read the chess tournament data
chess_tournament <- readLines('https://raw.githubusercontent.com/mirajpatel289/Data607/main/tournamentinfo.txt', warn = FALSE)

# Remove header lines and filter out empty lines
chess_tournament <- chess_tournament[-(1:3)]
chess_tournament <- chess_tournament[chess_tournament != ""]

# Remove rows with only dashes
chess_tournament <- chess_tournament[!grepl("^[-]+$", chess_tournament)]

# Split lines by delimiter |
split_lines <- str_split(chess_tournament, "\\|")

# Convert to data frame for easier manipulation
df <- as.data.frame(do.call(rbind, split_lines), stringsAsFactors = FALSE, row.names = NULL)

# Ensure proper column trimming
df <- df %>%
  mutate_all(str_trim)

# Initialize a list to store player data and another for ratings lookup
player_data_list <- list()
ratings_lookup <- list()

# Initialize a list for storing player pre-chess ratings
player_pre_chess_rating_list <- list()

# Extract the required information
for (i in seq(1, nrow(df), by = 2)) {
  if (i + 1 <= nrow(df)) {
    row1 <- df[i, ]
    row2 <- df[i + 1, ]
    
    player_id <- as.numeric(row1[1])
    player_name <- row1[2]
    total_points <- as.numeric(row1[3])
    
    # Clean and extract only numeric opponent IDs
    opponent_ids <- row1[4:ncol(row1)]
    opponent_ids <- gsub("[^0-9]", "", opponent_ids)
    opponent_ids <- opponent_ids[opponent_ids != ""]  # remove empty elements
    opponent_ids <- as.numeric(opponent_ids)

    # Extract state and pre-rating from the second row
    player_state <- row2[1]
    pre_rating_str <- row2[2]

    # Extract rating after ":" and before "-"
    pre_rating_full <- str_extract(pre_rating_str, "(?<=R: )[^-]*")

    player_data <- data.frame(
      Player_ID = player_id,
      Player_Name = player_name,
      Player_State = player_state,
      Total_Points = total_points,
      Pre_Rating = pre_rating_full,
      Opponent_IDs = I(list(opponent_ids)),
      stringsAsFactors = FALSE
    )

    if (!is.na(pre_rating_full)) {
      # Extract numeric part before any 'P' for calculating average
      pre_rating_numeric <- as.numeric(str_extract(pre_rating_full, "\\d+"))
      ratings_lookup[[as.character(player_id)]] <- pre_rating_full
      player_pre_chess_rating_list <- append(player_pre_chess_rating_list, list(data.frame(Player_ID = player_id, Pre_Rating = pre_rating_numeric, stringsAsFactors = FALSE)))
    }

    player_data_list <- append(player_data_list, list(player_data))
  }
}

# Convert lists to data frames
player_data_df <- bind_rows(player_data_list)
player_pre_chess_rating_df <- bind_rows(player_pre_chess_rating_list)

# Replace opponent IDs with their pre-ratings
for (i in 1:nrow(player_data_df)) {
  opponent_ids <- player_data_df$Opponent_IDs[[i]]
  opponent_ratings <- sapply(opponent_ids, function(opp_id) {
    rating_row <- player_pre_chess_rating_df %>% filter(Player_ID == opp_id)
    if (nrow(rating_row) > 0) {
      return(rating_row$Pre_Rating)
    } else {
      return(NA)
    }
  })
  player_data_df$Opponent_IDs[[i]] <- opponent_ratings
}

# Calculate the average opponent rating for each player, rounded to the nearest whole number
player_data_df <- player_data_df %>%
  rowwise() %>%
  mutate(Average_Opponent_Rating = round(mean(unlist(Opponent_IDs), na.rm = TRUE))) %>%
  ungroup()

# Create a subset dataframe with selected columns and rename them
chess_tournament_info <- player_data_df %>%
  select(V2, V1, Total_Points, Pre_Rating, Average_Opponent_Rating)

colnames(chess_tournament_info) <- c("Player Name", "State", "Total Points", "Player Pre-Rating", "Average Opponent Rating")

# Save the subset dataframe
write.csv(chess_tournament_info, "chess_tournament_info.csv", row.names = FALSE)
print(chess_tournament_info)

## # A tibble: 64 × 5
##    `Player Name` State `Total Points` `Player Pre-Rating` Average Opponent Rat…¹
##    <chr>         <chr>          <dbl> <chr>                                <dbl>
##  1 GARY HUA      ON               6   "1794   "                             1605
##  2 DAKSHESH DAR… MI               6   "1553   "                             1469
##  3 ADITYA BAJAJ  MI               6   "1384   "                             1564
##  4 PATRICK H SC… MI               5.5 "1716   "                             1574
##  5 HANSHI ZUO    MI               5.5 "1655   "                             1501
##  6 HANSEN SONG   OH               5   "1686   "                             1519
##  7 GARY DEE SWA… MI               5   "1649   "                             1372
##  8 EZEKIEL HOUG… MI               5   "1641P17"                             1468
##  9 STEFANO LEE   ON               5   "1411   "                             1523
## 10 ANVIT RAO     MI               5   "1365   "                             1554
## # ℹ 54 more rows
## # ℹ abbreviated name: ¹`Average Opponent Rating`

…

DATA607 Project 1

Miraj Patel

2025-02-23

Introduction