# Load Required Libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)  # Load this to use str_trim()
library(tidyr)
library(readr)

# Read the file
txt_lines <- readLines("tournamentinfo.txt")
## Warning in readLines("tournamentinfo.txt"): incomplete final line found on
## 'tournamentinfo.txt'
# Initialize an empty list to store player data
players <- list()
i <- 1  # Line index for iterating through the text file

while (i <= length(txt_lines)) {
  line <- str_trim(txt_lines[i])  # Trim whitespace from the current line
  
  # Check if the line contains a player's record (starts with a number followed by "|")
  if (grepl("^\\d+ \\|", line)) {
    
    # Extract the player's name from the structured format
    player_name <- str_trim(str_extract(line, "(?<=\\| ).*?(?= \\|)"))
    
    # Extract the total number of points the player scored in the tournament
    total_points <- as.numeric(str_extract(line, "\\d+\\.\\d"))
    
    # Extract state, USCF ID, and pre-rating from the next line
    next_line <- ifelse(i + 1 <= length(txt_lines), str_trim(txt_lines[i + 1]), NA)
    
    # Extract player’s state (first two uppercase letters in the next line)
    player_state <- ifelse(!is.na(next_line), str_extract(next_line, "^[A-Z]{2}"), NA)
    
    # Extract player's pre-tournament rating (number following "R: ")
    pre_rating <- as.numeric(str_extract(next_line, "(?<=R: )\\d+"))
    
    # Extract opponent numbers (players the current player competed against in each round)
    opponent_nums <- as.numeric(unlist(str_extract_all(line, "(?<=W |L |D )\\d+")))
    
    # Ensure opponent_nums is not empty; set to NA if no opponents found
    if (length(opponent_nums) == 0) {
      opponent_nums <- NA
    }
    
    # Only store player data if required fields (name, rating, and total points) exist
    if (!is.na(player_name) && !is.na(pre_rating) && !is.na(total_points)) {
      players[[length(players) + 1]] <- data.frame(
        Name = player_name,        # Player's name
        State = player_state,      # Player's state
        TotalPoints = total_points, # Total points scored
        PreRating = pre_rating,    # Player's pre-tournament rating
        Opponents = list(opponent_nums)  # List of opponent IDs for each round
      )
    }
    
    i <- i + 2  # Move to the next player's record (skip one extra line for state/rating)
  } else {
    i <- i + 1  # Continue to the next line if no match is found
  }
  
  
  
  
  
}

# Convert the list of players into a data frame
players_df <- bind_rows(players)

# Display the first few rows of the extracted data
head(players_df)
##                  Name State TotalPoints PreRating NA.
## 1            GARY HUA    ON         6.0      1794  NA
## 2     DAKSHESH DARURI    MI         6.0      1553  NA
## 3        ADITYA BAJAJ    MI         6.0      1384  NA
## 4 PATRICK H SCHILLING    MI         5.5      1716  NA
## 5          HANSHI ZUO    MI         5.5      1655  NA
## 6         HANSEN SONG    OH         5.0      1686  NA
#Professor I really had trouble getting the ratings for the opponents, here is my code showcasing my attempt


#opponent_nums <- as.numeric(unlist(str_extract_all(line, "(?<=W |L |D )\\d+")))

# Create a lookup table for Pre-Ratings
#rating_lookup <- players_df %>%
#  select(PlayerID, PreRating)

# Function to compute average opponent pre-rating
#calculate_avg_opponent_rating <- function(rounds) {
  # Convert round data to numeric
#  opponents <- as.numeric(rounds)
#  opponents <- opponents[!is.na(opponents)]  # Remove NA values
  
  # Get the pre-ratings of the opponents using PlayerID
#  valid_ratings <- rating_lookup$PreRating[rating_lookup$PlayerID %in% opponents]
  
#  # Compute and return the mean of opponent pre-ratings
 # if (length(valid_ratings) == 0) {
  #  return(NA)
#  }
#  return(mean(valid_ratings, na.rm = TRUE))
#}

# Apply function to calculate the average opponent pre-rating
#players_df <- players_df %>%
#  rowwise() %>%
#  mutate(AvgOpponentPreRating = calculate_avg_opponent_rating(c_across(Round1:Round7))) %>%
#  ungroup()
print(players_df)
##                          Name State TotalPoints PreRating NA.
## 1                    GARY HUA    ON         6.0      1794  NA
## 2             DAKSHESH DARURI    MI         6.0      1553  NA
## 3                ADITYA BAJAJ    MI         6.0      1384  NA
## 4         PATRICK H SCHILLING    MI         5.5      1716  NA
## 5                  HANSHI ZUO    MI         5.5      1655  NA
## 6                 HANSEN SONG    OH         5.0      1686  NA
## 7           GARY DEE SWATHELL    MI         5.0      1649  NA
## 8            EZEKIEL HOUGHTON    MI         5.0      1641  NA
## 9                 STEFANO LEE    ON         5.0      1411  NA
## 10                  ANVIT RAO    MI         5.0      1365  NA
## 11   CAMERON WILLIAM MC LEMAN    MI         4.5      1712  NA
## 12             KENNETH J TACK    MI         4.5      1663  NA
## 13          TORRANCE HENRY JR    MI         4.5      1666  NA
## 14               BRADLEY SHAW    MI         4.5      1610  NA
## 15     ZACHARY JAMES HOUGHTON    MI         4.5      1220  NA
## 16               MIKE NIKITIN    MI         4.0      1604  NA
## 17         RONALD GRZEGORCZYK    MI         4.0      1629  NA
## 18              DAVID SUNDEEN    MI         4.0      1600  NA
## 19               DIPANKAR ROY    MI         4.0      1564  NA
## 20                JASON ZHENG    MI         4.0      1595  NA
## 21              DINH DANG BUI    ON         4.0      1563  NA
## 22           EUGENE L MCCLURE    MI         4.0      1555  NA
## 23                   ALAN BUI    ON         4.0      1363  NA
## 24          MICHAEL R ALDRICH    MI         4.0      1229  NA
## 25           LOREN SCHWIEBERT    MI         3.5      1745  NA
## 26                    MAX ZHU    ON         3.5      1579  NA
## 27             GAURAV GIDWANI    MI         3.5      1552  NA
## 28 SOFIA ADINA STANESCU-BELLU    MI         3.5      1507  NA
## 29           CHIEDOZIE OKORIE    MI         3.5      1602  NA
## 30         GEORGE AVERY JONES    ON         3.5      1522  NA
## 31               RISHI SHETTY    MI         3.5      1494  NA
## 32      JOSHUA PHILIP MATHEWS    ON         3.5      1441  NA
## 33                    JADE GE    MI         3.5      1449  NA
## 34     MICHAEL JEFFERY THOMAS    MI         3.5      1399  NA
## 35           JOSHUA DAVID LEE    MI         3.5      1438  NA
## 36              SIDDHARTH JHA    MI         3.5      1355  NA
## 37                  BRIAN LIU    MI         3.0      1423  NA
## 38              JOEL R HENDON    MI         3.0      1436  NA
## 39               FOREST ZHANG    MI         3.0      1348  NA
## 40        KYLE WILLIAM MURPHY    MI         3.0      1403  NA
## 41                   JARED GE    MI         3.0      1332  NA
## 42          ROBERT GLEN VASEY    MI         3.0      1283  NA
## 43         JUSTIN D SCHILLING    MI         3.0      1199  NA
## 44                  DEREK YAN    MI         3.0      1242  NA
## 45                ERIC WRIGHT    MI         2.5      1362  NA
## 46               DANIEL KHAIN    MI         2.5      1382  NA
## 47           MICHAEL J MARTIN    MI         2.5      1291  NA
## 48                 SHIVAM JHA    MI         2.5      1056  NA
## 49             TEJAS AYYAGARI    MI         2.5      1011  NA
## 50              JOSE C YBARRA    MI         2.0      1393  NA
## 51                LARRY HODGE    MI         2.0      1270  NA
## 52                  ALEX KONG    MI         2.0      1186  NA
## 53               MARISA RICCI    MI         2.0      1153  NA
## 54                 MICHAEL LU    MI         2.0      1092  NA
## 55              ASHWIN BALAJI    MI         1.0      1530  NA
## 56       THOMAS JOSEPH HOSMER    MI         1.0      1175  NA
## 57                     BEN LI    MI         1.0      1163  NA