#setup

knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)

library(tidyverse)   
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
dir.create("output", showWarnings = FALSE)


output_dir  <- "output"                   
dir.create(output_dir, showWarnings = FALSE)
# step1 target output
TARGET_COLS <- c("Name","State","TotalPoints","PreRating","AvgOppPreRating")

assert_output_schema <- function(df) {
  stopifnot(is.data.frame(df))
  miss <- setdiff(TARGET_COLS, names(df))
  if (length(miss)) stop("Missing columns: ", paste(miss, collapse = ", "))
  df %>%
    mutate(
      Name            = as.character(Name),
      State           = as.character(State),
      TotalPoints     = as.numeric(TotalPoints),
      PreRating       = as.integer(PreRating),
      AvgOppPreRating = as.integer(AvgOppPreRating)
    ) %>%
    invisible()
}

expected_first_user <- list(Name="Gary Hua", State="ON", TotalPoints=6.0, PreRating=1794, AvgOppPreRating=1605)
# ---- step2 path
url <- "https://raw.githubusercontent.com/lher96/MSDS-Assignments/main/data.tournament.info.txt"

read_and_normalize <- function(path_or_url) {
  x <- readr::read_lines(path_or_url, progress = FALSE)
  x <- stringr::str_replace_all(x, stringr::fixed("\u00A0"), " ") 
  x <- stringr::str_replace_all(x, "\t", " ")                     
  x <- stringr::str_replace_all(x, "\\s*\\|\\s*", " | ")          
  stringr::str_squish(x)                                          
}

lines_raw <- read_and_normalize(url)

# Identify meta lines and pair each with the previous non-empty line
is_meta_line <- \(s) stringr::str_detect(s, "R:\\s*\\d+")
idx_meta <- which(is_meta_line(lines_raw))
stopifnot(length(idx_meta) > 0)

 blocks_tbl <- tibble::tibble(
  Line1 = lines_raw[idx_meta - 1],
  Line2 = lines_raw[idx_meta]
)


utils::head(blocks_tbl, 2)
## # A tibble: 2 × 2
##   Line1                                                                    Line2
##   <chr>                                                                    <chr>
## 1 1 | GARY HUA | 6.0 | W 39 | W 21 | W 18 | W 14 | W 7 | D 12 | D 4 |      ON |…
## 2 2 | DAKSHESH DARURI | 6.0 | W 63 | W 58 | L 4 | W 17 | W 16 | W 20 | W … MI |…
# ---- step3_parsers ----
# Line 1 example:
# "1 | GARY HUA |6.0 |W 39|W 21|... "
LINE1_RE <- "^\\s*(\\d+)\\s*\\|\\s*([^|]+?)\\s*\\|\\s*(\\d+(?:\\.\\d)?)\\s*\\|"

parse_line1_meta <- function(line1) {
  m <- str_match(line1, LINE1_RE)
  if (all(is.na(m))) {
    warning("Could not parse Line1: ", line1)
    return(tibble(PlayerNum=NA_integer_, Name=NA_character_, TotalPoints=NA_real_))
  }
  # Match expected output
  nm <- str_to_title(str_squish(m[,3]))
  tibble(
    PlayerNum   = as.integer(m[,2]),
    Name        = nm,
    TotalPoints = as.numeric(m[,4])
  )
}

# From Line 1, get opponent player numbers
parse_opponents_from_line1 <- function(line1, owner_num) {
  parts <- str_split(line1, "\\|")[[1]]
  if (length(parts) <= 3) {
    return(tibble(PlayerNum=integer(), Round=integer(), OppPlayerNum=integer()))
  }
  rounds <- parts[-(1:3)]                # fields after "Pair|Name|Total"
  opp_ids <- str_extract(rounds, "\\d+") # one number per field, if present
  opp_ids <- opp_ids[!is.na(opp_ids)]
  if (!length(opp_ids)) {
    return(tibble(PlayerNum=integer(), Round=integer(), OppPlayerNum=integer()))
  }
  tibble(
    PlayerNum    = as.integer(owner_num),
    Round        = seq_along(opp_ids),
    OppPlayerNum = as.integer(opp_ids)
  )
}


parse_line2_meta <- function(line2) {
  state <- str_match(line2, "^\\s*([A-Z]{2})\\s*\\|")[,2]
  pre   <- str_match(line2, "R:\\s*(\\d+)")[,2]  
  tibble(
    State     = ifelse(nchar(state)==2, state, NA_character_),
    PreRating = as.integer(pre)
  )
}

# Parse both lines
parse_block <- function(Line1, Line2) {
  m1 <- parse_line1_meta(Line1)
  m2 <- parse_line2_meta(Line2)
  row <- dplyr::bind_cols(m1, m2)
  opp <- parse_opponents_from_line1(Line1, owner_num = m1$PlayerNum[1])
  list(row = row, opp = opp)
}
parsed <- purrr::pmap(blocks_tbl, parse_block)

players_meta <- map_dfr(parsed, "row") %>%
  select(PlayerNum, Name, State, TotalPoints, PreRating)

opponents_long <- map_dfr(parsed, "opp")

# Quick sanity checks
stopifnot(nrow(players_meta) > 0, nrow(opponents_long) > 0)
players_meta %>% slice(1)
## # A tibble: 1 × 5
##   PlayerNum Name     State TotalPoints PreRating
##       <int> <chr>    <chr>       <dbl>     <int>
## 1         1 Gary Hua ON              6      1794
opponents_long %>% slice(1:6)
## # A tibble: 6 × 3
##   PlayerNum Round OppPlayerNum
##       <int> <int>        <int>
## 1         1     1           39
## 2         1     2           21
## 3         1     3           18
## 4         1     4           14
## 5         1     5            7
## 6         1     6           12
# ---- step4_join_opp_preratings
stopifnot(exists("players_meta"), exists("opponents_long"))

# clean up just in case edge cases
players_meta <- players_meta %>%
  distinct(PlayerNum, .keep_all = TRUE)

# Mapping (PlayerNum, Round) to opponent's prerating
opp_with_ratings <- opponents_long %>%
  filter(!is.na(OppPlayerNum), OppPlayerNum > 0) %>%
  filter(OppPlayerNum != PlayerNum) %>%                 # safety: ignore accidental self matches
  left_join(
    players_meta %>%
      transmute(OppPlayerNum = PlayerNum, OppPreRating = PreRating),
    by = "OppPlayerNum"
  )
# Preview
print(utils::head(opp_with_ratings), row.names = FALSE)
## # A tibble: 6 × 4
##   PlayerNum Round OppPlayerNum OppPreRating
##       <int> <int>        <int>        <int>
## 1         1     1           39         1436
## 2         1     2           21         1563
## 3         1     3           18         1600
## 4         1     4           14         1610
## 5         1     5            7         1649
## 6         1     6           12         1663
#step5
# Player's average opponent pre-rating
avg_opp_pr <- opp_with_ratings %>%
  group_by(PlayerNum) %>%
  summarize(
    AvgOppPreRating = {
      m <- mean(OppPreRating, na.rm = TRUE)
      if (is.nan(m)) NA_integer_ else as.integer(round(m))
    },
    .groups = "drop"
  )

utils::head(avg_opp_pr)
## # A tibble: 6 × 2
##   PlayerNum AvgOppPreRating
##       <int>           <int>
## 1         1            1605
## 2         2            1469
## 3         3            1564
## 4         4            1574
## 5         5            1501
## 6         6            1519
# ---- step6_assemble_output
final_players <- players_meta %>%
  left_join(avg_opp_pr, by = "PlayerNum") %>%
  # Preserve original listing order by PlayerNum
  arrange(PlayerNum) %>%
  transmute(
    Name        = Name,
    State       = State,
    TotalPoints = TotalPoints,
    PreRating   = PreRating,
    AvgOppPreRating = AvgOppPreRating %||% NA_integer_  # players with no valid opponents
  )

# Preview
print(utils::head(final_players), row.names = FALSE)
## # A tibble: 6 × 5
##   Name                State TotalPoints PreRating AvgOppPreRating
##   <chr>               <chr>       <dbl>     <int>           <int>
## 1 Gary Hua            ON            6        1794            1605
## 2 Dakshesh Daruri     MI            6        1553            1469
## 3 Aditya Bajaj        MI            6        1384            1564
## 4 Patrick H Schilling MI            5.5      1716            1574
## 5 Hanshi Zuo          MI            5.5      1655            1501
## 6 Hansen Song         OH            5        1686            1519
# Keep schema honest
final_players <- assert_output_schema(final_players)
# ---- step8_elo (final ratings) ----

stopifnot(exists("blocks_tbl"), exists("players_meta"))

# 1) Parse results and opponent IDs from Line1 (W/L/D/H/B/U and opponent number)
parse_opponents_and_results_from_line1 <- function(line1, owner_num) {
  parts  <- stringr::str_split(line1, "\\|")[[1]]
  if (length(parts) <= 3) {
    return(tibble::tibble(PlayerNum=integer(), Round=integer(), Result=character(),
                          Score=numeric(), OppPlayerNum=integer()))
  }
  rounds <- parts[-(1:3)]                                # after "Pair | Name | Total"
  m <- stringr::str_match(rounds, "\\b([WLDHBU])\\s*(\\d+)?")  # result + optional opp id
  res <- m[,2]
  opp <- suppressWarnings(as.integer(m[,3]))
  # Scoring conventions; H=half-point bye, B=full bye, U=unplayed
  scr <- dplyr::recode(res, W=1, L=0, D=0.5, H=0.5, B=1, U=0, .default=NA_real_)
  tibble::tibble(
    PlayerNum    = as.integer(owner_num),
    Round        = seq_along(rounds),
    Result       = res,
    Score        = scr,
    OppPlayerNum = opp
  )
}

# 2) Rebuild rounds with results
parsed_results <- purrr::pmap(blocks_tbl, \(Line1, Line2) {
  m1 <- parse_line1_meta(Line1)  
  m2 <- parse_line2_meta(Line2) #Reuse already parsed lines
  list(
    row = dplyr::bind_cols(m1, m2),
    opp = parse_opponents_and_results_from_line1(Line1, m1$PlayerNum[1])
  )
})

players_meta <- purrr::map_dfr(parsed_results, "row") %>% dplyr::distinct(PlayerNum, .keep_all = TRUE)
rounds_long  <- purrr::map_dfr(parsed_results, "opp") #make rounds tables

# 3) Attach opponent pre-ratings (by PlayerNum)
rounds_with_ratings <- rounds_long %>%
  dplyr::left_join(
    players_meta %>% dplyr::transmute(OppPlayerNum = PlayerNum, OppPreRating = PreRating),
    by = "OppPlayerNum"
  )

# 4) Elo update  Byes/unknowns opp no rating change.
elo_one_player <- function(pre_rating, df_rounds, K = 20) {
  r <- pre_rating
  if (nrow(df_rounds) == 0) return(r)
  df_rounds <- dplyr::arrange(df_rounds, Round)
  for (i in seq_len(nrow(df_rounds))) {
    oppR <- df_rounds$OppPreRating[i]
    S    <- df_rounds$Score[i]
    if (is.na(S) || is.na(oppR)) next         
    E <- 1 / (1 + 10^((oppR - r)/400))        # expected score
    r <- r + K * (S - E)                      # update
  }
  r
}


# 5) Compute player's final rating and assemble output table
elo_updates <- rounds_with_ratings %>%
  dplyr::group_by(PlayerNum) %>%
  dplyr::group_modify(\(d, key) {
    pre <- players_meta$PreRating[match(key$PlayerNum, players_meta$PlayerNum)]
    tibble::tibble(PostElo = round(elo_one_player(pre, d, K = 20)))
  }) %>%
  dplyr::ungroup()

aggregated <- rounds_with_ratings %>%
  dplyr::group_by(PlayerNum) %>%
  dplyr::summarise(
    GamesWithOpp = sum(!is.na(OppPreRating) & !is.na(Score)),
    PointsCalc   = sum(Score, na.rm = TRUE),
    .groups = "drop"
  )

final_elo <- players_meta %>%
  dplyr::select(PlayerNum, Name, State, PreRating, TotalPoints) %>%
  dplyr::left_join(elo_updates, by = "PlayerNum") %>%
  dplyr::left_join(aggregated, by = "PlayerNum") %>%
  dplyr::mutate(PostElo = dplyr::coalesce(PostElo, PreRating)) %>%  
  dplyr::arrange(PlayerNum)

# Show
final_elo_ratings <- final_elo %>%
  dplyr::select(PlayerNum, Name, PreRating, PostElo) %>%
  dplyr::rename(PreRating_Before = PreRating,
                FinalRating_After = PostElo)

utils::head(final_elo_ratings)
## # A tibble: 6 × 4
##   PlayerNum Name                PreRating_Before FinalRating_After
##       <int> <chr>                          <int>             <dbl>
## 1         1 Gary Hua                        1794              1809
## 2         2 Dakshesh Daruri                 1553              1596
## 3         3 Aditya Bajaj                    1384              1462
## 4         4 Patrick H Schilling             1716              1730
## 5         5 Hanshi Zuo                      1655              1677
## 6         6 Hansen Song                     1686              1688
#step7 write 
output_file <- file.path(output_dir, "tournament_players.csv")
readr::write_csv(final_players, output_file)

cat("Wrote CSV to: output/tournament_players.csv\n")
## Wrote CSV to: output/tournament_players.csv
# Acceptance tests first row should match Gary
first_row <- final_players[1, , drop = FALSE]


expected_fst <- list(Name = "Gary Hua", State = "ON",
                       TotalPoints = 6.0, PreRating = 1794, AvgOppPreRating = 1605)

# Validate 
if (nrow(first_row) == 1) {
  same_first <- isTRUE(all.equal(
    unname(c(first_row$Name, first_row$State, first_row$TotalPoints, first_row$PreRating, first_row$AvgOppPreRating)),
    unname(unlist(expected_fst))
  ))
  if (!same_first) {
    warning("First row does not match the Gary Hua example")
  } else {
    message("First-row check passed ")
  }
}


# github: "https://github.com/lher96/MSDS-Assignments/blob/main/Chess%20Elo%20Project%201"
# rpubs : "https://rpubs.com/loudata/1344576"