# Minimal, friendly helper stack
library(tidyverse)
library(lubridate)

# We'll use the sample file we created earlier.
INPUT_CSV <- "bonds_raw_sample.csv"

bonds_raw <- read_csv(INPUT_CSV, show_col_types = FALSE)
bonds_raw %>% slice(1:8)
## # A tibble: 8 × 11
##   id    issuer    isin  ccy   coupon freq  maturity issue_date clean_px ytm_hint
##   <chr> <chr>     <chr> <chr> <chr>  <chr> <chr>    <chr>         <dbl>    <dbl>
## 1 B1    Acme Corp US03… usd   5%     SA    2029-09… 2019/09/15    101.    NA    
## 2 B2    Globex I… XS09… USD   4.75   SA    15-12-2… 2018-12-15     98.8    0.052
## 3 B3    Initech   INE0… Usd   6.250% Q     2031.06… 2016.06.30    103.    NA    
## 4 B4    Umbrella… GB00… GBP   3,50%  A     2032/03… 2022/03/31     95.7    0.04 
## 5 B5    Soylent … FR00… eur   0      SA    2030-11… 2020-11-20     87     NA    
## 6 B6    Wayne Ho… USN5… USD   7%     SA    2034-01… 2014-01-01    110.    NA    
## 7 B7    Stark In… US12… USD   5.5%   S/A   2031/12… 2016/12/15    102.    NA    
## 8 B8    Oscorp    XS05… EUR   2,25%  Annu… 2030.03… 2015.03.30     96.2   NA    
## # ℹ 1 more variable: day_count <chr>
# Coupon cleaner: "6,25%" -> 0.0625, "5" -> 0.05, "-" -> 0
# ---------- CLEANING HELPERS ----------
clean_coupon <- function(x) {
  s <- as.character(x) |>
    stringr::str_replace(",", ".") |>
    stringr::str_remove("%") |>
    stringr::str_trim()
  if (s %in% c("", "-", "NA", "NaN")) return(0)
  v <- suppressWarnings(as.numeric(s))
  ifelse(v > 1, v/100, v) %||% 0
}

# Normalize frequency to A / SA / Q / M, then to m (periods per year)
normalize_freq <- function(f) {
  f <- stringr::str_to_upper(stringr::str_trim(as.character(f)))
  dplyr::case_when(
    f %in% c("A","ANNUAL","ANNUALLY","Y") ~ "A",
    f %in% c("SA","S/A","SEMI","SEMI-ANNUAL","SEMIANNUAL","S") ~ "SA",
    f %in% c("Q","QUARTERLY") ~ "Q",
    f %in% c("M","MONTHLY") ~ "M",
    TRUE ~ "SA"  # safe default
  )
}

`%||%` <- function(a,b) if (is.null(a) || (length(a)==0)) b else a
freq_to_m <- function(freq) c(A=1L, SA=2L, Q=4L, M=12L)[[freq]] %||% 2L

# Parse a few common date styles
parse_any <- function(x) {
  # NOTE: include "Y/m/d" (NOT "Y/m-d")
  lubridate::parse_date_time(
    x,
    orders = c("Y-m-d","Y/m/d","d-m-Y","Y.m.d","d.m.Y","d/m/Y"),
    tz = "UTC"
  ) |> as.Date()
}

bonds <- bonds_raw |>
  dplyr::mutate(
    coupon = purrr::map_dbl(coupon, clean_coupon),
    freq   = purrr::map_chr(freq,   normalize_freq),
    m      = purrr::map_int(freq,   freq_to_m),
    issue_date = parse_any(issue_date),
    maturity   = parse_any(maturity),
    clean_px   = suppressWarnings(as.numeric(clean_px))
  )

# Early sanity (fail fast with a useful message if something is wrong)
stopifnot(all(!is.na(bonds$issue_date)))
stopifnot(all(!is.na(bonds$maturity)))
stopifnot(all(bonds$m %in% c(1L,2L,4L,12L)))

bonds %>% select(id, issuer, coupon, freq, m, issue_date, maturity, clean_px) %>% slice(1:8)
## # A tibble: 8 × 8
##   id    issuer           coupon freq      m issue_date maturity   clean_px
##   <chr> <chr>             <dbl> <chr> <int> <date>     <date>        <dbl>
## 1 B1    Acme Corp        0.05   SA        2 2019-09-15 2029-09-15    101. 
## 2 B2    Globex Intl      0.0475 SA        2 2018-12-15 2030-12-15     98.8
## 3 B3    Initech          0.0625 Q         4 2016-06-30 2031-06-30    103. 
## 4 B4    Umbrella plc     0.035  A         1 2022-03-31 2032-03-31     95.7
## 5 B5    Soylent SA       0      SA        2 2020-11-20 2030-11-20     87  
## 6 B6    Wayne Holdings   0.07   SA        2 2014-01-01 2034-01-01    110. 
## 7 B7    Stark Industries 0.055  SA        2 2016-12-15 2031-12-15    102. 
## 8 B8    Oscorp           0.0225 A         1 2015-03-30 2030-03-30     96.2
make_cashflows <- function(issue, maturity, m, coupon, face = 100) {
  if (is.na(issue) || is.na(maturity) || is.na(m) || m <= 0) {
    return(tibble::tibble(cf_date = as.Date(character()), cf = numeric()))
  }
  dates <- seq.Date(issue, maturity, by = paste0(12/m, " months"))
  if (length(dates) == 0L || tail(dates,1) != maturity) dates <- c(dates, maturity)
  cpn <- face * coupon / m
  tibble::tibble(
    cf_date = dates,
    cf      = c(rep(cpn, length(dates)-1), face + cpn)
  )
}

# Show one example cashflow table
ex_cf <- make_cashflows(bonds$issue_date[1], bonds$maturity[1], bonds$m[1], bonds$coupon[1])
ex_cf
## # A tibble: 21 × 2
##    cf_date       cf
##    <date>     <dbl>
##  1 2019-09-15   2.5
##  2 2020-03-15   2.5
##  3 2020-09-15   2.5
##  4 2021-03-15   2.5
##  5 2021-09-15   2.5
##  6 2022-03-15   2.5
##  7 2022-09-15   2.5
##  8 2023-03-15   2.5
##  9 2023-09-15   2.5
## 10 2024-03-15   2.5
## # ℹ 11 more rows
price_from_yield <- function(cf_tbl, y, m) {
  if (!is.data.frame(cf_tbl) || nrow(cf_tbl) == 0L || is.na(m) || is.na(y)) return(NA_real_)
  t <- seq_len(nrow(cf_tbl))
  sum(cf_tbl$cf / (1 + y/m)^t)
}


yield_from_price <- function(cf_tbl, price_target, m) {
  if (!is.data.frame(cf_tbl) || nrow(cf_tbl) == 0L || is.na(price_target) || is.na(m)) return(NA_real_)
  f <- function(y) price_from_yield(cf_tbl, y, m) - price_target
  # Try to bracket; if not possible, return NA
  out <- try(stats::uniroot(f, lower = -0.5, upper = 1.0)$root, silent = TRUE)
  if (inherits(out, "try-error")) NA_real_ else out
}

# Try it on a single bond
i <- 1
cf_i <- make_cashflows(bonds$issue_date[i], bonds$maturity[i], bonds$m[i], bonds$coupon[i])
y_i  <- yield_from_price(cf_i, bonds$clean_px[i], bonds$m[i])
list(example_id = bonds$id[i], ytm_decimal = round(y_i, 6),
     repro_price = round(price_from_yield(cf_i, y_i, bonds$m[i]), 6))
## $example_id
## [1] "B1"
## 
## $ytm_decimal
## [1] 0.048461
## 
## $repro_price
## [1] 101.2553
grid <- tibble(y = seq(-0.02, 0.12, by = 0.001)) |>
  mutate(price = map_dbl(y, ~ price_from_yield(cf_i, .x, bonds$m[i])))

ggplot(grid, aes(y, price)) +
  geom_line() +
  labs(title = paste("Price–Yield curve for", bonds$id[i]),
       x = "Yield (decimal)", y = "Price (per 100)")

macaulay_duration <- function(cf_tbl, y, m) {
  if (!is.data.frame(cf_tbl) || nrow(cf_tbl) == 0L || is.na(y) || is.na(m)) return(NA_real_)
  t <- seq_len(nrow(cf_tbl))
  P <- price_from_yield(cf_tbl, y, m)
  if (is.na(P) || P == 0) return(NA_real_)
  sum(t * cf_tbl$cf / (1 + y/m)^t) / P / m
}

modified_duration <- function(cf_tbl, y, m) {
  Dmac <- macaulay_duration(cf_tbl, y, m)
  if (is.na(Dmac) || is.na(m) || is.na(y)) return(NA_real_)
  Dmac / (1 + y/m)
}

convexity_years <- function(cf_tbl, y, m) {
  if (!is.data.frame(cf_tbl) || nrow(cf_tbl) == 0L || is.na(y) || is.na(m)) return(NA_real_)
  t <- seq_len(nrow(cf_tbl))
  P <- price_from_yield(cf_tbl, y, m)
  if (is.na(P) || P == 0) return(NA_real_)
  sum(t*(t+1) * cf_tbl$cf / (1 + y/m)^(t+2)) / P / (m^2)
}

list(D_mac = round(macaulay_duration(cf_i, y_i, bonds$m[i]), 6),
     D_mod = round(modified_duration(cf_i, y_i, bonds$m[i]), 6),
     Convexity = round(convexity_years(cf_i, y_i, bonds$m[i]), 6))
## $D_mac
## [1] 8.310787
## 
## $D_mod
## [1] 8.114178
## 
## $Convexity
## [1] 80.05306
xnpv <- function(cashflows, dates, r) {
  t0 <- min(dates)
  sum(cashflows / (1 + r)^((as.numeric(dates - t0))/365))
}
xirr <- function(cashflows, dates) {
  f <- function(r) xnpv(cashflows, dates, r)
  uniroot(f, lower = -0.5, upper = 1)$root
}

# Demo: buy -100 today, then a few irregular inflows
cf_demo <- c(-100, 3.2, 3.1, 3.3, 103.2)
dt_demo <- as.Date(c("2024-09-15","2025-03-17","2025-08-01","2026-01-10","2026-09-15"))
round(xirr(cf_demo, dt_demo), 6)
## [1] 0.065325
results <- bonds |>
  dplyr::mutate(
    cf_tbl = purrr::pmap(
      list(issue_date, maturity, m, coupon),
      function(issue_date, maturity, m, coupon) make_cashflows(issue_date, maturity, m, coupon)
    ),
    ytm = purrr::pmap_dbl(
      list(cf_tbl, clean_px, m),
      function(cf, px, m) yield_from_price(cf, px, m)
    ),
    dur_mac = purrr::pmap_dbl(
      list(cf_tbl, ytm, m),
      function(cf, y, m) macaulay_duration(cf, y, m)
    ),
    dur_mod = purrr::pmap_dbl(
      list(cf_tbl, ytm, m),
      function(cf, y, m) modified_duration(cf, y, m)
    ),
    convex  = purrr::pmap_dbl(
      list(cf_tbl, ytm, m),
      function(cf, y, m) convexity_years(cf, y, m)
    )
  )

write_csv(results, "bond_results_basic.csv")
"Saved → bond_results_basic.csv"
## [1] "Saved → bond_results_basic.csv"