# Minimal, friendly helper stack
library(tidyverse)
library(lubridate)
# We'll use the sample file we created earlier.
INPUT_CSV <- "bonds_raw_sample.csv"
bonds_raw <- read_csv(INPUT_CSV, show_col_types = FALSE)
bonds_raw %>% slice(1:8)
## # A tibble: 8 × 11
## id issuer isin ccy coupon freq maturity issue_date clean_px ytm_hint
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 B1 Acme Corp US03… usd 5% SA 2029-09… 2019/09/15 101. NA
## 2 B2 Globex I… XS09… USD 4.75 SA 15-12-2… 2018-12-15 98.8 0.052
## 3 B3 Initech INE0… Usd 6.250% Q 2031.06… 2016.06.30 103. NA
## 4 B4 Umbrella… GB00… GBP 3,50% A 2032/03… 2022/03/31 95.7 0.04
## 5 B5 Soylent … FR00… eur 0 SA 2030-11… 2020-11-20 87 NA
## 6 B6 Wayne Ho… USN5… USD 7% SA 2034-01… 2014-01-01 110. NA
## 7 B7 Stark In… US12… USD 5.5% S/A 2031/12… 2016/12/15 102. NA
## 8 B8 Oscorp XS05… EUR 2,25% Annu… 2030.03… 2015.03.30 96.2 NA
## # ℹ 1 more variable: day_count <chr>
# Coupon cleaner: "6,25%" -> 0.0625, "5" -> 0.05, "-" -> 0
# ---------- CLEANING HELPERS ----------
clean_coupon <- function(x) {
s <- as.character(x) |>
stringr::str_replace(",", ".") |>
stringr::str_remove("%") |>
stringr::str_trim()
if (s %in% c("", "-", "NA", "NaN")) return(0)
v <- suppressWarnings(as.numeric(s))
ifelse(v > 1, v/100, v) %||% 0
}
# Normalize frequency to A / SA / Q / M, then to m (periods per year)
normalize_freq <- function(f) {
f <- stringr::str_to_upper(stringr::str_trim(as.character(f)))
dplyr::case_when(
f %in% c("A","ANNUAL","ANNUALLY","Y") ~ "A",
f %in% c("SA","S/A","SEMI","SEMI-ANNUAL","SEMIANNUAL","S") ~ "SA",
f %in% c("Q","QUARTERLY") ~ "Q",
f %in% c("M","MONTHLY") ~ "M",
TRUE ~ "SA" # safe default
)
}
`%||%` <- function(a,b) if (is.null(a) || (length(a)==0)) b else a
freq_to_m <- function(freq) c(A=1L, SA=2L, Q=4L, M=12L)[[freq]] %||% 2L
# Parse a few common date styles
parse_any <- function(x) {
# NOTE: include "Y/m/d" (NOT "Y/m-d")
lubridate::parse_date_time(
x,
orders = c("Y-m-d","Y/m/d","d-m-Y","Y.m.d","d.m.Y","d/m/Y"),
tz = "UTC"
) |> as.Date()
}
bonds <- bonds_raw |>
dplyr::mutate(
coupon = purrr::map_dbl(coupon, clean_coupon),
freq = purrr::map_chr(freq, normalize_freq),
m = purrr::map_int(freq, freq_to_m),
issue_date = parse_any(issue_date),
maturity = parse_any(maturity),
clean_px = suppressWarnings(as.numeric(clean_px))
)
# Early sanity (fail fast with a useful message if something is wrong)
stopifnot(all(!is.na(bonds$issue_date)))
stopifnot(all(!is.na(bonds$maturity)))
stopifnot(all(bonds$m %in% c(1L,2L,4L,12L)))
bonds %>% select(id, issuer, coupon, freq, m, issue_date, maturity, clean_px) %>% slice(1:8)
## # A tibble: 8 × 8
## id issuer coupon freq m issue_date maturity clean_px
## <chr> <chr> <dbl> <chr> <int> <date> <date> <dbl>
## 1 B1 Acme Corp 0.05 SA 2 2019-09-15 2029-09-15 101.
## 2 B2 Globex Intl 0.0475 SA 2 2018-12-15 2030-12-15 98.8
## 3 B3 Initech 0.0625 Q 4 2016-06-30 2031-06-30 103.
## 4 B4 Umbrella plc 0.035 A 1 2022-03-31 2032-03-31 95.7
## 5 B5 Soylent SA 0 SA 2 2020-11-20 2030-11-20 87
## 6 B6 Wayne Holdings 0.07 SA 2 2014-01-01 2034-01-01 110.
## 7 B7 Stark Industries 0.055 SA 2 2016-12-15 2031-12-15 102.
## 8 B8 Oscorp 0.0225 A 1 2015-03-30 2030-03-30 96.2
make_cashflows <- function(issue, maturity, m, coupon, face = 100) {
if (is.na(issue) || is.na(maturity) || is.na(m) || m <= 0) {
return(tibble::tibble(cf_date = as.Date(character()), cf = numeric()))
}
dates <- seq.Date(issue, maturity, by = paste0(12/m, " months"))
if (length(dates) == 0L || tail(dates,1) != maturity) dates <- c(dates, maturity)
cpn <- face * coupon / m
tibble::tibble(
cf_date = dates,
cf = c(rep(cpn, length(dates)-1), face + cpn)
)
}
# Show one example cashflow table
ex_cf <- make_cashflows(bonds$issue_date[1], bonds$maturity[1], bonds$m[1], bonds$coupon[1])
ex_cf
## # A tibble: 21 × 2
## cf_date cf
## <date> <dbl>
## 1 2019-09-15 2.5
## 2 2020-03-15 2.5
## 3 2020-09-15 2.5
## 4 2021-03-15 2.5
## 5 2021-09-15 2.5
## 6 2022-03-15 2.5
## 7 2022-09-15 2.5
## 8 2023-03-15 2.5
## 9 2023-09-15 2.5
## 10 2024-03-15 2.5
## # ℹ 11 more rows
price_from_yield <- function(cf_tbl, y, m) {
if (!is.data.frame(cf_tbl) || nrow(cf_tbl) == 0L || is.na(m) || is.na(y)) return(NA_real_)
t <- seq_len(nrow(cf_tbl))
sum(cf_tbl$cf / (1 + y/m)^t)
}
yield_from_price <- function(cf_tbl, price_target, m) {
if (!is.data.frame(cf_tbl) || nrow(cf_tbl) == 0L || is.na(price_target) || is.na(m)) return(NA_real_)
f <- function(y) price_from_yield(cf_tbl, y, m) - price_target
# Try to bracket; if not possible, return NA
out <- try(stats::uniroot(f, lower = -0.5, upper = 1.0)$root, silent = TRUE)
if (inherits(out, "try-error")) NA_real_ else out
}
# Try it on a single bond
i <- 1
cf_i <- make_cashflows(bonds$issue_date[i], bonds$maturity[i], bonds$m[i], bonds$coupon[i])
y_i <- yield_from_price(cf_i, bonds$clean_px[i], bonds$m[i])
list(example_id = bonds$id[i], ytm_decimal = round(y_i, 6),
repro_price = round(price_from_yield(cf_i, y_i, bonds$m[i]), 6))
## $example_id
## [1] "B1"
##
## $ytm_decimal
## [1] 0.048461
##
## $repro_price
## [1] 101.2553
grid <- tibble(y = seq(-0.02, 0.12, by = 0.001)) |>
mutate(price = map_dbl(y, ~ price_from_yield(cf_i, .x, bonds$m[i])))
ggplot(grid, aes(y, price)) +
geom_line() +
labs(title = paste("Price–Yield curve for", bonds$id[i]),
x = "Yield (decimal)", y = "Price (per 100)")

macaulay_duration <- function(cf_tbl, y, m) {
if (!is.data.frame(cf_tbl) || nrow(cf_tbl) == 0L || is.na(y) || is.na(m)) return(NA_real_)
t <- seq_len(nrow(cf_tbl))
P <- price_from_yield(cf_tbl, y, m)
if (is.na(P) || P == 0) return(NA_real_)
sum(t * cf_tbl$cf / (1 + y/m)^t) / P / m
}
modified_duration <- function(cf_tbl, y, m) {
Dmac <- macaulay_duration(cf_tbl, y, m)
if (is.na(Dmac) || is.na(m) || is.na(y)) return(NA_real_)
Dmac / (1 + y/m)
}
convexity_years <- function(cf_tbl, y, m) {
if (!is.data.frame(cf_tbl) || nrow(cf_tbl) == 0L || is.na(y) || is.na(m)) return(NA_real_)
t <- seq_len(nrow(cf_tbl))
P <- price_from_yield(cf_tbl, y, m)
if (is.na(P) || P == 0) return(NA_real_)
sum(t*(t+1) * cf_tbl$cf / (1 + y/m)^(t+2)) / P / (m^2)
}
list(D_mac = round(macaulay_duration(cf_i, y_i, bonds$m[i]), 6),
D_mod = round(modified_duration(cf_i, y_i, bonds$m[i]), 6),
Convexity = round(convexity_years(cf_i, y_i, bonds$m[i]), 6))
## $D_mac
## [1] 8.310787
##
## $D_mod
## [1] 8.114178
##
## $Convexity
## [1] 80.05306
xnpv <- function(cashflows, dates, r) {
t0 <- min(dates)
sum(cashflows / (1 + r)^((as.numeric(dates - t0))/365))
}
xirr <- function(cashflows, dates) {
f <- function(r) xnpv(cashflows, dates, r)
uniroot(f, lower = -0.5, upper = 1)$root
}
# Demo: buy -100 today, then a few irregular inflows
cf_demo <- c(-100, 3.2, 3.1, 3.3, 103.2)
dt_demo <- as.Date(c("2024-09-15","2025-03-17","2025-08-01","2026-01-10","2026-09-15"))
round(xirr(cf_demo, dt_demo), 6)
## [1] 0.065325
results <- bonds |>
dplyr::mutate(
cf_tbl = purrr::pmap(
list(issue_date, maturity, m, coupon),
function(issue_date, maturity, m, coupon) make_cashflows(issue_date, maturity, m, coupon)
),
ytm = purrr::pmap_dbl(
list(cf_tbl, clean_px, m),
function(cf, px, m) yield_from_price(cf, px, m)
),
dur_mac = purrr::pmap_dbl(
list(cf_tbl, ytm, m),
function(cf, y, m) macaulay_duration(cf, y, m)
),
dur_mod = purrr::pmap_dbl(
list(cf_tbl, ytm, m),
function(cf, y, m) modified_duration(cf, y, m)
),
convex = purrr::pmap_dbl(
list(cf_tbl, ytm, m),
function(cf, y, m) convexity_years(cf, y, m)
)
)
write_csv(results, "bond_results_basic.csv")
"Saved → bond_results_basic.csv"
## [1] "Saved → bond_results_basic.csv"