Code
library(tidyverse)
library(janitor)
library(gghdx)
library(gt)
gghdx()library(tidyverse)
library(janitor)
library(gghdx)
library(gt)
gghdx()set.seed(123) # just so % values are always the same
df <- readr::read_csv(
file.path(
# reminder (from readme) - run data-raw/01_input_data.R to initiate data.
"../dashboard/trig_probs_202406_CERF.csv"
)
)
# some wrangling
df_clean <- df |>
clean_names() |>
select(
country,
shock,
amount,
activation_probability_per_year,
status
) |>
mutate(
country_trig = snakecase::to_snake_case(country),
shock_clean = snakecase::to_snake_case(shock)
) |>
unite(
event_id,
c("country_trig", "shock_clean"),
na.rm = T,
remove = T
) |>
select(event_id, amount, prob = activation_probability_per_year, status)
df_active <- df_clean |>
filter(
status == "Active"
)Status of ActiveActivation probability per year column. If the simulated trigger is activated we assign a payout of the full amount in the Amount column, If the trigger is not activated $0 is assigned.df_sample_all <- df_clean %>%
split(.$event_id) %>%
map(\(dfe){
event_sampled <- sample(
# sample either the full amount OR 0
x = c(dfe$amount, 0),
# creat 1M samples
size = 1e6,
replace = T,
# Probability defined in spreadsheet, therefore
# probability of non-activation is just 1- prob
prob = c(dfe$prob, 1 - dfe$prob)
)
# create df/tibble w/ results
tibble(
event = dfe$event_id,
payout = event_sampled,
status = dfe$status # keep in so we can filter later
)
}) %>%
list_rbind() |>
group_by(event) %>%
mutate(
year = row_number()
) |>
ungroup()Unless mentioned as a condition below, we assume there are no conditions on the trigger and payout amount. Below I’ve copied what I thought was the key piece of conditional logic from latest email communications (in italic) and then summarized my understanding of it in a way that would be easy to code below.
df_sample_all_wide <- df_sample_all |>
pivot_wider(
id_cols = c("year","status"), # this does create some rows where values are NA,but seems cleaner to deal w/ later in filter
names_from = "event",
values_from = "payout"
)
df_payouts_all_conditioned_wide <- df_sample_all_wide |>
mutate(
across(c("bangladesh_padma_floods", "bangladesh_jamuna_floods"), ~ ifelse(bangladesh_storms > 0, 0, .x)),
burkina_faso_observational_drought = ifelse(burkina_faso_predictive_1_drought > 0, 0, burkina_faso_observational_drought),
niger_observational_drought = ifelse(niger_predictive_2_drought > 0, 0, niger_observational_drought),
guatemala_window_b_drought = ifelse(guatemala_window_a_drought > 0, 0, guatemala_window_b_drought),
nicaragua_window_b_drought = ifelse(nicaragua_window_a_drought > 0, 0, nicaragua_window_b_drought),
el_salvador_window_b_drought = ifelse(el_salvador_window_a_drought > 0, 0, el_salvador_window_b_drought),
honduras_window_b_drought = ifelse(honduras_window_a_drought > 0, 0, honduras_window_b_drought),
) |>
rowwise() |>
mutate(
philippines_max = max(philippines_scenario_1_storms, philippines_scenario_2_storms)
) |>
ungroup()
df_payouts_all_conditioned_long <- df_payouts_all_conditioned_wide |>
# since we take max for Philippines get rid of others
select(-philippines_scenario_1_storms, -philippines_scenario_2_storms) |>
pivot_longer(-c("year","status")) |>
# filter to rm NA -
filter(!is.na(value))
# Quick check to make sure we are back to 1M samples:
# df_payouts_all_conditioned_long |>
# pivot_wider(
# id_cols = "year",
# names_from = name, values_from = value
# )$594,935,462$59,493,546What would be the expected/average yearly CERF allocation and what is the % chance of exceeding 10 % of CERF budget on AA based on current active triggers:
cerf_10_pct <- 59493546
df_yearly_payouts_active <- df_payouts_all_conditioned_long |>
filter(status == "Active") |>
group_by(year) %>%
summarise(
payout = sum(value, na.rm = T)
)
df_yearly_payouts_all <- df_payouts_all_conditioned_long |>
group_by(year) %>%
summarise(
payout = sum(value, na.rm = T)
)df_yearly_payouts_active <- df_yearly_payouts_active %>%
mutate(
exceed = payout > cerf_10_pct
)
pct_exceed <- round(mean(df_yearly_payouts_active$exceed) * 100, 2)
mean_payout <- mean(df_yearly_payouts_active$payout)
max_payout_active <- gghdx:::format_number_hdx(max(df_yearly_payouts_active$payout),additional_prefix = "$")
mean_payout_rounded <- scales::dollar(mean_payout, accuracy = 10000)
df_yearly_payouts_active |>
ggplot(
aes(x = payout, fill = exceed)
) +
geom_histogram(bins = 60) +
scale_x_continuous(labels = scales::label_currency()) +
scale_y_continuous_hdx() +
geom_vline(xintercept = cerf_10_pct, color = "tomato") +
geom_vline(xintercept = mean_payout, color = "white", linetype = "dashed") +
annotate(
x = 45000000, y = 45000, geom = "text",
label = paste0(
pct_exceed,
"% chance of exceeding 10% of average\nCERF budget ($59,493,546) on AA\nbased on current active triggers"
),
angle = 0,
hjust = 0,
size = 3
) +
labs(
title = "Distribution of yearly CERF allocations",
subtitle = paste0("Average/expected yearly payout of ", mean_payout_rounded, " (active portfolio)"),
x = "Yearly Allocation",
caption = "Calculated from 1M simualted years based on current active CERF AA portfolio"
) +
theme(
legend.position = "none",
axis.text.y = element_text(angle = 90),
axis.title.y = element_blank(),
plot.caption = element_text(hjust = 0, size = 8)
)The maximum payout simulated for just active triggers was $72M.
df_quants_active <- df_yearly_payouts_active |>
reframe(
qs = c(0.5,0.67,0.8,0.9,0.95),
q_val = quantile(.data[["payout"]], probs = qs),
qs_inv = 1-qs,
value_label = gghdx:::format_number_hdx(q_val,additional_prefix ="$")
)
df_quants_active |>
gt() |>
gt::cols_hide(
c("q_val","qs")
) |>
fmt_percent(columns = c("qs","qs_inv"),decimals = 0) |>
cols_label(
qs_inv = "% years exceeding",
value_label = "Amount"
) |>
tab_header(
title = "Chance of exceeding spending levels",
) |>
tab_footnote(
footnote = "Based on only 'active' triggers in the portfolio"
) |>
tab_options(
table.width = "40%"
)| Chance of exceeding spending levels | |
|---|---|
| % years exceeding | Amount |
| 50% | $31M |
| 33% | $35M |
| 20% | $39M |
| 10% | $43M |
| 5% | $47M |
| Based on only 'active' triggers in the portfolio | |
# just copy paste above
df_yearly_payouts <- df_yearly_payouts_all %>%
mutate(
exceed = payout > cerf_10_pct
)
pct_exceed <- round(mean(df_yearly_payouts$exceed) * 100, 2)
mean_payout <- mean(df_yearly_payouts$payout)
mean_payout_rounded <- scales::dollar(mean_payout, accuracy = 10000)
df_yearly_payouts |>
ggplot(
aes(x = payout, fill = exceed)
) +
geom_histogram(bins = 60) +
scale_x_continuous(labels = scales::label_currency()) +
scale_y_continuous_hdx() +
geom_vline(xintercept = cerf_10_pct, color = "tomato") +
geom_vline(xintercept = mean_payout, color = "white", linetype = "dashed") +
annotate(
x = 60000000, y = 45000, geom = "text",
label = paste0(
pct_exceed,
"% chance of exceeding 10% of average\nCERF budget ($59,493,546) on AA\nbased on on entire portfolio of triggers\n(active, under development and prep work)"
),
angle = 0,
hjust = 0,
size = 3
) +
labs(
title = "Distribution of yearly CERF allocations",
subtitle = paste0("Average/expected yearly payout of ", mean_payout_rounded, " (entire portfolio)"),
x = "Yearly Allocation",
caption = "Calculated from 1M simualted years based on entire CERF AA portfolio (active, under development, closed, and prep work)"
) +
theme(
legend.position = "none",
axis.text.y = element_text(angle = 90),
axis.title.y = element_blank(),
plot.caption = element_text(hjust = 0, size = 8)
)df_quants <- df_yearly_payouts |>
reframe(
qs = c(0.5,0.67,0.8,0.9,0.95),
q_val = quantile(.data[["payout"]], probs = qs),
qs_inv = 1-qs,
value_label = gghdx:::format_number_hdx(q_val,additional_prefix ="$")
)
df_quants |>
gt() |>
gt::cols_hide(
c("q_val","qs")
) |>
fmt_percent(columns = c("qs","qs_inv"),decimals = 0) |>
cols_label(
qs_inv = "% years exceeding",
value_label = "Amount"
) |>
tab_header(
title = "Chance of exceeding spending levels",
) |>
tab_footnote(
footnote = "Based on entire portfolio of triggers (active, under development, closed, and prep work)"
) |>
tab_options(
table.width = "40%"
)| Chance of exceeding spending levels | |
|---|---|
| % years exceeding | Amount |
| 50% | $50M |
| 33% | $57M |
| 20% | $63M |
| 10% | $69M |
| 5% | $75M |
| Based on entire portfolio of triggers (active, under development, closed, and prep work) | |
max_payout <- gghdx:::format_number_hdx(max(df_yearly_payouts$payout),additional_prefix = "$")The maximum payout simulated for entire portfolio of trigger (active, under development,closed, and prep work) was $114M.