Code
# 1. Set start date for spreading over whole year
fy_start_date <- as.Date("2025-07-01")
# 2. Set start date for historical data from which seasonality is calculated
seasonality_start_date <- as.Date("2021-07-01")To do list
Goal: Distribute the annual consensus forecast across months and perform a variance analysis against the official monthly consensus forecast.
Methodology: Annual Forecast Monthly Distribution Strategy
Data Sources
# 1. Set start date for spreading over whole year
fy_start_date <- as.Date("2025-07-01")
# 2. Set start date for historical data from which seasonality is calculated
seasonality_start_date <- as.Date("2021-07-01")library(tidyverse)
library(lubridate)
library(timetk)
library(tidyquant)
library(plotly)
library(readxl)
# Annual forecast
forecast_consensus <- read_excel("../00_Data/Data_raw/Copy of FY 2026_FINAL CashFlows_JANUARY 2026 Consensus Forecast Update_01.27.26_ToTEK.xlsx", range = "A11:N65") %>% # Row76 is CCC
rename(Tax_abb = `TARGETS SUMMARY`) %>%
# Convert the unit: raw data are in $1,000
mutate(across(-Tax_abb, ~.x %>% as.numeric() * 1000))
# Monthly historical
historical_data_tbl <- read_rds("../00_Data/Data_wrangled/calm_clean_tbl.rds") %>%
# Remove actual data within the forecast year
filter(Date < fy_start_date) %>%
# Select years in calculating seasonality
filter(Date >= seasonality_start_date)
# Select the distribution functions: See 01_example_qmd
# with or without months ending on weekends
source("../00_Scripts/spread_annual_forecast_to_months")# Define G-Fund monthly target
forecast_GF_consensus <- forecast_consensus %>%
# Select G-Fund
slice(1:30) %>%
# Create Tax_abb
mutate(Tax_abb = case_when(
Tax_abb == "Total Net G-Fund" ~ "TOTAL_G",
Tax_abb == "Personal Income Tax" ~ "PINCOME",
Tax_abb == "Sales & Use Tax (GROSS)" ~ "S&U",
Tax_abb == "Corporate Tax" ~ "CORP",
Tax_abb == "G-Fund Meals & Rooms Tax" ~ "M&R",
Tax_abb == "Property Transfer Tax [Net]" ~ "PROPT",
Tax_abb == "Insurance Tax" ~ "INSUR",
Tax_abb == "Estate Tax" ~ "ESTATE",
Tax_abb == "Other Taxes/Fees" ~ "OTHER_G"
)) %>%
filter(!Tax_abb %>% str_detect("S&U")) %>%
mutate(Fund = "G-Fund")
# Define T-Fund monthly target
forecast_TF_consensus <- forecast_consensus %>%
# Select G-Fund
slice(32:35,37,38,47,48) %>%
# Create Tax_abb
mutate(Tax_abb = case_when(
Tax_abb == "Total Net T-Fund" ~ "TOTAL_T",
Tax_abb == "Gasoline Tax (Excluding TIB)" ~ "GAS",
Tax_abb == "Diesel Tax (Excluding TIB)" ~ "DIESEL",
Tax_abb == "Motor Vehicle P&U Tax" ~ "MVP&U",
Tax_abb == "Motor Vehicle Fees" ~ "MVFEES",
Tax_abb == "Other Fees" ~ "OTHTR",
Tax_abb == "Gasoline Tax TIB" ~ "TIBGAS",
Tax_abb == "Diesel Tax TIB" ~ "TIBDIESEL"
)) %>%
mutate(Fund = "T-Fund")
# Define T-Fund monthly target
forecast_EF_consensus <- forecast_consensus %>%
# Select G-Fund
slice(40:45) %>%
# Create Tax_abb
mutate(Tax_abb = case_when(
Tax_abb == "Total Education Fund [Partial]" ~ "TOTAL_E",
Tax_abb == "Sales & Use Tax" ~ "S&U",
Tax_abb == "Meals & Rooms Tax" ~ "M&R",
Tax_abb == "Purchase & Use Tax" ~ "MVP&U",
Tax_abb == "Interest, Premiums" ~ "INT",
Tax_abb == "Lottery Transfer" ~ "LOT"
)) %>%
mutate(Fund = "E-Fund")
# Combine all targets into one tibble first
forecast_Consensus_tbl <- bind_rows(forecast_GF_consensus, forecast_TF_consensus, forecast_EF_consensus)# 2. Reshape History into a Nested Tibble
# This replaces: hist_pincome_tbl, hist_corporate_tbl, etc.
Fund_type_T_vec <- c(historical_data_tbl %>% select(GAS:TIBDIESEL) %>% names(),"TOTAL_T")
Fund_type_E_vec <- c("S&U","M&R","INT","LOT","TOTAL_E")
historical_clean_tbl <- historical_data_tbl %>%
# Add TOTAL_G and OTHER_G
# Note TOTAL_G doesn't exactly match Total Net G-Fund in Jeff's but should closely follow
mutate(OTHER_G = OTHTAX + OTHREV,
TOTAL_G = OTHER_G + PINCOME + CORP + `M&R` + PROPT + INSUR + ESTATE) %>%
# Note TOTAL_G doesn't exactly match Total Net T-Fund in Jeff's but should closely follow
mutate(TOTAL_T = GAS + DIESEL + `MVP&U` + MVFEES + OTHTR) %>%
mutate(TOTAL_E = `S&U` + `M&R` + `MVP&U` + INT + LOT) %>%
pivot_longer(cols = -Date, names_to = "category", values_to = "Monthly") %>%
mutate(Fund = case_when(
category %in% Fund_type_T_vec ~ "T-Fund",
category %in% Fund_type_E_vec ~ "E-Fund",
TRUE ~ "G-Fund"
))
nested_history <- historical_clean_tbl %>%
# 1. Isolate and modify the specific rows you want to duplicate
bind_rows(
historical_clean_tbl %>% filter(category == "M&R") %>% mutate(Fund = "G-Fund"),
historical_clean_tbl %>% filter(category == "MVP&U") %>% mutate(Fund = "E-Fund")
) %>%
group_by(Fund, category) %>%
nest(history = c(Date, Monthly))# Run the entire pipeline in one go
forecast_DSL_whole_year_tbl <- forecast_Consensus_tbl %>%
select(Fund, category = Tax_abb, target = Total) %>%
inner_join(nested_history) %>%
group_by(category) %>% # This ensures each fund is processed independently
mutate(
fy_start = fy_start_date,
annual_spread = pmap(list(target, fy_start, history), distribute_annual_forecast)
) %>%
ungroup() %>%
select(Fund, category, annual_spread) %>%
unnest(annual_spread) %>%
mutate(
month_label = month(Date, label = TRUE),
year = year(Date)
)Plot
# 1. Prepare the ggplot object for the 12-month spread
p_annual <- forecast_DSL_whole_year_tbl %>%
filter(Fund == "G-Fund") %>%
mutate(month_label = month(Date, label = TRUE, abbr = FALSE)) %>%
ggplot(aes(x = Date, y = Revenue,
fill = category, # Color by category since it's all one 'type'
text = paste("Category:", category,
"<br>Month:", month_label, year(Date),
"<br>Forecast: $", format(round(Revenue, 0), big.mark = ","),
"<br>Share of Year:", scales::percent(final_share, accuracy = 0.1)))) +
# Using geom_col for a clean bar representation
geom_col(alpha = 0.8, color = "white", size = 0.2) +
# Facet by category with independent Y-axes
facet_wrap(~category, scales = "free_y", ncol = 2) +
# Professional financial color palette
scale_fill_tq() +
theme_tq() +
labs(title = "FY2026 Monthly Revenue Forecast: Whole Year",
y = NULL,
x = NULL,
fill = NULL) +
scale_y_continuous(labels = scales::label_dollar(scale_cut = scales::cut_short_scale())) +
scale_x_date(date_labels = "%b %y", date_breaks = "2 months") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none", # Hide legend since facets are labeled
strip.background = element_rect(fill = "#2c3e50"),
strip.text = element_text(face = "bold", color = "white",
size = 8,
margin = margin(t = 2, r = 2, b = 2, l = 2) ))
# 2. Convert to Plotly for interactivity
ggplotly(p_annual, tooltip = "text") %>%
layout(margin = list(t = 80))master_all_funds <- forecast_Consensus_tbl %>%
pivot_longer(
cols = -c(Tax_abb, Total, Fund),
names_to = "month_label",
values_to = "Revenue"
) %>%
mutate(
# Convert month labels (e.g., "Jul") to Dates for the FY2026 cycle
month_num = month(parse_date_time(month_label, "b")),
year_adj = if_else(month_num >= 7, 2025, 2026),
Date = as.Date(paste(year_adj, month_num, "01", sep = "-"))
) %>%
filter(Date <= as.Date("2025-12-01")) %>% # Only keep H1 Actuals
select(Fund, Tax_abb, Total, Date, Revenue) %>%
group_by(Fund, category = Tax_abb, target = Total) %>%
nest(actuals = c(Date, Revenue)) %>%
ungroup()
# 2. Join and Run Batch Forecast (Processed once for all funds)
master_forecast_tbl <- master_all_funds %>%
inner_join(nested_history) %>%
mutate(
results = pmap(list(target, actuals, history), function(t, a, h) {
distribute_h2_forecast(annual_target = t, actuals_tbl = a, hist_data_tbl = h)
})
)
# 3. Combine Actuals and Forecasts
forecast_DSL_half_year_tbl <- master_forecast_tbl %>%
transmute(
Fund, category,
combined = pmap(list(actuals, results), function(act, res) {
bind_rows(
act %>% mutate(type = "actual"),
res %>% select(Date, Revenue = Revenue) %>% mutate(type = "forecast")
)
})
) %>%
unnest(combined) %>%
mutate(month_label = month(Date, label = TRUE))Plot
library(tidyverse)
library(plotly)
library(tidyquant) # Optional: for professional financial themes
# 1. Prepare the ggplot object
p <- forecast_DSL_half_year_tbl %>%
filter(Fund == "G-Fund") %>%
ggplot(aes(x = Date, y = Revenue, fill = type,
text = paste("Category:", category,
"<br>Month:", month_label, year(Date),
"<br>Status:", str_to_title(type),
"<br>Revenue: $", format(round(Revenue, 0), big.mark = ",")))) +
# Create the bar chart
geom_col(alpha = 0.8, color = "white", size = 0.2) +
# Split by Tax Category with independent Y-axes
facet_wrap(~category, scales = "free_y", ncol = 2) +
# Custom colors: Dark Blue for Actuals, High-Viz Red for Forecast
scale_fill_manual(values = c("actual" = "#2c3e50", "forecast" = "#e74c3c")) +
# Professional theme
theme_tq() +
labs(title = "FY2026 Monthly Revenue Forecast: Half Year",
y = NULL,
x = NULL,
fill = NULL) +
# Format Y-axis with dollar signs
scale_y_continuous(labels = scales::label_dollar(scale_cut = scales::cut_short_scale())) +
# Rotate x-axis labels for readability
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom",
strip.background = element_rect(fill = "#2c3e50"),
strip.text = element_text(face = "bold", color = "white",
size = 8,
margin = margin(t = 2, r = 2, b = 2, l = 2) ))
# 2. Convert to Plotly
# The 'tooltip' argument refers to the 'text' aesthetic defined in ggplot(aes(...))
ggplotly(p, tooltip = "text") %>%
layout(
legend = list(orientation = "h", x = 0.35, y = -0.1),
margin = list(t = 80) # Adjust top margin for the title
)run_variance_analysis <- function(dsl_data, consensus_data, scenario_label = "Analysis") {
# 1. Pivot targets to long format
consensus_long <- consensus_data %>%
select(-Total) %>%
pivot_longer(cols = -c(Tax_abb,Fund), names_to = "month_label", values_to = "consensus_revenue") %>%
rename(category = Tax_abb)
# 2. Join and calculate variances
comparison_tbl <- dsl_data %>%
mutate(month_label = month(Date, label = TRUE, abbr = FALSE)) %>%
left_join(consensus_long) %>%
mutate(
diff_dollars = Revenue - consensus_revenue,
diff_percent = if_else(consensus_revenue == 0, 0, diff_dollars / abs(consensus_revenue))
) %>%
select(Fund, category, Date, month_label, type, consensus_revenue, DSL_revenue = Revenue, diff_dollars, diff_percent)
# 3. Create Summary
summary_tbl <- comparison_tbl %>%
filter(type == "forecast") %>%
group_by(Fund, category) %>%
summarise(
avg_abs_variance_dollar = mean(abs(diff_dollars)),
avg_abs_variance_pct = mean(abs(diff_percent), na.rm = TRUE),
scenario = scenario_label
)
return(list(details = comparison_tbl, summary = summary_tbl))
}plot_variance_comparison <- function(comp_tbl, fund_name, plot_type = "line") {
# Filter the data for the specific Fund provided in the argument
dat_filtered <- comp_tbl %>%
filter(Fund == fund_name)
# 1. Shared Tooltip Preparation
processed_data <- dat_filtered %>%
mutate(
hover_text = paste0(
"Category: ", category,
"<br>Month: ", month_label, " ", year(Date),
"<br>----------------------------",
"<br>Consensus ($): $", format(round(consensus_revenue, 0), big.mark = ","),
"<br>DSL ($): $", format(round(DSL_revenue, 0), big.mark = ","),
"<br>Diff ($): $", format(round(diff_dollars, 0), big.mark = ","),
"<br>Diff (%): ", scales::percent(diff_percent, accuracy = 0.1)
)
)
# 2. Conditional Plot Generation
if (plot_type == "line") {
# --- PERCENT DIFFERENCE LINE PLOT ---
p <- processed_data %>%
ggplot(aes(x = Date, y = diff_percent, group = category, text = hover_text)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey50") +
geom_line(aes(color = category), linewidth = 1, alpha = 0.7) +
geom_point(aes(color = category), size = 2) +
facet_wrap(~category, scales = "free_y", ncol = 2) +
scale_y_continuous(labels = scales::percent_format()) +
labs(title = paste("Variance Comparison (%):", fund_name), y = "Percent Difference (%)")
} else if (plot_type == "bar") {
# --- SIDE-BY-SIDE REVENUE BAR CHART ---
p <- processed_data %>%
pivot_longer(
cols = c(consensus_revenue, DSL_revenue),
names_to = "revenue_source",
values_to = "amount"
) %>%
mutate(revenue_source = recode(revenue_source,
"consensus_revenue" = "Consensus (Actual/Forecast)",
"DSL_revenue" = "DSL")) %>%
ggplot(aes(x = Date, y = amount, fill = revenue_source, text = hover_text)) +
geom_col(position = position_dodge(width = 20), alpha = 0.85) +
facet_wrap(~category, scales = "free_y", ncol = 2) +
scale_fill_manual(values = c("Consensus (Actual/Forecast)" = "#95a5a6",
"DSL" = "#2c3e50")) +
scale_y_continuous(labels = scales::label_dollar(scale_cut = scales::cut_short_scale())) +
labs(title = paste("Variance Comparison ($):", fund_name),
x = NULL, fill = NULL, y = "Revenue ($)", fill = "Source")
}
# 3. Apply Universal Styling
p <- p +
theme_tq() +
scale_x_date(date_labels = "%b %y", date_breaks = "2 months") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom",
strip.background = element_rect(fill = "#2c3e50"),
strip.text = element_text(face = "bold", color = "white",
size = 8,
margin = margin(t = 2, r = 2, b = 2, l = 2) ))
# 4. Convert to Plotly
ggplotly(p, tooltip = "text") %>%
layout(margin = list(t = 80, b = 80), # Increased bottom margin to make room for legend
legend = list(
orientation = "h", # Sets legend to horizontal
xanchor = "center", # Anchors the legend at its center
x = 0.5, # Centers horizontally
y = -0.2 # Places it below the plot
)
)
}library(RColorBrewer)
plot_summary_variance <- function(summary_tbl, fund_name = NULL) {
# 1. Filter for specific fund if requested
if (!is.null(fund_name)) {
summary_tbl <- summary_tbl %>% filter(Fund == fund_name)
}
# 2. Prepare Tooltip and Factor levels for sorting
processed_data <- summary_tbl %>%
mutate(
category = reorder(category, avg_abs_variance_pct),
hover_text = paste0(
"Fund: ", Fund,
"<br>Category: ", category,
"<br>Avg. Abs. Monthly Variance ($): $", format(round(avg_abs_variance_dollar, 0), big.mark = ","),
"<br>Avg. Abs. Monthly Variance (%): ", scales::percent(avg_abs_variance_pct, accuracy = 0.1)
)
)
# 3. Create the ggplot object
p <- processed_data %>%
ggplot(aes(x = avg_abs_variance_pct, y = category, text = hover_text)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
geom_col(fill = "midnightblue") +
# direction = 1: Red is low, Green is high. Use -1 to reverse if needed.
# scale_fill_distiller(palette = "RdYlGn", direction = 1) +
scale_x_continuous(labels = scales::percent_format()) +
theme_tq() +
labs(
title = paste("Summary Variance Analysis:", fund_name %||% "All Funds"),
subtitle = "Aggregate Percent Difference for the Forecast Period",
x = "Avg. Absolute Monthly Pct Variance (%)",
y = ""
) +
theme(
legend.position = "none", # Removes legend from ggplot
panel.grid.major.y = element_blank()
)
# 4. Convert to Plotly and hide legend/colorbar
ggplotly(p, tooltip = "text") %>%
layout(
showlegend = FALSE, # Removes discrete legend entries
margin = list(t = 80, b = 50)
) %>%
hide_colorbar() # Specifically hides the Spectral color scale bar
}# Analyze Full Year
full_year_analysis <- run_variance_analysis(forecast_DSL_whole_year_tbl, forecast_Consensus_tbl, "Full Year")plot_variance_comparison(full_year_analysis$details, fund_name = "G-Fund", "bar")plot_summary_variance(full_year_analysis$summary, fund_name = "G-Fund")plot_variance_comparison(full_year_analysis$details, fund_name = "T-Fund", "bar")plot_summary_variance(full_year_analysis$summary, fund_name = "T-Fund")plot_variance_comparison(full_year_analysis$details, fund_name = "E-Fund", "bar")plot_summary_variance(full_year_analysis$summary, fund_name = "E-Fund")# Analyze H2 Spread
half_year_analysis <- run_variance_analysis(forecast_DSL_half_year_tbl, forecast_Consensus_tbl, "H2 Spread")plot_variance_comparison(half_year_analysis$details, fund_name = "G-Fund", "bar")plot_summary_variance(half_year_analysis$summary, fund_name = "G-Fund")plot_variance_comparison(half_year_analysis$details, fund_name = "T-Fund", "bar")plot_summary_variance(half_year_analysis$summary, fund_name = "T-Fund")plot_variance_comparison(half_year_analysis$details, fund_name = "E-Fund", "bar")plot_summary_variance(half_year_analysis$summary, fund_name = "E-Fund")