Spread Annual Forecast to Months

To do list

Goal: Distribute the annual consensus forecast across months and perform a variance analysis against the official monthly consensus forecast.

Methodology: Annual Forecast Monthly Distribution Strategy

Data Sources

1 Set dates

Code
# 1. Set start date for spreading over whole year
fy_start_date <- as.Date("2025-07-01")

# 2. Set start date for historical data from which seasonality is calculated
seasonality_start_date <- as.Date("2021-07-01")

2 Import data

Code
library(tidyverse)
library(lubridate)
library(timetk)
library(tidyquant)
library(plotly)
library(readxl)

# Annual forecast
forecast_consensus <- read_excel("../00_Data/Data_raw/Copy of FY 2026_FINAL CashFlows_JANUARY 2026 Consensus Forecast Update_01.27.26_ToTEK.xlsx", range = "A11:N65") %>% # Row76 is CCC
    rename(Tax_abb = `TARGETS SUMMARY`) %>%
    
    # Convert the unit: raw data are in $1,000
    mutate(across(-Tax_abb, ~.x %>% as.numeric() * 1000))

# Monthly historical
historical_data_tbl <- read_rds("../00_Data/Data_wrangled/calm_clean_tbl.rds") %>%
    
    # Remove actual data within the forecast year 
    filter(Date < fy_start_date) %>%
    
    # Select years in calculating seasonality
    filter(Date >= seasonality_start_date)

# Select the distribution functions: See 01_example_qmd
# with or without months ending on weekends
source("../00_Scripts/spread_annual_forecast_to_months")

3 Clean data

3.1 Annual Consensus Forecast

Code
# Define G-Fund monthly target
forecast_GF_consensus <- forecast_consensus %>%
    
    # Select G-Fund
    slice(1:30) %>%
    
    # Create Tax_abb
    mutate(Tax_abb = case_when(
        Tax_abb == "Total Net G-Fund"            ~ "TOTAL_G",
        Tax_abb == "Personal Income Tax"         ~ "PINCOME",
        Tax_abb == "Sales & Use Tax (GROSS)"     ~ "S&U",
        Tax_abb == "Corporate Tax"               ~ "CORP",
        Tax_abb == "G-Fund Meals & Rooms Tax"    ~ "M&R",
        Tax_abb == "Property Transfer Tax [Net]" ~ "PROPT",
        Tax_abb == "Insurance Tax"               ~ "INSUR",
        Tax_abb == "Estate Tax"                  ~ "ESTATE",
        Tax_abb == "Other Taxes/Fees"            ~ "OTHER_G"
    )) %>%
    filter(!Tax_abb %>% str_detect("S&U")) %>%
    mutate(Fund = "G-Fund")
    
# Define T-Fund monthly target
forecast_TF_consensus <- forecast_consensus %>%
    
    # Select G-Fund
    slice(32:35,37,38,47,48) %>%
    
    # Create Tax_abb
    mutate(Tax_abb = case_when(
        Tax_abb == "Total Net T-Fund"             ~ "TOTAL_T",
        Tax_abb == "Gasoline Tax (Excluding TIB)" ~ "GAS",
        Tax_abb == "Diesel Tax (Excluding TIB)"   ~ "DIESEL",
        Tax_abb == "Motor Vehicle P&U Tax"        ~ "MVP&U",
        Tax_abb == "Motor Vehicle Fees"           ~ "MVFEES",
        Tax_abb == "Other Fees"                   ~ "OTHTR",
        Tax_abb == "Gasoline Tax TIB"             ~ "TIBGAS",
        Tax_abb == "Diesel Tax TIB"               ~ "TIBDIESEL"
    )) %>%
    mutate(Fund = "T-Fund") 
    
# Define T-Fund monthly target
forecast_EF_consensus <- forecast_consensus %>%
    
    # Select G-Fund
    slice(40:45) %>%
    
    # Create Tax_abb
    mutate(Tax_abb = case_when(
        Tax_abb == "Total Education Fund [Partial]" ~ "TOTAL_E",
        Tax_abb == "Sales & Use Tax"                ~ "S&U",
        Tax_abb == "Meals & Rooms Tax"              ~ "M&R",
        Tax_abb == "Purchase & Use Tax"             ~ "MVP&U",
        Tax_abb == "Interest, Premiums"             ~ "INT",
        Tax_abb == "Lottery Transfer"               ~ "LOT"
    )) %>%
    mutate(Fund = "E-Fund") 

# Combine all targets into one tibble first
forecast_Consensus_tbl <- bind_rows(forecast_GF_consensus, forecast_TF_consensus, forecast_EF_consensus)

3.2 Monthly Historical Data

Code
# 2. Reshape History into a Nested Tibble
# This replaces: hist_pincome_tbl, hist_corporate_tbl, etc.
Fund_type_T_vec <- c(historical_data_tbl %>% select(GAS:TIBDIESEL) %>% names(),"TOTAL_T")
Fund_type_E_vec <- c("S&U","M&R","INT","LOT","TOTAL_E")

historical_clean_tbl <- historical_data_tbl %>%
    
    # Add TOTAL_G and OTHER_G
    # Note TOTAL_G doesn't exactly match Total Net G-Fund in Jeff's but should closely follow
    mutate(OTHER_G = OTHTAX + OTHREV, 
           TOTAL_G = OTHER_G + PINCOME + CORP + `M&R` + PROPT + INSUR + ESTATE) %>%
    
    # Note TOTAL_G doesn't exactly match Total Net T-Fund in Jeff's but should closely follow
    mutate(TOTAL_T = GAS + DIESEL + `MVP&U` + MVFEES + OTHTR) %>%
    
    mutate(TOTAL_E = `S&U` + `M&R` + `MVP&U` + INT + LOT) %>%
    
    pivot_longer(cols = -Date, names_to = "category", values_to = "Monthly") %>%
    
    mutate(Fund = case_when(
        category %in% Fund_type_T_vec ~ "T-Fund",
        category %in% Fund_type_E_vec ~ "E-Fund",
        TRUE                          ~ "G-Fund"
    )) 

nested_history <- historical_clean_tbl %>%
    
    # 1. Isolate and modify the specific rows you want to duplicate
    bind_rows(
      historical_clean_tbl %>% filter(category == "M&R")   %>% mutate(Fund = "G-Fund"),
      historical_clean_tbl %>% filter(category == "MVP&U") %>% mutate(Fund = "E-Fund")
    ) %>%
    
    group_by(Fund, category) %>%
    nest(history = c(Date, Monthly))

4 Spread

4.1 Whole year

Code
# Run the entire pipeline in one go
forecast_DSL_whole_year_tbl <- forecast_Consensus_tbl %>%
  select(Fund, category = Tax_abb, target = Total) %>%
  inner_join(nested_history) %>%
  group_by(category) %>% # This ensures each fund is processed independently
  mutate(
    fy_start = fy_start_date,
    annual_spread = pmap(list(target, fy_start, history), distribute_annual_forecast)
  ) %>%
  ungroup() %>%
  select(Fund, category, annual_spread) %>%
  unnest(annual_spread) %>%
  mutate(
    month_label = month(Date, label = TRUE),
    year = year(Date)
  )

Plot

Code
# 1. Prepare the ggplot object for the 12-month spread
p_annual <- forecast_DSL_whole_year_tbl %>%
    filter(Fund == "G-Fund") %>%
  mutate(month_label = month(Date, label = TRUE, abbr = FALSE)) %>%
  ggplot(aes(x = Date, y = Revenue, 
             fill = category, # Color by category since it's all one 'type'
             text = paste("Category:", category,
                          "<br>Month:", month_label, year(Date),
                          "<br>Forecast: $", format(round(Revenue, 0), big.mark = ","),
                          "<br>Share of Year:", scales::percent(final_share, accuracy = 0.1)))) +
  # Using geom_col for a clean bar representation
  geom_col(alpha = 0.8, color = "white", size = 0.2) +
  # Facet by category with independent Y-axes
  facet_wrap(~category, scales = "free_y", ncol = 2) +
  # Professional financial color palette
  scale_fill_tq() + 
  theme_tq() +
  labs(title = "FY2026 Monthly Revenue Forecast: Whole Year",
       y = NULL, 
       x = NULL,
       fill = NULL) +
  scale_y_continuous(labels = scales::label_dollar(scale_cut = scales::cut_short_scale())) +
  scale_x_date(date_labels = "%b %y", date_breaks = "2 months") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none", # Hide legend since facets are labeled
        strip.background = element_rect(fill = "#2c3e50"),
        strip.text = element_text(face = "bold", color = "white", 
                                  size = 8, 
                                  margin = margin(t = 2, r = 2, b = 2, l = 2) ))

# 2. Convert to Plotly for interactivity
ggplotly(p_annual, tooltip = "text") %>%
  layout(margin = list(t = 80))

4.2 Half year

Code
master_all_funds <- forecast_Consensus_tbl %>%
  pivot_longer(
    cols = -c(Tax_abb, Total, Fund), 
    names_to = "month_label", 
    values_to = "Revenue"
  ) %>%
  mutate(
    # Convert month labels (e.g., "Jul") to Dates for the FY2026 cycle
    month_num = month(parse_date_time(month_label, "b")),
    year_adj = if_else(month_num >= 7, 2025, 2026),
    Date = as.Date(paste(year_adj, month_num, "01", sep = "-"))
  ) %>%
  filter(Date <= as.Date("2025-12-01")) %>% # Only keep H1 Actuals
  select(Fund, Tax_abb, Total, Date, Revenue) %>%
  group_by(Fund, category = Tax_abb, target = Total) %>%
  nest(actuals = c(Date, Revenue)) %>%
  ungroup()

# 2. Join and Run Batch Forecast (Processed once for all funds)
master_forecast_tbl <- master_all_funds %>%
  inner_join(nested_history) %>%
  mutate(
    results = pmap(list(target, actuals, history), function(t, a, h) {
      distribute_h2_forecast(annual_target = t, actuals_tbl = a, hist_data_tbl = h)
    })
  )

# 3. Combine Actuals and Forecasts
forecast_DSL_half_year_tbl <- master_forecast_tbl %>%
  transmute(
    Fund, category,
    combined = pmap(list(actuals, results), function(act, res) {
      bind_rows(
        act %>% mutate(type = "actual"),
        res %>% select(Date, Revenue = Revenue) %>% mutate(type = "forecast")
      )
    })
  ) %>%
  unnest(combined) %>%
  mutate(month_label = month(Date, label = TRUE))

Plot

Code
library(tidyverse)
library(plotly)
library(tidyquant) # Optional: for professional financial themes

# 1. Prepare the ggplot object
p <- forecast_DSL_half_year_tbl %>%
    filter(Fund == "G-Fund") %>%
  ggplot(aes(x = Date, y = Revenue, fill = type,
             text = paste("Category:", category,
                          "<br>Month:", month_label, year(Date),
                          "<br>Status:", str_to_title(type),
                          "<br>Revenue: $", format(round(Revenue, 0), big.mark = ",")))) +
  # Create the bar chart
  geom_col(alpha = 0.8, color = "white", size = 0.2) +
  # Split by Tax Category with independent Y-axes
  facet_wrap(~category, scales = "free_y", ncol = 2) +
  # Custom colors: Dark Blue for Actuals, High-Viz Red for Forecast
  scale_fill_manual(values = c("actual" = "#2c3e50", "forecast" = "#e74c3c")) +
  # Professional theme
  theme_tq() +
  labs(title = "FY2026 Monthly Revenue Forecast: Half Year",
       y = NULL, 
       x = NULL,
       fill = NULL) +
  # Format Y-axis with dollar signs
  scale_y_continuous(labels = scales::label_dollar(scale_cut = scales::cut_short_scale())) +
  # Rotate x-axis labels for readability
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "bottom",
        strip.background = element_rect(fill = "#2c3e50"),
        strip.text = element_text(face = "bold", color = "white", 
                                  size = 8, 
                                  margin = margin(t = 2, r = 2, b = 2, l = 2) ))

# 2. Convert to Plotly
# The 'tooltip' argument refers to the 'text' aesthetic defined in ggplot(aes(...))
ggplotly(p, tooltip = "text") %>%
  layout(
    legend = list(orientation = "h", x = 0.35, y = -0.1),
    margin = list(t = 80) # Adjust top margin for the title
  )

5 Compare

5.1 Functions

5.1.0.1 comparison

Code
run_variance_analysis <- function(dsl_data, consensus_data, scenario_label = "Analysis") {
  
  # 1. Pivot targets to long format
  consensus_long <- consensus_data %>%
    select(-Total) %>%
    pivot_longer(cols = -c(Tax_abb,Fund), names_to = "month_label", values_to = "consensus_revenue") %>%
    rename(category = Tax_abb)

  # 2. Join and calculate variances
  comparison_tbl <- dsl_data %>%
    mutate(month_label = month(Date, label = TRUE, abbr = FALSE)) %>%
    left_join(consensus_long) %>%
    mutate(
      diff_dollars = Revenue - consensus_revenue,
      diff_percent = if_else(consensus_revenue == 0, 0, diff_dollars / abs(consensus_revenue))
    ) %>%
    select(Fund, category, Date, month_label, type, consensus_revenue, DSL_revenue = Revenue, diff_dollars, diff_percent)

  # 3. Create Summary
  summary_tbl <- comparison_tbl %>%
    filter(type == "forecast") %>%
    group_by(Fund, category) %>%
    summarise(
      avg_abs_variance_dollar     = mean(abs(diff_dollars)),
      avg_abs_variance_pct = mean(abs(diff_percent), na.rm = TRUE),
      scenario       = scenario_label
    )

  return(list(details = comparison_tbl, summary = summary_tbl))
}

5.1.0.2 Plot details

Code
plot_variance_comparison <- function(comp_tbl, fund_name, plot_type = "line") {
  
    # Filter the data for the specific Fund provided in the argument
  dat_filtered <- comp_tbl %>%
    filter(Fund == fund_name)
  
  
  # 1. Shared Tooltip Preparation
  processed_data <- dat_filtered %>%
    mutate(
      hover_text = paste0(
        "Category: ", category,
        "<br>Month: ", month_label, " ", year(Date),
        "<br>----------------------------",
        "<br>Consensus ($): $", format(round(consensus_revenue, 0), big.mark = ","),
        "<br>DSL ($): $", format(round(DSL_revenue, 0), big.mark = ","),
        "<br>Diff ($): $", format(round(diff_dollars, 0), big.mark = ","),
        "<br>Diff (%): ", scales::percent(diff_percent, accuracy = 0.1)
      )
    )

  # 2. Conditional Plot Generation
  if (plot_type == "line") {
    # --- PERCENT DIFFERENCE LINE PLOT ---
    p <- processed_data %>%
      ggplot(aes(x = Date, y = diff_percent, group = category, text = hover_text)) +
      geom_hline(yintercept = 0, linetype = "dashed", color = "grey50") +
      geom_line(aes(color = category), linewidth = 1, alpha = 0.7) +
      geom_point(aes(color = category), size = 2) +
      facet_wrap(~category, scales = "free_y", ncol = 2) +
      scale_y_continuous(labels = scales::percent_format()) +
      labs(title = paste("Variance Comparison (%):", fund_name), y = "Percent Difference (%)")

  } else if (plot_type == "bar") {
    # --- SIDE-BY-SIDE REVENUE BAR CHART ---
    p <- processed_data %>%
      pivot_longer(
        cols = c(consensus_revenue, DSL_revenue),
        names_to = "revenue_source",
        values_to = "amount"
      ) %>%
      mutate(revenue_source = recode(revenue_source, 
                                     "consensus_revenue" = "Consensus (Actual/Forecast)", 
                                     "DSL_revenue" = "DSL")) %>%
      ggplot(aes(x = Date, y = amount, fill = revenue_source, text = hover_text)) +
      geom_col(position = position_dodge(width = 20), alpha = 0.85) +
      facet_wrap(~category, scales = "free_y", ncol = 2) +
      scale_fill_manual(values = c("Consensus (Actual/Forecast)" = "#95a5a6", 
                                   "DSL" = "#2c3e50")) +
      scale_y_continuous(labels = scales::label_dollar(scale_cut = scales::cut_short_scale())) +
      labs(title = paste("Variance Comparison ($):", fund_name), 
           x = NULL, fill = NULL, y = "Revenue ($)", fill = "Source")
  }

  # 3. Apply Universal Styling
  p <- p + 
    theme_tq() +
    scale_x_date(date_labels = "%b %y", date_breaks = "2 months") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          legend.position = "bottom",
          strip.background = element_rect(fill = "#2c3e50"),
          strip.text = element_text(face = "bold", color = "white", 
                                    size = 8, 
                                    margin = margin(t = 2, r = 2, b = 2, l = 2) ))

  # 4. Convert to Plotly
  ggplotly(p, tooltip = "text") %>%
    layout(margin = list(t = 80, b = 80), # Increased bottom margin to make room for legend
        legend = list(
          orientation = "h",  # Sets legend to horizontal
          xanchor = "center", # Anchors the legend at its center
          x = 0.5,            # Centers horizontally
          y = -0.2            # Places it below the plot
    )
           )
}

5.1.0.3 Plot summary

Code
library(RColorBrewer)

plot_summary_variance <- function(summary_tbl, fund_name = NULL) {
  
  # 1. Filter for specific fund if requested
  if (!is.null(fund_name)) {
    summary_tbl <- summary_tbl %>% filter(Fund == fund_name)
  }
  
  # 2. Prepare Tooltip and Factor levels for sorting
  processed_data <- summary_tbl %>%
    mutate(
      category = reorder(category, avg_abs_variance_pct),
      hover_text = paste0(
        "Fund: ", Fund,
        "<br>Category: ", category,
        "<br>Avg. Abs. Monthly Variance ($): $", format(round(avg_abs_variance_dollar, 0), big.mark = ","),
        "<br>Avg. Abs. Monthly Variance (%): ", scales::percent(avg_abs_variance_pct, accuracy = 0.1)
      )
    )

  # 3. Create the ggplot object
  p <- processed_data %>%
    ggplot(aes(x = avg_abs_variance_pct, y = category, text = hover_text)) +
    geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
    geom_col(fill = "midnightblue") +
    # direction = 1: Red is low, Green is high. Use -1 to reverse if needed.
    # scale_fill_distiller(palette = "RdYlGn", direction = 1) +
    scale_x_continuous(labels = scales::percent_format()) +
    theme_tq() +
    labs(
      title = paste("Summary Variance Analysis:", fund_name %||% "All Funds"),
      subtitle = "Aggregate Percent Difference for the Forecast Period",
      x = "Avg. Absolute Monthly Pct Variance (%)",
      y = ""
    ) +
    theme(
      legend.position = "none",           # Removes legend from ggplot
      panel.grid.major.y = element_blank()
    )

  # 4. Convert to Plotly and hide legend/colorbar
  ggplotly(p, tooltip = "text") %>%
    layout(
      showlegend = FALSE,                 # Removes discrete legend entries
      margin = list(t = 80, b = 50)
    ) %>%
    hide_colorbar()                       # Specifically hides the Spectral color scale bar
}

5.2 Whole year

Code
# Analyze Full Year
full_year_analysis <- run_variance_analysis(forecast_DSL_whole_year_tbl, forecast_Consensus_tbl, "Full Year")

5.2.1 G-Fund

Code
plot_variance_comparison(full_year_analysis$details, fund_name = "G-Fund", "bar")
Code
plot_summary_variance(full_year_analysis$summary, fund_name = "G-Fund")

5.2.2 T-Fund

Code
plot_variance_comparison(full_year_analysis$details, fund_name = "T-Fund", "bar")
Code
plot_summary_variance(full_year_analysis$summary, fund_name = "T-Fund")

5.2.3 E-Fund

Code
plot_variance_comparison(full_year_analysis$details, fund_name = "E-Fund", "bar")
Code
plot_summary_variance(full_year_analysis$summary, fund_name = "E-Fund")

5.3 Half year

Code
# Analyze H2 Spread
half_year_analysis <- run_variance_analysis(forecast_DSL_half_year_tbl, forecast_Consensus_tbl, "H2 Spread")

5.3.1 G-Fund

Code
plot_variance_comparison(half_year_analysis$details, fund_name = "G-Fund", "bar")
Code
plot_summary_variance(half_year_analysis$summary, fund_name = "G-Fund")

5.3.2 T-Fund

Code
plot_variance_comparison(half_year_analysis$details, fund_name = "T-Fund", "bar")
Code
plot_summary_variance(half_year_analysis$summary, fund_name = "T-Fund")

5.3.3 E-Fund

Code
plot_variance_comparison(half_year_analysis$details, fund_name = "E-Fund", "bar")
Code
plot_summary_variance(half_year_analysis$summary, fund_name = "E-Fund")