Spread Annual Forecast to Months

To do list

Improve the spreading functions:
- Add trend
Add items not part of three major finds
Build a dashboard?

Goal: Distribute the annual consensus forecast across months and perform a variance analysis against the official monthly consensus forecast.

Methodology: Annual Forecast Monthly Distribution Strategy

Seasonality: Utilize the timetk package to extract and decompose historical seasonal components.
Trading Days: Calculate the number of business days per month using lubridate to account for monthly variations in economic activity.
Optimization (Reconciliation): Scale the resulting monthly values so that their sum matches the consensus annual forecast exactly.

Data Sources

Monthly Historical Data: Sourced from CALM F0126, specifically the “CALM Data Entry” tab. *Preprocessing: Exclude actual data from the current forecast year when calculating seasonality. For example, when forecasting FY 2026, remove the actual data from July 2025 through December 2025 to ensure the seasonal baseline remains consistent.
Monthly and Anuual Consensus Forecast: Sourced from the consensus update file: Copy of FY 2026_FINAL CashFlows_JANUARY 2026 Consensus Forecast Update_01.27.26_ToTEK.xlsx.

1 Set dates

Code

# 1. Set start date for spreading over whole year
fy_start_date <- as.Date("2025-07-01")

# 2. Set start date for historical data from which seasonality is calculated
seasonality_start_date <- as.Date("2021-07-01")

2 Import data

Code

library(tidyverse)
library(lubridate)
library(timetk)
library(tidyquant)
library(plotly)
library(readxl)

# Annual forecast
forecast_consensus <- read_excel("../00_Data/Data_raw/Copy of FY 2026_FINAL CashFlows_JANUARY 2026 Consensus Forecast Update_01.27.26_ToTEK.xlsx", range = "A11:N65") %>% # Row76 is CCC
    rename(Tax_abb = `TARGETS SUMMARY`) %>%
    
    # Convert the unit: raw data are in $1,000
    mutate(across(-Tax_abb, ~.x %>% as.numeric() * 1000))

# Monthly historical
historical_data_tbl <- read_rds("../00_Data/Data_wrangled/calm_clean_tbl.rds") %>%
    
    # Remove actual data within the forecast year 
    filter(Date < fy_start_date) %>%
    
    # Select years in calculating seasonality
    filter(Date >= seasonality_start_date)

# Select the distribution functions: See 01_example_qmd
# with or without months ending on weekends
source("../00_Scripts/spread_annual_forecast_to_months")

3 Clean data

3.1 Annual Consensus Forecast

Code

# Define G-Fund monthly target
forecast_GF_consensus <- forecast_consensus %>%
    
    # Select G-Fund
    slice(1:30) %>%
    
    # Create Tax_abb
    mutate(Tax_abb = case_when(
        Tax_abb == "Total Net G-Fund"            ~ "TOTAL_G",
        Tax_abb == "Personal Income Tax"         ~ "PINCOME",
        Tax_abb == "Sales & Use Tax (GROSS)"     ~ "S&U",
        Tax_abb == "Corporate Tax"               ~ "CORP",
        Tax_abb == "G-Fund Meals & Rooms Tax"    ~ "M&R",
        Tax_abb == "Property Transfer Tax [Net]" ~ "PROPT",
        Tax_abb == "Insurance Tax"               ~ "INSUR",
        Tax_abb == "Estate Tax"                  ~ "ESTATE",
        Tax_abb == "Other Taxes/Fees"            ~ "OTHER_G"
    )) %>%
    filter(!Tax_abb %>% str_detect("S&U")) %>%
    mutate(Fund = "G-Fund")
    
# Define T-Fund monthly target
forecast_TF_consensus <- forecast_consensus %>%
    
    # Select G-Fund
    slice(32:35,37,38,47,48) %>%
    
    # Create Tax_abb
    mutate(Tax_abb = case_when(
        Tax_abb == "Total Net T-Fund"             ~ "TOTAL_T",
        Tax_abb == "Gasoline Tax (Excluding TIB)" ~ "GAS",
        Tax_abb == "Diesel Tax (Excluding TIB)"   ~ "DIESEL",
        Tax_abb == "Motor Vehicle P&U Tax"        ~ "MVP&U",
        Tax_abb == "Motor Vehicle Fees"           ~ "MVFEES",
        Tax_abb == "Other Fees"                   ~ "OTHTR",
        Tax_abb == "Gasoline Tax TIB"             ~ "TIBGAS",
        Tax_abb == "Diesel Tax TIB"               ~ "TIBDIESEL"
    )) %>%
    mutate(Fund = "T-Fund") 
    
# Define T-Fund monthly target
forecast_EF_consensus <- forecast_consensus %>%
    
    # Select G-Fund
    slice(40:45) %>%
    
    # Create Tax_abb
    mutate(Tax_abb = case_when(
        Tax_abb == "Total Education Fund [Partial]" ~ "TOTAL_E",
        Tax_abb == "Sales & Use Tax"                ~ "S&U",
        Tax_abb == "Meals & Rooms Tax"              ~ "M&R",
        Tax_abb == "Purchase & Use Tax"             ~ "MVP&U",
        Tax_abb == "Interest, Premiums"             ~ "INT",
        Tax_abb == "Lottery Transfer"               ~ "LOT"
    )) %>%
    mutate(Fund = "E-Fund") 

# Combine all targets into one tibble first
forecast_Consensus_tbl <- bind_rows(forecast_GF_consensus, forecast_TF_consensus, forecast_EF_consensus)

3.2 Monthly Historical Data

Code

# 2. Reshape History into a Nested Tibble
# This replaces: hist_pincome_tbl, hist_corporate_tbl, etc.
Fund_type_T_vec <- c(historical_data_tbl %>% select(GAS:TIBDIESEL) %>% names(),"TOTAL_T")
Fund_type_E_vec <- c("S&U","M&R","INT","LOT","TOTAL_E")

historical_clean_tbl <- historical_data_tbl %>%
    
    # Add TOTAL_G and OTHER_G
    # Note TOTAL_G doesn't exactly match Total Net G-Fund in Jeff's but should closely follow
    mutate(OTHER_G = OTHTAX + OTHREV, 
           TOTAL_G = OTHER_G + PINCOME + CORP + `M&R` + PROPT + INSUR + ESTATE) %>%
    
    # Note TOTAL_G doesn't exactly match Total Net T-Fund in Jeff's but should closely follow
    mutate(TOTAL_T = GAS + DIESEL + `MVP&U` + MVFEES + OTHTR) %>%
    
    mutate(TOTAL_E = `S&U` + `M&R` + `MVP&U` + INT + LOT) %>%
    
    pivot_longer(cols = -Date, names_to = "category", values_to = "Monthly") %>%
    
    mutate(Fund = case_when(
        category %in% Fund_type_T_vec ~ "T-Fund",
        category %in% Fund_type_E_vec ~ "E-Fund",
        TRUE                          ~ "G-Fund"
    )) 

nested_history <- historical_clean_tbl %>%
    
    # 1. Isolate and modify the specific rows you want to duplicate
    bind_rows(
      historical_clean_tbl %>% filter(category == "M&R")   %>% mutate(Fund = "G-Fund"),
      historical_clean_tbl %>% filter(category == "MVP&U") %>% mutate(Fund = "E-Fund")
    ) %>%
    
    group_by(Fund, category) %>%
    nest(history = c(Date, Monthly))

4 Spread

4.1 Whole year

Code

# Run the entire pipeline in one go
forecast_DSL_whole_year_tbl <- forecast_Consensus_tbl %>%
  select(Fund, category = Tax_abb, target = Total) %>%
  inner_join(nested_history) %>%
  group_by(category) %>% # This ensures each fund is processed independently
  mutate(
    fy_start = fy_start_date,
    annual_spread = pmap(list(target, fy_start, history), distribute_annual_forecast)
  ) %>%
  ungroup() %>%
  select(Fund, category, annual_spread) %>%
  unnest(annual_spread) %>%
  mutate(
    month_label = month(Date, label = TRUE),
    year = year(Date)
  )

Plot

Code

# 1. Prepare the ggplot object for the 12-month spread
p_annual <- forecast_DSL_whole_year_tbl %>%
    filter(Fund == "G-Fund") %>%
  mutate(month_label = month(Date, label = TRUE, abbr = FALSE)) %>%
  ggplot(aes(x = Date, y = Revenue, 
             fill = category, # Color by category since it's all one 'type'
             text = paste("Category:", category,
                          "<br>Month:", month_label, year(Date),
                          "<br>Forecast: $", format(round(Revenue, 0), big.mark = ","),
                          "<br>Share of Year:", scales::percent(final_share, accuracy = 0.1)))) +
  # Using geom_col for a clean bar representation
  geom_col(alpha = 0.8, color = "white", size = 0.2) +
  # Facet by category with independent Y-axes
  facet_wrap(~category, scales = "free_y", ncol = 2) +
  # Professional financial color palette
  scale_fill_tq() + 
  theme_tq() +
  labs(title = "FY2026 Monthly Revenue Forecast: Whole Year",
       y = NULL, 
       x = NULL,
       fill = NULL) +
  scale_y_continuous(labels = scales::label_dollar(scale_cut = scales::cut_short_scale())) +
  scale_x_date(date_labels = "%b %y", date_breaks = "2 months") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none", # Hide legend since facets are labeled
        strip.background = element_rect(fill = "#2c3e50"),
        strip.text = element_text(face = "bold", color = "white", 
                                  size = 8, 
                                  margin = margin(t = 2, r = 2, b = 2, l = 2) ))

# 2. Convert to Plotly for interactivity
ggplotly(p_annual, tooltip = "text") %>%
  layout(margin = list(t = 80))

4.2 Half year

Code

master_all_funds <- forecast_Consensus_tbl %>%
  pivot_longer(
    cols = -c(Tax_abb, Total, Fund), 
    names_to = "month_label", 
    values_to = "Revenue"
  ) %>%
  mutate(
    # Convert month labels (e.g., "Jul") to Dates for the FY2026 cycle
    month_num = month(parse_date_time(month_label, "b")),
    year_adj = if_else(month_num >= 7, 2025, 2026),
    Date = as.Date(paste(year_adj, month_num, "01", sep = "-"))
  ) %>%
  filter(Date <= as.Date("2025-12-01")) %>% # Only keep H1 Actuals
  select(Fund, Tax_abb, Total, Date, Revenue) %>%
  group_by(Fund, category = Tax_abb, target = Total) %>%
  nest(actuals = c(Date, Revenue)) %>%
  ungroup()

# 2. Join and Run Batch Forecast (Processed once for all funds)
master_forecast_tbl <- master_all_funds %>%
  inner_join(nested_history) %>%
  mutate(
    results = pmap(list(target, actuals, history), function(t, a, h) {
      distribute_h2_forecast(annual_target = t, actuals_tbl = a, hist_data_tbl = h)
    })
  )

# 3. Combine Actuals and Forecasts
forecast_DSL_half_year_tbl <- master_forecast_tbl %>%
  transmute(
    Fund, category,
    combined = pmap(list(actuals, results), function(act, res) {
      bind_rows(
        act %>% mutate(type = "actual"),
        res %>% select(Date, Revenue = Revenue) %>% mutate(type = "forecast")
      )
    })
  ) %>%
  unnest(combined) %>%
  mutate(month_label = month(Date, label = TRUE))

Plot

Code

library(tidyverse)
library(plotly)
library(tidyquant) # Optional: for professional financial themes

# 1. Prepare the ggplot object
p <- forecast_DSL_half_year_tbl %>%
    filter(Fund == "G-Fund") %>%
  ggplot(aes(x = Date, y = Revenue, fill = type,
             text = paste("Category:", category,
                          "<br>Month:", month_label, year(Date),
                          "<br>Status:", str_to_title(type),
                          "<br>Revenue: $", format(round(Revenue, 0), big.mark = ",")))) +
  # Create the bar chart
  geom_col(alpha = 0.8, color = "white", size = 0.2) +
  # Split by Tax Category with independent Y-axes
  facet_wrap(~category, scales = "free_y", ncol = 2) +
  # Custom colors: Dark Blue for Actuals, High-Viz Red for Forecast
  scale_fill_manual(values = c("actual" = "#2c3e50", "forecast" = "#e74c3c")) +
  # Professional theme
  theme_tq() +
  labs(title = "FY2026 Monthly Revenue Forecast: Half Year",
       y = NULL, 
       x = NULL,
       fill = NULL) +
  # Format Y-axis with dollar signs
  scale_y_continuous(labels = scales::label_dollar(scale_cut = scales::cut_short_scale())) +
  # Rotate x-axis labels for readability
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "bottom",
        strip.background = element_rect(fill = "#2c3e50"),
        strip.text = element_text(face = "bold", color = "white", 
                                  size = 8, 
                                  margin = margin(t = 2, r = 2, b = 2, l = 2) ))

# 2. Convert to Plotly
# The 'tooltip' argument refers to the 'text' aesthetic defined in ggplot(aes(...))
ggplotly(p, tooltip = "text") %>%
  layout(
    legend = list(orientation = "h", x = 0.35, y = -0.1),
    margin = list(t = 80) # Adjust top margin for the title
  )

5 Compare

5.1 Functions

5.1.0.1 comparison

Code

run_variance_analysis <- function(dsl_data, consensus_data, scenario_label = "Analysis") {
  
  # 1. Pivot targets to long format
  consensus_long <- consensus_data %>%
    select(-Total) %>%
    pivot_longer(cols = -c(Tax_abb,Fund), names_to = "month_label", values_to = "consensus_revenue") %>%
    rename(category = Tax_abb)

  # 2. Join and calculate variances
  comparison_tbl <- dsl_data %>%
    mutate(month_label = month(Date, label = TRUE, abbr = FALSE)) %>%
    left_join(consensus_long) %>%
    mutate(
      diff_dollars = Revenue - consensus_revenue,
      diff_percent = if_else(consensus_revenue == 0, 0, diff_dollars / abs(consensus_revenue))
    ) %>%
    select(Fund, category, Date, month_label, type, consensus_revenue, DSL_revenue = Revenue, diff_dollars, diff_percent)

  # 3. Create Summary
  summary_tbl <- comparison_tbl %>%
    filter(type == "forecast") %>%
    group_by(Fund, category) %>%
    summarise(
      avg_abs_variance_dollar     = mean(abs(diff_dollars)),
      avg_abs_variance_pct = mean(abs(diff_percent), na.rm = TRUE),
      scenario       = scenario_label
    )

  return(list(details = comparison_tbl, summary = summary_tbl))
}

5.1.0.2 Plot details

Code

plot_variance_comparison <- function(comp_tbl, fund_name, plot_type = "line") {
  
    # Filter the data for the specific Fund provided in the argument
  dat_filtered <- comp_tbl %>%
    filter(Fund == fund_name)
  
  
  # 1. Shared Tooltip Preparation
  processed_data <- dat_filtered %>%
    mutate(
      hover_text = paste0(
        "Category: ", category,
        "<br>Month: ", month_label, " ", year(Date),
        "<br>----------------------------",
        "<br>Consensus ($): $", format(round(consensus_revenue, 0), big.mark = ","),
        "<br>DSL ($): $", format(round(DSL_revenue, 0), big.mark = ","),
        "<br>Diff ($): $", format(round(diff_dollars, 0), big.mark = ","),
        "<br>Diff (%): ", scales::percent(diff_percent, accuracy = 0.1)
      )
    )

  # 2. Conditional Plot Generation
  if (plot_type == "line") {
    # --- PERCENT DIFFERENCE LINE PLOT ---
    p <- processed_data %>%
      ggplot(aes(x = Date, y = diff_percent, group = category, text = hover_text)) +
      geom_hline(yintercept = 0, linetype = "dashed", color = "grey50") +
      geom_line(aes(color = category), linewidth = 1, alpha = 0.7) +
      geom_point(aes(color = category), size = 2) +
      facet_wrap(~category, scales = "free_y", ncol = 2) +
      scale_y_continuous(labels = scales::percent_format()) +
      labs(title = paste("Variance Comparison (%):", fund_name), y = "Percent Difference (%)")

  } else if (plot_type == "bar") {
    # --- SIDE-BY-SIDE REVENUE BAR CHART ---
    p <- processed_data %>%
      pivot_longer(
        cols = c(consensus_revenue, DSL_revenue),
        names_to = "revenue_source",
        values_to = "amount"
      ) %>%
      mutate(revenue_source = recode(revenue_source, 
                                     "consensus_revenue" = "Consensus (Actual/Forecast)", 
                                     "DSL_revenue" = "DSL")) %>%
      ggplot(aes(x = Date, y = amount, fill = revenue_source, text = hover_text)) +
      geom_col(position = position_dodge(width = 20), alpha = 0.85) +
      facet_wrap(~category, scales = "free_y", ncol = 2) +
      scale_fill_manual(values = c("Consensus (Actual/Forecast)" = "#95a5a6", 
                                   "DSL" = "#2c3e50")) +
      scale_y_continuous(labels = scales::label_dollar(scale_cut = scales::cut_short_scale())) +
      labs(title = paste("Variance Comparison ($):", fund_name), 
           x = NULL, fill = NULL, y = "Revenue ($)", fill = "Source")
  }

  # 3. Apply Universal Styling
  p <- p + 
    theme_tq() +
    scale_x_date(date_labels = "%b %y", date_breaks = "2 months") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          legend.position = "bottom",
          strip.background = element_rect(fill = "#2c3e50"),
          strip.text = element_text(face = "bold", color = "white", 
                                    size = 8, 
                                    margin = margin(t = 2, r = 2, b = 2, l = 2) ))

  # 4. Convert to Plotly
  ggplotly(p, tooltip = "text") %>%
    layout(margin = list(t = 80, b = 80), # Increased bottom margin to make room for legend
        legend = list(
          orientation = "h",  # Sets legend to horizontal
          xanchor = "center", # Anchors the legend at its center
          x = 0.5,            # Centers horizontally
          y = -0.2            # Places it below the plot
    )
           )
}

5.1.0.3 Plot summary

Code

library(RColorBrewer)

plot_summary_variance <- function(summary_tbl, fund_name = NULL) {
  
  # 1. Filter for specific fund if requested
  if (!is.null(fund_name)) {
    summary_tbl <- summary_tbl %>% filter(Fund == fund_name)
  }
  
  # 2. Prepare Tooltip and Factor levels for sorting
  processed_data <- summary_tbl %>%
    mutate(
      category = reorder(category, avg_abs_variance_pct),
      hover_text = paste0(
        "Fund: ", Fund,
        "<br>Category: ", category,
        "<br>Avg. Abs. Monthly Variance ($): $", format(round(avg_abs_variance_dollar, 0), big.mark = ","),
        "<br>Avg. Abs. Monthly Variance (%): ", scales::percent(avg_abs_variance_pct, accuracy = 0.1)
      )
    )

  # 3. Create the ggplot object
  p <- processed_data %>%
    ggplot(aes(x = avg_abs_variance_pct, y = category, text = hover_text)) +
    geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
    geom_col(fill = "midnightblue") +
    # direction = 1: Red is low, Green is high. Use -1 to reverse if needed.
    # scale_fill_distiller(palette = "RdYlGn", direction = 1) +
    scale_x_continuous(labels = scales::percent_format()) +
    theme_tq() +
    labs(
      title = paste("Summary Variance Analysis:", fund_name %||% "All Funds"),
      subtitle = "Aggregate Percent Difference for the Forecast Period",
      x = "Avg. Absolute Monthly Pct Variance (%)",
      y = ""
    ) +
    theme(
      legend.position = "none",           # Removes legend from ggplot
      panel.grid.major.y = element_blank()
    )

  # 4. Convert to Plotly and hide legend/colorbar
  ggplotly(p, tooltip = "text") %>%
    layout(
      showlegend = FALSE,                 # Removes discrete legend entries
      margin = list(t = 80, b = 50)
    ) %>%
    hide_colorbar()                       # Specifically hides the Spectral color scale bar
}

5.2 Whole year

Code

# Analyze Full Year
full_year_analysis <- run_variance_analysis(forecast_DSL_whole_year_tbl, forecast_Consensus_tbl, "Full Year")

5.2.1 G-Fund

Code

plot_variance_comparison(full_year_analysis$details, fund_name = "G-Fund", "bar")

Code

plot_summary_variance(full_year_analysis$summary, fund_name = "G-Fund")

5.2.2 T-Fund

Code

plot_variance_comparison(full_year_analysis$details, fund_name = "T-Fund", "bar")

Code

plot_summary_variance(full_year_analysis$summary, fund_name = "T-Fund")

5.2.3 E-Fund

Code

plot_variance_comparison(full_year_analysis$details, fund_name = "E-Fund", "bar")

Code

plot_summary_variance(full_year_analysis$summary, fund_name = "E-Fund")

5.3 Half year

Code

# Analyze H2 Spread
half_year_analysis <- run_variance_analysis(forecast_DSL_half_year_tbl, forecast_Consensus_tbl, "H2 Spread")

5.3.1 G-Fund

Code

plot_variance_comparison(half_year_analysis$details, fund_name = "G-Fund", "bar")

Code

plot_summary_variance(half_year_analysis$summary, fund_name = "G-Fund")

5.3.2 T-Fund

Code

plot_variance_comparison(half_year_analysis$details, fund_name = "T-Fund", "bar")

Code

plot_summary_variance(half_year_analysis$summary, fund_name = "T-Fund")

5.3.3 E-Fund

Code

plot_variance_comparison(half_year_analysis$details, fund_name = "E-Fund", "bar")

Code

plot_summary_variance(half_year_analysis$summary, fund_name = "E-Fund")