library(tidyverse)
library(skimr)
library(knitr)
library(kableExtra)
library(glue)
library(scales)
library(patchwork)
cats <- c("food","gas","school_fees","fuel","medication",
"personal_care","family_support","data_airtime",
"investment","project","loan_repayment","others")
# ── Annual dataset — percentage / index form only ─────────────────────────────
df_ann <- tibble(
year = 2021:2025,
# Funding structure (%)
savings_rate = c(26.1, 14.1, 14.3, 13.6, 2.2),
loan_dep = c(35.0, 12.0, 33.3, 7.6, 4.5),
income_share = c(65.0, 88.0, 66.7, 92.4, 95.5),
# Nominal growth indices (2021 = 100)
income_idx = c(100.0, 84.6, 107.7, 102.6, 163.7),
spend_idx = c(100.0, 72.6, 121.7, 84.3, 147.4),
# Category shares of annual total (%)
food_pct = c( 5.6, 10.7, 11.4, 29.8, 16.9),
gas_pct = c( 0.3, 0.7, 0.4, 1.0, 0.1),
school_pct = c( 2.7, 6.3, 4.0, 8.5, 8.1),
fuel_pct = c( 1.5, 2.2, 2.0, 9.2, 6.8),
med_pct = c( 0.3, 0.3, 0.2, 0.7, 0.3),
pcare_pct = c( 4.2, 4.5, 5.6, 9.9, 8.3),
famsup_pct = c( 8.2, 12.9, 7.7, 16.3, 7.4),
data_pct = c( 0.7, 0.6, 0.5, 0.7, 0.5),
invest_pct = c(21.2, 10.8, 0.3, 0.0, 4.6),
project_pct = c(31.8, 23.5, 59.1, 8.4, 28.9),
loanrep_pct = c(14.4, 15.0, 0.5, 3.9, 0.0),
others_pct = c( 8.9, 12.6, 8.2, 11.7, 17.4),
# Nominal category growth indices (2021 = 100)
food_idx = c(100.0, 137.3, 246.3, 444.9, 441.3),
fuel_idx = c(100.0, 106.1, 160.8, 524.6, 676.0),
school_idx = c(100.0, 171.6, 181.6, 269.6, 446.9),
pcare_idx = c(100.0, 78.8, 164.4, 200.5, 292.6),
invest_idx = c(100.0, 37.1, 1.4, 0.0, 31.8),
project_idx = c(100.0, 53.7, 226.1, 22.1, 133.8),
# Food and investment as % of income
food_pct_inc = c( 6.4, 10.4, 14.7, 27.8, 17.3),
invest_pct_inc = c(24.2, 10.6, 0.3, 0.0, 4.7),
# YoY growth rates
income_growth = c(NA, -15.4, 27.3, -4.8, 59.6),
spend_growth = c(NA, -27.4, 67.5,-30.7, 74.8)
) |>
mutate(
year_f = factor(year),
essentials_pct = food_pct + school_pct + fuel_pct + med_pct + gas_pct,
discret_pct = pcare_pct + data_pct
)
# ── Monthly dataset (n = 60) — income as % of annual average ─────────────────
# 2025 actual monthly income indexed to annual average = 100
m2025_idx <- c(56.2, 102.2, 51.2, 96.6, 119.8, 89.3,
90.4, 102.3, 84.4, 74.3, 179.2, 154.0)
# 2024 actual monthly income (10 observed, 2 interpolated), indexed
m2024_idx <- c(89.8, 163.4, 81.8, 67.0, 89.8, 112.5,
97.5, 82.5, 87.3, 68.6, 213.7, 156.5)
# 2021–2023: seasonal pattern (% of annual average)
seasonal_idx <- c(75.0, 75.0, 80.0, 80.0, 85.0, 85.0,
85.0, 85.0, 90.0, 85.0, 90.0, 90.0)
df_monthly <- df_ann |>
select(year, savings_rate, loan_dep, income_idx, spend_idx) |>
mutate(month_list = list(1:12)) |>
unnest(month_list) |>
rename(month = month_list) |>
mutate(
monthly_income_idx = case_when(
year == 2025 ~ m2025_idx[month],
year == 2024 ~ m2024_idx[month],
TRUE ~ seasonal_idx[month]
),
# Monthly spend as fraction of annual (uniform)
monthly_spend_share = 100 / 12, # = 8.33% per month
ym = as.Date(paste(year, month, "01", sep = "-")),
t = (year - 2021)*12 + month
)
cat(glue(
"Annual dataset: n = {nrow(df_ann)} years\\n",
"Monthly dataset: n = {nrow(df_monthly)} months\\n",
" Actual monthly income: 2024–2025 (24 months)\\n",
" Estimated monthly income: 2021–2023 (36 months, seasonal pattern)\\n"
))