# --- Python session: generate data + helper plotting functions (runs once) ---
reticulate::py_run_string("
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import statsmodels.api as sm
from matplotlib.ticker import FuncFormatter
np.random.seed(42)
lowess = sm.nonparametric.lowess
# ---------- Formatters to match the PDF style ($K / $M) ----------
def money_k(x, pos):
return f'${int(round(x/1000.0))}K'
def money_m(x, pos):
return f'${x/1_000_000:.0f},000,000' if x >= 1_000_000 else f'${int(round(x/1000.0))}K'
# We'll use a general formatter that shows commas and $:
money_full = FuncFormatter(lambda x, pos: f'${x:,.0f}')
fmt_k = FuncFormatter(money_k)
fmt_full = FuncFormatter(lambda x, pos: f'${x:,.0f}')
STRIP_COLOR = '#2c3e50'
POINT_COLOR = '#1f2d3a'
RIBBON_ALPHA = 0.25
ROAD_COLORS = {
'Elite Road': '#2c3e50',
'Endurance Road': '#e41a1c',
'Triathalon': '#1abc9c',
'Cyclocross': '#c8b37a'
}
MTN_COLORS = {
'Cross Country Race': '#2c3e50',
'Trail': '#e41a1c',
'Over Mountain': '#1abc9c',
'Sport': '#c8b37a',
'Fat Bike': '#7fb3d5'
}
# ---------- Base weekly total sales ----------
dates_w = pd.date_range('2011-01-03', '2015-12-28', freq='W-MON')
t = np.arange(len(dates_w))
season = 0.20*np.sin(2*np.pi*t/52) + 0.10*np.sin(2*np.pi*t/26)
trend = 1.0 + 0.0015*t
noise = np.random.normal(0, 0.18, size=len(t))
# Build weekly totals then SCALE to match the PDF axis (~0 to 800K; curve around 200-550K)
weekly_revenue = (320_000 * trend * (1 + season) * (1 + noise)).clip(min=5_000)
df_w = pd.DataFrame({'date': dates_w, 'revenue': weekly_revenue})
# ---------- Category/subcategory weekly sales (then scaled to match PDF K-level ranges) ----------
subcats_road = ['Elite Road', 'Endurance Road', 'Triathalon', 'Cyclocross']
subcats_mtn = ['Cross Country Race', 'Trail', 'Over Mountain', 'Sport', 'Fat Bike']
road_shares = np.array([0.22, 0.16, 0.08, 0.04])
mtn_shares = np.array([0.25, 0.14, 0.10, 0.06, 0.03])
def make_subseries(base, share, phase, noise_sd=0.25):
t = np.arange(len(base))
seas = 0.18*np.sin(2*np.pi*(t/52) + phase) + 0.06*np.sin(2*np.pi*(t/26) + phase/2)
noise = np.random.normal(0, noise_sd, size=len(base))
return (base * share * (1 + seas) * (1 + noise)).clip(min=1000)
rows = []
for i, sc in enumerate(subcats_road):
s = make_subseries(df_w['revenue'].values, road_shares[i], phase=0.7*i, noise_sd=0.20)
rows.append(pd.DataFrame({'date': df_w['date'], 'category':'Road', 'subcategory':sc, 'revenue':s}))
for i, sc in enumerate(subcats_mtn):
s = make_subseries(df_w['revenue'].values, mtn_shares[i], phase=0.5*i+0.2, noise_sd=0.22)
rows.append(pd.DataFrame({'date': df_w['date'], 'category':'Mountain', 'subcategory':sc, 'revenue':s}))
df_cat_w = pd.concat(rows, ignore_index=True)
# ---------- Rescale each subcategory to match the PDF weekly y-axis magnitudes ----------
targets_weekly = {
# Road weekly (approx from PDF axes)
('Road','Elite Road'): 250_000,
('Road','Endurance Road'): 150_000,
('Road','Triathalon'): 80_000,
('Road','Cyclocross'): 40_000,
# Mountain weekly (approx from PDF axes)
('Mountain','Cross Country Race'): 200_000,
('Mountain','Over Mountain'): 100_000,
('Mountain','Trail'): 100_000,
('Mountain','Sport'): 20_000,
('Mountain','Fat Bike'): 40_000
}
for (cat, sub), target_max in targets_weekly.items():
mask = (df_cat_w['category']==cat) & (df_cat_w['subcategory']==sub)
cur_max = df_cat_w.loc[mask, 'revenue'].max()
if cur_max > 0:
df_cat_w.loc[mask, 'revenue'] *= (0.92*target_max / cur_max)
# ---------- Helpers ----------
def _smooth_with_ci(x, y, frac=0.25, B=120, seed=123):
yhat = lowess(y, x, frac=frac, return_sorted=False)
rng = np.random.default_rng(seed)
boot = np.empty((B, len(y)))
for b in range(B):
idx = rng.integers(0, len(y), len(y))
xb, yb = x[idx], y[idx]
order = np.argsort(xb)
fitted = lowess(yb[order], xb[order], frac=frac, return_sorted=False)
boot[b] = fitted[np.argsort(order)]
lo = np.percentile(boot, 2.5, axis=0)
hi = np.percentile(boot, 97.5, axis=0)
return yhat, lo, hi
def _style_axes(ax, yfmt=fmt_full):
ax.grid(True, alpha=0.35)
ax.yaxis.set_major_formatter(yfmt)
# ---------- Total Sales plots (match PDF axes) ----------
def plot_total_sales_quarterly(outfile):
df_q = (df_w.assign(q=lambda d: d['date'].dt.to_period('Q').dt.start_time)
.groupby('q', as_index=False)['revenue'].sum()
.rename(columns={'q':'date'}))
# Rescale quarterly totals so peak ~6.8M (PDF peak near 2015 ~6-7M)
cur_max = df_q['revenue'].max()
df_q['revenue'] *= (6_800_000 / cur_max)
fig, ax = plt.subplots(figsize=(7.6, 4.6), dpi=150)
ax.plot(df_q['date'], df_q['revenue'], marker='o', linewidth=2, color='#2b6cff')
ax.set_title('Total Sales', loc='left')
ax.set_ylabel('Revenue (USD)')
ax.set_xlabel('')
ax.set_ylim(0, 7_000_000)
_style_axes(ax, yfmt=fmt_full)
plt.tight_layout()
fig.savefig(outfile); plt.close(fig)
def plot_total_sales_smooth(freq, outfile, frac):
if freq == 'M':
df = (df_w.assign(date=lambda d: d['date'].dt.to_period('M').dt.start_time)
.groupby('date', as_index=False)['revenue'].sum())
# scale monthly to top ~2.5M
df['revenue'] *= (2_500_000 / df['revenue'].max())
ylim = 2_600_000
elif freq == 'W':
df = df_w.rename(columns={'date':'date'}).copy()
# scale weekly to top ~800K
df['revenue'] *= (800_000 / df['revenue'].max())
ylim = 850_000
else:
raise ValueError('freq must be M or W')
x = (df['date'] - df['date'].min()).dt.days.values.astype(float)
y = df['revenue'].values.astype(float)
yhat, lo, hi = _smooth_with_ci(x, y, frac=frac, B=120, seed=42 if freq=='M' else 7)
fig, ax = plt.subplots(figsize=(7.6, 4.6), dpi=150)
ax.scatter(df['date'], y, s=14 if freq=='M' else 10, alpha=0.85, color=POINT_COLOR)
ax.fill_between(df['date'], lo, hi, alpha=0.25, color='grey')
ax.plot(df['date'], yhat, linewidth=2, color='#2b6cff')
ax.set_title('Total Sales', loc='left')
ax.set_ylabel('Revenue (USD)')
ax.set_xlabel('')
ax.set_ylim(0, ylim)
_style_axes(ax, yfmt=fmt_full)
plt.tight_layout()
fig.savefig(outfile); plt.close(fig)
# ---------- Category plots (use PDF K scales by fixing ylim per facet) ----------
ylim_quarterly_road = {
'Elite Road': 1_500_000,
'Endurance Road': 1_000_000,
'Triathalon': 400_000,
'Cyclocross': 200_000
}
ylim_quarterly_mtn = {
'Cross Country Race': 2_000_000,
'Trail': 800_000,
'Over Mountain': 800_000,
'Sport': 150_000,
'Fat Bike': 90_000
}
ylim_monthly_road = {
'Elite Road': 600_000,
'Endurance Road': 400_000,
'Triathalon': 150_000,
'Cyclocross': 75_000
}
ylim_weekly_road = {
'Elite Road': 250_000,
'Endurance Road': 150_000,
'Triathalon': 80_000,
'Cyclocross': 40_000
}
ylim_monthly_mtn = {
'Cross Country Race': 600_000,
'Trail': 300_000,
'Over Mountain': 200_000,
'Sport': 60_000,
'Fat Bike': 75_000
}
ylim_weekly_mtn = {
'Cross Country Race': 200_000,
'Over Mountain': 100_000,
'Trail': 100_000,
'Sport': 20_000,
'Fat Bike': 40_000
}
def plot_category_quarterly(category, outfile, order, colors, title_left='Sales By Category 2'):
df = df_cat_w[df_cat_w['category'] == category].copy()
d = (df.assign(date=lambda z: z['date'].dt.to_period('Q').dt.start_time)
.groupby(['subcategory','date'], as_index=False)['revenue'].sum())
# rescale each subcat quarterly to fit its PDF axis top (nice fit)
for sc in order:
cur_max = d.loc[d['subcategory']==sc, 'revenue'].max()
target = (ylim_quarterly_road if category=='Road' else ylim_quarterly_mtn)[sc]
if cur_max > 0:
d.loc[d['subcategory']==sc, 'revenue'] *= (0.92*target/cur_max)
fig, axes = plt.subplots(len(order), 1, figsize=(7.8, 6.8 if category=='Road' else 7.3),
dpi=150, sharex=True)
if len(order) == 1: axes = [axes]
for ax, sc in zip(axes, order):
dd = d[d['subcategory']==sc].sort_values('date')
col = colors.get(sc, '#2c3e50')
ax.plot(dd['date'], dd['revenue'], marker='o', linewidth=2, color=col)
ax.set_ylim(0, (ylim_quarterly_road if category=='Road' else ylim_quarterly_mtn)[sc])
ax.set_title(sc, fontsize=10, color='white', pad=6, backgroundcolor=STRIP_COLOR)
_style_axes(ax, yfmt=fmt_k)
axes[0].set_title(title_left, loc='left', fontsize=12, pad=20)
plt.tight_layout()
fig.savefig(outfile); plt.close(fig)
def plot_category_smooth(category, freq, outfile, frac, order, colors, title_left='Sales By Category 2'):
df = df_cat_w[df_cat_w['category'] == category].copy()
if freq == 'M':
d = (df.assign(date=lambda z: z['date'].dt.to_period('M').dt.start_time)
.groupby(['subcategory','date'], as_index=False)['revenue'].sum())
ylims = (ylim_monthly_road if category=='Road' else ylim_monthly_mtn)
fig_h = 7.0
point_s = 14
elif freq == 'W':
d = df.copy()
ylims = (ylim_weekly_road if category=='Road' else ylim_weekly_mtn)
fig_h = 7.0 if category=='Road' else 8.0
point_s = 10
else:
raise ValueError('freq must be M or W')
# rescale each subcat to fit its PDF axis top
for sc in order:
cur_max = d.loc[d['subcategory']==sc, 'revenue'].max()
target = ylims[sc]
if cur_max > 0:
d.loc[d['subcategory']==sc, 'revenue'] *= (0.92*target/cur_max)
fig, axes = plt.subplots(len(order), 1, figsize=(7.8, fig_h), dpi=150, sharex=True)
if len(order) == 1: axes = [axes]
for i, (ax, sc) in enumerate(zip(axes, order)):
dd = d[d['subcategory']==sc].sort_values('date')
x = (dd['date'] - dd['date'].min()).dt.days.values.astype(float)
y = dd['revenue'].values.astype(float)
yhat, lo, hi = _smooth_with_ci(x, y, frac=frac, B=120, seed=100+i)
col = colors.get(sc, '#2c3e50')
ax.scatter(dd['date'], y, s=point_s, alpha=0.85, color=POINT_COLOR)
ax.fill_between(dd['date'], lo, hi, alpha=RIBBON_ALPHA, color='grey')
ax.plot(dd['date'], yhat, linewidth=2, color=col)
ax.set_ylim(0, ylims[sc])
ax.set_title(sc, fontsize=10, color='white', pad=6, backgroundcolor=STRIP_COLOR)
_style_axes(ax, yfmt=fmt_k)
axes[0].set_title(title_left, loc='left', fontsize=12, pad=20)
plt.tight_layout()
fig.savefig(outfile); plt.close(fig)
")
Total Sales Charts
Quarterly Sales Trends
run_py_plot("
OUTFILE = '__OUTFILE__'
plot_total_sales_quarterly(OUTFILE)
", "01_total_sales_quarterly.png")

Monthly Sales Trends
run_py_plot("
OUTFILE = '__OUTFILE__'
plot_total_sales_smooth('M', OUTFILE, frac=0.25)
", "02_total_sales_monthly.png")

Weekly Sales Trends
run_py_plot("
OUTFILE = '__OUTFILE__'
plot_total_sales_smooth('W', OUTFILE, frac=0.15)
", "03_total_sales_weekly.png")

Category Sales Charts
Road
Quarterly
run_py_plot("
OUTFILE = '__OUTFILE__'
plot_category_quarterly(
'Road', OUTFILE,
order=['Elite Road','Endurance Road','Triathalon','Cyclocross'],
colors=ROAD_COLORS
)
", "04_road_quarterly.png")

Monthly
run_py_plot("
OUTFILE = '__OUTFILE__'
plot_category_smooth(
'Road', 'M', OUTFILE, frac=0.28,
order=['Elite Road','Endurance Road','Triathalon','Cyclocross'],
colors=ROAD_COLORS
)
", "05_road_monthly.png")

Weekly
run_py_plot("
OUTFILE = '__OUTFILE__'
plot_category_smooth(
'Road', 'W', OUTFILE, frac=0.15,
order=['Elite Road','Endurance Road','Triathalon','Cyclocross'],
colors=ROAD_COLORS
)
", "06_road_weekly.png")

Mountain
Quarterly
run_py_plot("
OUTFILE = '__OUTFILE__'
plot_category_quarterly(
'Mountain', OUTFILE,
order=['Cross Country Race','Trail','Over Mountain','Sport','Fat Bike'],
colors=MTN_COLORS
)
", "07_mountain_quarterly.png")

Monthly
run_py_plot("
OUTFILE = '__OUTFILE__'
plot_category_smooth(
'Mountain', 'M', OUTFILE, frac=0.28,
order=['Cross Country Race','Trail','Over Mountain','Sport','Fat Bike'],
colors=MTN_COLORS
)
", "08_mountain_monthly.png")

Weekly
run_py_plot("
OUTFILE = '__OUTFILE__'
plot_category_smooth(
'Mountain', 'W', OUTFILE, frac=0.15,
order=['Cross Country Race','Over Mountain','Trail','Sport','Fat Bike'],
colors=MTN_COLORS
)
", "09_mountain_weekly.png")
