# --- Python session: generate data + helper plotting functions (runs once) ---
reticulate::py_run_string("
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import statsmodels.api as sm
from matplotlib.ticker import FuncFormatter

np.random.seed(42)
lowess = sm.nonparametric.lowess

# ---------- Formatters to match the PDF style ($K / $M) ----------
def money_k(x, pos):
    return f'${int(round(x/1000.0))}K'
def money_m(x, pos):
    return f'${x/1_000_000:.0f},000,000' if x >= 1_000_000 else f'${int(round(x/1000.0))}K'
# We'll use a general formatter that shows commas and $:
money_full = FuncFormatter(lambda x, pos: f'${x:,.0f}')

fmt_k = FuncFormatter(money_k)
fmt_full = FuncFormatter(lambda x, pos: f'${x:,.0f}')

STRIP_COLOR = '#2c3e50'
POINT_COLOR = '#1f2d3a'
RIBBON_ALPHA = 0.25

ROAD_COLORS = {
  'Elite Road': '#2c3e50',
  'Endurance Road': '#e41a1c',
  'Triathalon': '#1abc9c',
  'Cyclocross': '#c8b37a'
}
MTN_COLORS = {
  'Cross Country Race': '#2c3e50',
  'Trail': '#e41a1c',
  'Over Mountain': '#1abc9c',
  'Sport': '#c8b37a',
  'Fat Bike': '#7fb3d5'
}

# ---------- Base weekly total sales ----------
dates_w = pd.date_range('2011-01-03', '2015-12-28', freq='W-MON')
t = np.arange(len(dates_w))
season = 0.20*np.sin(2*np.pi*t/52) + 0.10*np.sin(2*np.pi*t/26)
trend  = 1.0 + 0.0015*t
noise  = np.random.normal(0, 0.18, size=len(t))

# Build weekly totals then SCALE to match the PDF axis (~0 to 800K; curve around 200-550K)
weekly_revenue = (320_000 * trend * (1 + season) * (1 + noise)).clip(min=5_000)
df_w = pd.DataFrame({'date': dates_w, 'revenue': weekly_revenue})

# ---------- Category/subcategory weekly sales (then scaled to match PDF K-level ranges) ----------
subcats_road = ['Elite Road', 'Endurance Road', 'Triathalon', 'Cyclocross']
subcats_mtn  = ['Cross Country Race', 'Trail', 'Over Mountain', 'Sport', 'Fat Bike']

road_shares = np.array([0.22, 0.16, 0.08, 0.04])
mtn_shares  = np.array([0.25, 0.14, 0.10, 0.06, 0.03])

def make_subseries(base, share, phase, noise_sd=0.25):
    t = np.arange(len(base))
    seas = 0.18*np.sin(2*np.pi*(t/52) + phase) + 0.06*np.sin(2*np.pi*(t/26) + phase/2)
    noise = np.random.normal(0, noise_sd, size=len(base))
    return (base * share * (1 + seas) * (1 + noise)).clip(min=1000)

rows = []
for i, sc in enumerate(subcats_road):
    s = make_subseries(df_w['revenue'].values, road_shares[i], phase=0.7*i, noise_sd=0.20)
    rows.append(pd.DataFrame({'date': df_w['date'], 'category':'Road', 'subcategory':sc, 'revenue':s}))
for i, sc in enumerate(subcats_mtn):
    s = make_subseries(df_w['revenue'].values, mtn_shares[i], phase=0.5*i+0.2, noise_sd=0.22)
    rows.append(pd.DataFrame({'date': df_w['date'], 'category':'Mountain', 'subcategory':sc, 'revenue':s}))

df_cat_w = pd.concat(rows, ignore_index=True)

# ---------- Rescale each subcategory to match the PDF weekly y-axis magnitudes ----------
targets_weekly = {
  # Road weekly (approx from PDF axes)
  ('Road','Elite Road'): 250_000,
  ('Road','Endurance Road'): 150_000,
  ('Road','Triathalon'): 80_000,
  ('Road','Cyclocross'): 40_000,
  # Mountain weekly (approx from PDF axes)
  ('Mountain','Cross Country Race'): 200_000,
  ('Mountain','Over Mountain'): 100_000,
  ('Mountain','Trail'): 100_000,
  ('Mountain','Sport'): 20_000,
  ('Mountain','Fat Bike'): 40_000
}
for (cat, sub), target_max in targets_weekly.items():
    mask = (df_cat_w['category']==cat) & (df_cat_w['subcategory']==sub)
    cur_max = df_cat_w.loc[mask, 'revenue'].max()
    if cur_max > 0:
        df_cat_w.loc[mask, 'revenue'] *= (0.92*target_max / cur_max)

# ---------- Helpers ----------
def _smooth_with_ci(x, y, frac=0.25, B=120, seed=123):
    yhat = lowess(y, x, frac=frac, return_sorted=False)
    rng = np.random.default_rng(seed)
    boot = np.empty((B, len(y)))
    for b in range(B):
        idx = rng.integers(0, len(y), len(y))
        xb, yb = x[idx], y[idx]
        order = np.argsort(xb)
        fitted = lowess(yb[order], xb[order], frac=frac, return_sorted=False)
        boot[b] = fitted[np.argsort(order)]
    lo = np.percentile(boot, 2.5, axis=0)
    hi = np.percentile(boot, 97.5, axis=0)
    return yhat, lo, hi

def _style_axes(ax, yfmt=fmt_full):
    ax.grid(True, alpha=0.35)
    ax.yaxis.set_major_formatter(yfmt)

# ---------- Total Sales plots (match PDF axes) ----------
def plot_total_sales_quarterly(outfile):
    df_q = (df_w.assign(q=lambda d: d['date'].dt.to_period('Q').dt.start_time)
              .groupby('q', as_index=False)['revenue'].sum()
              .rename(columns={'q':'date'}))

    # Rescale quarterly totals so peak ~6.8M (PDF peak near 2015 ~6-7M)
    cur_max = df_q['revenue'].max()
    df_q['revenue'] *= (6_800_000 / cur_max)

    fig, ax = plt.subplots(figsize=(7.6, 4.6), dpi=150)
    ax.plot(df_q['date'], df_q['revenue'], marker='o', linewidth=2, color='#2b6cff')
    ax.set_title('Total Sales', loc='left')
    ax.set_ylabel('Revenue (USD)')
    ax.set_xlabel('')
    ax.set_ylim(0, 7_000_000)
    _style_axes(ax, yfmt=fmt_full)
    plt.tight_layout()
    fig.savefig(outfile); plt.close(fig)

def plot_total_sales_smooth(freq, outfile, frac):
    if freq == 'M':
        df = (df_w.assign(date=lambda d: d['date'].dt.to_period('M').dt.start_time)
                .groupby('date', as_index=False)['revenue'].sum())
        # scale monthly to top ~2.5M
        df['revenue'] *= (2_500_000 / df['revenue'].max())
        ylim = 2_600_000
    elif freq == 'W':
        df = df_w.rename(columns={'date':'date'}).copy()
        # scale weekly to top ~800K
        df['revenue'] *= (800_000 / df['revenue'].max())
        ylim = 850_000
    else:
        raise ValueError('freq must be M or W')

    x = (df['date'] - df['date'].min()).dt.days.values.astype(float)
    y = df['revenue'].values.astype(float)

    yhat, lo, hi = _smooth_with_ci(x, y, frac=frac, B=120, seed=42 if freq=='M' else 7)

    fig, ax = plt.subplots(figsize=(7.6, 4.6), dpi=150)
    ax.scatter(df['date'], y, s=14 if freq=='M' else 10, alpha=0.85, color=POINT_COLOR)
    ax.fill_between(df['date'], lo, hi, alpha=0.25, color='grey')
    ax.plot(df['date'], yhat, linewidth=2, color='#2b6cff')
    ax.set_title('Total Sales', loc='left')
    ax.set_ylabel('Revenue (USD)')
    ax.set_xlabel('')
    ax.set_ylim(0, ylim)
    _style_axes(ax, yfmt=fmt_full)
    plt.tight_layout()
    fig.savefig(outfile); plt.close(fig)

# ---------- Category plots (use PDF K scales by fixing ylim per facet) ----------
ylim_quarterly_road = {
  'Elite Road': 1_500_000,
  'Endurance Road': 1_000_000,
  'Triathalon': 400_000,
  'Cyclocross': 200_000
}
ylim_quarterly_mtn = {
  'Cross Country Race': 2_000_000,
  'Trail': 800_000,
  'Over Mountain': 800_000,
  'Sport': 150_000,
  'Fat Bike': 90_000
}
ylim_monthly_road = {
  'Elite Road': 600_000,
  'Endurance Road': 400_000,
  'Triathalon': 150_000,
  'Cyclocross': 75_000
}
ylim_weekly_road = {
  'Elite Road': 250_000,
  'Endurance Road': 150_000,
  'Triathalon': 80_000,
  'Cyclocross': 40_000
}
ylim_monthly_mtn = {
  'Cross Country Race': 600_000,
  'Trail': 300_000,
  'Over Mountain': 200_000,
  'Sport': 60_000,
  'Fat Bike': 75_000
}
ylim_weekly_mtn = {
  'Cross Country Race': 200_000,
  'Over Mountain': 100_000,
  'Trail': 100_000,
  'Sport': 20_000,
  'Fat Bike': 40_000
}

def plot_category_quarterly(category, outfile, order, colors, title_left='Sales By Category 2'):
    df = df_cat_w[df_cat_w['category'] == category].copy()
    d = (df.assign(date=lambda z: z['date'].dt.to_period('Q').dt.start_time)
           .groupby(['subcategory','date'], as_index=False)['revenue'].sum())

    # rescale each subcat quarterly to fit its PDF axis top (nice fit)
    for sc in order:
        cur_max = d.loc[d['subcategory']==sc, 'revenue'].max()
        target = (ylim_quarterly_road if category=='Road' else ylim_quarterly_mtn)[sc]
        if cur_max > 0:
            d.loc[d['subcategory']==sc, 'revenue'] *= (0.92*target/cur_max)

    fig, axes = plt.subplots(len(order), 1, figsize=(7.8, 6.8 if category=='Road' else 7.3),
                             dpi=150, sharex=True)
    if len(order) == 1: axes = [axes]

    for ax, sc in zip(axes, order):
        dd = d[d['subcategory']==sc].sort_values('date')
        col = colors.get(sc, '#2c3e50')
        ax.plot(dd['date'], dd['revenue'], marker='o', linewidth=2, color=col)
        ax.set_ylim(0, (ylim_quarterly_road if category=='Road' else ylim_quarterly_mtn)[sc])
        ax.set_title(sc, fontsize=10, color='white', pad=6, backgroundcolor=STRIP_COLOR)
        _style_axes(ax, yfmt=fmt_k)

    axes[0].set_title(title_left, loc='left', fontsize=12, pad=20)
    plt.tight_layout()
    fig.savefig(outfile); plt.close(fig)

def plot_category_smooth(category, freq, outfile, frac, order, colors, title_left='Sales By Category 2'):
    df = df_cat_w[df_cat_w['category'] == category].copy()

    if freq == 'M':
        d = (df.assign(date=lambda z: z['date'].dt.to_period('M').dt.start_time)
               .groupby(['subcategory','date'], as_index=False)['revenue'].sum())
        ylims = (ylim_monthly_road if category=='Road' else ylim_monthly_mtn)
        fig_h = 7.0
        point_s = 14
    elif freq == 'W':
        d = df.copy()
        ylims = (ylim_weekly_road if category=='Road' else ylim_weekly_mtn)
        fig_h = 7.0 if category=='Road' else 8.0
        point_s = 10
    else:
        raise ValueError('freq must be M or W')

    # rescale each subcat to fit its PDF axis top
    for sc in order:
        cur_max = d.loc[d['subcategory']==sc, 'revenue'].max()
        target = ylims[sc]
        if cur_max > 0:
            d.loc[d['subcategory']==sc, 'revenue'] *= (0.92*target/cur_max)

    fig, axes = plt.subplots(len(order), 1, figsize=(7.8, fig_h), dpi=150, sharex=True)
    if len(order) == 1: axes = [axes]

    for i, (ax, sc) in enumerate(zip(axes, order)):
        dd = d[d['subcategory']==sc].sort_values('date')
        x = (dd['date'] - dd['date'].min()).dt.days.values.astype(float)
        y = dd['revenue'].values.astype(float)
        yhat, lo, hi = _smooth_with_ci(x, y, frac=frac, B=120, seed=100+i)

        col = colors.get(sc, '#2c3e50')
        ax.scatter(dd['date'], y, s=point_s, alpha=0.85, color=POINT_COLOR)
        ax.fill_between(dd['date'], lo, hi, alpha=RIBBON_ALPHA, color='grey')
        ax.plot(dd['date'], yhat, linewidth=2, color=col)

        ax.set_ylim(0, ylims[sc])
        ax.set_title(sc, fontsize=10, color='white', pad=6, backgroundcolor=STRIP_COLOR)
        _style_axes(ax, yfmt=fmt_k)

    axes[0].set_title(title_left, loc='left', fontsize=12, pad=20)
    plt.tight_layout()
    fig.savefig(outfile); plt.close(fig)
")

Total Sales Charts

Quarterly Sales Trends

run_py_plot("
OUTFILE = '__OUTFILE__'
plot_total_sales_quarterly(OUTFILE)
", "01_total_sales_quarterly.png")

Monthly Sales Trends

run_py_plot("
OUTFILE = '__OUTFILE__'
plot_total_sales_smooth('M', OUTFILE, frac=0.25)
", "02_total_sales_monthly.png")

Weekly Sales Trends

run_py_plot("
OUTFILE = '__OUTFILE__'
plot_total_sales_smooth('W', OUTFILE, frac=0.15)
", "03_total_sales_weekly.png")

Category Sales Charts

Road

Quarterly

run_py_plot("
OUTFILE = '__OUTFILE__'
plot_category_quarterly(
  'Road', OUTFILE,
  order=['Elite Road','Endurance Road','Triathalon','Cyclocross'],
  colors=ROAD_COLORS
)
", "04_road_quarterly.png")

Monthly

run_py_plot("
OUTFILE = '__OUTFILE__'
plot_category_smooth(
  'Road', 'M', OUTFILE, frac=0.28,
  order=['Elite Road','Endurance Road','Triathalon','Cyclocross'],
  colors=ROAD_COLORS
)
", "05_road_monthly.png")

Weekly

run_py_plot("
OUTFILE = '__OUTFILE__'
plot_category_smooth(
  'Road', 'W', OUTFILE, frac=0.15,
  order=['Elite Road','Endurance Road','Triathalon','Cyclocross'],
  colors=ROAD_COLORS
)
", "06_road_weekly.png")

Mountain

Quarterly

run_py_plot("
OUTFILE = '__OUTFILE__'
plot_category_quarterly(
  'Mountain', OUTFILE,
  order=['Cross Country Race','Trail','Over Mountain','Sport','Fat Bike'],
  colors=MTN_COLORS
)
", "07_mountain_quarterly.png")

Monthly

run_py_plot("
OUTFILE = '__OUTFILE__'
plot_category_smooth(
  'Mountain', 'M', OUTFILE, frac=0.28,
  order=['Cross Country Race','Trail','Over Mountain','Sport','Fat Bike'],
  colors=MTN_COLORS
)
", "08_mountain_monthly.png")

Weekly

run_py_plot("
OUTFILE = '__OUTFILE__'
plot_category_smooth(
  'Mountain', 'W', OUTFILE, frac=0.15,
  order=['Cross Country Race','Over Mountain','Trail','Sport','Fat Bike'],
  colors=MTN_COLORS
)
", "09_mountain_weekly.png")

Final Homework

Total Sales Charts

Quarterly Sales Trends

Monthly Sales Trends

Weekly Sales Trends

Category Sales Charts

Road

Quarterly

Monthly

Weekly

Mountain

Quarterly

Monthly

Weekly