DATOS SIMULADOS

ESIG FIRMA SVM

import numpy as np
import pandas as pd
import time
import warnings
import os
warnings.filterwarnings("ignore")

# pip install tqdm tqdm_joblib imbalanced-learn matplotlib seaborn
from tqdm.auto import tqdm
from tqdm_joblib import tqdm_joblib
import joblib

import matplotlib
matplotlib.use("Agg")          # backend sin pantalla; cambiar a "TkAgg" si se quiere ventana
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import VarianceThreshold, SelectKBest, mutual_info_classif
from sklearn.svm import SVC

from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV, cross_val_score
from sklearn.metrics import (
    confusion_matrix, classification_report,
    f1_score, balanced_accuracy_score, recall_score,
    accuracy_score, roc_auc_score, make_scorer
)
from scipy.stats import loguniform
from scipy.special import softmax

from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE

start_time = time.time()

# =========================
# 0) DATOS
# =========================
x = pd.read_csv(r'C:\Users\Gamer\Desktop\Cata\ESIG\path_signature_esig_M9.csv')
y = pd.read_csv(r'C:\Users\Gamer\Desktop\Cata\ts_v9.0.1_SMBH_ZTF_xmatch.csv')
y["id"] = y["oid"]
data = pd.merge(x, y, on="id")

data_train = data.sample(frac=0.8, random_state=42).reset_index(drop=True)
data_test  = data.drop(data_train.index).reset_index(drop=True)

X_train = data_train.drop(columns=['oid','survey_class_mapped','survey_class','survey_class_cat','id'])
y_train = data_train['survey_class_mapped']
X_test  = data_test[X_train.columns].copy()
y_test  = data_test['survey_class_mapped']

le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)
labels = list(le.classes_)

DOWNLOADS = r'C:\Users\Gamer\Downloads'
os.makedirs(DOWNLOADS, exist_ok=True)

print("Shapes:", X_train.shape, X_test.shape)
print("Labels:", labels)
print("Distribución train:", dict(zip(*np.unique(y_train_enc, return_counts=True))))

# =========================
# 1) SCORERS
# =========================
def robust_score(y_true, y_pred):
    mf1     = f1_score(y_true, y_pred, average="macro", zero_division=0)
    bacc    = balanced_accuracy_score(y_true, y_pred)
    recalls = recall_score(y_true, y_pred, average=None, zero_division=0)
    minR    = float(np.min(recalls))
    penalty = 0.0 if minR >= 0.30 else (0.30 - minR) * 2.0
    return 0.50 * mf1 + 0.40 * bacc + 0.10 * minR - penalty

robust_scorer = make_scorer(robust_score)

# AUC OVR: decision_function → softmax → roc_auc_score
def _auc_ovr(y_true, y_score):
    proba = softmax(y_score, axis=1)   # convierte scores crudos a distribución [0,1] que suma 1
    return roc_auc_score(y_true, proba, multi_class="ovr", average="macro")

auc_scorer = make_scorer(_auc_ovr, response_method="decision_function")

# =========================
# 2) PIPELINE CON SMOTE
# =========================
min_class_count = min(np.bincount(y_train_enc))
smote_k = min(5, min_class_count - 1)

def make_pipe(params=None):
    p = ImbPipeline(steps=[
        ("imp",   SimpleImputer(strategy="median")),
        ("sc",    StandardScaler()),
        ("vt",    VarianceThreshold(0.0)),
        ("smote", SMOTE(k_neighbors=smote_k, random_state=42)),
        ("kbest", SelectKBest(score_func=mutual_info_classif)),
        ("svc",   SVC(kernel="rbf", decision_function_shape="ovr", cache_size=4000))
    ])
    if params:
        p.set_params(**params)
    return p

pipe = make_pipe()

# =========================
# 3) ESPACIO DE BÚSQUEDA
# =========================
cw_options = [
    "balanced",
    {0: 2.0, 1: 4.0, 2: 1.0},
    {0: 2.5, 1: 5.0, 2: 1.0},
    {0: 1.5, 1: 3.5, 2: 0.8},
    {0: 3.0, 1: 6.0, 2: 1.0},
    {0: 1.0, 1: 3.0, 2: 0.7},
]

param_dist = {
    "kbest__k":          [128, 256, 384, 512, 768, 1023],
    "svc__C":            loguniform(0.5, 2000),
    "svc__gamma":        loguniform(1e-6, 0.1),
    "svc__class_weight": cw_options,
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# =========================
# 4) BÚSQUEDA CON BARRA DE PROGRESO
# =========================
t0 = time.time()
rs = RandomizedSearchCV(
    estimator=pipe,
    param_distributions=param_dist,
    n_iter=80,
    scoring=robust_scorer,
    cv=cv,
    random_state=42,
    n_jobs=-1,
    verbose=0,
    refit=True,
    return_train_score=False,
    error_score=0.0
)

n_fits = 80 * cv.n_splits
with tqdm_joblib(tqdm(
    desc="Buscando hiperparámetros",
    total=n_fits,
    unit="fit",
    colour="cyan",
    bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} fits [{elapsed}<{remaining}, {rate_fmt}]"
)):
    with joblib.parallel_config(backend="threading"):
        rs.fit(X_train, y_train_enc)

print("\nTiempo búsqueda (s):", int(time.time() - t0))
print("Best robust CV:", rs.best_score_)
print("Best params:", rs.best_params_)

# =========================
# 5) TOP-10 ÚNICOS
# =========================
def _canon_value(v):
    if isinstance(v, np.generic):   return v.item()
    if isinstance(v, dict):         return tuple(sorted((int(k), float(w)) for k, w in v.items()))
    return v

def _params_key(d):
    return tuple(sorted((k, _canon_value(v)) for k, v in d.items()))

res = pd.DataFrame(rs.cv_results_).copy()
res["params_key"] = res["params"].apply(_params_key)
res = res.sort_values(["mean_test_score", "std_test_score"], ascending=[False, True]).reset_index(drop=True)
res_unique = res.drop_duplicates(subset=["params_key"], keep="first").reset_index(drop=True)
top10 = res_unique.head(10)[["mean_test_score", "std_test_score", "params"]].copy()

print("\nTOP-10 CV robust (únicos):")
print(top10.to_string(index=False))

# =========================
# 6) EVALUACIÓN COMPLETA + FIGURAS
# =========================
def _row_normalize(cm):
    cm = cm.astype(float)
    row_sums = cm.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    return (cm / row_sums) * 100.0


def plot_cms(cm_train, cm_test, tag, save_dir, subtitle="",
             gap_width=0.28, wspace=0.15,
             label_fontsize=13, tick_fontsize=13, title_fontsize=14):
    """Guarda figura con CM de train y test — mismo estilo que el resto de tus CMs."""
    cm_tr_pct = _row_normalize(cm_train)
    cm_te_pct = _row_normalize(cm_test)

    fig = plt.figure(figsize=(10.8, 4.8))
    gs = gridspec.GridSpec(1, 4,
                           width_ratios=[1, gap_width, 1, 0.08],
                           wspace=wspace)
    ax1    = fig.add_subplot(gs[0, 0])
    ax_gap = fig.add_subplot(gs[0, 1]);  ax_gap.axis("off")
    ax2    = fig.add_subplot(gs[0, 2])
    ax_cb  = fig.add_subplot(gs[0, 3])

    im = None
    for ax, cm_pct, cm_cnt, title in [
        (ax1, cm_tr_pct, cm_train, "Train"),
        (ax2, cm_te_pct, cm_test,  "Test"),
    ]:
        im = ax.imshow(cm_pct, cmap="Blues", vmin=0, vmax=100)
        ax.set_title(title, fontsize=title_fontsize)
        ax.set_xticks(np.arange(len(labels)))
        ax.set_yticks(np.arange(len(labels)))
        ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=tick_fontsize)
        ax.set_yticklabels(labels, fontsize=tick_fontsize)
        ax.set_xlabel("Predicho", fontsize=label_fontsize)
        ax.set_ylabel("Real",     fontsize=label_fontsize)

        for i in range(cm_pct.shape[0]):
            for j in range(cm_pct.shape[1]):
                pct = cm_pct[i, j]
                cnt = int(cm_cnt[i, j])
                color_txt = "white" if pct > 50 else "black"
                ax.text(j, i - 0.10, f"{pct:.1f}%",
                        ha="center", va="center",
                        color=color_txt, fontsize=10, fontweight="bold")
                ax.text(j, i + 0.22, f"({cnt})",
                        ha="center", va="center",
                        color=color_txt, fontsize=7)

    fig.colorbar(im, cax=ax_cb, label="% por fila (clase real)")
    fig.suptitle(f"{tag}\n{subtitle}", fontsize=13, y=0.98)
    fig.subplots_adjust(left=0.08, right=0.92, bottom=0.22, top=0.82)

    fname = os.path.join(save_dir, f"{tag}.png")
    fig.savefig(fname, dpi=300, bbox_inches="tight")
    plt.close(fig)
    print(f"  Figura guardada: {fname}")
def eval_one(params, tag):
    print("\n" + "="*95)
    print(f"Evaluando {tag}...")

    # AUC en CV (5 folds, calcula sobreajuste respecto a validación)
    auc_cv_scores = cross_val_score(
        make_pipe(params), X_train, y_train_enc,
        cv=cv, scoring=auc_scorer, n_jobs=-1
    )
    auc_cv = float(np.mean(auc_cv_scores))
    sd_cv  = float(np.std(auc_cv_scores))

    # Reentrenar sobre todo el train
    model = make_pipe(params)
    model.fit(X_train, y_train_enc)

    yhat_tr = model.predict(X_train)
    yhat_te = model.predict(X_test)
    df_tr   = softmax(model.decision_function(X_train), axis=1)
    df_te   = softmax(model.decision_function(X_test),  axis=1)

    auc_train = roc_auc_score(y_train_enc, df_tr, multi_class="ovr", average="macro")
    auc_test  = roc_auc_score(y_test_enc,  df_te, multi_class="ovr", average="macro")
    gap_cv    = round(auc_train - auc_cv,   4)
    gap_rep   = round(auc_train - auc_test, 4)

    acc_test  = accuracy_score(y_test_enc, yhat_te)
    f1w_test  = f1_score(y_test_enc, yhat_te, average="weighted", zero_division=0)
    mf1_test  = f1_score(y_test_enc, yhat_te, average="macro",    zero_division=0)
    bacc_test = balanced_accuracy_score(y_test_enc, yhat_te)

    cm_tr = confusion_matrix(y_train_enc, yhat_tr)
    cm_te = confusion_matrix(y_test_enc,  yhat_te)

    print(f"  AUC_CV={auc_cv:.4f}±{sd_cv:.4f} | AUC_train={auc_train:.4f} | AUC_test={auc_test:.4f}")
    print(f"  Gap_CV={gap_cv:.4f} | Gap_rep={gap_rep:.4f}")
    print(f"  Acc={acc_test:.4f} | F1w={f1w_test:.4f} | macroF1={mf1_test:.4f} | balacc={bacc_test:.4f}")

    print("\n  Report TRAIN")
    print(classification_report(y_train_enc, yhat_tr, target_names=labels, zero_division=0))
    print("  CM TEST")
    print(pd.DataFrame(cm_te, index=labels, columns=labels))
    print("\n  Report TEST")
    print(classification_report(y_test_enc, yhat_te, target_names=labels, zero_division=0))

    subtitle = (
        f"Acc train={accuracy_score(y_train_enc, yhat_tr):.3f} | "
        f"Acc test={acc_test:.3f} | "
        f"F1w train={f1_score(y_train_enc, yhat_tr, average='weighted', zero_division=0):.3f} | "
        f"F1w test={f1w_test:.3f}"
    )
    plot_cms(cm_tr, cm_te, tag, DOWNLOADS, subtitle=subtitle)

    return {
        "tag":      tag,
        "AUC_CV":   round(auc_cv,    4),
        "SD_CV":    round(sd_cv,     4),
        "Gap_CV":   gap_cv,
        "AUC_rep":  round(auc_test,  4),
        "SD_rep":   round(sd_cv,     4),
        "Gap_rep":  gap_rep,
        "Acc_test": round(acc_test,  4),
        "F1w_test": round(f1w_test,  4),
        "macroF1":  round(mf1_test,  4),
        "balacc":   round(bacc_test, 4),
    }


results_summary = []
for i, row in enumerate(top10.head(5).itertuples(index=False), start=1):
    r = eval_one(row.params, f"SVM{i}")
    results_summary.append(r)

# =========================
# 7) TABLA RESUMEN + LATEX AUTO-GENERADO
# =========================
summary_df = pd.DataFrame(results_summary)
print("\n" + "="*95)
print("RESUMEN FINAL:")
print(summary_df.to_string(index=False))

best_idx = summary_df["AUC_rep"].idxmax()
best_tag = summary_df.loc[best_idx, "tag"]
print(f"\n>>> Mejor modelo: {best_tag}  AUC_test={summary_df.loc[best_idx,'AUC_rep']:.4f}")

# Generar snippet LaTeX
latex_rows = []
for _, r in summary_df.iterrows():
    highlight = r"\rowcolor{BlueBest} " if r["tag"] == best_tag else ""
    latex_rows.append(
        f"        {highlight}SVM$_{{{r['tag'][-1]}}}$ & "
        f"{r['AUC_CV']:.4f} & {r['SD_CV']:.4f} & {r['Gap_CV']:.4f} & "
        f"{r['AUC_rep']:.4f} & {r['SD_rep']:.4f} & {r['Gap_rep']:.4f} & "
        f"{r['Acc_test']:.4f} & {r['F1w_test']:.4f} \\\\"
    )

latex_table = (
    r"\begin{table}[!ht]" + "\n"
    r"\centering" + "\n"
    r"\small" + "\n"
    r"\renewcommand{\arraystretch}{1.18}" + "\n"
    r"\setlength{\tabcolsep}{6pt}" + "\n"
    r"\begin{adjustbox}{width=\textwidth}" + "\n"
    r"\rowcolors{2}{white}{gray!8}" + "\n"
    r"\begin{tabular}{ccccccccc}" + "\n"
    r"\toprule" + "\n"
    r"\rowcolor{BlueHeader}" + "\n"
    r"\color{white}\textbf{Modelo} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Acc\textsubscript{test}} &" + "\n"
    r"\color{white}\textbf{F1\textsubscript{w,test}} \\" + "\n"
    r"\midrule" + "\n"
    + "\n".join(latex_rows) + "\n"
    r"\bottomrule" + "\n"
    r"\end{tabular}" + "\n"
    r"\end{adjustbox}" + "\n"
    r"\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG).}" + "\n"
    r"\label{tab:svm_top5_auc}" + "\n"
    r"\end{table}"
)

latex_path = os.path.join(DOWNLOADS, "tabla_svm_top5.tex")
with open(latex_path, "w", encoding="utf-8") as f:
    f.write(latex_table)

print(f"\nTabla LaTeX guardada en: {latex_path}")
print("\n--- PREVIEW ---\n" + latex_table)

# =========================
# 8) TIEMPO TOTAL
# =========================
elapsed = int(time.time() - start_time)
h, rem = divmod(elapsed, 3600)
m, s = divmod(rem, 60)
print(f"\nTiempo total: {h:02d}:{m:02d}:{s:02d}")

Shapes: (1510, 1023) (377, 1023)
Labels: ['AGN', 'Blazar', 'QSO']
Distribución train: {np.int64(0): np.int64(422), np.int64(1): np.int64(189), np.int64(2): np.int64(899)}

Buscando hiperparámetros:   0%|                                                          | 0/400 fits [00:00<?, ?fit/s]

  0%|                                                                                          | 0/400 [00:00<?, ?it/s]

  0%|▏                                                                               | 1/400 [00:27<3:01:40, 27.32s/it]

  0%|▍                                                                               | 2/400 [00:29<1:23:08, 12.53s/it]

  1%|▌                                                                                 | 3/400 [00:29<45:47,  6.92s/it]

  1%|▊                                                                                 | 4/400 [00:30<29:42,  4.50s/it]

  2%|█▍                                                                                | 7/400 [00:30<11:09,  1.70s/it]

  2%|█▋                                                                                | 8/400 [00:42<26:08,  4.00s/it]

  2%|█▊                                                                                | 9/400 [00:44<22:56,  3.52s/it]

  2%|██                                                                               | 10/400 [00:45<18:06,  2.79s/it]

  3%|██▏                                                                              | 11/400 [00:45<14:09,  2.18s/it]

  3%|██▍                                                                              | 12/400 [00:48<15:35,  2.41s/it]

  3%|██▋                                                                              | 13/400 [00:49<13:12,  2.05s/it]

  4%|██▊                                                                              | 14/400 [00:50<09:41,  1.51s/it]

  4%|███▏                                                                             | 16/400 [00:50<06:02,  1.06it/s]

  4%|███▍                                                                             | 17/400 [00:50<04:43,  1.35it/s]

  4%|███▋                                                                             | 18/400 [00:50<03:47,  1.68it/s]

  5%|███▊                                                                             | 19/400 [00:51<03:24,  1.87it/s]

  5%|████                                                                             | 20/400 [01:05<27:28,  4.34s/it]

  5%|████▎                                                                            | 21/400 [01:09<27:40,  4.38s/it]

  6%|████▍                                                                            | 22/400 [01:10<21:35,  3.43s/it]

  6%|████▋                                                                            | 23/400 [01:11<16:48,  2.67s/it]

  6%|████▊                                                                            | 24/400 [01:14<17:06,  2.73s/it]

  6%|█████                                                                            | 25/400 [01:14<12:23,  1.98s/it]

  6%|█████▎                                                                           | 26/400 [01:16<11:38,  1.87s/it]

  7%|█████▍                                                                           | 27/400 [01:16<09:00,  1.45s/it]

  7%|█████▊                                                                           | 29/400 [01:17<05:28,  1.13it/s]

  8%|██████▎                                                                          | 31/400 [01:17<03:24,  1.80it/s]

  8%|██████▍                                                                          | 32/400 [01:28<17:13,  2.81s/it]

  8%|██████▋                                                                          | 33/400 [01:32<19:49,  3.24s/it]

  8%|██████▉                                                                          | 34/400 [01:34<16:40,  2.73s/it]

  9%|███████                                                                          | 35/400 [01:34<13:32,  2.23s/it]

  9%|███████▎                                                                         | 36/400 [01:39<17:05,  2.82s/it]

  9%|███████▍                                                                         | 37/400 [01:39<12:40,  2.09s/it]

 10%|███████▋                                                                         | 38/400 [01:41<12:10,  2.02s/it]

 10%|███████▉                                                                         | 39/400 [01:42<10:07,  1.68s/it]

 10%|████████                                                                         | 40/400 [01:42<07:33,  1.26s/it]

 10%|████████▎                                                                        | 41/400 [01:45<09:58,  1.67s/it]

 10%|████████▌                                                                        | 42/400 [01:45<07:55,  1.33s/it]

 11%|████████▋                                                                        | 43/400 [01:45<05:48,  1.02it/s]

 11%|████████▉                                                                        | 44/400 [01:53<17:55,  3.02s/it]

 11%|█████████                                                                        | 45/400 [01:58<20:44,  3.51s/it]

 12%|█████████▎                                                                       | 46/400 [02:01<20:28,  3.47s/it]

 12%|█████████▌                                                                       | 47/400 [02:02<16:03,  2.73s/it]

 12%|█████████▋                                                                       | 48/400 [02:06<18:10,  3.10s/it]

 12%|█████████▉                                                                       | 49/400 [02:08<15:07,  2.59s/it]

 12%|██████████▏                                                                      | 50/400 [02:09<13:26,  2.31s/it]

 13%|██████████▎                                                                      | 51/400 [02:10<09:54,  1.70s/it]

 13%|██████████▋                                                                      | 53/400 [02:11<07:40,  1.33s/it]

 14%|██████████▉                                                                      | 54/400 [02:12<07:10,  1.24s/it]

 14%|███████████▏                                                                     | 55/400 [02:13<06:51,  1.19s/it]

 14%|███████████▎                                                                     | 56/400 [02:15<06:54,  1.21s/it]

 14%|███████████▌                                                                     | 57/400 [02:19<12:28,  2.18s/it]

 14%|███████████▋                                                                     | 58/400 [02:23<14:54,  2.62s/it]

 15%|███████████▉                                                                     | 59/400 [02:24<12:03,  2.12s/it]

 15%|████████████▏                                                                    | 60/400 [02:29<16:38,  2.94s/it]

 15%|████████████▎                                                                    | 61/400 [02:31<16:12,  2.87s/it]

 16%|████████████▌                                                                    | 62/400 [02:34<16:07,  2.86s/it]

 16%|████████████▊                                                                    | 63/400 [02:35<12:14,  2.18s/it]

 16%|█████████████▏                                                                   | 65/400 [02:37<08:53,  1.59s/it]

 16%|█████████████▎                                                                   | 66/400 [02:37<07:45,  1.39s/it]

 17%|█████████████▌                                                                   | 67/400 [02:39<07:43,  1.39s/it]

 17%|█████████████▊                                                                   | 68/400 [02:39<06:24,  1.16s/it]

 17%|█████████████▉                                                                   | 69/400 [02:45<12:36,  2.28s/it]

 18%|██████████████▏                                                                  | 70/400 [02:48<14:02,  2.55s/it]

 18%|██████████████▍                                                                  | 71/400 [02:48<10:50,  1.98s/it]

 18%|██████████████▌                                                                  | 72/400 [02:53<15:14,  2.79s/it]

 18%|██████████████▊                                                                  | 73/400 [02:56<15:04,  2.77s/it]

 18%|██████████████▉                                                                  | 74/400 [02:59<15:40,  2.89s/it]

 19%|███████████████▏                                                                 | 75/400 [03:00<11:59,  2.21s/it]

 19%|███████████████▍                                                                 | 76/400 [03:00<08:48,  1.63s/it]

 19%|███████████████▌                                                                 | 77/400 [03:02<09:39,  1.79s/it]

 20%|███████████████▊                                                                 | 78/400 [03:03<08:25,  1.57s/it]

 20%|███████████████▉                                                                 | 79/400 [03:05<08:13,  1.54s/it]

 20%|████████████████▏                                                                | 80/400 [03:05<06:16,  1.18s/it]

 20%|████████████████▍                                                                | 81/400 [03:09<10:52,  2.05s/it]

 20%|████████████████▌                                                                | 82/400 [03:12<12:51,  2.43s/it]

 21%|████████████████▊                                                                | 83/400 [03:13<09:51,  1.87s/it]

 21%|█████████████████                                                                | 84/400 [03:17<13:53,  2.64s/it]

 21%|█████████████████▏                                                               | 85/400 [03:20<14:17,  2.72s/it]

 22%|█████████████████▍                                                               | 86/400 [03:27<20:56,  4.00s/it]

 22%|█████████████████▌                                                               | 87/400 [03:29<17:50,  3.42s/it]

 22%|█████████████████▊                                                               | 88/400 [03:31<15:21,  2.95s/it]

 22%|██████████████████                                                               | 89/400 [03:32<11:37,  2.24s/it]

 22%|██████████████████▏                                                              | 90/400 [03:32<08:43,  1.69s/it]

 23%|██████████████████▍                                                              | 91/400 [03:33<07:18,  1.42s/it]

 23%|██████████████████▋                                                              | 92/400 [03:33<05:16,  1.03s/it]

 23%|██████████████████▊                                                              | 93/400 [03:34<05:04,  1.01it/s]

 24%|███████████████████                                                              | 94/400 [03:38<09:15,  1.82s/it]

 24%|███████████████████▏                                                             | 95/400 [03:39<07:54,  1.56s/it]

 24%|███████████████████▍                                                             | 96/400 [03:44<13:57,  2.75s/it]

 24%|███████████████████▋                                                             | 97/400 [03:47<13:50,  2.74s/it]

 24%|███████████████████▊                                                             | 98/400 [03:54<20:04,  3.99s/it]

 25%|████████████████████                                                             | 99/400 [03:56<16:39,  3.32s/it]

 25%|████████████████████                                                            | 100/400 [03:56<11:56,  2.39s/it]

 25%|████████████████████▏                                                           | 101/400 [03:57<09:29,  1.91s/it]

 26%|████████████████████▌                                                           | 103/400 [03:57<05:41,  1.15s/it]

 26%|████████████████████▊                                                           | 104/400 [03:58<05:19,  1.08s/it]

 26%|█████████████████████                                                           | 105/400 [03:59<05:14,  1.07s/it]

 26%|█████████████████████▏                                                          | 106/400 [04:01<06:52,  1.40s/it]

 27%|█████████████████████▍                                                          | 107/400 [04:02<06:16,  1.29s/it]

 27%|█████████████████████▌                                                          | 108/400 [04:08<12:41,  2.61s/it]

 27%|█████████████████████▊                                                          | 109/400 [04:11<12:46,  2.63s/it]

 28%|██████████████████████                                                          | 110/400 [04:18<18:37,  3.85s/it]

 28%|██████████████████████▏                                                         | 111/400 [04:21<17:07,  3.56s/it]

 28%|██████████████████████▍                                                         | 112/400 [04:21<12:22,  2.58s/it]

 28%|██████████████████████▌                                                         | 113/400 [04:21<09:26,  1.97s/it]

 28%|██████████████████████▊                                                         | 114/400 [04:22<07:18,  1.53s/it]

 29%|███████████████████████▏                                                        | 116/400 [04:22<04:35,  1.03it/s]

 29%|███████████████████████▍                                                        | 117/400 [04:23<03:56,  1.20it/s]

 30%|███████████████████████▌                                                        | 118/400 [04:25<05:40,  1.21s/it]

 30%|███████████████████████▊                                                        | 119/400 [04:26<05:49,  1.24s/it]

 30%|████████████████████████                                                        | 120/400 [04:32<11:44,  2.52s/it]

 30%|████████████████████████▏                                                       | 121/400 [04:35<11:35,  2.49s/it]

 30%|████████████████████████▍                                                       | 122/400 [04:42<17:29,  3.77s/it]

 31%|████████████████████████▌                                                       | 123/400 [04:45<16:31,  3.58s/it]

 31%|████████████████████████▊                                                       | 124/400 [04:45<12:08,  2.64s/it]

 31%|█████████████████████████                                                       | 125/400 [04:45<09:02,  1.97s/it]

 32%|█████████████████████████▏                                                      | 126/400 [04:52<15:01,  3.29s/it]

 32%|█████████████████████████▌                                                      | 128/400 [04:52<08:28,  1.87s/it]

 32%|█████████████████████████▊                                                      | 129/400 [04:53<06:58,  1.55s/it]

 32%|██████████████████████████                                                      | 130/400 [04:55<07:11,  1.60s/it]

 33%|██████████████████████████▏                                                     | 131/400 [04:57<08:32,  1.91s/it]

 33%|██████████████████████████▍                                                     | 132/400 [05:01<11:15,  2.52s/it]

 33%|██████████████████████████▌                                                     | 133/400 [05:02<09:04,  2.04s/it]

 34%|██████████████████████████▊                                                     | 134/400 [05:08<14:11,  3.20s/it]

 34%|███████████████████████████                                                     | 135/400 [05:14<17:27,  3.95s/it]

 34%|███████████████████████████▏                                                    | 136/400 [05:25<26:53,  6.11s/it]

 34%|███████████████████████████▍                                                    | 137/400 [05:28<21:55,  5.00s/it]

 34%|███████████████████████████▌                                                    | 138/400 [05:32<21:19,  4.88s/it]

 35%|███████████████████████████▊                                                    | 139/400 [05:34<16:45,  3.85s/it]

 35%|████████████████████████████                                                    | 140/400 [05:35<12:44,  2.94s/it]

 35%|████████████████████████████▏                                                   | 141/400 [05:36<10:49,  2.51s/it]

 36%|████████████████████████████▍                                                   | 142/400 [05:37<08:12,  1.91s/it]

 36%|████████████████████████████▌                                                   | 143/400 [05:39<08:19,  1.94s/it]

 36%|████████████████████████████▊                                                   | 144/400 [05:43<11:25,  2.68s/it]

 36%|█████████████████████████████                                                   | 145/400 [05:45<10:10,  2.39s/it]

 36%|█████████████████████████████▏                                                  | 146/400 [05:45<07:18,  1.73s/it]

 37%|█████████████████████████████▍                                                  | 147/400 [05:50<11:37,  2.76s/it]

 37%|█████████████████████████████▌                                                  | 148/400 [05:51<09:29,  2.26s/it]

 37%|█████████████████████████████▊                                                  | 149/400 [05:52<07:33,  1.81s/it]

 38%|██████████████████████████████                                                  | 150/400 [05:52<05:25,  1.30s/it]

 38%|██████████████████████████████▏                                                 | 151/400 [05:55<06:58,  1.68s/it]

 38%|██████████████████████████████▍                                                 | 152/400 [05:56<06:43,  1.63s/it]

 38%|██████████████████████████████▌                                                 | 153/400 [05:58<07:24,  1.80s/it]

 38%|██████████████████████████████▊                                                 | 154/400 [05:59<05:54,  1.44s/it]

 39%|███████████████████████████████                                                 | 155/400 [06:01<06:34,  1.61s/it]

 39%|███████████████████████████████▏                                                | 156/400 [06:07<12:19,  3.03s/it]

 39%|███████████████████████████████▍                                                | 157/400 [06:10<12:21,  3.05s/it]

 40%|███████████████████████████████▌                                                | 158/400 [06:10<08:45,  2.17s/it]

 40%|███████████████████████████████▊                                                | 159/400 [06:15<12:02,  3.00s/it]

 40%|████████████████████████████████                                                | 160/400 [06:17<10:15,  2.56s/it]

 40%|████████████████████████████████▏                                               | 161/400 [06:20<10:51,  2.73s/it]

 40%|████████████████████████████████▍                                               | 162/400 [06:20<07:48,  1.97s/it]

 41%|████████████████████████████████▌                                               | 163/400 [06:23<08:15,  2.09s/it]

 41%|████████████████████████████████▊                                               | 164/400 [06:25<08:25,  2.14s/it]

 41%|█████████████████████████████████                                               | 165/400 [06:26<06:37,  1.69s/it]

 42%|█████████████████████████████████▏                                              | 166/400 [06:27<06:07,  1.57s/it]

 42%|█████████████████████████████████▍                                              | 167/400 [06:27<04:50,  1.25s/it]

 42%|█████████████████████████████████▌                                              | 168/400 [06:32<08:25,  2.18s/it]

 42%|█████████████████████████████████▊                                              | 169/400 [06:36<11:25,  2.97s/it]

 42%|██████████████████████████████████                                              | 170/400 [06:37<08:24,  2.19s/it]

 43%|██████████████████████████████████▏                                             | 171/400 [06:39<07:59,  2.09s/it]

 43%|██████████████████████████████████▍                                             | 172/400 [06:41<07:40,  2.02s/it]

 43%|██████████████████████████████████▌                                             | 173/400 [06:45<09:53,  2.62s/it]

 44%|██████████████████████████████████▊                                             | 174/400 [06:45<07:11,  1.91s/it]

 44%|███████████████████████████████████                                             | 175/400 [06:47<07:30,  2.00s/it]

 44%|███████████████████████████████████▏                                            | 176/400 [06:49<07:32,  2.02s/it]

 44%|███████████████████████████████████▍                                            | 177/400 [06:50<05:47,  1.56s/it]

 44%|███████████████████████████████████▌                                            | 178/400 [06:51<05:44,  1.55s/it]

 45%|███████████████████████████████████▊                                            | 179/400 [06:52<04:31,  1.23s/it]

 45%|████████████████████████████████████                                            | 180/400 [06:56<07:39,  2.09s/it]

 45%|████████████████████████████████████▏                                           | 181/400 [07:03<12:56,  3.55s/it]

 46%|████████████████████████████████████▍                                           | 182/400 [07:03<09:31,  2.62s/it]

 46%|████████████████████████████████████▌                                           | 183/400 [07:04<07:45,  2.15s/it]

 46%|████████████████████████████████████▊                                           | 184/400 [07:06<07:24,  2.06s/it]

 46%|█████████████████████████████████████                                           | 185/400 [07:09<08:30,  2.37s/it]

 46%|█████████████████████████████████████▏                                          | 186/400 [07:10<06:51,  1.92s/it]

 47%|█████████████████████████████████████▍                                          | 187/400 [07:11<05:53,  1.66s/it]

 47%|█████████████████████████████████████▌                                          | 188/400 [07:13<06:25,  1.82s/it]

 47%|█████████████████████████████████████▊                                          | 189/400 [07:14<04:49,  1.37s/it]

 48%|██████████████████████████████████████                                          | 190/400 [07:15<05:04,  1.45s/it]

 48%|██████████████████████████████████████▏                                         | 191/400 [07:17<05:43,  1.65s/it]

 48%|██████████████████████████████████████▍                                         | 192/400 [07:21<08:13,  2.37s/it]

 48%|██████████████████████████████████████▌                                         | 193/400 [07:29<14:01,  4.06s/it]

 48%|██████████████████████████████████████▊                                         | 194/400 [07:30<10:35,  3.08s/it]

 49%|███████████████████████████████████████                                         | 195/400 [07:30<07:43,  2.26s/it]

 49%|███████████████████████████████████████▏                                        | 196/400 [07:31<05:53,  1.73s/it]

 49%|███████████████████████████████████████▍                                        | 197/400 [07:33<06:27,  1.91s/it]

 50%|███████████████████████████████████████▌                                        | 198/400 [07:34<05:21,  1.59s/it]

 50%|███████████████████████████████████████▊                                        | 199/400 [07:35<04:53,  1.46s/it]

 50%|████████████████████████████████████████                                        | 200/400 [07:38<05:38,  1.69s/it]

 50%|████████████████████████████████████████▏                                       | 201/400 [07:38<04:18,  1.30s/it]

 50%|████████████████████████████████████████▍                                       | 202/400 [07:40<04:41,  1.42s/it]

 51%|████████████████████████████████████████▌                                       | 203/400 [07:42<05:17,  1.61s/it]

 51%|████████████████████████████████████████▊                                       | 204/400 [07:46<07:40,  2.35s/it]

 51%|█████████████████████████████████████████                                       | 205/400 [07:54<13:09,  4.05s/it]

 52%|█████████████████████████████████████████▏                                      | 206/400 [08:00<15:20,  4.75s/it]

 52%|█████████████████████████████████████████▍                                      | 207/400 [08:02<12:45,  3.97s/it]

 52%|█████████████████████████████████████████▌                                      | 208/400 [08:02<08:59,  2.81s/it]

 52%|█████████████████████████████████████████▊                                      | 209/400 [08:04<07:19,  2.30s/it]

 52%|██████████████████████████████████████████                                      | 210/400 [08:05<06:42,  2.12s/it]

 53%|██████████████████████████████████████████▏                                     | 211/400 [08:05<04:48,  1.53s/it]

 53%|██████████████████████████████████████████▍                                     | 212/400 [08:06<04:23,  1.40s/it]

 53%|██████████████████████████████████████████▌                                     | 213/400 [08:07<03:36,  1.16s/it]

 54%|██████████████████████████████████████████▊                                     | 214/400 [08:07<02:55,  1.06it/s]

 54%|███████████████████████████████████████████                                     | 215/400 [08:10<04:05,  1.33s/it]

 54%|███████████████████████████████████████████▏                                    | 216/400 [08:10<03:13,  1.05s/it]

 54%|███████████████████████████████████████████▍                                    | 217/400 [08:15<06:58,  2.29s/it]

 55%|███████████████████████████████████████████▌                                    | 218/400 [08:22<11:21,  3.75s/it]

 55%|███████████████████████████████████████████▊                                    | 219/400 [08:26<10:50,  3.59s/it]

 55%|████████████████████████████████████████████                                    | 220/400 [08:26<07:41,  2.56s/it]

 55%|████████████████████████████████████████████▏                                   | 221/400 [08:28<07:10,  2.40s/it]

 56%|████████████████████████████████████████████▍                                   | 222/400 [08:31<07:45,  2.62s/it]

 56%|████████████████████████████████████████████▊                                   | 224/400 [08:32<05:02,  1.72s/it]

 56%|█████████████████████████████████████████████                                   | 225/400 [08:33<04:17,  1.47s/it]

 56%|█████████████████████████████████████████████▏                                  | 226/400 [08:34<04:05,  1.41s/it]

 57%|█████████████████████████████████████████████▍                                  | 227/400 [08:37<04:56,  1.71s/it]

 57%|█████████████████████████████████████████████▌                                  | 228/400 [08:37<03:39,  1.27s/it]

 57%|█████████████████████████████████████████████▊                                  | 229/400 [08:41<05:43,  2.01s/it]

 57%|██████████████████████████████████████████████                                  | 230/400 [08:48<10:08,  3.58s/it]

 58%|██████████████████████████████████████████████▏                                 | 231/400 [08:52<10:09,  3.61s/it]

 58%|██████████████████████████████████████████████▍                                 | 232/400 [08:52<07:19,  2.61s/it]

 58%|██████████████████████████████████████████████▌                                 | 233/400 [08:53<06:10,  2.22s/it]

 58%|██████████████████████████████████████████████▊                                 | 234/400 [08:57<07:26,  2.69s/it]

 59%|███████████████████████████████████████████████▏                                | 236/400 [08:57<04:05,  1.50s/it]

 59%|███████████████████████████████████████████████▍                                | 237/400 [08:58<03:19,  1.22s/it]

 60%|███████████████████████████████████████████████▌                                | 238/400 [08:59<03:07,  1.16s/it]

 60%|███████████████████████████████████████████████▊                                | 239/400 [09:02<04:19,  1.61s/it]

 60%|████████████████████████████████████████████████▏                               | 241/400 [09:10<07:22,  2.78s/it]

 60%|████████████████████████████████████████████████▍                               | 242/400 [09:19<10:50,  4.12s/it]

 61%|████████████████████████████████████████████████▌                               | 243/400 [09:21<09:28,  3.62s/it]

 61%|████████████████████████████████████████████████▊                               | 244/400 [09:21<06:59,  2.69s/it]

 62%|█████████████████████████████████████████████████▏                              | 246/400 [09:21<04:03,  1.58s/it]

 62%|█████████████████████████████████████████████████▍                              | 247/400 [09:22<03:31,  1.38s/it]

 62%|█████████████████████████████████████████████████▌                              | 248/400 [09:23<03:14,  1.28s/it]

 62%|█████████████████████████████████████████████████▊                              | 249/400 [09:23<02:38,  1.05s/it]

 62%|██████████████████████████████████████████████████                              | 250/400 [09:26<03:44,  1.49s/it]

 63%|██████████████████████████████████████████████████▏                             | 251/400 [09:36<09:45,  3.93s/it]

 63%|██████████████████████████████████████████████████▍                             | 252/400 [09:37<07:56,  3.22s/it]

 63%|██████████████████████████████████████████████████▌                             | 253/400 [09:42<09:08,  3.73s/it]

 64%|██████████████████████████████████████████████████▊                             | 254/400 [09:45<08:10,  3.36s/it]

 64%|███████████████████████████████████████████████████                             | 255/400 [09:45<05:49,  2.41s/it]

 64%|███████████████████████████████████████████████████▏                            | 256/400 [09:45<04:08,  1.73s/it]

 64%|███████████████████████████████████████████████████▍                            | 257/400 [09:46<03:37,  1.52s/it]

 64%|███████████████████████████████████████████████████▌                            | 258/400 [09:47<03:11,  1.35s/it]

 65%|███████████████████████████████████████████████████▊                            | 259/400 [09:56<08:35,  3.66s/it]

 65%|████████████████████████████████████████████████████                            | 260/400 [09:58<06:55,  2.97s/it]

 65%|████████████████████████████████████████████████████▏                           | 261/400 [10:01<07:06,  3.07s/it]

 66%|████████████████████████████████████████████████████▍                           | 262/400 [10:04<06:52,  2.99s/it]

 66%|████████████████████████████████████████████████████▌                           | 263/400 [10:05<05:50,  2.56s/it]

 66%|████████████████████████████████████████████████████▊                           | 264/400 [10:05<04:10,  1.84s/it]

 66%|█████████████████████████████████████████████████████                           | 265/400 [10:06<03:05,  1.37s/it]

 66%|█████████████████████████████████████████████████████▏                          | 266/400 [10:07<02:52,  1.29s/it]

 67%|█████████████████████████████████████████████████████▍                          | 267/400 [10:08<02:38,  1.19s/it]

 67%|█████████████████████████████████████████████████████▌                          | 268/400 [10:09<02:27,  1.12s/it]

 67%|█████████████████████████████████████████████████████▊                          | 269/400 [10:15<05:57,  2.73s/it]

 68%|██████████████████████████████████████████████████████                          | 270/400 [10:18<06:04,  2.80s/it]

 68%|██████████████████████████████████████████████████████▏                         | 271/400 [10:20<05:23,  2.50s/it]

 68%|██████████████████████████████████████████████████████▍                         | 272/400 [10:22<04:55,  2.31s/it]

 68%|██████████████████████████████████████████████████████▌                         | 273/400 [10:25<05:25,  2.56s/it]

 68%|██████████████████████████████████████████████████████▊                         | 274/400 [10:29<05:59,  2.85s/it]

 69%|███████████████████████████████████████████████████████                         | 275/400 [10:29<04:20,  2.09s/it]

 69%|███████████████████████████████████████████████████████▏                        | 276/400 [10:29<03:14,  1.57s/it]

 69%|███████████████████████████████████████████████████████▍                        | 277/400 [10:30<02:42,  1.33s/it]

 70%|███████████████████████████████████████████████████████▌                        | 278/400 [10:31<02:27,  1.21s/it]

 70%|███████████████████████████████████████████████████████▊                        | 279/400 [10:31<01:47,  1.12it/s]

 70%|████████████████████████████████████████████████████████                        | 280/400 [10:32<01:51,  1.07it/s]

 70%|████████████████████████████████████████████████████████▏                       | 281/400 [10:52<13:15,  6.68s/it]

 70%|████████████████████████████████████████████████████████▍                       | 282/400 [10:53<09:26,  4.80s/it]

 71%|████████████████████████████████████████████████████████▌                       | 283/400 [10:53<06:40,  3.43s/it]

 71%|████████████████████████████████████████████████████████▊                       | 284/400 [10:53<04:52,  2.52s/it]

 71%|█████████████████████████████████████████████████████████                       | 285/400 [10:54<03:54,  2.04s/it]

 72%|█████████████████████████████████████████████████████████▏                      | 286/400 [11:01<06:31,  3.44s/it]

 72%|█████████████████████████████████████████████████████████▍                      | 287/400 [11:01<04:48,  2.55s/it]

 72%|█████████████████████████████████████████████████████████▌                      | 288/400 [11:18<12:25,  6.66s/it]

 72%|█████████████████████████████████████████████████████████▊                      | 289/400 [11:18<08:55,  4.83s/it]

 72%|██████████████████████████████████████████████████████████                      | 290/400 [11:20<07:11,  3.92s/it]

 73%|██████████████████████████████████████████████████████████▏                     | 291/400 [11:22<05:57,  3.28s/it]

 73%|██████████████████████████████████████████████████████████▍                     | 292/400 [11:22<04:12,  2.34s/it]

 73%|██████████████████████████████████████████████████████████▌                     | 293/400 [11:23<03:24,  1.91s/it]

 74%|███████████████████████████████████████████████████████████                     | 295/400 [11:38<07:59,  4.57s/it]

 74%|███████████████████████████████████████████████████████████▏                    | 296/400 [11:43<07:55,  4.57s/it]

 74%|███████████████████████████████████████████████████████████▍                    | 297/400 [11:44<06:18,  3.68s/it]

 74%|███████████████████████████████████████████████████████████▌                    | 298/400 [11:47<06:04,  3.58s/it]

 75%|███████████████████████████████████████████████████████████▊                    | 299/400 [11:51<05:59,  3.56s/it]

 75%|████████████████████████████████████████████████████████████▏                   | 301/400 [11:52<03:39,  2.21s/it]

 76%|████████████████████████████████████████████████████████████▍                   | 302/400 [11:53<03:19,  2.04s/it]

 76%|████████████████████████████████████████████████████████████▌                   | 303/400 [11:56<03:26,  2.13s/it]

 76%|████████████████████████████████████████████████████████████▊                   | 304/400 [12:00<04:15,  2.66s/it]

 76%|█████████████████████████████████████████████████████████████                   | 305/400 [12:02<03:59,  2.52s/it]

 76%|█████████████████████████████████████████████████████████████▏                  | 306/400 [12:07<05:11,  3.31s/it]

 77%|█████████████████████████████████████████████████████████████▍                  | 307/400 [12:12<05:45,  3.71s/it]

 77%|█████████████████████████████████████████████████████████████▌                  | 308/400 [12:16<05:40,  3.70s/it]

 77%|█████████████████████████████████████████████████████████████▊                  | 309/400 [12:17<04:35,  3.02s/it]

 78%|██████████████████████████████████████████████████████████████                  | 310/400 [12:17<03:20,  2.23s/it]

 78%|██████████████████████████████████████████████████████████████▏                 | 311/400 [12:18<02:35,  1.74s/it]

 78%|██████████████████████████████████████████████████████████████▍                 | 312/400 [12:18<01:57,  1.33s/it]

 78%|██████████████████████████████████████████████████████████████▌                 | 313/400 [12:19<01:39,  1.14s/it]

 78%|██████████████████████████████████████████████████████████████▊                 | 314/400 [12:20<01:29,  1.04s/it]

 79%|███████████████████████████████████████████████████████████████                 | 315/400 [12:21<01:23,  1.02it/s]

 79%|███████████████████████████████████████████████████████████████▏                | 316/400 [12:21<01:20,  1.04it/s]

 79%|███████████████████████████████████████████████████████████████▍                | 317/400 [12:24<01:51,  1.34s/it]

 80%|███████████████████████████████████████████████████████████████▌                | 318/400 [12:29<03:34,  2.61s/it]

 80%|███████████████████████████████████████████████████████████████▊                | 319/400 [12:35<04:40,  3.47s/it]

 80%|████████████████████████████████████████████████████████████████                | 320/400 [12:39<05:05,  3.82s/it]

 80%|████████████████████████████████████████████████████████████████▏               | 321/400 [12:43<04:45,  3.62s/it]

 80%|████████████████████████████████████████████████████████████████▍               | 322/400 [12:43<03:35,  2.76s/it]

 81%|████████████████████████████████████████████████████████████████▌               | 323/400 [12:43<02:31,  1.97s/it]

 81%|████████████████████████████████████████████████████████████████▊               | 324/400 [12:44<01:51,  1.47s/it]

 81%|█████████████████████████████████████████████████████████████████               | 325/400 [12:44<01:22,  1.10s/it]

 82%|█████████████████████████████████████████████████████████████████▏              | 326/400 [12:44<01:07,  1.09it/s]

 82%|█████████████████████████████████████████████████████████████████▍              | 327/400 [12:45<00:50,  1.46it/s]

 82%|█████████████████████████████████████████████████████████████████▌              | 328/400 [12:45<00:50,  1.42it/s]

 82%|█████████████████████████████████████████████████████████████████▊              | 329/400 [12:48<01:26,  1.22s/it]

 82%|██████████████████████████████████████████████████████████████████              | 330/400 [12:53<02:56,  2.52s/it]

 83%|██████████████████████████████████████████████████████████████████▏             | 331/400 [13:02<05:07,  4.45s/it]

 83%|██████████████████████████████████████████████████████████████████▍             | 332/400 [13:06<04:55,  4.34s/it]

 83%|██████████████████████████████████████████████████████████████████▌             | 333/400 [13:14<06:02,  5.41s/it]

 84%|██████████████████████████████████████████████████████████████████▊             | 334/400 [13:15<04:16,  3.89s/it]

 84%|███████████████████████████████████████████████████████████████████             | 335/400 [13:15<03:02,  2.81s/it]

 84%|███████████████████████████████████████████████████████████████████▏            | 336/400 [13:16<02:35,  2.44s/it]

 84%|███████████████████████████████████████████████████████████████████▍            | 337/400 [13:17<01:49,  1.74s/it]

 84%|███████████████████████████████████████████████████████████████████▌            | 338/400 [13:18<01:48,  1.75s/it]

 85%|████████████████████████████████████████████████████████████████████            | 340/400 [13:19<01:04,  1.07s/it]

 85%|████████████████████████████████████████████████████████████████████▏           | 341/400 [13:19<00:55,  1.06it/s]

 86%|████████████████████████████████████████████████████████████████████▍           | 342/400 [13:20<00:53,  1.08it/s]

 86%|████████████████████████████████████████████████████████████████████▌           | 343/400 [13:23<01:15,  1.33s/it]

 86%|████████████████████████████████████████████████████████████████████▊           | 344/400 [13:27<01:56,  2.08s/it]

 86%|█████████████████████████████████████████████████████████████████████           | 345/400 [13:38<04:21,  4.76s/it]

 86%|█████████████████████████████████████████████████████████████████████▏          | 346/400 [13:38<03:06,  3.45s/it]

 87%|█████████████████████████████████████████████████████████████████████▍          | 347/400 [13:40<02:34,  2.92s/it]

 87%|█████████████████████████████████████████████████████████████████████▌          | 348/400 [13:40<01:53,  2.18s/it]

 88%|██████████████████████████████████████████████████████████████████████          | 350/400 [13:42<01:21,  1.62s/it]

 88%|██████████████████████████████████████████████████████████████████████▏         | 351/400 [13:43<01:03,  1.30s/it]

 88%|██████████████████████████████████████████████████████████████████████▍         | 352/400 [13:43<00:54,  1.14s/it]

 88%|██████████████████████████████████████████████████████████████████████▌         | 353/400 [13:44<00:44,  1.05it/s]

 88%|██████████████████████████████████████████████████████████████████████▊         | 354/400 [13:45<00:43,  1.05it/s]

 89%|███████████████████████████████████████████████████████████████████████         | 355/400 [13:47<00:57,  1.27s/it]

 89%|███████████████████████████████████████████████████████████████████████▏        | 356/400 [13:54<02:06,  2.87s/it]

 89%|███████████████████████████████████████████████████████████████████████▍        | 357/400 [14:07<04:08,  5.79s/it]

 90%|███████████████████████████████████████████████████████████████████████▌        | 358/400 [14:07<02:52,  4.12s/it]

 90%|███████████████████████████████████████████████████████████████████████▊        | 359/400 [14:09<02:29,  3.64s/it]

 90%|████████████████████████████████████████████████████████████████████████        | 360/400 [14:11<02:00,  3.01s/it]

 90%|████████████████████████████████████████████████████████████████████████▏       | 361/400 [14:14<02:05,  3.22s/it]

 90%|████████████████████████████████████████████████████████████████████████▍       | 362/400 [14:17<01:55,  3.03s/it]

 91%|████████████████████████████████████████████████████████████████████████▌       | 363/400 [14:18<01:27,  2.36s/it]

 91%|████████████████████████████████████████████████████████████████████████▊       | 364/400 [14:19<01:08,  1.91s/it]

 91%|█████████████████████████████████████████████████████████████████████████       | 365/400 [14:19<00:54,  1.56s/it]

 92%|█████████████████████████████████████████████████████████████████████████▍      | 367/400 [14:20<00:33,  1.03s/it]

 92%|█████████████████████████████████████████████████████████████████████████▌      | 368/400 [14:20<00:25,  1.25it/s]

 92%|█████████████████████████████████████████████████████████████████████████▊      | 369/400 [14:27<01:14,  2.41s/it]

 92%|██████████████████████████████████████████████████████████████████████████      | 370/400 [14:28<00:55,  1.84s/it]

 93%|██████████████████████████████████████████████████████████████████████████▏     | 371/400 [14:32<01:16,  2.65s/it]

 93%|██████████████████████████████████████████████████████████████████████████▍     | 372/400 [14:35<01:14,  2.65s/it]

 93%|██████████████████████████████████████████████████████████████████████████▌     | 373/400 [14:40<01:31,  3.38s/it]

 94%|██████████████████████████████████████████████████████████████████████████▊     | 374/400 [14:43<01:22,  3.17s/it]

 94%|███████████████████████████████████████████████████████████████████████████     | 375/400 [14:43<01:00,  2.43s/it]

 94%|███████████████████████████████████████████████████████████████████████████▏    | 376/400 [14:44<00:42,  1.76s/it]

 94%|███████████████████████████████████████████████████████████████████████████▍    | 377/400 [14:44<00:31,  1.37s/it]

 94%|███████████████████████████████████████████████████████████████████████████▌    | 378/400 [14:44<00:21,  1.01it/s]

 95%|███████████████████████████████████████████████████████████████████████████▊    | 379/400 [14:45<00:17,  1.22it/s]

 95%|████████████████████████████████████████████████████████████████████████████    | 380/400 [14:46<00:21,  1.09s/it]

 95%|████████████████████████████████████████████████████████████████████████████▏   | 381/400 [14:54<00:57,  3.04s/it]

 96%|████████████████████████████████████████████████████████████████████████████▍   | 382/400 [14:54<00:41,  2.30s/it]

 96%|████████████████████████████████████████████████████████████████████████████▌   | 383/400 [14:58<00:44,  2.63s/it]

 96%|████████████████████████████████████████████████████████████████████████████▊   | 384/400 [15:00<00:41,  2.60s/it]

 96%|█████████████████████████████████████████████████████████████████████████████   | 385/400 [15:06<00:50,  3.40s/it]

 96%|█████████████████████████████████████████████████████████████████████████████▏  | 386/400 [15:09<00:46,  3.29s/it]

 97%|█████████████████████████████████████████████████████████████████████████████▍  | 387/400 [15:10<00:36,  2.80s/it]

 97%|█████████████████████████████████████████████████████████████████████████████▌  | 388/400 [15:11<00:26,  2.22s/it]

 97%|█████████████████████████████████████████████████████████████████████████████▊  | 389/400 [15:13<00:21,  1.94s/it]

 98%|██████████████████████████████████████████████████████████████████████████████  | 390/400 [15:13<00:14,  1.42s/it]

 98%|██████████████████████████████████████████████████████████████████████████████▍ | 392/400 [15:13<00:06,  1.21it/s]

 98%|██████████████████████████████████████████████████████████████████████████████▌ | 393/400 [15:16<00:09,  1.41s/it]

 98%|██████████████████████████████████████████████████████████████████████████████▊ | 394/400 [15:17<00:06,  1.16s/it]

 99%|███████████████████████████████████████████████████████████████████████████████ | 395/400 [15:18<00:06,  1.27s/it]

 99%|███████████████████████████████████████████████████████████████████████████████▏| 396/400 [15:21<00:06,  1.66s/it]

 99%|███████████████████████████████████████████████████████████████████████████████▍| 397/400 [15:23<00:05,  1.94s/it]

100%|███████████████████████████████████████████████████████████████████████████████▌| 398/400 [15:25<00:03,  1.88s/it]

100%|███████████████████████████████████████████████████████████████████████████████▊| 399/400 [15:26<00:01,  1.53s/it]

100%|████████████████████████████████████████████████████████████████████████████████| 400/400 [15:51<00:00,  2.38s/it]


Tiempo búsqueda (s): 951
Best robust CV: 0.3053026861636998
Best params: {'kbest__k': 512, 'svc__C': np.float64(1328.923467281017), 'svc__class_weight': {0: 2.5, 1: 5.0, 2: 1.0}, 'svc__gamma': np.float64(0.00791507439765622)}

TOP-10 CV robust (únicos):
 mean_test_score  std_test_score                                                                                                                              params
        0.305303        0.069117    {'kbest__k': 512, 'svc__C': 1328.923467281017, 'svc__class_weight': {0: 2.5, 1: 5.0, 2: 1.0}, 'svc__gamma': 0.00791507439765622}
        0.287945        0.084995    {'kbest__k': 512, 'svc__C': 1221.212458656701, 'svc__class_weight': {0: 2.0, 1: 4.0, 2: 1.0}, 'svc__gamma': 0.01543656015027835}
        0.194202        0.113900   {'kbest__k': 768, 'svc__C': 617.774408226366, 'svc__class_weight': {0: 1.5, 1: 3.5, 2: 0.8}, 'svc__gamma': 0.0024424282201158257}
        0.188927        0.156790     {'kbest__k': 384, 'svc__C': 159.0841875063149, 'svc__class_weight': {0: 1.5, 1: 3.5, 2: 0.8}, 'svc__gamma': 0.0445131431564921}
        0.120687        0.206468    {'kbest__k': 512, 'svc__C': 61.75015483640841, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.05019578058935042}
        0.118068        0.048082               {'kbest__k': 1023, 'svc__C': 784.8081929215668, 'svc__class_weight': 'balanced', 'svc__gamma': 8.215507070842181e-05}
        0.116988        0.140381   {'kbest__k': 384, 'svc__C': 607.4373269037123, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.007931909113857064}
        0.104806        0.157056              {'kbest__k': 1023, 'svc__C': 0.6576892252744615, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0014851035064938566}
        0.103125        0.149126               {'kbest__k': 1023, 'svc__C': 1.2456144526856272, 'svc__class_weight': 'balanced', 'svc__gamma': 0.001731612063445476}
        0.077383        0.072905 {'kbest__k': 1023, 'svc__C': 571.2267368106508, 'svc__class_weight': {0: 3.0, 1: 6.0, 2: 1.0}, 'svc__gamma': 0.0006403036652671174}

===============================================================================================
Evaluando SVM1...
  AUC_CV=0.5137±0.0449 | AUC_train=0.7583 | AUC_test=0.8458
  Gap_CV=0.2446 | Gap_rep=-0.0875
  Acc=0.8117 | F1w=0.8161 | macroF1=0.7913 | balacc=0.8122

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.63      0.68      0.65       422
      Blazar       0.29      0.99      0.45       189
         QSO       1.00      0.46      0.63       899

    accuracy                           0.59      1510
   macro avg       0.64      0.71      0.58      1510
weighted avg       0.81      0.59      0.61      1510

  CM TEST
        AGN  Blazar  QSO
AGN      84       1   16
Blazar    2      24    4
QSO      42       6  198

  Report TEST
              precision    recall  f1-score   support

         AGN       0.66      0.83      0.73       101
      Blazar       0.77      0.80      0.79        30
         QSO       0.91      0.80      0.85       246

    accuracy                           0.81       377
   macro avg       0.78      0.81      0.79       377
weighted avg       0.83      0.81      0.82       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM1.png

===============================================================================================
Evaluando SVM2...
  AUC_CV=0.5087±0.0479 | AUC_train=0.7764 | AUC_test=0.8716
  Gap_CV=0.2677 | Gap_rep=-0.0952
  Acc=0.8488 | F1w=0.8506 | macroF1=0.8284 | balacc=0.8292

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.67      0.70      0.69       422
      Blazar       0.31      0.99      0.47       189
         QSO       1.00      0.51      0.67       899

    accuracy                           0.62      1510
   macro avg       0.66      0.73      0.61      1510
weighted avg       0.82      0.62      0.65      1510

  CM TEST
        AGN  Blazar  QSO
AGN      83       0   18
Blazar    2      24    4
QSO      29       4  213

  Report TEST
              precision    recall  f1-score   support

         AGN       0.73      0.82      0.77       101
      Blazar       0.86      0.80      0.83        30
         QSO       0.91      0.87      0.89       246

    accuracy                           0.85       377
   macro avg       0.83      0.83      0.83       377
weighted avg       0.85      0.85      0.85       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM2.png

===============================================================================================
Evaluando SVM3...
  AUC_CV=0.5155±0.0455 | AUC_train=0.7289 | AUC_test=0.7965
  Gap_CV=0.2134 | Gap_rep=-0.0677
  Acc=0.7109 | F1w=0.7206 | macroF1=0.6942 | balacc=0.7568

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.56      0.60      0.58       422
      Blazar       0.26      0.99      0.41       189
         QSO       0.98      0.35      0.52       899

    accuracy                           0.50      1510
   macro avg       0.60      0.65      0.50      1510
weighted avg       0.77      0.50      0.52      1510

  CM TEST
        AGN  Blazar  QSO
AGN      82       3   16
Blazar    3      24    3
QSO      71      13  162

  Report TEST
              precision    recall  f1-score   support

         AGN       0.53      0.81      0.64       101
      Blazar       0.60      0.80      0.69        30
         QSO       0.90      0.66      0.76       246

    accuracy                           0.71       377
   macro avg       0.67      0.76      0.69       377
weighted avg       0.77      0.71      0.72       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM3.png

===============================================================================================
Evaluando SVM4...

Buscando hiperparámetros:   0%|                                                          | 0/400 fits [19:00<?, ?fit/s]

  AUC_CV=0.5120±0.0521 | AUC_train=0.7879 | AUC_test=0.8786
  Gap_CV=0.2758 | Gap_rep=-0.0908
  Acc=0.7984 | F1w=0.8025 | macroF1=0.7666 | balacc=0.8015

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.68      0.63      0.65       422
      Blazar       0.26      0.99      0.42       189
         QSO       1.00      0.45      0.63       899

    accuracy                           0.57      1510
   macro avg       0.65      0.69      0.57      1510
weighted avg       0.82      0.57      0.61      1510

  CM TEST
        AGN  Blazar  QSO
AGN      82       1   18
Blazar    1      24    5
QSO      40      11  195

  Report TEST
              precision    recall  f1-score   support

         AGN       0.67      0.81      0.73       101
      Blazar       0.67      0.80      0.73        30
         QSO       0.89      0.79      0.84       246

    accuracy                           0.80       377
   macro avg       0.74      0.80      0.77       377
weighted avg       0.82      0.80      0.80       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM4.png

===============================================================================================
Evaluando SVM5...
  AUC_CV=0.5204±0.0566 | AUC_train=0.7942 | AUC_test=0.9101
  Gap_CV=0.2738 | Gap_rep=-0.1159
  Acc=0.8462 | F1w=0.8506 | macroF1=0.8059 | balacc=0.8688

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.75      0.59      0.66       422
      Blazar       0.25      1.00      0.41       189
         QSO       1.00      0.48      0.65       899

    accuracy                           0.58      1510
   macro avg       0.67      0.69      0.57      1510
weighted avg       0.84      0.58      0.62      1510

  CM TEST
        AGN  Blazar  QSO
AGN      84       5   12
Blazar    1      28    1
QSO      26      13  207

  Report TEST
              precision    recall  f1-score   support

         AGN       0.76      0.83      0.79       101
      Blazar       0.61      0.93      0.74        30
         QSO       0.94      0.84      0.89       246

    accuracy                           0.85       377
   macro avg       0.77      0.87      0.81       377
weighted avg       0.87      0.85      0.85       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM5.png

===============================================================================================
RESUMEN FINAL:
 tag  AUC_CV  SD_CV  Gap_CV  AUC_rep  SD_rep  Gap_rep  Acc_test  F1w_test  macroF1  balacc
SVM1  0.5137 0.0449  0.2446   0.8458  0.0449  -0.0875    0.8117    0.8161   0.7913  0.8122
SVM2  0.5087 0.0479  0.2677   0.8716  0.0479  -0.0952    0.8488    0.8506   0.8284  0.8292
SVM3  0.5155 0.0455  0.2134   0.7965  0.0455  -0.0677    0.7109    0.7206   0.6942  0.7568
SVM4  0.5120 0.0521  0.2758   0.8786  0.0521  -0.0908    0.7984    0.8025   0.7666  0.8015
SVM5  0.5204 0.0566  0.2738   0.9101  0.0566  -0.1159    0.8462    0.8506   0.8059  0.8688

>>> Mejor modelo: SVM5  AUC_test=0.9101

Tabla LaTeX guardada en: C:\Users\Gamer\Downloads\tabla_svm_top5.tex

--- PREVIEW ---
\begin{table}[!ht]
\centering
\small
\renewcommand{\arraystretch}{1.18}
\setlength{\tabcolsep}{6pt}
\begin{adjustbox}{width=\textwidth}
\rowcolors{2}{white}{gray!8}
\begin{tabular}{ccccccccc}
\toprule
\rowcolor{BlueHeader}
\color{white}\textbf{Modelo} &
\color{white}\textbf{AUC\textsubscript{CV}} &
\color{white}\textbf{SD\textsubscript{CV}} &
\color{white}\textbf{Gap\textsubscript{CV}} &
\color{white}\textbf{AUC\textsubscript{rep}} &
\color{white}\textbf{SD\textsubscript{rep}} &
\color{white}\textbf{Gap\textsubscript{rep}} &
\color{white}\textbf{Acc\textsubscript{test}} &
\color{white}\textbf{F1\textsubscript{w,test}} \\
\midrule
        SVM$_{1}$ & 0.5137 & 0.0449 & 0.2446 & 0.8458 & 0.0449 & -0.0875 & 0.8117 & 0.8161 \\
        SVM$_{2}$ & 0.5087 & 0.0479 & 0.2677 & 0.8716 & 0.0479 & -0.0952 & 0.8488 & 0.8506 \\
        SVM$_{3}$ & 0.5155 & 0.0455 & 0.2134 & 0.7965 & 0.0455 & -0.0677 & 0.7109 & 0.7206 \\
        SVM$_{4}$ & 0.5120 & 0.0521 & 0.2758 & 0.8786 & 0.0521 & -0.0908 & 0.7984 & 0.8025 \\
        \rowcolor{BlueBest} SVM$_{5}$ & 0.5204 & 0.0566 & 0.2738 & 0.9101 & 0.0566 & -0.1159 & 0.8462 & 0.8506 \\
\bottomrule
\end{tabular}
\end{adjustbox}
\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG).}
\label{tab:svm_top5_auc}
\end{table}

Tiempo total: 00:19:22

# =========================
# IMPORTANCIA POR NIVEL PARA SVM (RBF) CON PERMUTATION IMPORTANCE
# =========================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.inspection import permutation_importance

# Para evitar estilos raros del notebook
plt.style.use("default")

# -------------------------------------------------
# 1) Tomar el mejor modelo encontrado
# -------------------------------------------------
svm_best = rs.best_estimator_

# -------------------------------------------------
# 2) Calcular permutation importance sobre el pipeline completo
# -------------------------------------------------
perm = permutation_importance(
    estimator=svm_best,
    X=X_test,
    y=y_test_enc,
    n_repeats=20,
    random_state=42,
    scoring="balanced_accuracy",
    n_jobs=-1
)

# Importancia base por feature
importances_mean = perm.importances_mean

# -------------------------------------------------
# 3) DataFrame base con TODAS las variables originales
# -------------------------------------------------
orig_cols = pd.Index(X_test.columns)

df_all = pd.DataFrame({
    "feature": orig_cols.astype(str),
    "importance_mean": importances_mean
})

# -------------------------------------------------
# 4) Identificar cuáles variables realmente usa el SVM final
#    (mapeando VT -> KBest sobre las columnas originales)
# -------------------------------------------------
selected_cols = orig_cols.copy()

if hasattr(svm_best, "named_steps"):
    # VarianceThreshold
    if "vt" in svm_best.named_steps:
        vt_mask = svm_best.named_steps["vt"].get_support()
        selected_cols = selected_cols[vt_mask]

    # SelectKBest
    if "kbest" in svm_best.named_steps:
        kbest_mask = svm_best.named_steps["kbest"].get_support()
        selected_cols = selected_cols[kbest_mask]

print(f"Variables originales en X_test: {len(orig_cols)}")
print(f"Variables que realmente usa el SVM final: {len(selected_cols)}")

# Filtrar solo las que sobrevivieron al pipeline
df_imp_svm = df_all[df_all["feature"].isin(selected_cols.astype(str))].copy()

# -------------------------------------------------
# 5) Convertir nombres de columnas a índices numéricos
# -------------------------------------------------
feat_idx = pd.to_numeric(pd.Index(df_imp_svm["feature"]).astype(str), errors="coerce")

if feat_idx.isna().any():
    raise ValueError(
        "No se pudieron convertir las columnas seleccionadas a números. "
        "Revisa los nombres de df_imp_svm['feature']."
    )

feat_idx = feat_idx.astype(int)
min_idx, max_idx = int(feat_idx.min()), int(feat_idx.max())
print(f"Rango original columnas seleccionadas: {min_idx} .. {max_idx}")

# Si parte en 0, convertir a base 1
if min_idx == 0:
    feat_idx_1based = feat_idx + 1
    print("Detectado 0-based -> usando idx_1based = idx + 1")
else:
    feat_idx_1based = feat_idx

print(f"Rango idx_1based: {int(feat_idx_1based.min())} .. {int(feat_idx_1based.max())}")

# -------------------------------------------------
# 6) Asignar cada feature a un nivel de firma
# -------------------------------------------------
edges = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
lvl_labels = [f"N{i}" for i in range(0, 10)]  # N0..N9

niveles = pd.cut(
    feat_idx_1based,
    bins=edges,
    labels=lvl_labels,
    right=False,
    include_lowest=True
)

if niveles.isna().any():
    bad = np.array(df_imp_svm["feature"])[niveles.isna()][:10]
    raise ValueError(f"Hay features fuera del rango de bins. Ejemplos: {bad}")

df_imp_svm["nivel"] = niveles.astype(str)

# Excluir N0 para mantener estilo del gráfico RF
df_imp_svm = df_imp_svm[df_imp_svm["nivel"] != "N0"].copy()

# Ordenar niveles
df_imp_svm["nivel_num"] = (
    df_imp_svm["nivel"].str.replace("N", "", regex=False).astype(int)
)
df_imp_svm = df_imp_svm.sort_values(["nivel_num", "feature"])

# Verificación opcional
print("\nCantidad de variables por nivel seleccionadas por el modelo:")
print(df_imp_svm["nivel_num"].value_counts().sort_index())

# -------------------------------------------------
# 7) Resumen por nivel
#    mean y median salen ambos de importance_mean
# -------------------------------------------------
res_svm = df_imp_svm.groupby("nivel_num").agg(
    mean=("importance_mean", "mean"),
    median=("importance_mean", "median"),
    count=("importance_mean", "size")
).reset_index()

# Forzar presencia de todos los niveles 1..9
niveles_completos = pd.DataFrame({"nivel_num": np.arange(1, 10)})
res_svm = niveles_completos.merge(res_svm, on="nivel_num", how="left").fillna(0)

print("\nRESUMEN POR NIVEL — SVM")
print(res_svm.to_string(index=False))

# -------------------------------------------------
# 8) Gráfico
# -------------------------------------------------
x = np.arange(len(res_svm))
labels_plot = [f"Nivel {n}" for n in res_svm["nivel_num"]]

fig, ax = plt.subplots(figsize=(9.5, 5.2), facecolor="white")
ax.set_facecolor("white")

# Barras = promedio por nivel
ax.bar(
    x,
    res_svm["mean"],
    color="#8FA8C7",      # azul sobrio
    edgecolor="#355C7D",  # azul más oscuro
    linewidth=1.2,
    alpha=0.9,
    label="Importancia promedio"
)

# Puntos = mediana por nivel
ax.scatter(
    x,
    res_svm["median"],
    s=80,
    marker="o",
    color="#355C7D",
    edgecolors="#1F3A56",
    linewidths=1,
    zorder=3,
    label="Mediana"
)

ax.set_xticks(x)
ax.set_xticklabels(labels_plot)
ax.set_xlabel("Nivel")
ax.set_ylabel("Importancia")
ax.set_title("Importancia promedio y mediana por nivel (SVM)")
ax.grid(axis="y", linestyle="--", alpha=0.35, color="gray")

# Leyenda: Mediana primero
handles, labels = ax.get_legend_handles_labels()
order = [1, 0]
ax.legend([handles[i] for i in order], [labels[i] for i in order])

fig.tight_layout()

# -------------------------------------------------
# 9) Guardar
# -------------------------------------------------
downloads = Path.home() / "Downloads"
downloads.mkdir(parents=True, exist_ok=True)
ruta_guardado = downloads / "importancia_promedio_mediana_por_nivel_SVM.png"

fig.savefig(ruta_guardado, dpi=300, bbox_inches="tight", facecolor="white")
print(f"\nGráfico guardado en: {ruta_guardado}")

plt.show()

Variables originales en X_test: 1023
Variables que realmente usa el SVM final: 512
Rango original columnas seleccionadas: 16 .. 1020
Rango idx_1based: 16 .. 1020

Cantidad de variables por nivel seleccionadas por el modelo:
nivel_num
4      4
5     11
6     18
7     53
8    125
9    301
Name: count, dtype: int64

RESUMEN POR NIVEL — SVM
 nivel_num     mean   median  count
         1 0.000000 0.000000    0.0
         2 0.000000 0.000000    0.0
         3 0.000000 0.000000    0.0
         4 0.095203 0.104233    4.0
         5 0.070577 0.072475   11.0
         6 0.041997 0.035556   18.0
         7 0.026342 0.023606   53.0
         8 0.024279 0.019415  125.0
         9 0.020577 0.012756  301.0

Gráfico guardado en: C:\Users\Gamer\Downloads\importancia_promedio_mediana_por_nivel_SVM.png

# ==========================================================
# TABLA POR NIVELES (SVM5) — criterio AUC_rep
# - Usa el MISMO pipeline del SVM final de la tesis
# - Toma los parámetros del modelo etiquetado como SVM5
# - Evalúa por niveles 1..9: 2, 6, 14, ..., 1022 features
# - Reporta Acc/F1w + AUC (OVR macro) en train y test
# - Filas LaTeX listas para pegar
# ==========================================================

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from scipy.special import softmax

from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

# ----------------------------------------------------------
# 0) REQUISITOS PREVIOS
# ----------------------------------------------------------
# Este bloque asume que YA corriste tu notebook principal de SVM,
# por lo que ya existen estas variables:
# X_train, X_test, y_train_enc, y_test_enc, top10, make_pipe

y_train_encoded = y_train_enc
y_test_encoded  = y_test_enc

# ----------------------------------------------------------
# 1) TOMAR LOS PARÁMETROS DEL MODELO SVM5 REAL
# ----------------------------------------------------------
# En tu notebook:
# for i, row in enumerate(top10.head(5).itertuples(index=False), start=1):
#     r = eval_one(row.params, f"SVM{i}")
#
# Por eso SVM5 corresponde a top10.iloc[4]["params"]

SVM5_PARAMS = top10.iloc[4]["params"].copy()

print("Parámetros de SVM5:")
print(SVM5_PARAMS)

# Guardar k original del modelo
kbest_original = SVM5_PARAMS.get("kbest__k", X_train.shape[1])

# ----------------------------------------------------------
# 2) FUNCIÓN AUC — MISMA DEFINICIÓN QUE EN TU NOTEBOOK PRINCIPAL
# ----------------------------------------------------------
def auc_ovr_macro_from_scores(y_true, decision_scores):
    """
    Usa decision_function del SVM y luego softmax,
    igual que en tu evaluación principal.
    """
    if decision_scores.ndim == 1:
        decision_scores = decision_scores.reshape(-1, 1)
    proba = softmax(decision_scores, axis=1)
    return roc_auc_score(y_true, proba, multi_class="ovr", average="macro")

# ----------------------------------------------------------
# 3) COLUMNAS NUMÉRICAS 0..1022
# ----------------------------------------------------------
col_nums = pd.to_numeric(pd.Index(X_train.columns).astype(str), errors="coerce")
if col_nums.isna().any():
    raise ValueError(
        "X_train.columns no son numéricas (0..1022). "
        "Revisa los nombres de columnas."
    )
col_nums = col_nums.astype(int)

# Orden estable por índice
pairs = sorted(zip(col_nums.values, X_train.columns), key=lambda t: t[0])
nums_sorted = [n for n, _ in pairs]
cols_sorted = [c for _, c in pairs]

# Para que nivel 9 tenga 1022 features, excluimos la columna 0 (N0)
EXCLUDE_N0 = True

# ----------------------------------------------------------
# 4) LOOP POR NIVELES 1..9
# ----------------------------------------------------------
rows = []

for m in tqdm(range(1, 10), desc="Evaluando niveles (SVM5)", unit="nivel"):
    end_idx = (2 ** (m + 1)) - 2   # 2, 6, 14, 30, ..., 1022

    if EXCLUDE_N0:
        selected = [c for c, n in zip(cols_sorted, nums_sorted) if (1 <= n <= end_idx)]
    else:
        selected = [c for c, n in zip(cols_sorted, nums_sorted) if (0 <= n <= end_idx)]

    Xtr_m = X_train[selected].copy()
    Xte_m = X_test[selected].copy()

    # Copia de params del SVM5
    params_m = SVM5_PARAMS.copy()

    # Ajuste clave:
    # si el k del modelo final es mayor que las variables disponibles en ese nivel,
    # se reduce automáticamente para evitar error.
    if isinstance(kbest_original, (int, np.integer, float, np.floating)):
        params_m["kbest__k"] = int(min(int(kbest_original), Xtr_m.shape[1]))
    else:
        params_m["kbest__k"] = "all"

    # Construir EXACTAMENTE el mismo tipo de pipeline de tu notebook principal
    model_m = make_pipe(params_m)

    # Entrenar
    model_m.fit(Xtr_m, y_train_encoded)

    # Predicciones
    pred_tr = model_m.predict(Xtr_m)
    pred_te = model_m.predict(Xte_m)

    # Scores del SVM
    scores_tr = model_m.decision_function(Xtr_m)
    scores_te = model_m.decision_function(Xte_m)

    # Métricas
    acc_tr = accuracy_score(y_train_encoded, pred_tr)
    f1w_tr = f1_score(y_train_encoded, pred_tr, average="weighted", zero_division=0)
    auc_tr = auc_ovr_macro_from_scores(y_train_encoded, scores_tr)

    acc_te = accuracy_score(y_test_encoded, pred_te)
    f1w_te = f1_score(y_test_encoded, pred_te, average="weighted", zero_division=0)
    auc_te = auc_ovr_macro_from_scores(y_test_encoded, scores_te)

    rows.append({
        "NivelFirma": m,
        "N_features": Xtr_m.shape[1],
        "kbest_usado": params_m["kbest__k"],
        "AccTrain": acc_tr,
        "F1wTrain": f1w_tr,
        "AUCTrain": auc_tr,
        "AccTest": acc_te,
        "F1wTest": f1w_te,
        "AUCRep": auc_te
    })

df_levels = pd.DataFrame(rows)

# ----------------------------------------------------------
# 5) ELEGIR EL NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep
# ----------------------------------------------------------
best_auc = df_levels["AUCRep"].max()
tol = 1e-4
best_candidates = df_levels[df_levels["AUCRep"] >= best_auc - tol]
best_simple = best_candidates.sort_values(["NivelFirma"]).iloc[0]

print("\n" + "="*100)
print("RESULTADOS POR NIVEL (SVM5) — criterio AUCRep")
print("="*100)
print(df_levels.to_string(index=False, float_format=lambda x: f"{x:.4f}"))

print("\n" + "="*100)
print("NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep (con tolerancia)")
print("="*100)
print(best_simple.to_string())

# ----------------------------------------------------------
# 6) FILAS LaTeX
# ----------------------------------------------------------
print("\n" + "="*100)
print("FILAS LaTeX")
print("="*100)
for _, r in df_levels.iterrows():
    print(
        f"{int(r['NivelFirma'])} & {int(r['N_features'])} & "
        f"{r['AccTrain']:.4f} & {r['F1wTrain']:.4f} & {r['AUCTrain']:.4f} & "
        f"{r['AccTest']:.4f} & {r['F1wTest']:.4f} & {r['AUCRep']:.4f} \\\\"
    )

Parámetros de SVM5:
{'kbest__k': 512, 'svc__C': np.float64(61.75015483640841), 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': np.float64(0.05019578058935042)}

Evaluando niveles (SVM5): 100%|███████████████████████████████████████████████████████| 9/9 [00:40<00:00,  4.48s/nivel]


====================================================================================================
RESULTADOS POR NIVEL (SVM5) — criterio AUCRep
====================================================================================================
 NivelFirma  N_features  kbest_usado  AccTrain  F1wTrain  AUCTrain  AccTest  F1wTest  AUCRep
          1           2            2    0.1252    0.0278    0.5161   0.0796   0.0117  0.4744
          2           6            6    0.1371    0.0527    0.5448   0.0902   0.0328  0.5288
          3          14           14    0.2854    0.2644    0.6555   0.3979   0.4001  0.7101
          4          30           30    0.4377    0.4560    0.7246   0.6419   0.6568  0.8051
          5          62           62    0.5430    0.5752    0.7682   0.7692   0.7758  0.8614
          6         126          126    0.6106    0.6459    0.8007   0.8568   0.8583  0.8937
          7         254          254    0.6351    0.6723    0.8262   0.8780   0.8780  0.9182
          8         510          510    0.6477    0.6845    0.8373   0.8912   0.8909  0.9278
          9        1022          512    0.5788    0.6242    0.7942   0.8462   0.8506  0.9101

====================================================================================================
NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep (con tolerancia)
====================================================================================================
NivelFirma       8.000000
N_features     510.000000
kbest_usado    510.000000
AccTrain         0.647682
F1wTrain         0.684537
AUCTrain         0.837325
AccTest          0.891247
F1wTest          0.890900
AUCRep           0.927821

====================================================================================================
FILAS LaTeX
====================================================================================================
1 & 2 & 0.1252 & 0.0278 & 0.5161 & 0.0796 & 0.0117 & 0.4744 \\
2 & 6 & 0.1371 & 0.0527 & 0.5448 & 0.0902 & 0.0328 & 0.5288 \\
3 & 14 & 0.2854 & 0.2644 & 0.6555 & 0.3979 & 0.4001 & 0.7101 \\
4 & 30 & 0.4377 & 0.4560 & 0.7246 & 0.6419 & 0.6568 & 0.8051 \\
5 & 62 & 0.5430 & 0.5752 & 0.7682 & 0.7692 & 0.7758 & 0.8614 \\
6 & 126 & 0.6106 & 0.6459 & 0.8007 & 0.8568 & 0.8583 & 0.8937 \\
7 & 254 & 0.6351 & 0.6723 & 0.8262 & 0.8780 & 0.8780 & 0.9182 \\
8 & 510 & 0.6477 & 0.6845 & 0.8373 & 0.8912 & 0.8909 & 0.9278 \\
9 & 1022 & 0.5788 & 0.6242 & 0.7942 & 0.8462 & 0.8506 & 0.9101 \\

ESIG LOGFIRMA SVM

import numpy as np
import pandas as pd
import time
import warnings
import os
warnings.filterwarnings("ignore")

# pip install tqdm tqdm_joblib imbalanced-learn matplotlib seaborn
from tqdm.auto import tqdm
from tqdm_joblib import tqdm_joblib
import joblib

import matplotlib
matplotlib.use("Agg")          # backend sin pantalla; cambiar a "TkAgg" si se quiere ventana
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import VarianceThreshold, SelectKBest, mutual_info_classif
from sklearn.svm import SVC

from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV, cross_val_score
from sklearn.metrics import (
    confusion_matrix, classification_report,
    f1_score, balanced_accuracy_score, recall_score,
    accuracy_score, roc_auc_score, make_scorer
)
from scipy.stats import loguniform
from scipy.special import softmax

from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE

start_time = time.time()

# =========================
# 0) DATOS
# =========================
x = pd.read_csv(r'C:\Users\Gamer\Desktop\Cata\ESIG\logsignature_esig_M9.csv')
y = pd.read_csv(r'C:\Users\Gamer\Desktop\Cata\ts_v9.0.1_SMBH_ZTF_xmatch.csv')
y["id"] = y["oid"]
data = pd.merge(x, y, on="id")

data_train = data.sample(frac=0.8, random_state=42).reset_index(drop=True)
data_test  = data.drop(data_train.index).reset_index(drop=True)

X_train = data_train.drop(columns=['oid','survey_class_mapped','survey_class','survey_class_cat','id'])
y_train = data_train['survey_class_mapped']
X_test  = data_test[X_train.columns].copy()
y_test  = data_test['survey_class_mapped']

le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)
labels = list(le.classes_)

DOWNLOADS = r'C:\Users\Gamer\Downloads'
os.makedirs(DOWNLOADS, exist_ok=True)

print("Shapes:", X_train.shape, X_test.shape)
print("Labels:", labels)
print("Distribución train:", dict(zip(*np.unique(y_train_enc, return_counts=True))))

# =========================
# 1) SCORERS
# =========================
def robust_score(y_true, y_pred):
    mf1     = f1_score(y_true, y_pred, average="macro", zero_division=0)
    bacc    = balanced_accuracy_score(y_true, y_pred)
    recalls = recall_score(y_true, y_pred, average=None, zero_division=0)
    minR    = float(np.min(recalls))
    penalty = 0.0 if minR >= 0.30 else (0.30 - minR) * 2.0
    return 0.50 * mf1 + 0.40 * bacc + 0.10 * minR - penalty

robust_scorer = make_scorer(robust_score)

# AUC OVR: decision_function → softmax → roc_auc_score
def _auc_ovr(y_true, y_score):
    proba = softmax(y_score, axis=1)   # convierte scores crudos a distribución [0,1] que suma 1
    return roc_auc_score(y_true, proba, multi_class="ovr", average="macro")

auc_scorer = make_scorer(_auc_ovr, response_method="decision_function")

# =========================
# 2) PIPELINE CON SMOTE
# =========================
min_class_count = min(np.bincount(y_train_enc))
smote_k = min(5, min_class_count - 1)

def make_pipe(params=None):
    p = ImbPipeline(steps=[
        ("imp",   SimpleImputer(strategy="median")),
        ("sc",    StandardScaler()),
        ("vt",    VarianceThreshold(0.0)),
        ("smote", SMOTE(k_neighbors=smote_k, random_state=42)),
        ("kbest", SelectKBest(score_func=mutual_info_classif)),
        ("svc",   SVC(kernel="rbf", decision_function_shape="ovr", cache_size=4000))
    ])
    if params:
        p.set_params(**params)
    return p

pipe = make_pipe()

# =========================
# 3) ESPACIO DE BÚSQUEDA
# =========================
cw_options = [
    "balanced",
    {0: 2.0, 1: 4.0, 2: 1.0},
    {0: 2.5, 1: 5.0, 2: 1.0},
    {0: 1.5, 1: 3.5, 2: 0.8},
    {0: 3.0, 1: 6.0, 2: 1.0},
    {0: 1.0, 1: 3.0, 2: 0.7},
]

param_dist = {
    "kbest__k":          [128, 256, 384, 512, 768, 1023],
    "svc__C":            loguniform(0.5, 2000),
    "svc__gamma":        loguniform(1e-6, 0.1),
    "svc__class_weight": cw_options,
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# =========================
# 4) BÚSQUEDA CON BARRA DE PROGRESO
# =========================
t0 = time.time()
rs = RandomizedSearchCV(
    estimator=pipe,
    param_distributions=param_dist,
    n_iter=80,
    scoring=robust_scorer,
    cv=cv,
    random_state=42,
    n_jobs=-1,
    verbose=0,
    refit=True,
    return_train_score=False,
    error_score=0.0
)

n_fits = 80 * cv.n_splits
with tqdm_joblib(tqdm(
    desc="Buscando hiperparámetros",
    total=n_fits,
    unit="fit",
    colour="cyan",
    bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} fits [{elapsed}<{remaining}, {rate_fmt}]"
)):
    with joblib.parallel_config(backend="threading"):
        rs.fit(X_train, y_train_enc)

print("\nTiempo búsqueda (s):", int(time.time() - t0))
print("Best robust CV:", rs.best_score_)
print("Best params:", rs.best_params_)

# =========================
# 5) TOP-10 ÚNICOS
# =========================
def _canon_value(v):
    if isinstance(v, np.generic):   return v.item()
    if isinstance(v, dict):         return tuple(sorted((int(k), float(w)) for k, w in v.items()))
    return v

def _params_key(d):
    return tuple(sorted((k, _canon_value(v)) for k, v in d.items()))

res = pd.DataFrame(rs.cv_results_).copy()
res["params_key"] = res["params"].apply(_params_key)
res = res.sort_values(["mean_test_score", "std_test_score"], ascending=[False, True]).reset_index(drop=True)
res_unique = res.drop_duplicates(subset=["params_key"], keep="first").reset_index(drop=True)
top10 = res_unique.head(10)[["mean_test_score", "std_test_score", "params"]].copy()

print("\nTOP-10 CV robust (únicos):")
print(top10.to_string(index=False))

# =========================
# 6) EVALUACIÓN COMPLETA + FIGURAS
# =========================
def _row_normalize(cm):
    cm = cm.astype(float)
    row_sums = cm.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    return (cm / row_sums) * 100.0


def plot_cms(cm_train, cm_test, tag, save_dir, subtitle="",
             gap_width=0.28, wspace=0.15,
             label_fontsize=13, tick_fontsize=13, title_fontsize=14):
    """Guarda figura con CM de train y test — mismo estilo que el resto de tus CMs."""
    cm_tr_pct = _row_normalize(cm_train)
    cm_te_pct = _row_normalize(cm_test)

    fig = plt.figure(figsize=(10.8, 4.8))
    gs = gridspec.GridSpec(1, 4,
                           width_ratios=[1, gap_width, 1, 0.08],
                           wspace=wspace)
    ax1    = fig.add_subplot(gs[0, 0])
    ax_gap = fig.add_subplot(gs[0, 1]);  ax_gap.axis("off")
    ax2    = fig.add_subplot(gs[0, 2])
    ax_cb  = fig.add_subplot(gs[0, 3])

    im = None
    for ax, cm_pct, cm_cnt, title in [
        (ax1, cm_tr_pct, cm_train, "Train"),
        (ax2, cm_te_pct, cm_test,  "Test"),
    ]:
        im = ax.imshow(cm_pct, cmap="Blues", vmin=0, vmax=100)
        ax.set_title(title, fontsize=title_fontsize)
        ax.set_xticks(np.arange(len(labels)))
        ax.set_yticks(np.arange(len(labels)))
        ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=tick_fontsize)
        ax.set_yticklabels(labels, fontsize=tick_fontsize)
        ax.set_xlabel("Predicho", fontsize=label_fontsize)
        ax.set_ylabel("Real",     fontsize=label_fontsize)

        for i in range(cm_pct.shape[0]):
            for j in range(cm_pct.shape[1]):
                pct = cm_pct[i, j]
                cnt = int(cm_cnt[i, j])
                color_txt = "white" if pct > 50 else "black"
                ax.text(j, i - 0.10, f"{pct:.1f}%",
                        ha="center", va="center",
                        color=color_txt, fontsize=10, fontweight="bold")
                ax.text(j, i + 0.22, f"({cnt})",
                        ha="center", va="center",
                        color=color_txt, fontsize=7)

    fig.colorbar(im, cax=ax_cb, label="% por fila (clase real)")
    fig.suptitle(f"{tag}\n{subtitle}", fontsize=13, y=0.98)
    fig.subplots_adjust(left=0.08, right=0.92, bottom=0.22, top=0.82)

    fname = os.path.join(save_dir, f"{tag}.png")
    fig.savefig(fname, dpi=300, bbox_inches="tight")
    plt.close(fig)
    print(f"  Figura guardada: {fname}")
def eval_one(params, tag):
    print("\n" + "="*95)
    print(f"Evaluando {tag}...")

    # AUC en CV (5 folds, calcula sobreajuste respecto a validación)
    auc_cv_scores = cross_val_score(
        make_pipe(params), X_train, y_train_enc,
        cv=cv, scoring=auc_scorer, n_jobs=-1
    )
    auc_cv = float(np.mean(auc_cv_scores))
    sd_cv  = float(np.std(auc_cv_scores))

    # Reentrenar sobre todo el train
    model = make_pipe(params)
    model.fit(X_train, y_train_enc)

    yhat_tr = model.predict(X_train)
    yhat_te = model.predict(X_test)
    df_tr   = softmax(model.decision_function(X_train), axis=1)
    df_te   = softmax(model.decision_function(X_test),  axis=1)

    auc_train = roc_auc_score(y_train_enc, df_tr, multi_class="ovr", average="macro")
    auc_test  = roc_auc_score(y_test_enc,  df_te, multi_class="ovr", average="macro")
    gap_cv    = round(auc_train - auc_cv,   4)
    gap_rep   = round(auc_train - auc_test, 4)

    acc_test  = accuracy_score(y_test_enc, yhat_te)
    f1w_test  = f1_score(y_test_enc, yhat_te, average="weighted", zero_division=0)
    mf1_test  = f1_score(y_test_enc, yhat_te, average="macro",    zero_division=0)
    bacc_test = balanced_accuracy_score(y_test_enc, yhat_te)

    cm_tr = confusion_matrix(y_train_enc, yhat_tr)
    cm_te = confusion_matrix(y_test_enc,  yhat_te)

    print(f"  AUC_CV={auc_cv:.4f}±{sd_cv:.4f} | AUC_train={auc_train:.4f} | AUC_test={auc_test:.4f}")
    print(f"  Gap_CV={gap_cv:.4f} | Gap_rep={gap_rep:.4f}")
    print(f"  Acc={acc_test:.4f} | F1w={f1w_test:.4f} | macroF1={mf1_test:.4f} | balacc={bacc_test:.4f}")

    print("\n  Report TRAIN")
    print(classification_report(y_train_enc, yhat_tr, target_names=labels, zero_division=0))
    print("  CM TEST")
    print(pd.DataFrame(cm_te, index=labels, columns=labels))
    print("\n  Report TEST")
    print(classification_report(y_test_enc, yhat_te, target_names=labels, zero_division=0))

    subtitle = (
        f"Acc train={accuracy_score(y_train_enc, yhat_tr):.3f} | "
        f"Acc test={acc_test:.3f} | "
        f"F1w train={f1_score(y_train_enc, yhat_tr, average='weighted', zero_division=0):.3f} | "
        f"F1w test={f1w_test:.3f}"
    )
    plot_cms(cm_tr, cm_te, tag, DOWNLOADS, subtitle=subtitle)

    return {
        "tag":      tag,
        "AUC_CV":   round(auc_cv,    4),
        "SD_CV":    round(sd_cv,     4),
        "Gap_CV":   gap_cv,
        "AUC_rep":  round(auc_test,  4),
        "SD_rep":   round(sd_cv,     4),
        "Gap_rep":  gap_rep,
        "Acc_test": round(acc_test,  4),
        "F1w_test": round(f1w_test,  4),
        "macroF1":  round(mf1_test,  4),
        "balacc":   round(bacc_test, 4),
    }


results_summary = []
for i, row in enumerate(top10.head(5).itertuples(index=False), start=1):
    r = eval_one(row.params, f"SVM{i}")
    results_summary.append(r)

# =========================
# 7) TABLA RESUMEN + LATEX AUTO-GENERADO
# =========================
summary_df = pd.DataFrame(results_summary)
print("\n" + "="*95)
print("RESUMEN FINAL:")
print(summary_df.to_string(index=False))

best_idx = summary_df["AUC_rep"].idxmax()
best_tag = summary_df.loc[best_idx, "tag"]
print(f"\n>>> Mejor modelo: {best_tag}  AUC_test={summary_df.loc[best_idx,'AUC_rep']:.4f}")

# Generar snippet LaTeX
latex_rows = []
for _, r in summary_df.iterrows():
    highlight = r"\rowcolor{BlueBest} " if r["tag"] == best_tag else ""
    latex_rows.append(
        f"        {highlight}SVM$_{{{r['tag'][-1]}}}$ & "
        f"{r['AUC_CV']:.4f} & {r['SD_CV']:.4f} & {r['Gap_CV']:.4f} & "
        f"{r['AUC_rep']:.4f} & {r['SD_rep']:.4f} & {r['Gap_rep']:.4f} & "
        f"{r['Acc_test']:.4f} & {r['F1w_test']:.4f} \\\\"
    )

latex_table = (
    r"\begin{table}[!ht]" + "\n"
    r"\centering" + "\n"
    r"\small" + "\n"
    r"\renewcommand{\arraystretch}{1.18}" + "\n"
    r"\setlength{\tabcolsep}{6pt}" + "\n"
    r"\begin{adjustbox}{width=\textwidth}" + "\n"
    r"\rowcolors{2}{white}{gray!8}" + "\n"
    r"\begin{tabular}{ccccccccc}" + "\n"
    r"\toprule" + "\n"
    r"\rowcolor{BlueHeader}" + "\n"
    r"\color{white}\textbf{Modelo} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Acc\textsubscript{test}} &" + "\n"
    r"\color{white}\textbf{F1\textsubscript{w,test}} \\" + "\n"
    r"\midrule" + "\n"
    + "\n".join(latex_rows) + "\n"
    r"\bottomrule" + "\n"
    r"\end{tabular}" + "\n"
    r"\end{adjustbox}" + "\n"
    r"\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG).}" + "\n"
    r"\label{tab:svm_top5_auc}" + "\n"
    r"\end{table}"
)

latex_path = os.path.join(DOWNLOADS, "tabla_svm_top5.tex")
with open(latex_path, "w", encoding="utf-8") as f:
    f.write(latex_table)

print(f"\nTabla LaTeX guardada en: {latex_path}")
print("\n--- PREVIEW ---\n" + latex_table)

# =========================
# 8) TIEMPO TOTAL
# =========================
elapsed = int(time.time() - start_time)
h, rem = divmod(elapsed, 3600)
m, s = divmod(rem, 60)
print(f"\nTiempo total: {h:02d}:{m:02d}:{s:02d}")

Shapes: (1510, 127) (377, 127)
Labels: ['AGN', 'Blazar', 'QSO']
Distribución train: {np.int64(0): np.int64(422), np.int64(1): np.int64(189), np.int64(2): np.int64(899)}

Buscando hiperparámetros:   0%|                                                          | 0/400 fits [00:00<?, ?fit/s]



Buscando hiperparámetros:   0%|                                                          | 0/400 fits [32:02<?, ?fit/s]





  0%|▏                                                                                 | 1/400 [00:03<24:24,  3.67s/it]



  1%|█                                                                                 | 5/400 [00:03<03:49,  1.72it/s]



  2%|█▍                                                                                | 7/400 [00:04<02:56,  2.22it/s]



  2%|█▋                                                                                | 8/400 [00:06<05:21,  1.22it/s]



  2%|██                                                                               | 10/400 [00:06<03:29,  1.86it/s]



  3%|██▍                                                                              | 12/400 [00:06<02:22,  2.73it/s]



  4%|██▊                                                                              | 14/400 [00:07<01:56,  3.31it/s]



  4%|███▏                                                                             | 16/400 [00:09<03:41,  1.73it/s]



  4%|███▋                                                                             | 18/400 [00:09<02:38,  2.41it/s]



  5%|████▎                                                                            | 21/400 [00:10<02:39,  2.38it/s]



  6%|████▍                                                                            | 22/400 [00:12<03:35,  1.75it/s]



  6%|████▊                                                                            | 24/400 [00:12<02:58,  2.10it/s]



  6%|█████                                                                            | 25/400 [00:13<02:50,  2.19it/s]



  7%|█████▍                                                                           | 27/400 [00:13<02:06,  2.96it/s]



  7%|█████▊                                                                           | 29/400 [00:14<02:13,  2.78it/s]



  8%|██████▎                                                                          | 31/400 [00:15<02:25,  2.53it/s]



  8%|██████▍                                                                          | 32/400 [00:15<02:44,  2.23it/s]



  8%|██████▉                                                                          | 34/400 [00:15<02:01,  3.02it/s]



  9%|███████▎                                                                         | 36/400 [00:16<02:02,  2.98it/s]



 10%|███████▋                                                                         | 38/400 [00:17<01:57,  3.09it/s]



 10%|███████▉                                                                         | 39/400 [00:18<02:37,  2.29it/s]



 10%|████████                                                                         | 40/400 [00:18<02:13,  2.70it/s]



 10%|████████▎                                                                        | 41/400 [00:18<01:59,  3.00it/s]



 11%|████████▋                                                                        | 43/400 [00:20<03:46,  1.58it/s]



 11%|████████▉                                                                        | 44/400 [00:21<04:39,  1.27it/s]



 11%|█████████                                                                        | 45/400 [00:22<03:49,  1.55it/s]



 12%|█████████▎                                                                       | 46/400 [00:22<03:10,  1.86it/s]



 12%|█████████▌                                                                       | 47/400 [00:22<02:47,  2.11it/s]



 12%|█████████▋                                                                       | 48/400 [00:23<02:39,  2.21it/s]



 12%|█████████▉                                                                       | 49/400 [00:23<02:11,  2.66it/s]



 13%|██████████▎                                                                      | 51/400 [00:23<01:43,  3.39it/s]



 13%|██████████▌                                                                      | 52/400 [00:24<01:58,  2.94it/s]



 13%|██████████▋                                                                      | 53/400 [00:24<01:52,  3.09it/s]



 14%|███████████▏                                                                     | 55/400 [00:24<01:40,  3.43it/s]



 14%|███████████▎                                                                     | 56/400 [00:25<01:41,  3.40it/s]



 14%|███████████▌                                                                     | 57/400 [00:25<01:27,  3.92it/s]



 14%|███████████▋                                                                     | 58/400 [00:25<01:23,  4.12it/s]



 15%|███████████▉                                                                     | 59/400 [00:25<01:19,  4.27it/s]



 15%|████████████▏                                                                    | 60/400 [00:26<01:30,  3.76it/s]



 16%|████████████▌                                                                    | 62/400 [00:26<01:16,  4.41it/s]



 16%|████████████▊                                                                    | 63/400 [00:26<01:34,  3.57it/s]



 16%|████████████▉                                                                    | 64/400 [00:27<01:50,  3.04it/s]



 16%|█████████████▏                                                                   | 65/400 [00:27<01:57,  2.86it/s]



 16%|█████████████▎                                                                   | 66/400 [00:28<01:41,  3.30it/s]



 17%|█████████████▌                                                                   | 67/400 [00:28<02:11,  2.53it/s]



 17%|█████████████▊                                                                   | 68/400 [00:28<01:52,  2.94it/s]



 17%|█████████████▉                                                                   | 69/400 [00:29<01:40,  3.29it/s]



 18%|██████████████▏                                                                  | 70/400 [00:29<01:30,  3.63it/s]



 18%|██████████████▌                                                                  | 72/400 [00:29<00:58,  5.57it/s]



 18%|██████████████▉                                                                  | 74/400 [00:29<00:47,  6.90it/s]



 19%|███████████████▏                                                                 | 75/400 [00:30<01:07,  4.80it/s]



 19%|███████████████▍                                                                 | 76/400 [00:30<01:47,  3.00it/s]



 19%|███████████████▌                                                                 | 77/400 [00:31<01:58,  2.73it/s]



 20%|███████████████▊                                                                 | 78/400 [00:31<01:35,  3.38it/s]



 20%|███████████████▉                                                                 | 79/400 [00:32<02:12,  2.41it/s]



 20%|████████████████▏                                                                | 80/400 [00:32<02:15,  2.36it/s]



 20%|████████████████▍                                                                | 81/400 [00:32<02:12,  2.41it/s]



 20%|████████████████▌                                                                | 82/400 [00:33<01:50,  2.88it/s]



 21%|████████████████▊                                                                | 83/400 [00:33<01:27,  3.61it/s]



 22%|█████████████████▍                                                               | 86/400 [00:33<00:46,  6.81it/s]



 22%|█████████████████▊                                                               | 88/400 [00:33<00:58,  5.33it/s]



 22%|██████████████████                                                               | 89/400 [00:34<01:22,  3.78it/s]



 22%|██████████████████▏                                                              | 90/400 [00:34<01:11,  4.36it/s]



 23%|██████████████████▍                                                              | 91/400 [00:35<01:52,  2.75it/s]



 23%|██████████████████▋                                                              | 92/400 [00:35<02:08,  2.41it/s]



 23%|██████████████████▊                                                              | 93/400 [00:36<02:20,  2.18it/s]



 24%|███████████████████                                                              | 94/400 [00:36<02:22,  2.15it/s]



 24%|███████████████████▏                                                             | 95/400 [00:37<01:59,  2.55it/s]



 24%|███████████████████▍                                                             | 96/400 [00:37<01:48,  2.81it/s]



 24%|███████████████████▊                                                             | 98/400 [00:37<01:30,  3.35it/s]



 25%|████████████████████                                                             | 99/400 [00:38<01:34,  3.19it/s]



 25%|████████████████████                                                            | 100/400 [00:38<01:48,  2.75it/s]



 25%|████████████████████▏                                                           | 101/400 [00:40<03:22,  1.48it/s]



 26%|████████████████████▍                                                           | 102/400 [00:40<02:50,  1.75it/s]



 26%|█████████████████████▏                                                          | 106/400 [00:40<01:14,  3.93it/s]



 27%|█████████████████████▍                                                          | 107/400 [00:40<01:11,  4.10it/s]



 27%|█████████████████████▌                                                          | 108/400 [00:41<01:05,  4.43it/s]



 27%|█████████████████████▊                                                          | 109/400 [00:41<01:19,  3.66it/s]



 28%|██████████████████████▏                                                         | 111/400 [00:42<01:28,  3.28it/s]



 28%|██████████████████████▌                                                         | 113/400 [00:43<01:35,  3.01it/s]



 28%|██████████████████████▊                                                         | 114/400 [00:43<01:49,  2.62it/s]



 29%|███████████████████████▏                                                        | 116/400 [00:43<01:19,  3.58it/s]



 29%|███████████████████████▍                                                        | 117/400 [00:43<01:11,  3.94it/s]



 30%|████████████████████████                                                        | 120/400 [00:44<00:48,  5.82it/s]



 30%|████████████████████████▏                                                       | 121/400 [00:44<01:15,  3.70it/s]



 31%|████████████████████████▌                                                       | 123/400 [00:45<01:24,  3.28it/s]



 31%|█████████████████████████                                                       | 125/400 [00:46<01:28,  3.11it/s]



 32%|█████████████████████████▏                                                      | 126/400 [00:47<01:59,  2.28it/s]



 32%|█████████████████████████▍                                                      | 127/400 [00:47<01:44,  2.60it/s]



 32%|█████████████████████████▊                                                      | 129/400 [00:47<01:16,  3.55it/s]



 33%|██████████████████████████▏                                                     | 131/400 [00:47<00:55,  4.84it/s]



 33%|██████████████████████████▌                                                     | 133/400 [00:48<00:52,  5.11it/s]



 34%|███████████████████████████                                                     | 135/400 [00:48<01:01,  4.32it/s]



 34%|███████████████████████████▏                                                    | 136/400 [00:51<02:47,  1.58it/s]



 34%|███████████████████████████▌                                                    | 138/400 [00:51<01:52,  2.32it/s]



 35%|████████████████████████████▏                                                   | 141/400 [00:51<01:09,  3.73it/s]



 36%|████████████████████████████▌                                                   | 143/400 [00:53<02:13,  1.93it/s]



 36%|█████████████████████████████                                                   | 145/400 [00:53<01:38,  2.58it/s]



 37%|█████████████████████████████▍                                                  | 147/400 [00:54<01:29,  2.82it/s]



 37%|█████████████████████████████▊                                                  | 149/400 [00:54<01:09,  3.61it/s]



 38%|██████████████████████████████▌                                                 | 153/400 [00:55<01:07,  3.65it/s]



 38%|██████████████████████████████▊                                                 | 154/400 [00:56<01:22,  3.00it/s]



 39%|███████████████████████████████                                                 | 155/400 [00:56<01:18,  3.14it/s]



 39%|███████████████████████████████▍                                                | 157/400 [00:56<00:58,  4.15it/s]



 40%|███████████████████████████████▊                                                | 159/400 [00:57<01:17,  3.10it/s]



 40%|████████████████████████████████▏                                               | 161/400 [01:00<02:25,  1.64it/s]



 41%|████████████████████████████████▌                                               | 163/400 [01:00<01:49,  2.17it/s]



 42%|█████████████████████████████████▌                                              | 168/400 [01:01<01:23,  2.77it/s]



 42%|██████████████████████████████████                                              | 170/400 [01:02<01:09,  3.29it/s]



 43%|██████████████████████████████████▏                                             | 171/400 [01:02<01:22,  2.79it/s]



 43%|██████████████████████████████████▍                                             | 172/400 [01:03<01:22,  2.77it/s]



 44%|██████████████████████████████████▊                                             | 174/400 [01:03<01:04,  3.49it/s]



 44%|███████████████████████████████████                                             | 175/400 [01:03<00:59,  3.81it/s]



 45%|███████████████████████████████████▊                                            | 179/400 [01:03<00:30,  7.13it/s]



 45%|████████████████████████████████████▏                                           | 181/400 [01:04<00:59,  3.70it/s]



 46%|████████████████████████████████████▌                                           | 183/400 [01:05<01:03,  3.39it/s]



 46%|████████████████████████████████████▊                                           | 184/400 [01:06<01:20,  2.69it/s]



 46%|█████████████████████████████████████                                           | 185/400 [01:06<01:09,  3.11it/s]



 46%|█████████████████████████████████████▏                                          | 186/400 [01:06<01:09,  3.09it/s]



 47%|█████████████████████████████████████▍                                          | 187/400 [01:07<01:05,  3.24it/s]



 48%|██████████████████████████████████████▍                                         | 192/400 [01:07<00:47,  4.37it/s]



 48%|██████████████████████████████████████▌                                         | 193/400 [01:08<00:46,  4.41it/s]



 48%|██████████████████████████████████████▊                                         | 194/400 [01:08<00:44,  4.67it/s]



 49%|███████████████████████████████████████                                         | 195/400 [01:08<00:57,  3.58it/s]



 49%|███████████████████████████████████████▏                                        | 196/400 [01:09<01:22,  2.49it/s]



 49%|███████████████████████████████████████▍                                        | 197/400 [01:09<01:08,  2.95it/s]



 50%|███████████████████████████████████████▌                                        | 198/400 [01:10<01:09,  2.92it/s]



 50%|███████████████████████████████████████▊                                        | 199/400 [01:10<01:03,  3.14it/s]



 50%|████████████████████████████████████████▍                                       | 202/400 [01:10<00:33,  5.85it/s]



 51%|████████████████████████████████████████▊                                       | 204/400 [01:11<00:43,  4.49it/s]



 51%|█████████████████████████████████████████                                       | 205/400 [01:11<00:45,  4.24it/s]



 52%|█████████████████████████████████████████▏                                      | 206/400 [01:12<01:31,  2.12it/s]



 52%|█████████████████████████████████████████▍                                      | 207/400 [01:13<01:41,  1.90it/s]



 52%|█████████████████████████████████████████▌                                      | 208/400 [01:14<01:44,  1.84it/s]



 52%|█████████████████████████████████████████▊                                      | 209/400 [01:14<01:28,  2.15it/s]



 53%|██████████████████████████████████████████▏                                     | 211/400 [01:14<00:56,  3.34it/s]



 53%|██████████████████████████████████████████▌                                     | 213/400 [01:14<00:40,  4.56it/s]



 54%|███████████████████████████████████████████                                     | 215/400 [01:14<00:30,  6.05it/s]



 55%|███████████████████████████████████████████▌                                    | 218/400 [01:15<00:20,  8.67it/s]



 55%|████████████████████████████████████████████                                    | 220/400 [01:17<01:12,  2.47it/s]



 55%|████████████████████████████████████████████▏                                   | 221/400 [01:17<01:08,  2.62it/s]



 56%|████████████████████████████████████████████▍                                   | 222/400 [01:17<01:01,  2.90it/s]



 56%|████████████████████████████████████████████▌                                   | 223/400 [01:17<00:54,  3.23it/s]



 56%|█████████████████████████████████████████████                                   | 225/400 [01:18<00:41,  4.23it/s]



 56%|█████████████████████████████████████████████▏                                  | 226/400 [01:18<00:44,  3.94it/s]



 57%|█████████████████████████████████████████████▊                                  | 229/400 [01:18<00:26,  6.57it/s]



 58%|██████████████████████████████████████████████▏                                 | 231/400 [01:18<00:20,  8.21it/s]



 58%|██████████████████████████████████████████████▌                                 | 233/400 [01:20<01:07,  2.47it/s]



 58%|██████████████████████████████████████████████▊                                 | 234/400 [01:21<01:02,  2.64it/s]



 59%|███████████████████████████████████████████████                                 | 235/400 [01:21<00:54,  3.05it/s]



 59%|███████████████████████████████████████████████▍                                | 237/400 [01:21<00:44,  3.67it/s]



 60%|███████████████████████████████████████████████▌                                | 238/400 [01:22<00:52,  3.08it/s]



 60%|████████████████████████████████████████████████▏                               | 241/400 [01:23<01:16,  2.08it/s]



 60%|████████████████████████████████████████████████▍                               | 242/400 [01:24<01:05,  2.42it/s]



 61%|████████████████████████████████████████████████▊                               | 244/400 [01:24<00:47,  3.30it/s]



 61%|█████████████████████████████████████████████████                               | 245/400 [01:24<00:46,  3.35it/s]



 62%|█████████████████████████████████████████████████▏                              | 246/400 [01:24<00:43,  3.53it/s]



 62%|█████████████████████████████████████████████████▍                              | 247/400 [01:25<01:10,  2.17it/s]



 62%|█████████████████████████████████████████████████▌                              | 248/400 [01:25<00:57,  2.62it/s]



 62%|█████████████████████████████████████████████████▊                              | 249/400 [01:27<01:32,  1.63it/s]



 62%|██████████████████████████████████████████████████                              | 250/400 [01:27<01:19,  1.89it/s]



 63%|██████████████████████████████████████████████████▏                             | 251/400 [01:27<01:12,  2.05it/s]



 63%|██████████████████████████████████████████████████▍                             | 252/400 [01:28<00:58,  2.55it/s]



 64%|██████████████████████████████████████████████████▊                             | 254/400 [01:28<00:44,  3.31it/s]



 64%|███████████████████████████████████████████████████                             | 255/400 [01:28<00:43,  3.37it/s]



 64%|███████████████████████████████████████████████████▏                            | 256/400 [01:29<00:48,  2.98it/s]



 64%|███████████████████████████████████████████████████▍                            | 257/400 [01:29<00:40,  3.56it/s]



 64%|███████████████████████████████████████████████████▌                            | 258/400 [01:30<01:07,  2.11it/s]



 65%|███████████████████████████████████████████████████▊                            | 259/400 [01:30<01:05,  2.14it/s]



 65%|████████████████████████████████████████████████████                            | 260/400 [01:30<00:55,  2.51it/s]



 65%|████████████████████████████████████████████████████▏                           | 261/400 [01:31<00:46,  2.97it/s]



 66%|████████████████████████████████████████████████████▌                           | 263/400 [01:31<00:29,  4.65it/s]



 66%|█████████████████████████████████████████████████████                           | 265/400 [01:32<00:41,  3.25it/s]



 66%|█████████████████████████████████████████████████████▏                          | 266/400 [01:33<01:10,  1.91it/s]



 67%|█████████████████████████████████████████████████████▍                          | 267/400 [01:33<01:02,  2.13it/s]



 67%|█████████████████████████████████████████████████████▌                          | 268/400 [01:34<00:56,  2.32it/s]



 68%|██████████████████████████████████████████████████████                          | 270/400 [01:34<00:35,  3.62it/s]



 68%|██████████████████████████████████████████████████████▍                         | 272/400 [01:34<00:26,  4.92it/s]



 68%|██████████████████████████████████████████████████████▌                         | 273/400 [01:35<00:50,  2.52it/s]



 68%|██████████████████████████████████████████████████████▊                         | 274/400 [01:36<00:53,  2.37it/s]



 69%|███████████████████████████████████████████████████████                         | 275/400 [01:36<00:57,  2.18it/s]



 69%|███████████████████████████████████████████████████████▏                        | 276/400 [01:38<01:44,  1.18it/s]



 69%|███████████████████████████████████████████████████████▍                        | 277/400 [01:39<01:34,  1.30it/s]



 70%|███████████████████████████████████████████████████████▌                        | 278/400 [01:40<01:58,  1.03it/s]



 70%|███████████████████████████████████████████████████████▊                        | 279/400 [01:41<01:42,  1.18it/s]



 70%|████████████████████████████████████████████████████████                        | 280/400 [01:42<02:08,  1.07s/it]



 70%|████████████████████████████████████████████████████████▏                       | 281/400 [01:43<02:13,  1.12s/it]



 70%|████████████████████████████████████████████████████████▍                       | 282/400 [01:45<02:34,  1.31s/it]



 71%|████████████████████████████████████████████████████████▌                       | 283/400 [01:46<02:22,  1.22s/it]



 71%|████████████████████████████████████████████████████████▊                       | 284/400 [01:47<02:09,  1.11s/it]



 71%|█████████████████████████████████████████████████████████                       | 285/400 [01:48<02:14,  1.17s/it]



 72%|█████████████████████████████████████████████████████████▏                      | 286/400 [01:49<01:52,  1.02it/s]



 72%|█████████████████████████████████████████████████████████▍                      | 287/400 [01:49<01:27,  1.30it/s]



 72%|█████████████████████████████████████████████████████████▌                      | 288/400 [01:50<01:22,  1.35it/s]



 72%|█████████████████████████████████████████████████████████▊                      | 289/400 [01:50<01:07,  1.64it/s]



 73%|██████████████████████████████████████████████████████████▏                     | 291/400 [01:51<00:51,  2.11it/s]



 73%|██████████████████████████████████████████████████████████▍                     | 292/400 [01:51<00:47,  2.29it/s]



 73%|██████████████████████████████████████████████████████████▌                     | 293/400 [01:52<00:56,  1.91it/s]



 74%|██████████████████████████████████████████████████████████▊                     | 294/400 [01:52<00:56,  1.89it/s]



 74%|███████████████████████████████████████████████████████████                     | 295/400 [01:54<01:26,  1.22it/s]



 74%|███████████████████████████████████████████████████████████▏                    | 296/400 [01:54<01:05,  1.58it/s]



 75%|███████████████████████████████████████████████████████████▊                    | 299/400 [01:55<00:41,  2.46it/s]



 75%|████████████████████████████████████████████████████████████▏                   | 301/400 [01:55<00:29,  3.33it/s]



 76%|████████████████████████████████████████████████████████████▍                   | 302/400 [01:55<00:28,  3.41it/s]



 76%|████████████████████████████████████████████████████████████▌                   | 303/400 [01:55<00:25,  3.82it/s]



 76%|█████████████████████████████████████████████████████████████                   | 305/400 [01:56<00:23,  4.11it/s]



 77%|█████████████████████████████████████████████████████████████▍                  | 307/400 [01:56<00:22,  4.20it/s]



 77%|█████████████████████████████████████████████████████████████▌                  | 308/400 [01:57<00:25,  3.63it/s]



 78%|██████████████████████████████████████████████████████████████▏                 | 311/400 [01:58<00:32,  2.76it/s]



 78%|██████████████████████████████████████████████████████████████▍                 | 312/400 [01:58<00:28,  3.06it/s]



 78%|██████████████████████████████████████████████████████████████▌                 | 313/400 [01:58<00:25,  3.45it/s]



 78%|██████████████████████████████████████████████████████████████▊                 | 314/400 [01:59<00:24,  3.53it/s]



 79%|███████████████████████████████████████████████████████████████                 | 315/400 [01:59<00:22,  3.86it/s]



 79%|███████████████████████████████████████████████████████████████▍                | 317/400 [01:59<00:19,  4.36it/s]



 80%|███████████████████████████████████████████████████████████████▊                | 319/400 [02:00<00:17,  4.71it/s]



 80%|████████████████████████████████████████████████████████████████                | 320/400 [02:00<00:20,  3.85it/s]



 80%|████████████████████████████████████████████████████████████████▏               | 321/400 [02:01<00:24,  3.16it/s]



 81%|████████████████████████████████████████████████████████████████▌               | 323/400 [02:02<00:34,  2.25it/s]



 81%|████████████████████████████████████████████████████████████████▊               | 324/400 [02:02<00:29,  2.55it/s]



 81%|█████████████████████████████████████████████████████████████████               | 325/400 [02:02<00:26,  2.81it/s]



 82%|█████████████████████████████████████████████████████████████████▏              | 326/400 [02:03<00:24,  2.99it/s]



 82%|█████████████████████████████████████████████████████████████████▍              | 327/400 [02:03<00:19,  3.66it/s]



 82%|█████████████████████████████████████████████████████████████████▌              | 328/400 [02:03<00:16,  4.43it/s]



 82%|█████████████████████████████████████████████████████████████████▊              | 329/400 [02:03<00:14,  4.89it/s]



 82%|██████████████████████████████████████████████████████████████████              | 330/400 [02:03<00:14,  4.83it/s]



 83%|██████████████████████████████████████████████████████████████████▍             | 332/400 [02:03<00:12,  5.39it/s]



 83%|██████████████████████████████████████████████████████████████████▌             | 333/400 [02:04<00:16,  4.13it/s]



 84%|███████████████████████████████████████████████████████████████████             | 335/400 [02:05<00:28,  2.27it/s]



 84%|███████████████████████████████████████████████████████████████████▏            | 336/400 [02:07<00:46,  1.38it/s]



 84%|███████████████████████████████████████████████████████████████████▌            | 338/400 [02:07<00:28,  2.16it/s]



 85%|███████████████████████████████████████████████████████████████████▊            | 339/400 [02:07<00:23,  2.57it/s]



 85%|████████████████████████████████████████████████████████████████████            | 340/400 [02:07<00:19,  3.09it/s]



 86%|████████████████████████████████████████████████████████████████████▍           | 342/400 [02:08<00:13,  4.17it/s]



 86%|████████████████████████████████████████████████████████████████████▌           | 343/400 [02:09<00:23,  2.40it/s]



 86%|████████████████████████████████████████████████████████████████████▊           | 344/400 [02:09<00:19,  2.83it/s]



 86%|█████████████████████████████████████████████████████████████████████           | 345/400 [02:09<00:17,  3.23it/s]



 86%|█████████████████████████████████████████████████████████████████████▏          | 346/400 [02:09<00:14,  3.75it/s]



 87%|█████████████████████████████████████████████████████████████████████▍          | 347/400 [02:09<00:14,  3.64it/s]



 87%|█████████████████████████████████████████████████████████████████████▌          | 348/400 [02:10<00:13,  3.78it/s]



 87%|█████████████████████████████████████████████████████████████████████▊          | 349/400 [02:10<00:11,  4.48it/s]



 88%|██████████████████████████████████████████████████████████████████████          | 350/400 [02:10<00:10,  4.89it/s]



 88%|██████████████████████████████████████████████████████████████████████▍         | 352/400 [02:10<00:07,  6.61it/s]



 88%|██████████████████████████████████████████████████████████████████████▌         | 353/400 [02:10<00:06,  6.89it/s]



 88%|██████████████████████████████████████████████████████████████████████▊         | 354/400 [02:11<00:07,  6.12it/s]



 89%|███████████████████████████████████████████████████████████████████████         | 355/400 [02:12<00:22,  2.00it/s]



 89%|███████████████████████████████████████████████████████████████████████▏        | 356/400 [02:14<00:39,  1.13it/s]



 90%|███████████████████████████████████████████████████████████████████████▊        | 359/400 [02:14<00:17,  2.36it/s]



 90%|████████████████████████████████████████████████████████████████████████▏       | 361/400 [02:15<00:14,  2.62it/s]



 90%|████████████████████████████████████████████████████████████████████████▍       | 362/400 [02:15<00:13,  2.87it/s]



 91%|████████████████████████████████████████████████████████████████████████▊       | 364/400 [02:15<00:09,  3.85it/s]



 91%|█████████████████████████████████████████████████████████████████████████       | 365/400 [02:15<00:08,  4.37it/s]



 92%|█████████████████████████████████████████████████████████████████████████▏      | 366/400 [02:15<00:09,  3.71it/s]



 92%|█████████████████████████████████████████████████████████████████████████▍      | 367/400 [02:16<00:10,  3.25it/s]



 92%|█████████████████████████████████████████████████████████████████████████▌      | 368/400 [02:17<00:16,  1.94it/s]



 92%|█████████████████████████████████████████████████████████████████████████▊      | 369/400 [02:17<00:14,  2.21it/s]



 92%|██████████████████████████████████████████████████████████████████████████      | 370/400 [02:17<00:11,  2.65it/s]



 93%|██████████████████████████████████████████████████████████████████████████▏     | 371/400 [02:18<00:15,  1.89it/s]



 93%|██████████████████████████████████████████████████████████████████████████▍     | 372/400 [02:18<00:11,  2.40it/s]



 93%|██████████████████████████████████████████████████████████████████████████▌     | 373/400 [02:19<00:09,  2.95it/s]



 94%|██████████████████████████████████████████████████████████████████████████▊     | 374/400 [02:19<00:08,  3.25it/s]



 94%|███████████████████████████████████████████████████████████████████████████     | 375/400 [02:19<00:08,  2.79it/s]



 94%|███████████████████████████████████████████████████████████████████████████▏    | 376/400 [02:19<00:06,  3.49it/s]



 94%|███████████████████████████████████████████████████████████████████████████▌    | 378/400 [02:20<00:04,  4.95it/s]



 95%|███████████████████████████████████████████████████████████████████████████▊    | 379/400 [02:20<00:04,  5.11it/s]



 95%|████████████████████████████████████████████████████████████████████████████    | 380/400 [02:20<00:03,  5.12it/s]



 96%|████████████████████████████████████████████████████████████████████████████▍   | 382/400 [02:20<00:03,  5.02it/s]



 96%|████████████████████████████████████████████████████████████████████████████▌   | 383/400 [02:21<00:04,  4.16it/s]



 96%|████████████████████████████████████████████████████████████████████████████▊   | 384/400 [02:21<00:03,  4.10it/s]



 96%|█████████████████████████████████████████████████████████████████████████████   | 385/400 [02:21<00:03,  3.83it/s]



 96%|█████████████████████████████████████████████████████████████████████████████▏  | 386/400 [02:22<00:03,  3.54it/s]



 97%|█████████████████████████████████████████████████████████████████████████████▍  | 387/400 [02:23<00:06,  2.02it/s]



 97%|█████████████████████████████████████████████████████████████████████████████▌  | 388/400 [02:23<00:04,  2.44it/s]



 98%|██████████████████████████████████████████████████████████████████████████████  | 390/400 [02:23<00:02,  3.64it/s]



 98%|██████████████████████████████████████████████████████████████████████████████▏ | 391/400 [02:23<00:02,  4.07it/s]



 98%|██████████████████████████████████████████████████████████████████████████████▌ | 393/400 [02:23<00:01,  5.85it/s]



 98%|██████████████████████████████████████████████████████████████████████████████▊ | 394/400 [02:24<00:01,  5.48it/s]



 99%|███████████████████████████████████████████████████████████████████████████████ | 395/400 [02:24<00:00,  5.26it/s]



 99%|███████████████████████████████████████████████████████████████████████████████▏| 396/400 [02:24<00:00,  4.20it/s]



100%|███████████████████████████████████████████████████████████████████████████████▌| 398/400 [02:25<00:00,  5.08it/s]



100%|████████████████████████████████████████████████████████████████████████████████| 400/400 [02:26<00:00,  2.73it/s]


Tiempo búsqueda (s): 146
Best robust CV: 0.3000639401596882
Best params: {'kbest__k': 1023, 'svc__C': np.float64(10.772186132342654), 'svc__class_weight': 'balanced', 'svc__gamma': np.float64(0.05825849149538057)}

TOP-10 CV robust (únicos):
 mean_test_score  std_test_score                                                                                                                            params
        0.300064        0.088711              {'kbest__k': 1023, 'svc__C': 10.772186132342654, 'svc__class_weight': 'balanced', 'svc__gamma': 0.05825849149538057}
        0.294546        0.094684              {'kbest__k': 1023, 'svc__C': 2.5404580742256915, 'svc__class_weight': 'balanced', 'svc__gamma': 0.06395288218539363}
        0.270580        0.098753  {'kbest__k': 512, 'svc__C': 1221.212458656701, 'svc__class_weight': {0: 2.0, 1: 4.0, 2: 1.0}, 'svc__gamma': 0.01543656015027835}
        0.250370        0.099212 {'kbest__k': 1023, 'svc__C': 852.6974999202718, 'svc__class_weight': {0: 3.0, 1: 6.0, 2: 1.0}, 'svc__gamma': 0.02589296092555009}
        0.241245        0.082423   {'kbest__k': 384, 'svc__C': 159.0841875063149, 'svc__class_weight': {0: 1.5, 1: 3.5, 2: 0.8}, 'svc__gamma': 0.0445131431564921}
        0.199769        0.103924  {'kbest__k': 512, 'svc__C': 61.75015483640841, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.05019578058935042}
        0.170291        0.144611             {'kbest__k': 1023, 'svc__C': 784.8081929215668, 'svc__class_weight': 'balanced', 'svc__gamma': 8.215507070842181e-05}
        0.160072        0.117242            {'kbest__k': 1023, 'svc__C': 30.036641140654908, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0035753161317240816}
        0.114299        0.143144            {'kbest__k': 1023, 'svc__C': 34.57103872771002, 'svc__class_weight': 'balanced', 'svc__gamma': 0.00026555217129736324}
        0.109081        0.162544              {'kbest__k': 128, 'svc__C': 442.0875300846289, 'svc__class_weight': 'balanced', 'svc__gamma': 3.341229373953389e-05}

===============================================================================================
Evaluando SVM1...
  AUC_CV=0.5562±0.0390 | AUC_train=0.7922 | AUC_test=0.8426
  Gap_CV=0.2359 | Gap_rep=-0.0505
  Acc=0.7480 | F1w=0.7551 | macroF1=0.7146 | balacc=0.7524

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.57      0.54      0.55       422
      Blazar       0.45      0.70      0.55       189
         QSO       0.77      0.70      0.73       899

    accuracy                           0.65      1510
   macro avg       0.60      0.65      0.61      1510
weighted avg       0.67      0.65      0.66      1510

  CM TEST
        AGN  Blazar  QSO
AGN      70       6   25
Blazar    4      24    2
QSO      50       8  188

  Report TEST
              precision    recall  f1-score   support

         AGN       0.56      0.69      0.62       101
      Blazar       0.63      0.80      0.71        30
         QSO       0.87      0.76      0.82       246

    accuracy                           0.75       377
   macro avg       0.69      0.75      0.71       377
weighted avg       0.77      0.75      0.76       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM1.png

===============================================================================================
Evaluando SVM2...

Buscando hiperparámetros:   0%|                                                          | 0/400 fits [02:33<?, ?fit/s]

  AUC_CV=0.5648±0.0316 | AUC_train=0.7650 | AUC_test=0.8159
  Gap_CV=0.2001 | Gap_rep=-0.0509
  Acc=0.6923 | F1w=0.7011 | macroF1=0.6488 | balacc=0.6948

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.56      0.45      0.50       422
      Blazar       0.42      0.67      0.51       189
         QSO       0.73      0.71      0.72       899

    accuracy                           0.63      1510
   macro avg       0.57      0.61      0.58      1510
weighted avg       0.64      0.63      0.63      1510

  CM TEST
        AGN  Blazar  QSO
AGN      60       8   33
Blazar    5      23    2
QSO      56      12  178

  Report TEST
              precision    recall  f1-score   support

         AGN       0.50      0.59      0.54       101
      Blazar       0.53      0.77      0.63        30
         QSO       0.84      0.72      0.78       246

    accuracy                           0.69       377
   macro avg       0.62      0.69      0.65       377
weighted avg       0.72      0.69      0.70       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM2.png

===============================================================================================
Evaluando SVM3...
  AUC_CV=0.5152±0.0506 | AUC_train=0.7458 | AUC_test=0.7931
  Gap_CV=0.2306 | Gap_rep=-0.0473
  Acc=0.7003 | F1w=0.7101 | macroF1=0.6632 | balacc=0.7338

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.55      0.65      0.60       422
      Blazar       0.28      0.98      0.43       189
         QSO       0.97      0.38      0.55       899

    accuracy                           0.53      1510
   macro avg       0.60      0.67      0.53      1510
weighted avg       0.77      0.53      0.55      1510

  CM TEST
        AGN  Blazar  QSO
AGN      78       3   20
Blazar    3      23    4
QSO      62      21  163

  Report TEST
              precision    recall  f1-score   support

         AGN       0.55      0.77      0.64       101
      Blazar       0.49      0.77      0.60        30
         QSO       0.87      0.66      0.75       246

    accuracy                           0.70       377
   macro avg       0.64      0.73      0.66       377
weighted avg       0.75      0.70      0.71       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM3.png

===============================================================================================
Evaluando SVM4...
  AUC_CV=0.5149±0.0550 | AUC_train=0.7589 | AUC_test=0.8110
  Gap_CV=0.2440 | Gap_rep=-0.0521
  Acc=0.7321 | F1w=0.7410 | macroF1=0.6912 | balacc=0.7579

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.58      0.70      0.63       422
      Blazar       0.29      0.98      0.45       189
         QSO       0.99      0.40      0.57       899

    accuracy                           0.56      1510
   macro avg       0.62      0.70      0.55      1510
weighted avg       0.79      0.56      0.57      1510

  CM TEST
        AGN  Blazar  QSO
AGN      82       3   16
Blazar    3      23    4
QSO      56      19  171

  Report TEST
              precision    recall  f1-score   support

         AGN       0.58      0.81      0.68       101
      Blazar       0.51      0.77      0.61        30
         QSO       0.90      0.70      0.78       246

    accuracy                           0.73       377
   macro avg       0.66      0.76      0.69       377
weighted avg       0.78      0.73      0.74       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM4.png

===============================================================================================
Evaluando SVM5...
  AUC_CV=0.5082±0.0507 | AUC_train=0.7523 | AUC_test=0.8215
  Gap_CV=0.2442 | Gap_rep=-0.0692
  Acc=0.7188 | F1w=0.7285 | macroF1=0.6830 | balacc=0.7492

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.58      0.62      0.60       422
      Blazar       0.27      0.99      0.42       189
         QSO       0.97      0.39      0.55       899

    accuracy                           0.53      1510
   macro avg       0.61      0.66      0.52      1510
weighted avg       0.77      0.53      0.55      1510

  CM TEST
        AGN  Blazar  QSO
AGN      81       4   16
Blazar    3      23    4
QSO      62      17  167

  Report TEST
              precision    recall  f1-score   support

         AGN       0.55      0.80      0.66       101
      Blazar       0.52      0.77      0.62        30
         QSO       0.89      0.68      0.77       246

    accuracy                           0.72       377
   macro avg       0.66      0.75      0.68       377
weighted avg       0.77      0.72      0.73       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM5.png

===============================================================================================
RESUMEN FINAL:
 tag  AUC_CV  SD_CV  Gap_CV  AUC_rep  SD_rep  Gap_rep  Acc_test  F1w_test  macroF1  balacc
SVM1  0.5562 0.0390  0.2359   0.8426  0.0390  -0.0505    0.7480    0.7551   0.7146  0.7524
SVM2  0.5648 0.0316  0.2001   0.8159  0.0316  -0.0509    0.6923    0.7011   0.6488  0.6948
SVM3  0.5152 0.0506  0.2306   0.7931  0.0506  -0.0473    0.7003    0.7101   0.6632  0.7338
SVM4  0.5149 0.0550  0.2440   0.8110  0.0550  -0.0521    0.7321    0.7410   0.6912  0.7579
SVM5  0.5082 0.0507  0.2442   0.8215  0.0507  -0.0692    0.7188    0.7285   0.6830  0.7492

>>> Mejor modelo: SVM1  AUC_test=0.8426

Tabla LaTeX guardada en: C:\Users\Gamer\Downloads\tabla_svm_top5.tex

--- PREVIEW ---
\begin{table}[!ht]
\centering
\small
\renewcommand{\arraystretch}{1.18}
\setlength{\tabcolsep}{6pt}
\begin{adjustbox}{width=\textwidth}
\rowcolors{2}{white}{gray!8}
\begin{tabular}{ccccccccc}
\toprule
\rowcolor{BlueHeader}
\color{white}\textbf{Modelo} &
\color{white}\textbf{AUC\textsubscript{CV}} &
\color{white}\textbf{SD\textsubscript{CV}} &
\color{white}\textbf{Gap\textsubscript{CV}} &
\color{white}\textbf{AUC\textsubscript{rep}} &
\color{white}\textbf{SD\textsubscript{rep}} &
\color{white}\textbf{Gap\textsubscript{rep}} &
\color{white}\textbf{Acc\textsubscript{test}} &
\color{white}\textbf{F1\textsubscript{w,test}} \\
\midrule
        \rowcolor{BlueBest} SVM$_{1}$ & 0.5562 & 0.0390 & 0.2359 & 0.8426 & 0.0390 & -0.0505 & 0.7480 & 0.7551 \\
        SVM$_{2}$ & 0.5648 & 0.0316 & 0.2001 & 0.8159 & 0.0316 & -0.0509 & 0.6923 & 0.7011 \\
        SVM$_{3}$ & 0.5152 & 0.0506 & 0.2306 & 0.7931 & 0.0506 & -0.0473 & 0.7003 & 0.7101 \\
        SVM$_{4}$ & 0.5149 & 0.0550 & 0.2440 & 0.8110 & 0.0550 & -0.0521 & 0.7321 & 0.7410 \\
        SVM$_{5}$ & 0.5082 & 0.0507 & 0.2442 & 0.8215 & 0.0507 & -0.0692 & 0.7188 & 0.7285 \\
\bottomrule
\end{tabular}
\end{adjustbox}
\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG).}
\label{tab:svm_top5_auc}
\end{table}

Tiempo total: 00:03:51

# =========================
# IMPORTANCIA POR NIVEL PARA SVM (RBF) CON PERMUTATION IMPORTANCE
# LOG-FIRMA (trayectoria 2D: tiempo, magnitud)
# =========================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.inspection import permutation_importance

plt.style.use("default")

# -------------------------------------------------
# 1) Mejor modelo SVM ya ajustado
# -------------------------------------------------
svm_best = rs.best_estimator_

# -------------------------------------------------
# 2) Permutation importance sobre el pipeline completo
# -------------------------------------------------
perm = permutation_importance(
    estimator=svm_best,
    X=X_test,
    y=y_test_enc,
    n_repeats=20,
    random_state=42,
    scoring="balanced_accuracy",
    n_jobs=-1
)

importances_mean = perm.importances_mean

# -------------------------------------------------
# 3) DataFrame base con TODAS las variables originales
#    Aquí NO usamos nombres numéricos; usamos el ORDEN de columnas
# -------------------------------------------------
orig_cols = pd.Index(X_test.columns)

df_all = pd.DataFrame({
    "feature": orig_cols.astype(str),
    "importance_mean": importances_mean,
    "pos": np.arange(1, len(orig_cols) + 1)   # posición 1-based
})

# -------------------------------------------------
# 4) Identificar cuáles variables realmente usa el SVM final
#    (mapeando VT -> KBest sobre columnas originales)
# -------------------------------------------------
selected_cols = orig_cols.copy()

if hasattr(svm_best, "named_steps"):
    if "vt" in svm_best.named_steps:
        vt_mask = svm_best.named_steps["vt"].get_support()
        selected_cols = selected_cols[vt_mask]

    if "kbest" in svm_best.named_steps:
        kbest_mask = svm_best.named_steps["kbest"].get_support()
        selected_cols = selected_cols[kbest_mask]

print(f"Variables originales en X_test: {len(orig_cols)}")
print(f"Variables que realmente usa el SVM final: {len(selected_cols)}")

df_imp_svm = df_all[df_all["feature"].isin(selected_cols.astype(str))].copy()

# -------------------------------------------------
# 5) Cortes correctos para LOG-FIRMA en dimensión 2
#    tamaños por nivel: [2,1,2,3,6,9,18,30,56]
#    acumulados:        [2,3,5,8,14,23,41,71,127]
# -------------------------------------------------
edges_logsig = [0, 2, 3, 5, 8, 14, 23, 41, 71, 127]
labels_logsig = [f"N{i}" for i in range(1, 10)]  # N1..N9

df_imp_svm["nivel"] = pd.cut(
    df_imp_svm["pos"],
    bins=edges_logsig,
    labels=labels_logsig,
    right=True,
    include_lowest=True
)

if df_imp_svm["nivel"].isna().any():
    bad = df_imp_svm.loc[df_imp_svm["nivel"].isna(), ["feature", "pos"]].head(10)
    raise ValueError(
        "Hay variables fuera del rango esperado para log-firma M=9. "
        f"Ejemplos:\n{bad}"
    )

df_imp_svm["nivel_num"] = (
    df_imp_svm["nivel"].astype(str).str.replace("N", "", regex=False).astype(int)
)

df_imp_svm = df_imp_svm.sort_values(["nivel_num", "pos"])

print("\nCantidad de variables seleccionadas por nivel:")
print(df_imp_svm["nivel_num"].value_counts().sort_index())

# -------------------------------------------------
# 6) Resumen por nivel
#    mean y median salen ambos de importance_mean
# -------------------------------------------------
res_svm = df_imp_svm.groupby("nivel_num").agg(
    mean=("importance_mean", "mean"),
    median=("importance_mean", "median"),
    count=("importance_mean", "size")
).reset_index()

# Forzar presencia de todos los niveles 1..9
niveles_completos = pd.DataFrame({"nivel_num": np.arange(1, 10)})
res_svm = niveles_completos.merge(res_svm, on="nivel_num", how="left").fillna(0)

print("\nRESUMEN POR NIVEL — SVM LOG-FIRMA")
print(res_svm.to_string(index=False))

# -------------------------------------------------
# 7) Gráfico azul sobrio
# -------------------------------------------------
x = np.arange(len(res_svm))
labels_plot = [f"Nivel {n}" for n in res_svm["nivel_num"]]

fig, ax = plt.subplots(figsize=(9.5, 5.2), facecolor="white")
ax.set_facecolor("white")

ax.bar(
    x,
    res_svm["mean"],
    color="#8FA8C7",
    edgecolor="#355C7D",
    linewidth=1.2,
    alpha=0.9,
    label="Importancia promedio"
)

ax.scatter(
    x,
    res_svm["median"],
    s=80,
    marker="o",
    color="#355C7D",
    edgecolors="#1F3A56",
    linewidths=1,
    zorder=3,
    label="Mediana"
)

ax.set_xticks(x)
ax.set_xticklabels(labels_plot)
ax.set_xlabel("Nivel")
ax.set_ylabel("Importancia")
ax.set_title("Importancia promedio y mediana por nivel (SVM, log-firma)")
ax.grid(axis="y", linestyle="--", alpha=0.35, color="gray")

handles, labels = ax.get_legend_handles_labels()
order = [1, 0]
ax.legend([handles[i] for i in order], [labels[i] for i in order])

fig.tight_layout()

# -------------------------------------------------
# 8) Guardar
# -------------------------------------------------
downloads = Path.home() / "Downloads"
downloads.mkdir(parents=True, exist_ok=True)
ruta_guardado = downloads / "importancia_SVMLOGFIRMAESIG.png"

fig.savefig(ruta_guardado, dpi=300, bbox_inches="tight", facecolor="white")
print(f"\nGráfico guardado en: {ruta_guardado}")

plt.show()

Variables originales en X_test: 127
Variables que realmente usa el SVM final: 127

Cantidad de variables seleccionadas por nivel:
nivel_num
1     2
2     1
3     2
4     3
5     6
6     9
7    18
8    30
9    56
Name: count, dtype: int64

RESUMEN POR NIVEL — SVM LOG-FIRMA
 nivel_num     mean   median  count
         1 0.016538 0.016538      2
         2 0.091708 0.091708      1
         3 0.099197 0.099197      2
         4 0.044668 0.044008      3
         5 0.024496 0.017768      6
         6 0.018242 0.015316      9
         7 0.012732 0.009927     18
         8 0.011348 0.006866     30
         9 0.004597 0.003235     56

Gráfico guardado en: C:\Users\Gamer\Downloads\importancia_SVMLOGFIRMAESIG.png

# ==========================================================
# TABLA POR NIVELES (SVM LOG-FIRMA) — criterio AUC_rep
# - Usa el MISMO pipeline del SVM final
# - Funciona aunque X_train.columns NO sean numéricas
# - Evalúa por niveles acumulados de log-firma 2D hasta M=9
# ==========================================================

import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from scipy.special import softmax
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

# ----------------------------------------------------------
# 0) REQUISITOS PREVIOS
# ----------------------------------------------------------
# Deben existir:
# X_train, X_test, y_train_enc, y_test_enc, top10, make_pipe

y_train_encoded = y_train_enc
y_test_encoded  = y_test_enc

# ----------------------------------------------------------
# 1) TOMAR LOS PARÁMETROS DEL MODELO FINAL DE LOG-FIRMA
# ----------------------------------------------------------
# Si seleccionaste SVM1 como modelo final de log-firma:
LOGSIG_PARAMS = top10.iloc[0]["params"].copy()

print("Parámetros del SVM final (log-firma):")
print(LOGSIG_PARAMS)

kbest_original = LOGSIG_PARAMS.get("kbest__k", X_train.shape[1])

# ----------------------------------------------------------
# 2) FUNCIÓN AUC — misma lógica que tu notebook principal
# ----------------------------------------------------------
def auc_ovr_macro_from_scores(y_true, decision_scores):
    if decision_scores.ndim == 1:
        decision_scores = decision_scores.reshape(-1, 1)
    proba = softmax(decision_scores, axis=1)
    return roc_auc_score(y_true, proba, multi_class="ovr", average="macro")

# ----------------------------------------------------------
# 3) ORDEN DE COLUMNAS POR POSICIÓN
# ----------------------------------------------------------
# En log-firma NO usamos nombres de columnas, usamos el orden
cols_ordered = list(X_train.columns)

print(f"Número total de columnas en X_train: {len(cols_ordered)}")

# Acumulados correctos para log-firma 2D, M=9
cum_levels = {
    1: 2,
    2: 3,
    3: 5,
    4: 8,
    5: 14,
    6: 23,
    7: 41,
    8: 71,
    9: 127
}

if len(cols_ordered) < 127:
    raise ValueError(
        f"X_train tiene {len(cols_ordered)} columnas, pero para log-firma 2D M=9 "
        f"se esperaban al menos 127."
    )

# ----------------------------------------------------------
# 4) LOOP POR NIVELES 1..9
# ----------------------------------------------------------
rows = []

for m in tqdm(range(1, 10), desc="Evaluando niveles (SVM log-firma)", unit="nivel"):
    n_feats = cum_levels[m]

    selected = cols_ordered[:n_feats]

    Xtr_m = X_train[selected].copy()
    Xte_m = X_test[selected].copy()

    params_m = LOGSIG_PARAMS.copy()

    # Ajustar kbest si el nivel tiene menos features que el k original
    if isinstance(kbest_original, (int, np.integer, float, np.floating)):
        params_m["kbest__k"] = int(min(int(kbest_original), Xtr_m.shape[1]))
    else:
        params_m["kbest__k"] = "all"

    model_m = make_pipe(params_m)
    model_m.fit(Xtr_m, y_train_encoded)

    pred_tr = model_m.predict(Xtr_m)
    pred_te = model_m.predict(Xte_m)

    scores_tr = model_m.decision_function(Xtr_m)
    scores_te = model_m.decision_function(Xte_m)

    acc_tr = accuracy_score(y_train_encoded, pred_tr)
    f1w_tr = f1_score(y_train_encoded, pred_tr, average="weighted", zero_division=0)
    auc_tr = auc_ovr_macro_from_scores(y_train_encoded, scores_tr)

    acc_te = accuracy_score(y_test_encoded, pred_te)
    f1w_te = f1_score(y_test_encoded, pred_te, average="weighted", zero_division=0)
    auc_te = auc_ovr_macro_from_scores(y_test_encoded, scores_te)

    rows.append({
        "NivelFirma": m,
        "N_features": Xtr_m.shape[1],
        "kbest_usado": params_m["kbest__k"],
        "AccTrain": acc_tr,
        "F1wTrain": f1w_tr,
        "AUCTrain": auc_tr,
        "AccTest": acc_te,
        "F1wTest": f1w_te,
        "AUCRep": auc_te
    })

df_levels_logsig = pd.DataFrame(rows)

# ----------------------------------------------------------
# 5) ELEGIR EL NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep
# ----------------------------------------------------------
best_auc = df_levels_logsig["AUCRep"].max()
tol = 1e-4
best_candidates = df_levels_logsig[df_levels_logsig["AUCRep"] >= best_auc - tol]
best_simple = best_candidates.sort_values(["NivelFirma"]).iloc[0]

print("\n" + "="*100)
print("RESULTADOS POR NIVEL (SVM LOG-FIRMA) — criterio AUCRep")
print("="*100)
print(df_levels_logsig.to_string(index=False, float_format=lambda x: f"{x:.4f}"))

print("\n" + "="*100)
print("NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep (con tolerancia)")
print("="*100)
print(best_simple.to_string())

# ----------------------------------------------------------
# 6) FILAS LaTeX
# ----------------------------------------------------------
print("\n" + "="*100)
print("FILAS LaTeX")
print("="*100)
for _, r in df_levels_logsig.iterrows():
    print(
        f"{int(r['NivelFirma'])} & {int(r['N_features'])} & "
        f"{r['AccTrain']:.4f} & {r['F1wTrain']:.4f} & "
        f"{r['AccTest']:.4f} & {r['F1wTest']:.4f} & {r['AUCRep']:.4f} \\\\"
    )

Parámetros del SVM final (log-firma):
{'kbest__k': 1023, 'svc__C': np.float64(10.772186132342654), 'svc__class_weight': 'balanced', 'svc__gamma': np.float64(0.05825849149538057)}
Número total de columnas en X_train: 127

Evaluando niveles (SVM log-firma): 100%|██████████████████████████████████████████████| 9/9 [00:07<00:00,  1.18nivel/s]


====================================================================================================
RESULTADOS POR NIVEL (SVM LOG-FIRMA) — criterio AUCRep
====================================================================================================
 NivelFirma  N_features  kbest_usado  AccTrain  F1wTrain  AUCTrain  AccTest  F1wTest  AUCRep
          1           2            2    0.3616    0.3883    0.5353   0.5385   0.5425  0.5397
          2           3            3    0.4344    0.4498    0.5875   0.5225   0.5265  0.6012
          3           5            5    0.5212    0.5263    0.6463   0.5517   0.5714  0.6989
          4           8            8    0.5457    0.5552    0.6849   0.6021   0.6161  0.7223
          5          14           14    0.5735    0.5808    0.7217   0.6446   0.6577  0.7495
          6          23           23    0.5907    0.5961    0.7458   0.6472   0.6590  0.7782
          7          41           41    0.6205    0.6251    0.7692   0.6870   0.6959  0.8060
          8          71           71    0.6391    0.6436    0.7817   0.7268   0.7322  0.8249
          9         127          127    0.6536    0.6586    0.7922   0.7480   0.7551  0.8426

====================================================================================================
NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep (con tolerancia)
====================================================================================================
NivelFirma       9.000000
N_features     127.000000
kbest_usado    127.000000
AccTrain         0.653642
F1wTrain         0.658641
AUCTrain         0.792165
AccTest          0.748011
F1wTest          0.755074
AUCRep           0.842649

====================================================================================================
FILAS LaTeX
====================================================================================================
1 & 2 & 0.3616 & 0.3883 & 0.5385 & 0.5425 & 0.5397 \\
2 & 3 & 0.4344 & 0.4498 & 0.5225 & 0.5265 & 0.6012 \\
3 & 5 & 0.5212 & 0.5263 & 0.5517 & 0.5714 & 0.6989 \\
4 & 8 & 0.5457 & 0.5552 & 0.6021 & 0.6161 & 0.7223 \\
5 & 14 & 0.5735 & 0.5808 & 0.6446 & 0.6577 & 0.7495 \\
6 & 23 & 0.5907 & 0.5961 & 0.6472 & 0.6590 & 0.7782 \\
7 & 41 & 0.6205 & 0.6251 & 0.6870 & 0.6959 & 0.8060 \\
8 & 71 & 0.6391 & 0.6436 & 0.7268 & 0.7322 & 0.8249 \\
9 & 127 & 0.6536 & 0.6586 & 0.7480 & 0.7551 & 0.8426 \\

IISIGNATURE FIRMA SVM

import numpy as np
import pandas as pd
import time
import warnings
import os
warnings.filterwarnings("ignore")

# pip install tqdm tqdm_joblib imbalanced-learn matplotlib seaborn
from tqdm.auto import tqdm
from tqdm_joblib import tqdm_joblib
import joblib

import matplotlib
matplotlib.use("Agg")          # backend sin pantalla; cambiar a "TkAgg" si se quiere ventana
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import VarianceThreshold, SelectKBest, mutual_info_classif
from sklearn.svm import SVC

from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV, cross_val_score
from sklearn.metrics import (
    confusion_matrix, classification_report,
    f1_score, balanced_accuracy_score, recall_score,
    accuracy_score, roc_auc_score, make_scorer
)
from scipy.stats import loguniform
from scipy.special import softmax

from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE

start_time = time.time()

# =========================
# 0) DATOS
# =========================
x = pd.read_csv(r'C:\Users\Gamer\Desktop\Cata\IISIGNATURE\path_signature_iisig_M9.csv')
y = pd.read_csv(r'C:\Users\Gamer\Desktop\Cata\ts_v9.0.1_SMBH_ZTF_xmatch.csv')
y["id"] = y["oid"]
data = pd.merge(x, y, on="id")

data_train = data.sample(frac=0.8, random_state=42).reset_index(drop=True)
data_test  = data.drop(data_train.index).reset_index(drop=True)

X_train = data_train.drop(columns=['oid','survey_class_mapped','survey_class','survey_class_cat','id'])
y_train = data_train['survey_class_mapped']
X_test  = data_test[X_train.columns].copy()
y_test  = data_test['survey_class_mapped']

le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)
labels = list(le.classes_)

DOWNLOADS = r'C:\Users\Gamer\Downloads'
os.makedirs(DOWNLOADS, exist_ok=True)

print("Shapes:", X_train.shape, X_test.shape)
print("Labels:", labels)
print("Distribución train:", dict(zip(*np.unique(y_train_enc, return_counts=True))))

# =========================
# 1) SCORERS
# =========================
def robust_score(y_true, y_pred):
    mf1     = f1_score(y_true, y_pred, average="macro", zero_division=0)
    bacc    = balanced_accuracy_score(y_true, y_pred)
    recalls = recall_score(y_true, y_pred, average=None, zero_division=0)
    minR    = float(np.min(recalls))
    penalty = 0.0 if minR >= 0.30 else (0.30 - minR) * 2.0
    return 0.50 * mf1 + 0.40 * bacc + 0.10 * minR - penalty

robust_scorer = make_scorer(robust_score)

# AUC OVR: decision_function → softmax → roc_auc_score
def _auc_ovr(y_true, y_score):
    proba = softmax(y_score, axis=1)   # convierte scores crudos a distribución [0,1] que suma 1
    return roc_auc_score(y_true, proba, multi_class="ovr", average="macro")

auc_scorer = make_scorer(_auc_ovr, response_method="decision_function")

# =========================
# 2) PIPELINE CON SMOTE
# =========================
min_class_count = min(np.bincount(y_train_enc))
smote_k = min(5, min_class_count - 1)

def make_pipe(params=None):
    p = ImbPipeline(steps=[
        ("imp",   SimpleImputer(strategy="median")),
        ("sc",    StandardScaler()),
        ("vt",    VarianceThreshold(0.0)),
        ("smote", SMOTE(k_neighbors=smote_k, random_state=42)),
        ("kbest", SelectKBest(score_func=mutual_info_classif)),
        ("svc",   SVC(kernel="rbf", decision_function_shape="ovr", cache_size=4000))
    ])
    if params:
        p.set_params(**params)
    return p

pipe = make_pipe()

# =========================
# 3) ESPACIO DE BÚSQUEDA
# =========================
cw_options = [
    "balanced",
    {0: 2.0, 1: 4.0, 2: 1.0},
    {0: 2.5, 1: 5.0, 2: 1.0},
    {0: 1.5, 1: 3.5, 2: 0.8},
    {0: 3.0, 1: 6.0, 2: 1.0},
    {0: 1.0, 1: 3.0, 2: 0.7},
]

param_dist = {
    "kbest__k":          [128, 256, 384, 512, 768, 1023],
    "svc__C":            loguniform(0.5, 2000),
    "svc__gamma":        loguniform(1e-6, 0.1),
    "svc__class_weight": cw_options,
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# =========================
# 4) BÚSQUEDA CON BARRA DE PROGRESO
# =========================
t0 = time.time()
rs = RandomizedSearchCV(
    estimator=pipe,
    param_distributions=param_dist,
    n_iter=80,
    scoring=robust_scorer,
    cv=cv,
    random_state=42,
    n_jobs=-1,
    verbose=0,
    refit=True,
    return_train_score=False,
    error_score=0.0
)

n_fits = 80 * cv.n_splits
with tqdm_joblib(tqdm(
    desc="Buscando hiperparámetros",
    total=n_fits,
    unit="fit",
    colour="cyan",
    bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} fits [{elapsed}<{remaining}, {rate_fmt}]"
)):
    with joblib.parallel_config(backend="threading"):
        rs.fit(X_train, y_train_enc)

print("\nTiempo búsqueda (s):", int(time.time() - t0))
print("Best robust CV:", rs.best_score_)
print("Best params:", rs.best_params_)

# =========================
# 5) TOP-10 ÚNICOS
# =========================
def _canon_value(v):
    if isinstance(v, np.generic):   return v.item()
    if isinstance(v, dict):         return tuple(sorted((int(k), float(w)) for k, w in v.items()))
    return v

def _params_key(d):
    return tuple(sorted((k, _canon_value(v)) for k, v in d.items()))

res = pd.DataFrame(rs.cv_results_).copy()
res["params_key"] = res["params"].apply(_params_key)
res = res.sort_values(["mean_test_score", "std_test_score"], ascending=[False, True]).reset_index(drop=True)
res_unique = res.drop_duplicates(subset=["params_key"], keep="first").reset_index(drop=True)
top10 = res_unique.head(10)[["mean_test_score", "std_test_score", "params"]].copy()

print("\nTOP-10 CV robust (únicos):")
print(top10.to_string(index=False))

# =========================
# 6) EVALUACIÓN COMPLETA + FIGURAS
# =========================
def _row_normalize(cm):
    cm = cm.astype(float)
    row_sums = cm.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    return (cm / row_sums) * 100.0


def plot_cms(cm_train, cm_test, tag, save_dir, subtitle="",
             gap_width=0.28, wspace=0.15,
             label_fontsize=13, tick_fontsize=13, title_fontsize=14):
    """Guarda figura con CM de train y test — mismo estilo que el resto de tus CMs."""
    cm_tr_pct = _row_normalize(cm_train)
    cm_te_pct = _row_normalize(cm_test)

    fig = plt.figure(figsize=(10.8, 4.8))
    gs = gridspec.GridSpec(1, 4,
                           width_ratios=[1, gap_width, 1, 0.08],
                           wspace=wspace)
    ax1    = fig.add_subplot(gs[0, 0])
    ax_gap = fig.add_subplot(gs[0, 1]);  ax_gap.axis("off")
    ax2    = fig.add_subplot(gs[0, 2])
    ax_cb  = fig.add_subplot(gs[0, 3])

    im = None
    for ax, cm_pct, cm_cnt, title in [
        (ax1, cm_tr_pct, cm_train, "Train"),
        (ax2, cm_te_pct, cm_test,  "Test"),
    ]:
        im = ax.imshow(cm_pct, cmap="Blues", vmin=0, vmax=100)
        ax.set_title(title, fontsize=title_fontsize)
        ax.set_xticks(np.arange(len(labels)))
        ax.set_yticks(np.arange(len(labels)))
        ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=tick_fontsize)
        ax.set_yticklabels(labels, fontsize=tick_fontsize)
        ax.set_xlabel("Predicho", fontsize=label_fontsize)
        ax.set_ylabel("Real",     fontsize=label_fontsize)

        for i in range(cm_pct.shape[0]):
            for j in range(cm_pct.shape[1]):
                pct = cm_pct[i, j]
                cnt = int(cm_cnt[i, j])
                color_txt = "white" if pct > 50 else "black"
                ax.text(j, i - 0.10, f"{pct:.1f}%",
                        ha="center", va="center",
                        color=color_txt, fontsize=10, fontweight="bold")
                ax.text(j, i + 0.22, f"({cnt})",
                        ha="center", va="center",
                        color=color_txt, fontsize=7)

    fig.colorbar(im, cax=ax_cb, label="% por fila (clase real)")
    fig.suptitle(f"{tag}\n{subtitle}", fontsize=13, y=0.98)
    fig.subplots_adjust(left=0.08, right=0.92, bottom=0.22, top=0.82)

    fname = os.path.join(save_dir, f"{tag}.png")
    fig.savefig(fname, dpi=300, bbox_inches="tight")
    plt.close(fig)
    print(f"  Figura guardada: {fname}")
def eval_one(params, tag):
    print("\n" + "="*95)
    print(f"Evaluando {tag}...")

    # AUC en CV (5 folds, calcula sobreajuste respecto a validación)
    auc_cv_scores = cross_val_score(
        make_pipe(params), X_train, y_train_enc,
        cv=cv, scoring=auc_scorer, n_jobs=-1
    )
    auc_cv = float(np.mean(auc_cv_scores))
    sd_cv  = float(np.std(auc_cv_scores))

    # Reentrenar sobre todo el train
    model = make_pipe(params)
    model.fit(X_train, y_train_enc)

    yhat_tr = model.predict(X_train)
    yhat_te = model.predict(X_test)
    df_tr   = softmax(model.decision_function(X_train), axis=1)
    df_te   = softmax(model.decision_function(X_test),  axis=1)

    auc_train = roc_auc_score(y_train_enc, df_tr, multi_class="ovr", average="macro")
    auc_test  = roc_auc_score(y_test_enc,  df_te, multi_class="ovr", average="macro")
    gap_cv    = round(auc_train - auc_cv,   4)
    gap_rep   = round(auc_train - auc_test, 4)

    acc_test  = accuracy_score(y_test_enc, yhat_te)
    f1w_test  = f1_score(y_test_enc, yhat_te, average="weighted", zero_division=0)
    mf1_test  = f1_score(y_test_enc, yhat_te, average="macro",    zero_division=0)
    bacc_test = balanced_accuracy_score(y_test_enc, yhat_te)

    cm_tr = confusion_matrix(y_train_enc, yhat_tr)
    cm_te = confusion_matrix(y_test_enc,  yhat_te)

    print(f"  AUC_CV={auc_cv:.4f}±{sd_cv:.4f} | AUC_train={auc_train:.4f} | AUC_test={auc_test:.4f}")
    print(f"  Gap_CV={gap_cv:.4f} | Gap_rep={gap_rep:.4f}")
    print(f"  Acc={acc_test:.4f} | F1w={f1w_test:.4f} | macroF1={mf1_test:.4f} | balacc={bacc_test:.4f}")

    print("\n  Report TRAIN")
    print(classification_report(y_train_enc, yhat_tr, target_names=labels, zero_division=0))
    print("  CM TEST")
    print(pd.DataFrame(cm_te, index=labels, columns=labels))
    print("\n  Report TEST")
    print(classification_report(y_test_enc, yhat_te, target_names=labels, zero_division=0))

    subtitle = (
        f"Acc train={accuracy_score(y_train_enc, yhat_tr):.3f} | "
        f"Acc test={acc_test:.3f} | "
        f"F1w train={f1_score(y_train_enc, yhat_tr, average='weighted', zero_division=0):.3f} | "
        f"F1w test={f1w_test:.3f}"
    )
    plot_cms(cm_tr, cm_te, tag, DOWNLOADS, subtitle=subtitle)

    return {
        "tag":      tag,
        "AUC_CV":   round(auc_cv,    4),
        "SD_CV":    round(sd_cv,     4),
        "Gap_CV":   gap_cv,
        "AUC_rep":  round(auc_test,  4),
        "SD_rep":   round(sd_cv,     4),
        "Gap_rep":  gap_rep,
        "Acc_test": round(acc_test,  4),
        "F1w_test": round(f1w_test,  4),
        "macroF1":  round(mf1_test,  4),
        "balacc":   round(bacc_test, 4),
    }


results_summary = []
for i, row in enumerate(top10.head(5).itertuples(index=False), start=1):
    r = eval_one(row.params, f"SVM{i}")
    results_summary.append(r)

# =========================
# 7) TABLA RESUMEN + LATEX AUTO-GENERADO
# =========================
summary_df = pd.DataFrame(results_summary)
print("\n" + "="*95)
print("RESUMEN FINAL:")
print(summary_df.to_string(index=False))

best_idx = summary_df["AUC_rep"].idxmax()
best_tag = summary_df.loc[best_idx, "tag"]
print(f"\n>>> Mejor modelo: {best_tag}  AUC_test={summary_df.loc[best_idx,'AUC_rep']:.4f}")

# Generar snippet LaTeX
latex_rows = []
for _, r in summary_df.iterrows():
    highlight = r"\rowcolor{BlueBest} " if r["tag"] == best_tag else ""
    latex_rows.append(
        f"        {highlight}SVM$_{{{r['tag'][-1]}}}$ & "
        f"{r['AUC_CV']:.4f} & {r['SD_CV']:.4f} & {r['Gap_CV']:.4f} & "
        f"{r['AUC_rep']:.4f} & {r['SD_rep']:.4f} & {r['Gap_rep']:.4f} & "
        f"{r['Acc_test']:.4f} & {r['F1w_test']:.4f} \\\\"
    )

latex_table = (
    r"\begin{table}[!ht]" + "\n"
    r"\centering" + "\n"
    r"\small" + "\n"
    r"\renewcommand{\arraystretch}{1.18}" + "\n"
    r"\setlength{\tabcolsep}{6pt}" + "\n"
    r"\begin{adjustbox}{width=\textwidth}" + "\n"
    r"\rowcolors{2}{white}{gray!8}" + "\n"
    r"\begin{tabular}{ccccccccc}" + "\n"
    r"\toprule" + "\n"
    r"\rowcolor{BlueHeader}" + "\n"
    r"\color{white}\textbf{Modelo} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Acc\textsubscript{test}} &" + "\n"
    r"\color{white}\textbf{F1\textsubscript{w,test}} \\" + "\n"
    r"\midrule" + "\n"
    + "\n".join(latex_rows) + "\n"
    r"\bottomrule" + "\n"
    r"\end{tabular}" + "\n"
    r"\end{adjustbox}" + "\n"
    r"\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG).}" + "\n"
    r"\label{tab:svm_top5_auc}" + "\n"
    r"\end{table}"
)

latex_path = os.path.join(DOWNLOADS, "tabla_svm_top5.tex")
with open(latex_path, "w", encoding="utf-8") as f:
    f.write(latex_table)

print(f"\nTabla LaTeX guardada en: {latex_path}")
print("\n--- PREVIEW ---\n" + latex_table)

# =========================
# 8) TIEMPO TOTAL
# =========================
elapsed = int(time.time() - start_time)
h, rem = divmod(elapsed, 3600)
m, s = divmod(rem, 60)
print(f"\nTiempo total: {h:02d}:{m:02d}:{s:02d}")

Shapes: (1510, 1022) (377, 1022)
Labels: ['AGN', 'Blazar', 'QSO']
Distribución train: {np.int64(0): np.int64(422), np.int64(1): np.int64(189), np.int64(2): np.int64(899)}

Buscando hiperparámetros:   0%|                                                          | 0/400 fits [00:00<?, ?fit/s]

  0%|                                                                                          | 0/400 [00:00<?, ?it/s]

  0%|▏                                                                               | 1/400 [00:28<3:10:37, 28.67s/it]

  0%|▍                                                                               | 2/400 [00:29<1:19:52, 12.04s/it]

  1%|▌                                                                                 | 3/400 [00:29<45:03,  6.81s/it]

  1%|▊                                                                                 | 4/400 [00:30<29:43,  4.50s/it]

  1%|█                                                                                 | 5/400 [00:30<19:34,  2.97s/it]

  2%|█▋                                                                                | 8/400 [00:43<24:26,  3.74s/it]

  2%|█▊                                                                                | 9/400 [00:45<20:56,  3.21s/it]

  2%|██                                                                               | 10/400 [00:45<16:01,  2.46s/it]

  3%|██▏                                                                              | 11/400 [00:45<12:25,  1.92s/it]

  3%|██▍                                                                              | 12/400 [00:46<10:30,  1.63s/it]

  3%|██▋                                                                              | 13/400 [00:49<13:52,  2.15s/it]

  4%|██▊                                                                              | 14/400 [00:50<11:13,  1.75s/it]

  4%|███                                                                              | 15/400 [00:51<09:32,  1.49s/it]

  4%|███▏                                                                             | 16/400 [00:52<08:44,  1.37s/it]

  4%|███▍                                                                             | 17/400 [00:52<07:09,  1.12s/it]

  4%|███▋                                                                             | 18/400 [00:53<05:36,  1.13it/s]

  5%|███▊                                                                             | 19/400 [00:53<04:26,  1.43it/s]

  5%|████                                                                             | 20/400 [01:08<31:43,  5.01s/it]

  5%|████▎                                                                            | 21/400 [01:13<31:09,  4.93s/it]

  6%|████▊                                                                            | 24/400 [01:14<14:20,  2.29s/it]

  6%|█████                                                                            | 25/400 [01:16<13:53,  2.22s/it]

  6%|█████▎                                                                           | 26/400 [01:17<12:11,  1.96s/it]

  7%|█████▍                                                                           | 27/400 [01:18<11:05,  1.78s/it]

  7%|█████▋                                                                           | 28/400 [01:18<08:22,  1.35s/it]

  7%|█████▊                                                                           | 29/400 [01:18<06:43,  1.09s/it]

  8%|██████                                                                           | 30/400 [01:19<06:18,  1.02s/it]

  8%|██████▎                                                                          | 31/400 [01:20<05:05,  1.21it/s]

  8%|██████▍                                                                          | 32/400 [01:33<27:32,  4.49s/it]

  8%|██████▋                                                                          | 33/400 [01:38<27:38,  4.52s/it]

  9%|███████                                                                          | 35/400 [01:38<15:08,  2.49s/it]

  9%|███████▎                                                                         | 36/400 [01:39<12:58,  2.14s/it]

  9%|███████▍                                                                         | 37/400 [01:41<12:36,  2.08s/it]

 10%|███████▋                                                                         | 38/400 [01:42<10:49,  1.79s/it]

 10%|███████▉                                                                         | 39/400 [01:43<10:32,  1.75s/it]

 10%|████████▎                                                                        | 41/400 [01:46<09:16,  1.55s/it]

 10%|████████▌                                                                        | 42/400 [01:47<08:09,  1.37s/it]

 11%|████████▋                                                                        | 43/400 [01:47<06:26,  1.08s/it]

 11%|████████▉                                                                        | 44/400 [02:00<24:37,  4.15s/it]

 11%|█████████                                                                        | 45/400 [02:05<26:20,  4.45s/it]

 12%|█████████▎                                                                       | 46/400 [02:07<22:11,  3.76s/it]

 12%|█████████▋                                                                       | 48/400 [02:08<13:09,  2.24s/it]

 12%|█████████▉                                                                       | 49/400 [02:10<12:52,  2.20s/it]

 12%|██████████▏                                                                      | 50/400 [02:11<11:40,  2.00s/it]

 13%|██████████▎                                                                      | 51/400 [02:12<09:24,  1.62s/it]

 13%|██████████▌                                                                      | 52/400 [02:13<08:33,  1.47s/it]

 13%|██████████▋                                                                      | 53/400 [02:13<06:51,  1.19s/it]

 14%|██████████▉                                                                      | 54/400 [02:14<05:54,  1.02s/it]

 14%|███████████▏                                                                     | 55/400 [02:14<04:54,  1.17it/s]

 14%|███████████▎                                                                     | 56/400 [02:22<17:07,  2.99s/it]

 14%|███████████▌                                                                     | 57/400 [02:28<21:34,  3.78s/it]

 14%|███████████▋                                                                     | 58/400 [02:30<19:03,  3.34s/it]

 15%|████████████▏                                                                    | 60/400 [02:31<11:18,  2.00s/it]

 15%|████████████▎                                                                    | 61/400 [02:35<13:06,  2.32s/it]

 16%|████████████▌                                                                    | 62/400 [02:37<13:13,  2.35s/it]

 16%|████████████▊                                                                    | 63/400 [02:38<10:42,  1.91s/it]

 16%|████████████▉                                                                    | 64/400 [02:39<09:32,  1.71s/it]

 16%|█████████████▏                                                                   | 65/400 [02:39<07:43,  1.38s/it]

 16%|█████████████▎                                                                   | 66/400 [02:40<06:33,  1.18s/it]

 17%|█████████████▌                                                                   | 67/400 [02:41<05:49,  1.05s/it]

 17%|█████████████▊                                                                   | 68/400 [02:48<16:06,  2.91s/it]

 17%|█████████████▉                                                                   | 69/400 [02:54<20:39,  3.74s/it]

 18%|██████████████▏                                                                  | 70/400 [02:56<17:29,  3.18s/it]

 18%|██████████████▍                                                                  | 71/400 [02:56<13:06,  2.39s/it]

 18%|██████████████▌                                                                  | 72/400 [02:57<09:28,  1.73s/it]

 18%|██████████████▊                                                                  | 73/400 [03:00<11:57,  2.19s/it]

 18%|██████████████▉                                                                  | 74/400 [03:02<12:14,  2.25s/it]

 19%|███████████████▏                                                                 | 75/400 [03:03<09:44,  1.80s/it]

 19%|███████████████▍                                                                 | 76/400 [03:04<09:17,  1.72s/it]

 19%|███████████████▌                                                                 | 77/400 [03:05<07:51,  1.46s/it]

 20%|███████████████▊                                                                 | 78/400 [03:06<06:30,  1.21s/it]

 20%|███████████████▉                                                                 | 79/400 [03:07<05:31,  1.03s/it]

 20%|████████████████▏                                                                | 80/400 [03:14<15:27,  2.90s/it]

 20%|████████████████▍                                                                | 81/400 [03:19<19:19,  3.63s/it]

 20%|████████████████▌                                                                | 82/400 [03:21<16:52,  3.18s/it]

 21%|████████████████▊                                                                | 83/400 [03:22<12:10,  2.30s/it]

 21%|█████████████████                                                                | 84/400 [03:22<08:55,  1.70s/it]

 21%|█████████████████▏                                                               | 85/400 [03:25<11:12,  2.14s/it]

 22%|█████████████████▍                                                               | 86/400 [03:30<16:00,  3.06s/it]

 22%|█████████████████▌                                                               | 87/400 [03:33<15:24,  2.95s/it]

 22%|█████████████████▊                                                               | 88/400 [03:34<12:41,  2.44s/it]

 22%|██████████████████                                                               | 89/400 [03:35<10:37,  2.05s/it]

 22%|██████████████████▏                                                              | 90/400 [03:35<07:38,  1.48s/it]

 23%|██████████████████▍                                                              | 91/400 [03:36<05:31,  1.07s/it]

 23%|██████████████████▋                                                              | 92/400 [03:40<11:15,  2.19s/it]

 23%|██████████████████▊                                                              | 93/400 [03:46<16:01,  3.13s/it]

 24%|███████████████████                                                              | 94/400 [03:48<14:45,  2.89s/it]

 24%|███████████████████▏                                                             | 95/400 [03:48<10:30,  2.07s/it]

 24%|███████████████████▍                                                             | 96/400 [03:50<10:08,  2.00s/it]

 24%|███████████████████▋                                                             | 97/400 [03:54<12:42,  2.52s/it]

 24%|███████████████████▊                                                             | 98/400 [03:58<15:24,  3.06s/it]

 25%|████████████████████                                                             | 99/400 [04:00<13:44,  2.74s/it]

 25%|████████████████████▏                                                           | 101/400 [04:00<07:33,  1.52s/it]

 26%|████████████████████▍                                                           | 102/400 [04:02<07:21,  1.48s/it]

 26%|████████████████████▌                                                           | 103/400 [04:02<06:27,  1.30s/it]

 26%|████████████████████▊                                                           | 104/400 [04:05<07:43,  1.57s/it]

 26%|█████████████████████                                                           | 105/400 [04:10<12:57,  2.63s/it]

 26%|█████████████████████▏                                                          | 106/400 [04:13<13:06,  2.67s/it]

 27%|█████████████████████▍                                                          | 107/400 [04:13<09:25,  1.93s/it]

 27%|█████████████████████▌                                                          | 108/400 [04:15<09:24,  1.93s/it]

 27%|█████████████████████▊                                                          | 109/400 [04:19<12:01,  2.48s/it]

 28%|██████████████████████                                                          | 110/400 [04:23<14:04,  2.91s/it]

 28%|██████████████████████▏                                                         | 111/400 [04:26<14:05,  2.92s/it]

 28%|██████████████████████▌                                                         | 113/400 [04:26<07:49,  1.64s/it]

 28%|██████████████████████▊                                                         | 114/400 [04:27<07:25,  1.56s/it]

 29%|███████████████████████                                                         | 115/400 [04:28<06:12,  1.31s/it]

 29%|███████████████████████▏                                                        | 116/400 [04:30<06:51,  1.45s/it]

 29%|███████████████████████▍                                                        | 117/400 [04:35<12:20,  2.62s/it]

 30%|███████████████████████▌                                                        | 118/400 [04:38<12:14,  2.61s/it]

 30%|███████████████████████▊                                                        | 119/400 [04:38<08:52,  1.89s/it]

 30%|████████████████████████                                                        | 120/400 [04:40<08:45,  1.88s/it]

 30%|████████████████████████▏                                                       | 121/400 [04:44<11:18,  2.43s/it]

 30%|████████████████████████▍                                                       | 122/400 [04:48<13:29,  2.91s/it]

 31%|████████████████████████▌                                                       | 123/400 [04:51<13:40,  2.96s/it]

 31%|█████████████████████████                                                       | 125/400 [04:51<07:29,  1.63s/it]

 32%|█████████████████████████▏                                                      | 126/400 [04:55<10:13,  2.24s/it]

 32%|█████████████████████████▍                                                      | 127/400 [04:56<08:30,  1.87s/it]

 32%|█████████████████████████▌                                                      | 128/400 [04:57<07:46,  1.71s/it]

 32%|█████████████████████████▊                                                      | 129/400 [05:02<12:15,  2.71s/it]

 32%|██████████████████████████                                                      | 130/400 [05:06<13:21,  2.97s/it]

 33%|██████████████████████████▏                                                     | 131/400 [05:07<10:41,  2.39s/it]

 33%|██████████████████████████▍                                                     | 132/400 [05:10<11:19,  2.53s/it]

 33%|██████████████████████████▌                                                     | 133/400 [05:12<10:27,  2.35s/it]

 34%|██████████████████████████▊                                                     | 134/400 [05:16<13:01,  2.94s/it]

 34%|███████████████████████████                                                     | 135/400 [05:21<15:30,  3.51s/it]

 34%|███████████████████████████▏                                                    | 136/400 [05:37<31:25,  7.14s/it]

 34%|███████████████████████████▍                                                    | 137/400 [05:39<25:37,  5.85s/it]

 34%|███████████████████████████▌                                                    | 138/400 [05:42<21:13,  4.86s/it]

 35%|███████████████████████████▊                                                    | 139/400 [05:47<21:11,  4.87s/it]

 35%|████████████████████████████▏                                                   | 141/400 [05:47<11:51,  2.75s/it]

 36%|████████████████████████████▍                                                   | 142/400 [05:48<09:44,  2.26s/it]

 36%|████████████████████████████▌                                                   | 143/400 [05:49<08:14,  1.92s/it]

 36%|█████████████████████████████                                                   | 145/400 [05:52<07:11,  1.69s/it]

 36%|█████████████████████████████▏                                                  | 146/400 [05:52<06:03,  1.43s/it]

 37%|█████████████████████████████▍                                                  | 147/400 [05:53<05:34,  1.32s/it]

 37%|█████████████████████████████▌                                                  | 148/400 [05:57<07:33,  1.80s/it]

 37%|█████████████████████████████▊                                                  | 149/400 [05:58<07:17,  1.74s/it]

 38%|██████████████████████████████                                                  | 150/400 [06:03<11:12,  2.69s/it]

 38%|██████████████████████████████▏                                                 | 151/400 [06:12<17:52,  4.31s/it]

 38%|██████████████████████████████▌                                                 | 153/400 [06:12<10:04,  2.45s/it]

 38%|██████████████████████████████▊                                                 | 154/400 [06:13<08:36,  2.10s/it]

 39%|███████████████████████████████                                                 | 155/400 [06:14<07:36,  1.86s/it]

 39%|███████████████████████████████▏                                                | 156/400 [06:15<06:28,  1.59s/it]

 39%|███████████████████████████████▍                                                | 157/400 [06:19<08:44,  2.16s/it]

 40%|███████████████████████████████▊                                                | 159/400 [06:20<05:45,  1.43s/it]

 40%|████████████████████████████████                                                | 160/400 [06:23<07:12,  1.80s/it]

 40%|████████████████████████████████▏                                               | 161/400 [06:26<08:35,  2.16s/it]

 40%|████████████████████████████████▍                                               | 162/400 [06:32<12:55,  3.26s/it]

 41%|████████████████████████████████▌                                               | 163/400 [06:40<18:07,  4.59s/it]

 41%|█████████████████████████████████                                               | 165/400 [06:41<10:49,  2.76s/it]

 42%|█████████████████████████████████▏                                              | 166/400 [06:42<08:50,  2.27s/it]

 42%|█████████████████████████████████▍                                              | 167/400 [06:43<07:41,  1.98s/it]

 42%|█████████████████████████████████▌                                              | 168/400 [06:43<05:52,  1.52s/it]

 42%|█████████████████████████████████▊                                              | 169/400 [06:46<06:51,  1.78s/it]

 42%|██████████████████████████████████                                              | 170/400 [06:46<05:27,  1.43s/it]

 43%|██████████████████████████████████▏                                             | 171/400 [06:46<04:01,  1.05s/it]

 43%|██████████████████████████████████▍                                             | 172/400 [06:48<05:05,  1.34s/it]

 43%|██████████████████████████████████▌                                             | 173/400 [06:51<06:51,  1.81s/it]

 44%|██████████████████████████████████▊                                             | 174/400 [06:57<11:28,  3.05s/it]

 44%|███████████████████████████████████                                             | 175/400 [07:06<17:33,  4.68s/it]

 44%|███████████████████████████████████▏                                            | 176/400 [07:06<12:44,  3.41s/it]

 44%|███████████████████████████████████▍                                            | 177/400 [07:06<09:07,  2.46s/it]

 44%|███████████████████████████████████▌                                            | 178/400 [07:07<07:06,  1.92s/it]

 45%|███████████████████████████████████▊                                            | 179/400 [07:08<06:16,  1.71s/it]

 45%|████████████████████████████████████                                            | 180/400 [07:09<04:40,  1.28s/it]

 45%|████████████████████████████████████▏                                           | 181/400 [07:12<07:21,  2.01s/it]

 46%|████████████████████████████████████▍                                           | 182/400 [07:13<06:07,  1.69s/it]

 46%|████████████████████████████████████▊                                           | 184/400 [07:14<04:11,  1.16s/it]

 46%|█████████████████████████████████████                                           | 185/400 [07:17<05:44,  1.60s/it]

 46%|█████████████████████████████████████▏                                          | 186/400 [07:22<09:03,  2.54s/it]

 47%|█████████████████████████████████████▍                                          | 187/400 [07:31<14:50,  4.18s/it]

 47%|█████████████████████████████████████▌                                          | 188/400 [07:32<11:08,  3.16s/it]

 47%|█████████████████████████████████████▊                                          | 189/400 [07:32<08:08,  2.31s/it]

 48%|██████████████████████████████████████                                          | 190/400 [07:32<06:13,  1.78s/it]

 48%|██████████████████████████████████████▏                                         | 191/400 [07:35<07:28,  2.15s/it]

 48%|██████████████████████████████████████▍                                         | 192/400 [07:36<05:33,  1.60s/it]

 48%|██████████████████████████████████████▌                                         | 193/400 [07:39<07:47,  2.26s/it]

 48%|██████████████████████████████████████▊                                         | 194/400 [07:40<05:37,  1.64s/it]

 49%|███████████████████████████████████████                                         | 195/400 [07:41<04:57,  1.45s/it]

 49%|███████████████████████████████████████▍                                        | 197/400 [07:42<04:05,  1.21s/it]

 50%|███████████████████████████████████████▌                                        | 198/400 [07:48<07:18,  2.17s/it]

 50%|███████████████████████████████████████▊                                        | 199/400 [07:56<13:01,  3.89s/it]

 50%|████████████████████████████████████████                                        | 200/400 [07:57<09:44,  2.92s/it]

 50%|████████████████████████████████████████▏                                       | 201/400 [07:57<07:30,  2.26s/it]

 50%|████████████████████████████████████████▍                                       | 202/400 [07:57<05:33,  1.69s/it]

 51%|████████████████████████████████████████▌                                       | 203/400 [08:00<06:46,  2.06s/it]

 51%|████████████████████████████████████████▊                                       | 204/400 [08:01<05:04,  1.55s/it]

 51%|█████████████████████████████████████████                                       | 205/400 [08:04<07:08,  2.20s/it]

 52%|█████████████████████████████████████████▏                                      | 206/400 [08:13<12:44,  3.94s/it]

 52%|█████████████████████████████████████████▍                                      | 207/400 [08:14<09:48,  3.05s/it]

 52%|█████████████████████████████████████████▌                                      | 208/400 [08:15<07:53,  2.47s/it]

 52%|█████████████████████████████████████████▊                                      | 209/400 [08:17<07:40,  2.41s/it]

 52%|██████████████████████████████████████████                                      | 210/400 [08:19<07:16,  2.30s/it]

 53%|██████████████████████████████████████████▏                                     | 211/400 [08:22<08:04,  2.56s/it]

 53%|██████████████████████████████████████████▍                                     | 212/400 [08:23<06:04,  1.94s/it]

 53%|██████████████████████████████████████████▌                                     | 213/400 [08:23<04:33,  1.46s/it]

 54%|██████████████████████████████████████████▊                                     | 214/400 [08:23<03:31,  1.14s/it]

 54%|███████████████████████████████████████████                                     | 215/400 [08:26<04:40,  1.52s/it]

 54%|███████████████████████████████████████████▏                                    | 216/400 [08:26<03:32,  1.15s/it]

 54%|███████████████████████████████████████████▍                                    | 217/400 [08:30<05:42,  1.87s/it]

 55%|███████████████████████████████████████████▌                                    | 218/400 [08:37<11:03,  3.65s/it]

 55%|███████████████████████████████████████████▊                                    | 219/400 [08:38<08:30,  2.82s/it]

 55%|████████████████████████████████████████████                                    | 220/400 [08:39<06:55,  2.31s/it]

 55%|████████████████████████████████████████████▏                                   | 221/400 [08:42<07:30,  2.51s/it]

 56%|████████████████████████████████████████████▍                                   | 222/400 [08:44<07:08,  2.41s/it]

 56%|████████████████████████████████████████████▌                                   | 223/400 [08:48<08:08,  2.76s/it]

 56%|████████████████████████████████████████████▊                                   | 224/400 [08:49<06:45,  2.31s/it]

 56%|█████████████████████████████████████████████                                   | 225/400 [08:50<05:03,  1.73s/it]

 56%|█████████████████████████████████████████████▏                                  | 226/400 [08:50<04:10,  1.44s/it]

 57%|█████████████████████████████████████████████▍                                  | 227/400 [08:53<04:44,  1.64s/it]

 57%|█████████████████████████████████████████████▌                                  | 228/400 [08:53<03:37,  1.26s/it]

 57%|█████████████████████████████████████████████▊                                  | 229/400 [08:56<05:26,  1.91s/it]

 57%|██████████████████████████████████████████████                                  | 230/400 [09:04<10:34,  3.73s/it]

 58%|██████████████████████████████████████████████▏                                 | 231/400 [09:05<08:02,  2.85s/it]

 58%|██████████████████████████████████████████████▍                                 | 232/400 [09:06<06:33,  2.35s/it]

 58%|██████████████████████████████████████████████▌                                 | 233/400 [09:09<06:53,  2.48s/it]

 58%|██████████████████████████████████████████████▊                                 | 234/400 [09:11<06:38,  2.40s/it]

 59%|███████████████████████████████████████████████                                 | 235/400 [09:15<07:33,  2.75s/it]

 59%|███████████████████████████████████████████████▏                                | 236/400 [09:15<05:35,  2.05s/it]

 59%|███████████████████████████████████████████████▍                                | 237/400 [09:16<04:06,  1.51s/it]

 60%|███████████████████████████████████████████████▌                                | 238/400 [09:16<03:16,  1.21s/it]

 60%|███████████████████████████████████████████████▊                                | 239/400 [09:18<04:01,  1.50s/it]

 60%|████████████████████████████████████████████████                                | 240/400 [09:19<03:01,  1.13s/it]

 60%|████████████████████████████████████████████████▏                               | 241/400 [09:27<09:07,  3.44s/it]

 60%|████████████████████████████████████████████████▍                               | 242/400 [09:36<12:49,  4.87s/it]

 61%|████████████████████████████████████████████████▌                               | 243/400 [09:36<09:01,  3.45s/it]

 61%|████████████████████████████████████████████████▊                               | 244/400 [09:37<07:24,  2.85s/it]

 61%|█████████████████████████████████████████████████                               | 245/400 [09:38<05:30,  2.13s/it]

 62%|█████████████████████████████████████████████████▏                              | 246/400 [09:39<04:53,  1.91s/it]

 62%|█████████████████████████████████████████████████▍                              | 247/400 [09:39<03:42,  1.46s/it]

 62%|█████████████████████████████████████████████████▌                              | 248/400 [09:40<02:44,  1.08s/it]

 62%|█████████████████████████████████████████████████▊                              | 249/400 [09:40<02:16,  1.11it/s]

 63%|██████████████████████████████████████████████████▏                             | 251/400 [09:52<08:04,  3.25s/it]

 63%|██████████████████████████████████████████████████▍                             | 252/400 [09:54<07:07,  2.89s/it]

 63%|██████████████████████████████████████████████████▌                             | 253/400 [09:59<08:16,  3.38s/it]

 64%|██████████████████████████████████████████████████▊                             | 254/400 [10:03<08:40,  3.57s/it]

 64%|███████████████████████████████████████████████████                             | 255/400 [10:04<07:07,  2.94s/it]

 64%|███████████████████████████████████████████████████▏                            | 256/400 [10:05<05:46,  2.41s/it]

 64%|███████████████████████████████████████████████████▍                            | 257/400 [10:06<04:30,  1.89s/it]

 64%|███████████████████████████████████████████████████▌                            | 258/400 [10:06<03:18,  1.40s/it]

 65%|███████████████████████████████████████████████████▊                            | 259/400 [10:12<06:17,  2.68s/it]

 65%|████████████████████████████████████████████████████                            | 260/400 [10:12<04:31,  1.94s/it]

 65%|████████████████████████████████████████████████████▏                           | 261/400 [10:18<07:30,  3.24s/it]

 66%|████████████████████████████████████████████████████▍                           | 262/400 [10:22<07:46,  3.38s/it]

 66%|████████████████████████████████████████████████████▌                           | 263/400 [10:25<07:37,  3.34s/it]

 66%|████████████████████████████████████████████████████▊                           | 264/400 [10:27<06:23,  2.82s/it]

 66%|█████████████████████████████████████████████████████                           | 265/400 [10:28<05:17,  2.35s/it]

 67%|█████████████████████████████████████████████████████▍                          | 267/400 [10:28<03:02,  1.37s/it]

 67%|█████████████████████████████████████████████████████▌                          | 268/400 [10:29<02:26,  1.11s/it]

 67%|█████████████████████████████████████████████████████▊                          | 269/400 [10:29<01:58,  1.10it/s]

 68%|██████████████████████████████████████████████████████                          | 270/400 [10:35<04:48,  2.22s/it]

 68%|██████████████████████████████████████████████████████▏                         | 271/400 [10:37<04:56,  2.29s/it]

 68%|██████████████████████████████████████████████████████▍                         | 272/400 [10:37<03:35,  1.68s/it]

 68%|██████████████████████████████████████████████████████▌                         | 273/400 [10:44<06:41,  3.16s/it]

 68%|██████████████████████████████████████████████████████▊                         | 274/400 [10:48<07:00,  3.34s/it]

 69%|███████████████████████████████████████████████████████                         | 275/400 [10:52<07:05,  3.40s/it]

 69%|███████████████████████████████████████████████████████▏                        | 276/400 [10:52<05:02,  2.44s/it]

 69%|███████████████████████████████████████████████████████▍                        | 277/400 [10:53<04:18,  2.10s/it]

 70%|███████████████████████████████████████████████████████▌                        | 278/400 [10:53<03:07,  1.53s/it]

 70%|███████████████████████████████████████████████████████▊                        | 279/400 [10:54<02:24,  1.19s/it]

 70%|████████████████████████████████████████████████████████▏                       | 281/400 [11:13<10:19,  5.21s/it]

 70%|████████████████████████████████████████████████████████▍                       | 282/400 [11:17<09:24,  4.78s/it]

 71%|████████████████████████████████████████████████████████▊                       | 284/400 [11:18<05:55,  3.06s/it]

 71%|█████████████████████████████████████████████████████████                       | 285/400 [11:19<04:43,  2.47s/it]

 72%|█████████████████████████████████████████████████████████▏                      | 286/400 [11:24<06:11,  3.26s/it]

 72%|█████████████████████████████████████████████████████████▍                      | 287/400 [11:26<05:26,  2.89s/it]

 72%|█████████████████████████████████████████████████████████▌                      | 288/400 [11:43<12:35,  6.74s/it]

 72%|█████████████████████████████████████████████████████████▊                      | 289/400 [11:45<09:43,  5.26s/it]

 72%|██████████████████████████████████████████████████████████                      | 290/400 [11:46<07:21,  4.02s/it]

 73%|██████████████████████████████████████████████████████████▏                     | 291/400 [11:48<06:23,  3.52s/it]

 73%|██████████████████████████████████████████████████████████▍                     | 292/400 [11:48<04:42,  2.62s/it]

 73%|██████████████████████████████████████████████████████████▌                     | 293/400 [11:49<03:28,  1.95s/it]

 74%|██████████████████████████████████████████████████████████▊                     | 294/400 [11:49<02:39,  1.51s/it]

 74%|███████████████████████████████████████████████████████████                     | 295/400 [12:08<11:27,  6.55s/it]

 74%|███████████████████████████████████████████████████████████▏                    | 296/400 [12:13<10:34,  6.10s/it]

 74%|███████████████████████████████████████████████████████████▍                    | 297/400 [12:14<08:08,  4.75s/it]

 74%|███████████████████████████████████████████████████████████▌                    | 298/400 [12:17<06:56,  4.08s/it]

 75%|███████████████████████████████████████████████████████████▊                    | 299/400 [12:17<05:05,  3.03s/it]

 75%|████████████████████████████████████████████████████████████                    | 300/400 [12:19<04:14,  2.55s/it]

 75%|████████████████████████████████████████████████████████████▏                   | 301/400 [12:20<03:21,  2.03s/it]

 76%|████████████████████████████████████████████████████████████▍                   | 302/400 [12:20<02:22,  1.46s/it]

 76%|████████████████████████████████████████████████████████████▌                   | 303/400 [12:24<03:39,  2.26s/it]

 76%|████████████████████████████████████████████████████████████▊                   | 304/400 [12:27<04:11,  2.62s/it]

 76%|█████████████████████████████████████████████████████████████                   | 305/400 [12:32<04:58,  3.14s/it]

 76%|█████████████████████████████████████████████████████████████▏                  | 306/400 [12:34<04:35,  2.93s/it]

 77%|█████████████████████████████████████████████████████████████▍                  | 307/400 [12:36<04:14,  2.74s/it]

 77%|█████████████████████████████████████████████████████████████▌                  | 308/400 [12:37<03:11,  2.08s/it]

 77%|█████████████████████████████████████████████████████████████▊                  | 309/400 [12:38<02:41,  1.78s/it]

 78%|██████████████████████████████████████████████████████████████                  | 310/400 [12:41<03:07,  2.08s/it]

 78%|██████████████████████████████████████████████████████████████▏                 | 311/400 [12:43<03:12,  2.16s/it]

 78%|██████████████████████████████████████████████████████████████▍                 | 312/400 [12:46<03:18,  2.26s/it]

 78%|██████████████████████████████████████████████████████████████▌                 | 313/400 [12:46<02:40,  1.84s/it]

 78%|██████████████████████████████████████████████████████████████▊                 | 314/400 [12:47<01:54,  1.33s/it]

 79%|███████████████████████████████████████████████████████████████                 | 315/400 [12:50<02:58,  2.10s/it]

 79%|███████████████████████████████████████████████████████████████▏                | 316/400 [12:52<02:40,  1.91s/it]

 79%|███████████████████████████████████████████████████████████████▍                | 317/400 [12:57<03:51,  2.78s/it]

 80%|███████████████████████████████████████████████████████████████▌                | 318/400 [12:59<03:36,  2.64s/it]

 80%|███████████████████████████████████████████████████████████████▊                | 319/400 [13:01<03:28,  2.57s/it]

 80%|████████████████████████████████████████████████████████████████                | 320/400 [13:02<02:38,  1.98s/it]

 80%|████████████████████████████████████████████████████████████████▏               | 321/400 [13:05<02:47,  2.12s/it]

 80%|████████████████████████████████████████████████████████████████▍               | 322/400 [13:07<03:01,  2.33s/it]

 81%|████████████████████████████████████████████████████████████████▌               | 323/400 [13:09<02:47,  2.17s/it]

 81%|████████████████████████████████████████████████████████████████▊               | 324/400 [13:12<02:55,  2.31s/it]

 81%|█████████████████████████████████████████████████████████████████               | 325/400 [13:12<02:07,  1.70s/it]

 82%|█████████████████████████████████████████████████████████████████▏              | 326/400 [13:13<01:46,  1.44s/it]

 82%|█████████████████████████████████████████████████████████████████▍              | 327/400 [13:16<02:13,  1.83s/it]

 82%|█████████████████████████████████████████████████████████████████▌              | 328/400 [13:17<02:03,  1.71s/it]

 82%|█████████████████████████████████████████████████████████████████▊              | 329/400 [13:22<03:06,  2.62s/it]

 82%|██████████████████████████████████████████████████████████████████              | 330/400 [13:24<02:57,  2.53s/it]

 83%|██████████████████████████████████████████████████████████████████▏             | 331/400 [13:31<04:15,  3.71s/it]

 83%|██████████████████████████████████████████████████████████████████▍             | 332/400 [13:31<03:07,  2.76s/it]

 83%|██████████████████████████████████████████████████████████████████▌             | 333/400 [13:34<02:59,  2.68s/it]

 84%|██████████████████████████████████████████████████████████████████▊             | 334/400 [13:36<02:48,  2.55s/it]

 84%|███████████████████████████████████████████████████████████████████             | 335/400 [13:38<02:38,  2.43s/it]

 84%|███████████████████████████████████████████████████████████████████▏            | 336/400 [13:41<02:42,  2.54s/it]

 84%|███████████████████████████████████████████████████████████████████▍            | 337/400 [13:41<01:56,  1.84s/it]

 84%|███████████████████████████████████████████████████████████████████▌            | 338/400 [13:42<01:45,  1.70s/it]

 85%|███████████████████████████████████████████████████████████████████▊            | 339/400 [13:44<01:44,  1.72s/it]

 85%|████████████████████████████████████████████████████████████████████            | 340/400 [13:46<01:39,  1.65s/it]

 85%|████████████████████████████████████████████████████████████████████▏           | 341/400 [13:49<02:12,  2.24s/it]

 86%|████████████████████████████████████████████████████████████████████▍           | 342/400 [13:52<02:11,  2.27s/it]

 86%|████████████████████████████████████████████████████████████████████▌           | 343/400 [13:57<03:10,  3.35s/it]

 86%|████████████████████████████████████████████████████████████████████▊           | 344/400 [13:58<02:26,  2.61s/it]

 86%|█████████████████████████████████████████████████████████████████████           | 345/400 [14:00<02:09,  2.36s/it]

 86%|█████████████████████████████████████████████████████████████████████▏          | 346/400 [14:01<01:36,  1.80s/it]

 87%|█████████████████████████████████████████████████████████████████████▍          | 347/400 [14:03<01:36,  1.83s/it]

 87%|█████████████████████████████████████████████████████████████████████▌          | 348/400 [14:06<01:55,  2.23s/it]

 88%|██████████████████████████████████████████████████████████████████████          | 350/400 [14:07<01:18,  1.57s/it]

 88%|██████████████████████████████████████████████████████████████████████▏         | 351/400 [14:09<01:22,  1.69s/it]

 88%|██████████████████████████████████████████████████████████████████████▍         | 352/400 [14:11<01:19,  1.65s/it]

 88%|██████████████████████████████████████████████████████████████████████▌         | 353/400 [14:15<01:43,  2.20s/it]

 88%|██████████████████████████████████████████████████████████████████████▊         | 354/400 [14:17<01:40,  2.18s/it]

 89%|███████████████████████████████████████████████████████████████████████         | 355/400 [14:23<02:26,  3.25s/it]

 89%|███████████████████████████████████████████████████████████████████████▏        | 356/400 [14:27<02:33,  3.48s/it]

 89%|███████████████████████████████████████████████████████████████████████▍        | 357/400 [14:28<02:04,  2.90s/it]

 90%|███████████████████████████████████████████████████████████████████████▌        | 358/400 [14:30<01:43,  2.46s/it]

 90%|███████████████████████████████████████████████████████████████████████▊        | 359/400 [14:31<01:23,  2.05s/it]

 90%|████████████████████████████████████████████████████████████████████████        | 360/400 [14:34<01:35,  2.40s/it]

 90%|████████████████████████████████████████████████████████████████████████▏       | 361/400 [14:35<01:13,  1.89s/it]

 90%|████████████████████████████████████████████████████████████████████████▍       | 362/400 [14:37<01:17,  2.03s/it]

 91%|████████████████████████████████████████████████████████████████████████▌       | 363/400 [14:38<01:05,  1.77s/it]

 91%|████████████████████████████████████████████████████████████████████████▊       | 364/400 [14:39<00:52,  1.45s/it]

 91%|█████████████████████████████████████████████████████████████████████████       | 365/400 [14:42<01:14,  2.13s/it]

 92%|█████████████████████████████████████████████████████████████████████████▏      | 366/400 [14:44<01:02,  1.84s/it]

 92%|█████████████████████████████████████████████████████████████████████████▍      | 367/400 [14:50<01:42,  3.11s/it]

 92%|█████████████████████████████████████████████████████████████████████████▌      | 368/400 [14:53<01:45,  3.31s/it]

 92%|█████████████████████████████████████████████████████████████████████████▊      | 369/400 [14:56<01:33,  3.01s/it]

 92%|██████████████████████████████████████████████████████████████████████████      | 370/400 [14:57<01:14,  2.50s/it]

 93%|██████████████████████████████████████████████████████████████████████████▏     | 371/400 [15:00<01:13,  2.53s/it]

 93%|██████████████████████████████████████████████████████████████████████████▍     | 372/400 [15:03<01:14,  2.65s/it]

 93%|██████████████████████████████████████████████████████████████████████████▌     | 373/400 [15:04<00:57,  2.12s/it]

 94%|██████████████████████████████████████████████████████████████████████████▊     | 374/400 [15:04<00:39,  1.52s/it]

 94%|███████████████████████████████████████████████████████████████████████████     | 375/400 [15:06<00:41,  1.67s/it]

 94%|███████████████████████████████████████████████████████████████████████████▏    | 376/400 [15:07<00:36,  1.51s/it]

 94%|███████████████████████████████████████████████████████████████████████████▍    | 377/400 [15:07<00:27,  1.19s/it]

 94%|███████████████████████████████████████████████████████████████████████████▌    | 378/400 [15:08<00:25,  1.15s/it]

 95%|███████████████████████████████████████████████████████████████████████████▊    | 379/400 [15:14<00:54,  2.60s/it]

 95%|████████████████████████████████████████████████████████████████████████████    | 380/400 [15:18<00:59,  2.99s/it]

 95%|████████████████████████████████████████████████████████████████████████████▏   | 381/400 [15:22<01:01,  3.25s/it]

 96%|████████████████████████████████████████████████████████████████████████████▍   | 382/400 [15:23<00:48,  2.70s/it]

 96%|████████████████████████████████████████████████████████████████████████████▌   | 383/400 [15:26<00:44,  2.59s/it]

 96%|████████████████████████████████████████████████████████████████████████████▊   | 384/400 [15:30<00:48,  3.03s/it]

 96%|█████████████████████████████████████████████████████████████████████████████   | 385/400 [15:31<00:38,  2.55s/it]

 97%|█████████████████████████████████████████████████████████████████████████████▍  | 387/400 [15:34<00:24,  1.92s/it]

 97%|█████████████████████████████████████████████████████████████████████████████▌  | 388/400 [15:35<00:21,  1.79s/it]

 97%|█████████████████████████████████████████████████████████████████████████████▊  | 389/400 [15:36<00:15,  1.45s/it]

 98%|██████████████████████████████████████████████████████████████████████████████  | 390/400 [15:36<00:11,  1.19s/it]

 98%|██████████████████████████████████████████████████████████████████████████████▏ | 391/400 [15:39<00:16,  1.78s/it]

 98%|██████████████████████████████████████████████████████████████████████████████▍ | 392/400 [15:42<00:17,  2.14s/it]

 98%|██████████████████████████████████████████████████████████████████████████████▌ | 393/400 [15:45<00:15,  2.26s/it]

 98%|██████████████████████████████████████████████████████████████████████████████▊ | 394/400 [15:46<00:11,  1.84s/it]

 99%|███████████████████████████████████████████████████████████████████████████████ | 395/400 [15:47<00:08,  1.62s/it]

 99%|███████████████████████████████████████████████████████████████████████████████▏| 396/400 [15:52<00:10,  2.59s/it]

 99%|███████████████████████████████████████████████████████████████████████████████▍| 397/400 [15:53<00:06,  2.08s/it]

100%|███████████████████████████████████████████████████████████████████████████████▊| 399/400 [15:53<00:01,  1.24s/it]

100%|████████████████████████████████████████████████████████████████████████████████| 400/400 [16:19<00:00,  2.45s/it]


Tiempo búsqueda (s): 979
Best robust CV: 0.25065234257533436
Best params: {'kbest__k': 512, 'svc__C': np.float64(1221.212458656701), 'svc__class_weight': {0: 2.0, 1: 4.0, 2: 1.0}, 'svc__gamma': np.float64(0.01543656015027835)}

TOP-10 CV robust (únicos):
 mean_test_score  std_test_score                                                                                                                            params
        0.250652        0.132229  {'kbest__k': 512, 'svc__C': 1221.212458656701, 'svc__class_weight': {0: 2.0, 1: 4.0, 2: 1.0}, 'svc__gamma': 0.01543656015027835}
        0.238619        0.100258  {'kbest__k': 512, 'svc__C': 1328.923467281017, 'svc__class_weight': {0: 2.5, 1: 5.0, 2: 1.0}, 'svc__gamma': 0.00791507439765622}
        0.169944        0.080814 {'kbest__k': 768, 'svc__C': 617.774408226366, 'svc__class_weight': {0: 1.5, 1: 3.5, 2: 0.8}, 'svc__gamma': 0.0024424282201158257}
        0.138779        0.144990   {'kbest__k': 384, 'svc__C': 159.0841875063149, 'svc__class_weight': {0: 1.5, 1: 3.5, 2: 0.8}, 'svc__gamma': 0.0445131431564921}
        0.133411        0.038574             {'kbest__k': 1023, 'svc__C': 784.8081929215668, 'svc__class_weight': 'balanced', 'svc__gamma': 8.215507070842181e-05}
        0.121660        0.118774  {'kbest__k': 512, 'svc__C': 61.75015483640841, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.05019578058935042}
        0.113960        0.224990 {'kbest__k': 1023, 'svc__C': 852.6974999202718, 'svc__class_weight': {0: 3.0, 1: 6.0, 2: 1.0}, 'svc__gamma': 0.02589296092555009}
        0.082017        0.126799             {'kbest__k': 1023, 'svc__C': 1.2456144526856272, 'svc__class_weight': 'balanced', 'svc__gamma': 0.001731612063445476}
        0.061387        0.157447 {'kbest__k': 384, 'svc__C': 607.4373269037123, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.007931909113857064}
        0.058967        0.112766            {'kbest__k': 1023, 'svc__C': 30.036641140654908, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0035753161317240816}

===============================================================================================
Evaluando SVM1...
  AUC_CV=0.5081±0.0512 | AUC_train=0.7715 | AUC_test=0.8623
  Gap_CV=0.2635 | Gap_rep=-0.0907
  Acc=0.8462 | F1w=0.8480 | macroF1=0.8050 | balacc=0.8240

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.66      0.69      0.67       422
      Blazar       0.29      0.98      0.45       189
         QSO       1.00      0.48      0.65       899

    accuracy                           0.60      1510
   macro avg       0.65      0.72      0.59      1510
weighted avg       0.81      0.60      0.63      1510

  CM TEST
        AGN  Blazar  QSO
AGN      81       3   17
Blazar    2      24    4
QSO      25       7  214

  Report TEST
              precision    recall  f1-score   support

         AGN       0.75      0.80      0.78       101
      Blazar       0.71      0.80      0.75        30
         QSO       0.91      0.87      0.89       246

    accuracy                           0.85       377
   macro avg       0.79      0.82      0.80       377
weighted avg       0.85      0.85      0.85       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM1.png

===============================================================================================
Evaluando SVM2...
  AUC_CV=0.5048±0.0570 | AUC_train=0.7560 | AUC_test=0.8422
  Gap_CV=0.2511 | Gap_rep=-0.0863
  Acc=0.7958 | F1w=0.8011 | macroF1=0.7742 | balacc=0.7982

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.61      0.66      0.63       422
      Blazar       0.28      0.99      0.44       189
         QSO       0.99      0.43      0.60       899

    accuracy                           0.57      1510
   macro avg       0.63      0.69      0.56      1510
weighted avg       0.80      0.57      0.59      1510

  CM TEST
        AGN  Blazar  QSO
AGN      81       2   18
Blazar    3      24    3
QSO      45       6  195

  Report TEST
              precision    recall  f1-score   support

         AGN       0.63      0.80      0.70       101
      Blazar       0.75      0.80      0.77        30
         QSO       0.90      0.79      0.84       246

    accuracy                           0.80       377
   macro avg       0.76      0.80      0.77       377
weighted avg       0.82      0.80      0.80       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM2.png

===============================================================================================
Evaluando SVM3...
  AUC_CV=0.5102±0.0467 | AUC_train=0.7210 | AUC_test=0.7988
  Gap_CV=0.2108 | Gap_rep=-0.0778
  Acc=0.7188 | F1w=0.7290 | macroF1=0.7035 | balacc=0.7667

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.57      0.57      0.57       422
      Blazar       0.24      1.00      0.38       189
         QSO       0.99      0.33      0.49       899

    accuracy                           0.48      1510
   macro avg       0.60      0.63      0.48      1510
weighted avg       0.78      0.48      0.50      1510

  CM TEST
        AGN  Blazar  QSO
AGN      85       4   12
Blazar    3      24    3
QSO      73      11  162

  Report TEST
              precision    recall  f1-score   support

         AGN       0.53      0.84      0.65       101
      Blazar       0.62      0.80      0.70        30
         QSO       0.92      0.66      0.77       246

    accuracy                           0.72       377
   macro avg       0.69      0.77      0.70       377
weighted avg       0.79      0.72      0.73       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM3.png

===============================================================================================
Evaluando SVM4...

Buscando hiperparámetros:   0%|                                                          | 0/400 fits [19:12<?, ?fit/s]

  AUC_CV=0.5082±0.0524 | AUC_train=0.7845 | AUC_test=0.8724
  Gap_CV=0.2763 | Gap_rep=-0.0880
  Acc=0.8223 | F1w=0.8256 | macroF1=0.8068 | balacc=0.8235

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.67      0.59      0.63       422
      Blazar       0.26      1.00      0.41       189
         QSO       1.00      0.44      0.61       899

    accuracy                           0.56      1510
   macro avg       0.64      0.68      0.55      1510
weighted avg       0.81      0.56      0.59      1510

  CM TEST
        AGN  Blazar  QSO
AGN      82       1   18
Blazar    1      25    4
QSO      38       5  203

  Report TEST
              precision    recall  f1-score   support

         AGN       0.68      0.81      0.74       101
      Blazar       0.81      0.83      0.82        30
         QSO       0.90      0.83      0.86       246

    accuracy                           0.82       377
   macro avg       0.80      0.82      0.81       377
weighted avg       0.83      0.82      0.83       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM4.png

===============================================================================================
Evaluando SVM5...
  AUC_CV=0.5335±0.0269 | AUC_train=0.7181 | AUC_test=0.7657
  Gap_CV=0.1846 | Gap_rep=-0.0476
  Acc=0.7003 | F1w=0.7070 | macroF1=0.6477 | balacc=0.6969

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.52      0.37      0.43       422
      Blazar       0.52      0.56      0.54       189
         QSO       0.70      0.78      0.74       899

    accuracy                           0.64      1510
   macro avg       0.58      0.57      0.57      1510
weighted avg       0.63      0.64      0.63      1510

  CM TEST
        AGN  Blazar  QSO
AGN      59       9   33
Blazar    2      23    5
QSO      50      14  182

  Report TEST
              precision    recall  f1-score   support

         AGN       0.53      0.58      0.56       101
      Blazar       0.50      0.77      0.61        30
         QSO       0.83      0.74      0.78       246

    accuracy                           0.70       377
   macro avg       0.62      0.70      0.65       377
weighted avg       0.72      0.70      0.71       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM5.png

===============================================================================================
RESUMEN FINAL:
 tag  AUC_CV  SD_CV  Gap_CV  AUC_rep  SD_rep  Gap_rep  Acc_test  F1w_test  macroF1  balacc
SVM1  0.5081 0.0512  0.2635   0.8623  0.0512  -0.0907    0.8462    0.8480   0.8050  0.8240
SVM2  0.5048 0.0570  0.2511   0.8422  0.0570  -0.0863    0.7958    0.8011   0.7742  0.7982
SVM3  0.5102 0.0467  0.2108   0.7988  0.0467  -0.0778    0.7188    0.7290   0.7035  0.7667
SVM4  0.5082 0.0524  0.2763   0.8724  0.0524  -0.0880    0.8223    0.8256   0.8068  0.8235
SVM5  0.5335 0.0269  0.1846   0.7657  0.0269  -0.0476    0.7003    0.7070   0.6477  0.6969

>>> Mejor modelo: SVM4  AUC_test=0.8724

Tabla LaTeX guardada en: C:\Users\Gamer\Downloads\tabla_svm_top5.tex

--- PREVIEW ---
\begin{table}[!ht]
\centering
\small
\renewcommand{\arraystretch}{1.18}
\setlength{\tabcolsep}{6pt}
\begin{adjustbox}{width=\textwidth}
\rowcolors{2}{white}{gray!8}
\begin{tabular}{ccccccccc}
\toprule
\rowcolor{BlueHeader}
\color{white}\textbf{Modelo} &
\color{white}\textbf{AUC\textsubscript{CV}} &
\color{white}\textbf{SD\textsubscript{CV}} &
\color{white}\textbf{Gap\textsubscript{CV}} &
\color{white}\textbf{AUC\textsubscript{rep}} &
\color{white}\textbf{SD\textsubscript{rep}} &
\color{white}\textbf{Gap\textsubscript{rep}} &
\color{white}\textbf{Acc\textsubscript{test}} &
\color{white}\textbf{F1\textsubscript{w,test}} \\
\midrule
        SVM$_{1}$ & 0.5081 & 0.0512 & 0.2635 & 0.8623 & 0.0512 & -0.0907 & 0.8462 & 0.8480 \\
        SVM$_{2}$ & 0.5048 & 0.0570 & 0.2511 & 0.8422 & 0.0570 & -0.0863 & 0.7958 & 0.8011 \\
        SVM$_{3}$ & 0.5102 & 0.0467 & 0.2108 & 0.7988 & 0.0467 & -0.0778 & 0.7188 & 0.7290 \\
        \rowcolor{BlueBest} SVM$_{4}$ & 0.5082 & 0.0524 & 0.2763 & 0.8724 & 0.0524 & -0.0880 & 0.8223 & 0.8256 \\
        SVM$_{5}$ & 0.5335 & 0.0269 & 0.1846 & 0.7657 & 0.0269 & -0.0476 & 0.7003 & 0.7070 \\
\bottomrule
\end{tabular}
\end{adjustbox}
\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG).}
\label{tab:svm_top5_auc}
\end{table}

Tiempo total: 00:19:38

# =========================
# IMPORTANCIA POR NIVEL PARA SVM (RBF) CON PERMUTATION IMPORTANCE
# =========================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.inspection import permutation_importance

# Para evitar estilos raros del notebook
plt.style.use("default")

# -------------------------------------------------
# 1) Tomar el mejor modelo encontrado
# -------------------------------------------------
svm_best = rs.best_estimator_

# -------------------------------------------------
# 2) Calcular permutation importance sobre el pipeline completo
# -------------------------------------------------
perm = permutation_importance(
    estimator=svm_best,
    X=X_test,
    y=y_test_enc,
    n_repeats=20,
    random_state=42,
    scoring="balanced_accuracy",
    n_jobs=-1
)

# Importancia base por feature
importances_mean = perm.importances_mean

# -------------------------------------------------
# 3) DataFrame base con TODAS las variables originales
# -------------------------------------------------
orig_cols = pd.Index(X_test.columns)

df_all = pd.DataFrame({
    "feature": orig_cols.astype(str),
    "importance_mean": importances_mean
})

# -------------------------------------------------
# 4) Identificar cuáles variables realmente usa el SVM final
#    (mapeando VT -> KBest sobre las columnas originales)
# -------------------------------------------------
selected_cols = orig_cols.copy()

if hasattr(svm_best, "named_steps"):
    # VarianceThreshold
    if "vt" in svm_best.named_steps:
        vt_mask = svm_best.named_steps["vt"].get_support()
        selected_cols = selected_cols[vt_mask]

    # SelectKBest
    if "kbest" in svm_best.named_steps:
        kbest_mask = svm_best.named_steps["kbest"].get_support()
        selected_cols = selected_cols[kbest_mask]

print(f"Variables originales en X_test: {len(orig_cols)}")
print(f"Variables que realmente usa el SVM final: {len(selected_cols)}")

# Filtrar solo las que sobrevivieron al pipeline
df_imp_svm = df_all[df_all["feature"].isin(selected_cols.astype(str))].copy()

# -------------------------------------------------
# 5) Convertir nombres de columnas a índices numéricos
# -------------------------------------------------
feat_idx = pd.to_numeric(pd.Index(df_imp_svm["feature"]).astype(str), errors="coerce")

if feat_idx.isna().any():
    raise ValueError(
        "No se pudieron convertir las columnas seleccionadas a números. "
        "Revisa los nombres de df_imp_svm['feature']."
    )

feat_idx = feat_idx.astype(int)
min_idx, max_idx = int(feat_idx.min()), int(feat_idx.max())
print(f"Rango original columnas seleccionadas: {min_idx} .. {max_idx}")

# Si parte en 0, convertir a base 1
if min_idx == 0:
    feat_idx_1based = feat_idx + 1
    print("Detectado 0-based -> usando idx_1based = idx + 1")
else:
    feat_idx_1based = feat_idx

print(f"Rango idx_1based: {int(feat_idx_1based.min())} .. {int(feat_idx_1based.max())}")

# -------------------------------------------------
# 6) Asignar cada feature a un nivel de firma
# -------------------------------------------------
edges = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
lvl_labels = [f"N{i}" for i in range(0, 10)]  # N0..N9

niveles = pd.cut(
    feat_idx_1based,
    bins=edges,
    labels=lvl_labels,
    right=False,
    include_lowest=True
)

if niveles.isna().any():
    bad = np.array(df_imp_svm["feature"])[niveles.isna()][:10]
    raise ValueError(f"Hay features fuera del rango de bins. Ejemplos: {bad}")

df_imp_svm["nivel"] = niveles.astype(str)

# Excluir N0 para mantener estilo del gráfico RF
df_imp_svm = df_imp_svm[df_imp_svm["nivel"] != "N0"].copy()

# Ordenar niveles
df_imp_svm["nivel_num"] = (
    df_imp_svm["nivel"].str.replace("N", "", regex=False).astype(int)
)
df_imp_svm = df_imp_svm.sort_values(["nivel_num", "feature"])

# Verificación opcional
print("\nCantidad de variables por nivel seleccionadas por el modelo:")
print(df_imp_svm["nivel_num"].value_counts().sort_index())

# -------------------------------------------------
# 7) Resumen por nivel
#    mean y median salen ambos de importance_mean
# -------------------------------------------------
res_svm = df_imp_svm.groupby("nivel_num").agg(
    mean=("importance_mean", "mean"),
    median=("importance_mean", "median"),
    count=("importance_mean", "size")
).reset_index()

# Forzar presencia de todos los niveles 1..9
niveles_completos = pd.DataFrame({"nivel_num": np.arange(1, 10)})
res_svm = niveles_completos.merge(res_svm, on="nivel_num", how="left").fillna(0)

print("\nRESUMEN POR NIVEL — SVM")
print(res_svm.to_string(index=False))

# -------------------------------------------------
# 8) Gráfico
# -------------------------------------------------
x = np.arange(len(res_svm))
labels_plot = [f"Nivel {n}" for n in res_svm["nivel_num"]]

fig, ax = plt.subplots(figsize=(9.5, 5.2), facecolor="white")
ax.set_facecolor("white")

# Barras = promedio por nivel
ax.bar(
    x,
    res_svm["mean"],
    color="#8FA8C7",      # azul sobrio
    edgecolor="#355C7D",  # azul más oscuro
    linewidth=1.2,
    alpha=0.9,
    label="Importancia promedio"
)

# Puntos = mediana por nivel
ax.scatter(
    x,
    res_svm["median"],
    s=80,
    marker="o",
    color="#355C7D",
    edgecolors="#1F3A56",
    linewidths=1,
    zorder=3,
    label="Mediana"
)

ax.set_xticks(x)
ax.set_xticklabels(labels_plot)
ax.set_xlabel("Nivel")
ax.set_ylabel("Importancia")
ax.set_title("Importancia promedio y mediana por nivel (SVM)")
ax.grid(axis="y", linestyle="--", alpha=0.35, color="gray")

# Leyenda: Mediana primero
handles, labels = ax.get_legend_handles_labels()
order = [1, 0]
ax.legend([handles[i] for i in order], [labels[i] for i in order])

fig.tight_layout()

# -------------------------------------------------
# 9) Guardar
# -------------------------------------------------
downloads = Path.home() / "Downloads"
downloads.mkdir(parents=True, exist_ok=True)
ruta_guardado = downloads / "importancia_promedio_mediana_por_nivel_SVM.png"

fig.savefig(ruta_guardado, dpi=300, bbox_inches="tight", facecolor="white")
print(f"\nGráfico guardado en: {ruta_guardado}")

plt.show()

Variables originales en X_test: 1022
Variables que realmente usa el SVM final: 512
Rango original columnas seleccionadas: 11 .. 1019
Rango idx_1based: 11 .. 1019

Cantidad de variables por nivel seleccionadas por el modelo:
nivel_num
3      2
4      5
5     10
6     15
7     56
8    126
9    298
Name: count, dtype: int64

RESUMEN POR NIVEL — SVM
 nivel_num     mean   median  count
         1 0.000000 0.000000    0.0
         2 0.000000 0.000000    0.0
         3 0.113341 0.113341    2.0
         4 0.074336 0.062727    5.0
         5 0.088919 0.085037   10.0
         6 0.043351 0.038160   15.0
         7 0.027346 0.020342   56.0
         8 0.021154 0.013888  126.0
         9 0.014636 0.009346  298.0

Gráfico guardado en: C:\Users\Gamer\Downloads\importancia_promedio_mediana_por_nivel_SVM.png

# ==========================================================
# TABLA POR NIVELES (SVM5) — criterio AUC_rep
# - Usa el MISMO pipeline del SVM final de la tesis
# - Toma los parámetros del modelo etiquetado como SVM5
# - Evalúa por niveles 1..9: 2, 6, 14, ..., 1022 features
# - Reporta Acc/F1w + AUC (OVR macro) en train y test
# - Filas LaTeX listas para pegar
# ==========================================================

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from scipy.special import softmax

from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

# ----------------------------------------------------------
# 0) REQUISITOS PREVIOS
# ----------------------------------------------------------
# Este bloque asume que YA corriste tu notebook principal de SVM,
# por lo que ya existen estas variables:
# X_train, X_test, y_train_enc, y_test_enc, top10, make_pipe

y_train_encoded = y_train_enc
y_test_encoded  = y_test_enc

# ----------------------------------------------------------
# 1) TOMAR LOS PARÁMETROS DEL MODELO SVM5 REAL
# ----------------------------------------------------------
# En tu notebook:
# for i, row in enumerate(top10.head(5).itertuples(index=False), start=1):
#     r = eval_one(row.params, f"SVM{i}")
#
# Por eso SVM5 corresponde a top10.iloc[4]["params"]

SVM5_PARAMS = top10.iloc[4]["params"].copy()

print("Parámetros de SVM5:")
print(SVM5_PARAMS)

# Guardar k original del modelo
kbest_original = SVM5_PARAMS.get("kbest__k", X_train.shape[1])

# ----------------------------------------------------------
# 2) FUNCIÓN AUC — MISMA DEFINICIÓN QUE EN TU NOTEBOOK PRINCIPAL
# ----------------------------------------------------------
def auc_ovr_macro_from_scores(y_true, decision_scores):
    """
    Usa decision_function del SVM y luego softmax,
    igual que en tu evaluación principal.
    """
    if decision_scores.ndim == 1:
        decision_scores = decision_scores.reshape(-1, 1)
    proba = softmax(decision_scores, axis=1)
    return roc_auc_score(y_true, proba, multi_class="ovr", average="macro")

# ----------------------------------------------------------
# 3) COLUMNAS NUMÉRICAS 0..1022
# ----------------------------------------------------------
col_nums = pd.to_numeric(pd.Index(X_train.columns).astype(str), errors="coerce")
if col_nums.isna().any():
    raise ValueError(
        "X_train.columns no son numéricas (0..1022). "
        "Revisa los nombres de columnas."
    )
col_nums = col_nums.astype(int)

# Orden estable por índice
pairs = sorted(zip(col_nums.values, X_train.columns), key=lambda t: t[0])
nums_sorted = [n for n, _ in pairs]
cols_sorted = [c for _, c in pairs]

# Para que nivel 9 tenga 1022 features, excluimos la columna 0 (N0)
EXCLUDE_N0 = True

# ----------------------------------------------------------
# 4) LOOP POR NIVELES 1..9
# ----------------------------------------------------------
rows = []

for m in tqdm(range(1, 10), desc="Evaluando niveles (SVM5)", unit="nivel"):
    end_idx = (2 ** (m + 1)) - 2   # 2, 6, 14, 30, ..., 1022

    if EXCLUDE_N0:
        selected = [c for c, n in zip(cols_sorted, nums_sorted) if (1 <= n <= end_idx)]
    else:
        selected = [c for c, n in zip(cols_sorted, nums_sorted) if (0 <= n <= end_idx)]

    Xtr_m = X_train[selected].copy()
    Xte_m = X_test[selected].copy()

    # Copia de params del SVM5
    params_m = SVM5_PARAMS.copy()

    # Ajuste clave:
    # si el k del modelo final es mayor que las variables disponibles en ese nivel,
    # se reduce automáticamente para evitar error.
    if isinstance(kbest_original, (int, np.integer, float, np.floating)):
        params_m["kbest__k"] = int(min(int(kbest_original), Xtr_m.shape[1]))
    else:
        params_m["kbest__k"] = "all"

    # Construir EXACTAMENTE el mismo tipo de pipeline de tu notebook principal
    model_m = make_pipe(params_m)

    # Entrenar
    model_m.fit(Xtr_m, y_train_encoded)

    # Predicciones
    pred_tr = model_m.predict(Xtr_m)
    pred_te = model_m.predict(Xte_m)

    # Scores del SVM
    scores_tr = model_m.decision_function(Xtr_m)
    scores_te = model_m.decision_function(Xte_m)

    # Métricas
    acc_tr = accuracy_score(y_train_encoded, pred_tr)
    f1w_tr = f1_score(y_train_encoded, pred_tr, average="weighted", zero_division=0)
    auc_tr = auc_ovr_macro_from_scores(y_train_encoded, scores_tr)

    acc_te = accuracy_score(y_test_encoded, pred_te)
    f1w_te = f1_score(y_test_encoded, pred_te, average="weighted", zero_division=0)
    auc_te = auc_ovr_macro_from_scores(y_test_encoded, scores_te)

    rows.append({
        "NivelFirma": m,
        "N_features": Xtr_m.shape[1],
        "kbest_usado": params_m["kbest__k"],
        "AccTrain": acc_tr,
        "F1wTrain": f1w_tr,
        "AUCTrain": auc_tr,
        "AccTest": acc_te,
        "F1wTest": f1w_te,
        "AUCRep": auc_te
    })

df_levels = pd.DataFrame(rows)

# ----------------------------------------------------------
# 5) ELEGIR EL NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep
# ----------------------------------------------------------
best_auc = df_levels["AUCRep"].max()
tol = 1e-4
best_candidates = df_levels[df_levels["AUCRep"] >= best_auc - tol]
best_simple = best_candidates.sort_values(["NivelFirma"]).iloc[0]

print("\n" + "="*100)
print("RESULTADOS POR NIVEL (SVM5) — criterio AUCRep")
print("="*100)
print(df_levels.to_string(index=False, float_format=lambda x: f"{x:.4f}"))

print("\n" + "="*100)
print("NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep (con tolerancia)")
print("="*100)
print(best_simple.to_string())

# ----------------------------------------------------------
# 6) FILAS LaTeX
# ----------------------------------------------------------
print("\n" + "="*100)
print("FILAS LaTeX")
print("="*100)
for _, r in df_levels.iterrows():
    print(
        f"{int(r['NivelFirma'])} & {int(r['N_features'])} & "
        f"{r['AccTrain']:.4f} & {r['F1wTrain']:.4f} & {r['AUCTrain']:.4f} & "
        f"{r['AccTest']:.4f} & {r['F1wTest']:.4f} & {r['AUCRep']:.4f} \\\\"
    )

Parámetros de SVM5:
{'kbest__k': 1023, 'svc__C': np.float64(784.8081929215668), 'svc__class_weight': 'balanced', 'svc__gamma': np.float64(8.215507070842181e-05)}

Evaluando niveles (SVM5): 100%|███████████████████████████████████████████████████████| 9/9 [00:28<00:00,  3.20s/nivel]


====================================================================================================
RESULTADOS POR NIVEL (SVM5) — criterio AUCRep
====================================================================================================
 NivelFirma  N_features  kbest_usado  AccTrain  F1wTrain  AUCTrain  AccTest  F1wTest  AUCRep
          1           2            2    0.3384    0.3707    0.5032   0.0796   0.0117  0.5135
          2           6            6    0.4185    0.4352    0.5260   0.2255   0.2485  0.5330
          3          14           14    0.5563    0.5182    0.5981   0.5809   0.5676  0.6035
          4          30           30    0.5583    0.5301    0.6172   0.6021   0.5955  0.6581
          5          62           62    0.5523    0.5358    0.6398   0.5703   0.5780  0.6803
          6         126          126    0.5530    0.5483    0.6578   0.5836   0.5967  0.7171
          7         254          254    0.5868    0.5815    0.6795   0.6207   0.6359  0.7395
          8         510          510    0.6225    0.6136    0.7061   0.6870   0.6943  0.7616
          9        1021         1021    0.6404    0.6294    0.7183   0.7003   0.7070  0.7658

====================================================================================================
NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep (con tolerancia)
====================================================================================================
NivelFirma        9.000000
N_features     1021.000000
kbest_usado    1021.000000
AccTrain          0.640397
F1wTrain          0.629370
AUCTrain          0.718256
AccTest           0.700265
F1wTest           0.706974
AUCRep            0.765800

====================================================================================================
FILAS LaTeX
====================================================================================================
1 & 2 & 0.3384 & 0.3707 & 0.5032 & 0.0796 & 0.0117 & 0.5135 \\
2 & 6 & 0.4185 & 0.4352 & 0.5260 & 0.2255 & 0.2485 & 0.5330 \\
3 & 14 & 0.5563 & 0.5182 & 0.5981 & 0.5809 & 0.5676 & 0.6035 \\
4 & 30 & 0.5583 & 0.5301 & 0.6172 & 0.6021 & 0.5955 & 0.6581 \\
5 & 62 & 0.5523 & 0.5358 & 0.6398 & 0.5703 & 0.5780 & 0.6803 \\
6 & 126 & 0.5530 & 0.5483 & 0.6578 & 0.5836 & 0.5967 & 0.7171 \\
7 & 254 & 0.5868 & 0.5815 & 0.6795 & 0.6207 & 0.6359 & 0.7395 \\
8 & 510 & 0.6225 & 0.6136 & 0.7061 & 0.6870 & 0.6943 & 0.7616 \\
9 & 1021 & 0.6404 & 0.6294 & 0.7183 & 0.7003 & 0.7070 & 0.7658 \\

IISIGNATURE LOGFIRMA SVM

import numpy as np
import pandas as pd
import time
import warnings
import os
warnings.filterwarnings("ignore")

# pip install tqdm tqdm_joblib imbalanced-learn matplotlib seaborn
from tqdm.auto import tqdm
from tqdm_joblib import tqdm_joblib
import joblib

import matplotlib
matplotlib.use("Agg")          # backend sin pantalla; cambiar a "TkAgg" si se quiere ventana
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import VarianceThreshold, SelectKBest, mutual_info_classif
from sklearn.svm import SVC

from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV, cross_val_score
from sklearn.metrics import (
    confusion_matrix, classification_report,
    f1_score, balanced_accuracy_score, recall_score,
    accuracy_score, roc_auc_score, make_scorer
)
from scipy.stats import loguniform
from scipy.special import softmax

from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE

start_time = time.time()

# =========================
# 0) DATOS
# =========================
x = pd.read_csv(r'C:\Users\Gamer\Desktop\Cata\IISIGNATURE\logsignature_iisig_M9.csv')
y = pd.read_csv(r'C:\Users\Gamer\Desktop\Cata\ts_v9.0.1_SMBH_ZTF_xmatch.csv')
y["id"] = y["oid"]
data = pd.merge(x, y, on="id")

data_train = data.sample(frac=0.8, random_state=42).reset_index(drop=True)
data_test  = data.drop(data_train.index).reset_index(drop=True)

X_train = data_train.drop(columns=['oid','survey_class_mapped','survey_class','survey_class_cat','id'])
y_train = data_train['survey_class_mapped']
X_test  = data_test[X_train.columns].copy()
y_test  = data_test['survey_class_mapped']

le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)
labels = list(le.classes_)

DOWNLOADS = r'C:\Users\Gamer\Downloads'
os.makedirs(DOWNLOADS, exist_ok=True)

print("Shapes:", X_train.shape, X_test.shape)
print("Labels:", labels)
print("Distribución train:", dict(zip(*np.unique(y_train_enc, return_counts=True))))

# =========================
# 1) SCORERS
# =========================
def robust_score(y_true, y_pred):
    mf1     = f1_score(y_true, y_pred, average="macro", zero_division=0)
    bacc    = balanced_accuracy_score(y_true, y_pred)
    recalls = recall_score(y_true, y_pred, average=None, zero_division=0)
    minR    = float(np.min(recalls))
    penalty = 0.0 if minR >= 0.30 else (0.30 - minR) * 2.0
    return 0.50 * mf1 + 0.40 * bacc + 0.10 * minR - penalty

robust_scorer = make_scorer(robust_score)

# AUC OVR: decision_function → softmax → roc_auc_score
def _auc_ovr(y_true, y_score):
    proba = softmax(y_score, axis=1)   # convierte scores crudos a distribución [0,1] que suma 1
    return roc_auc_score(y_true, proba, multi_class="ovr", average="macro")

auc_scorer = make_scorer(_auc_ovr, response_method="decision_function")

# =========================
# 2) PIPELINE CON SMOTE
# =========================
min_class_count = min(np.bincount(y_train_enc))
smote_k = min(5, min_class_count - 1)

def make_pipe(params=None):
    p = ImbPipeline(steps=[
        ("imp",   SimpleImputer(strategy="median")),
        ("sc",    StandardScaler()),
        ("vt",    VarianceThreshold(0.0)),
        ("smote", SMOTE(k_neighbors=smote_k, random_state=42)),
        ("kbest", SelectKBest(score_func=mutual_info_classif)),
        ("svc",   SVC(kernel="rbf", decision_function_shape="ovr", cache_size=4000))
    ])
    if params:
        p.set_params(**params)
    return p

pipe = make_pipe()

# =========================
# 3) ESPACIO DE BÚSQUEDA
# =========================
cw_options = [
    "balanced",
    {0: 2.0, 1: 4.0, 2: 1.0},
    {0: 2.5, 1: 5.0, 2: 1.0},
    {0: 1.5, 1: 3.5, 2: 0.8},
    {0: 3.0, 1: 6.0, 2: 1.0},
    {0: 1.0, 1: 3.0, 2: 0.7},
]

param_dist = {
    "kbest__k":          [128, 256, 384, 512, 768, 1023],
    "svc__C":            loguniform(0.5, 2000),
    "svc__gamma":        loguniform(1e-6, 0.1),
    "svc__class_weight": cw_options,
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# =========================
# 4) BÚSQUEDA CON BARRA DE PROGRESO
# =========================
t0 = time.time()
rs = RandomizedSearchCV(
    estimator=pipe,
    param_distributions=param_dist,
    n_iter=80,
    scoring=robust_scorer,
    cv=cv,
    random_state=42,
    n_jobs=-1,
    verbose=0,
    refit=True,
    return_train_score=False,
    error_score=0.0
)

n_fits = 80 * cv.n_splits
with tqdm_joblib(tqdm(
    desc="Buscando hiperparámetros",
    total=n_fits,
    unit="fit",
    colour="cyan",
    bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} fits [{elapsed}<{remaining}, {rate_fmt}]"
)):
    with joblib.parallel_config(backend="threading"):
        rs.fit(X_train, y_train_enc)

print("\nTiempo búsqueda (s):", int(time.time() - t0))
print("Best robust CV:", rs.best_score_)
print("Best params:", rs.best_params_)

# =========================
# 5) TOP-10 ÚNICOS
# =========================
def _canon_value(v):
    if isinstance(v, np.generic):   return v.item()
    if isinstance(v, dict):         return tuple(sorted((int(k), float(w)) for k, w in v.items()))
    return v

def _params_key(d):
    return tuple(sorted((k, _canon_value(v)) for k, v in d.items()))

res = pd.DataFrame(rs.cv_results_).copy()
res["params_key"] = res["params"].apply(_params_key)
res = res.sort_values(["mean_test_score", "std_test_score"], ascending=[False, True]).reset_index(drop=True)
res_unique = res.drop_duplicates(subset=["params_key"], keep="first").reset_index(drop=True)
top10 = res_unique.head(10)[["mean_test_score", "std_test_score", "params"]].copy()

print("\nTOP-10 CV robust (únicos):")
print(top10.to_string(index=False))

# =========================
# 6) EVALUACIÓN COMPLETA + FIGURAS
# =========================
def _row_normalize(cm):
    cm = cm.astype(float)
    row_sums = cm.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    return (cm / row_sums) * 100.0


def plot_cms(cm_train, cm_test, tag, save_dir, subtitle="",
             gap_width=0.28, wspace=0.15,
             label_fontsize=13, tick_fontsize=13, title_fontsize=14):
    """Guarda figura con CM de train y test — mismo estilo que el resto de tus CMs."""
    cm_tr_pct = _row_normalize(cm_train)
    cm_te_pct = _row_normalize(cm_test)

    fig = plt.figure(figsize=(10.8, 4.8))
    gs = gridspec.GridSpec(1, 4,
                           width_ratios=[1, gap_width, 1, 0.08],
                           wspace=wspace)
    ax1    = fig.add_subplot(gs[0, 0])
    ax_gap = fig.add_subplot(gs[0, 1]);  ax_gap.axis("off")
    ax2    = fig.add_subplot(gs[0, 2])
    ax_cb  = fig.add_subplot(gs[0, 3])

    im = None
    for ax, cm_pct, cm_cnt, title in [
        (ax1, cm_tr_pct, cm_train, "Train"),
        (ax2, cm_te_pct, cm_test,  "Test"),
    ]:
        im = ax.imshow(cm_pct, cmap="Blues", vmin=0, vmax=100)
        ax.set_title(title, fontsize=title_fontsize)
        ax.set_xticks(np.arange(len(labels)))
        ax.set_yticks(np.arange(len(labels)))
        ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=tick_fontsize)
        ax.set_yticklabels(labels, fontsize=tick_fontsize)
        ax.set_xlabel("Predicho", fontsize=label_fontsize)
        ax.set_ylabel("Real",     fontsize=label_fontsize)

        for i in range(cm_pct.shape[0]):
            for j in range(cm_pct.shape[1]):
                pct = cm_pct[i, j]
                cnt = int(cm_cnt[i, j])
                color_txt = "white" if pct > 50 else "black"
                ax.text(j, i - 0.10, f"{pct:.1f}%",
                        ha="center", va="center",
                        color=color_txt, fontsize=10, fontweight="bold")
                ax.text(j, i + 0.22, f"({cnt})",
                        ha="center", va="center",
                        color=color_txt, fontsize=7)

    fig.colorbar(im, cax=ax_cb, label="% por fila (clase real)")
    fig.suptitle(f"{tag}\n{subtitle}", fontsize=13, y=0.98)
    fig.subplots_adjust(left=0.08, right=0.92, bottom=0.22, top=0.82)

    fname = os.path.join(save_dir, f"{tag}.png")
    fig.savefig(fname, dpi=300, bbox_inches="tight")
    plt.close(fig)
    print(f"  Figura guardada: {fname}")
def eval_one(params, tag):
    print("\n" + "="*95)
    print(f"Evaluando {tag}...")

    # AUC en CV (5 folds, calcula sobreajuste respecto a validación)
    auc_cv_scores = cross_val_score(
        make_pipe(params), X_train, y_train_enc,
        cv=cv, scoring=auc_scorer, n_jobs=-1
    )
    auc_cv = float(np.mean(auc_cv_scores))
    sd_cv  = float(np.std(auc_cv_scores))

    # Reentrenar sobre todo el train
    model = make_pipe(params)
    model.fit(X_train, y_train_enc)

    yhat_tr = model.predict(X_train)
    yhat_te = model.predict(X_test)
    df_tr   = softmax(model.decision_function(X_train), axis=1)
    df_te   = softmax(model.decision_function(X_test),  axis=1)

    auc_train = roc_auc_score(y_train_enc, df_tr, multi_class="ovr", average="macro")
    auc_test  = roc_auc_score(y_test_enc,  df_te, multi_class="ovr", average="macro")
    gap_cv    = round(auc_train - auc_cv,   4)
    gap_rep   = round(auc_train - auc_test, 4)

    acc_test  = accuracy_score(y_test_enc, yhat_te)
    f1w_test  = f1_score(y_test_enc, yhat_te, average="weighted", zero_division=0)
    mf1_test  = f1_score(y_test_enc, yhat_te, average="macro",    zero_division=0)
    bacc_test = balanced_accuracy_score(y_test_enc, yhat_te)

    cm_tr = confusion_matrix(y_train_enc, yhat_tr)
    cm_te = confusion_matrix(y_test_enc,  yhat_te)

    print(f"  AUC_CV={auc_cv:.4f}±{sd_cv:.4f} | AUC_train={auc_train:.4f} | AUC_test={auc_test:.4f}")
    print(f"  Gap_CV={gap_cv:.4f} | Gap_rep={gap_rep:.4f}")
    print(f"  Acc={acc_test:.4f} | F1w={f1w_test:.4f} | macroF1={mf1_test:.4f} | balacc={bacc_test:.4f}")

    print("\n  Report TRAIN")
    print(classification_report(y_train_enc, yhat_tr, target_names=labels, zero_division=0))
    print("  CM TEST")
    print(pd.DataFrame(cm_te, index=labels, columns=labels))
    print("\n  Report TEST")
    print(classification_report(y_test_enc, yhat_te, target_names=labels, zero_division=0))

    subtitle = (
        f"Acc train={accuracy_score(y_train_enc, yhat_tr):.3f} | "
        f"Acc test={acc_test:.3f} | "
        f"F1w train={f1_score(y_train_enc, yhat_tr, average='weighted', zero_division=0):.3f} | "
        f"F1w test={f1w_test:.3f}"
    )
    plot_cms(cm_tr, cm_te, tag, DOWNLOADS, subtitle=subtitle)

    return {
        "tag":      tag,
        "AUC_CV":   round(auc_cv,    4),
        "SD_CV":    round(sd_cv,     4),
        "Gap_CV":   gap_cv,
        "AUC_rep":  round(auc_test,  4),
        "SD_rep":   round(sd_cv,     4),
        "Gap_rep":  gap_rep,
        "Acc_test": round(acc_test,  4),
        "F1w_test": round(f1w_test,  4),
        "macroF1":  round(mf1_test,  4),
        "balacc":   round(bacc_test, 4),
    }


results_summary = []
for i, row in enumerate(top10.head(5).itertuples(index=False), start=1):
    r = eval_one(row.params, f"SVM{i}")
    results_summary.append(r)

# =========================
# 7) TABLA RESUMEN + LATEX AUTO-GENERADO
# =========================
summary_df = pd.DataFrame(results_summary)
print("\n" + "="*95)
print("RESUMEN FINAL:")
print(summary_df.to_string(index=False))

best_idx = summary_df["AUC_rep"].idxmax()
best_tag = summary_df.loc[best_idx, "tag"]
print(f"\n>>> Mejor modelo: {best_tag}  AUC_test={summary_df.loc[best_idx,'AUC_rep']:.4f}")

# Generar snippet LaTeX
latex_rows = []
for _, r in summary_df.iterrows():
    highlight = r"\rowcolor{BlueBest} " if r["tag"] == best_tag else ""
    latex_rows.append(
        f"        {highlight}SVM$_{{{r['tag'][-1]}}}$ & "
        f"{r['AUC_CV']:.4f} & {r['SD_CV']:.4f} & {r['Gap_CV']:.4f} & "
        f"{r['AUC_rep']:.4f} & {r['SD_rep']:.4f} & {r['Gap_rep']:.4f} & "
        f"{r['Acc_test']:.4f} & {r['F1w_test']:.4f} \\\\"
    )

latex_table = (
    r"\begin{table}[!ht]" + "\n"
    r"\centering" + "\n"
    r"\small" + "\n"
    r"\renewcommand{\arraystretch}{1.18}" + "\n"
    r"\setlength{\tabcolsep}{6pt}" + "\n"
    r"\begin{adjustbox}{width=\textwidth}" + "\n"
    r"\rowcolors{2}{white}{gray!8}" + "\n"
    r"\begin{tabular}{ccccccccc}" + "\n"
    r"\toprule" + "\n"
    r"\rowcolor{BlueHeader}" + "\n"
    r"\color{white}\textbf{Modelo} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Acc\textsubscript{test}} &" + "\n"
    r"\color{white}\textbf{F1\textsubscript{w,test}} \\" + "\n"
    r"\midrule" + "\n"
    + "\n".join(latex_rows) + "\n"
    r"\bottomrule" + "\n"
    r"\end{tabular}" + "\n"
    r"\end{adjustbox}" + "\n"
    r"\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG).}" + "\n"
    r"\label{tab:svm_top5_auc}" + "\n"
    r"\end{table}"
)

latex_path = os.path.join(DOWNLOADS, "tabla_svm_top5.tex")
with open(latex_path, "w", encoding="utf-8") as f:
    f.write(latex_table)

print(f"\nTabla LaTeX guardada en: {latex_path}")
print("\n--- PREVIEW ---\n" + latex_table)

# =========================
# 8) TIEMPO TOTAL
# =========================
elapsed = int(time.time() - start_time)
h, rem = divmod(elapsed, 3600)
m, s = divmod(rem, 60)
print(f"\nTiempo total: {h:02d}:{m:02d}:{s:02d}")

Shapes: (1510, 127) (377, 127)
Labels: ['AGN', 'Blazar', 'QSO']
Distribución train: {np.int64(0): np.int64(422), np.int64(1): np.int64(189), np.int64(2): np.int64(899)}

Buscando hiperparámetros:   0%|                                                          | 0/400 fits [00:00<?, ?fit/s]



  0%|                                                                                          | 0/400 [00:00<?, ?it/s]



  0%|▏                                                                                 | 1/400 [00:03<24:47,  3.73s/it]



  1%|█                                                                                 | 5/400 [00:03<03:52,  1.70it/s]



  2%|█▍                                                                                | 7/400 [00:04<03:26,  1.91it/s]



  2%|█▋                                                                                | 8/400 [00:06<05:35,  1.17it/s]



  2%|██                                                                               | 10/400 [00:07<03:49,  1.70it/s]



  4%|██▊                                                                              | 14/400 [00:07<02:05,  3.07it/s]



  4%|███                                                                              | 15/400 [00:09<03:41,  1.74it/s]



  4%|███▏                                                                             | 16/400 [00:09<03:12,  1.99it/s]



  4%|███▍                                                                             | 17/400 [00:09<02:58,  2.14it/s]



  5%|████▎                                                                            | 21/400 [00:11<02:29,  2.54it/s]



  6%|████▍                                                                            | 22/400 [00:12<03:26,  1.83it/s]



  6%|█████                                                                            | 25/400 [00:13<02:29,  2.51it/s]



  7%|█████▍                                                                           | 27/400 [00:13<02:09,  2.89it/s]



  7%|█████▋                                                                           | 28/400 [00:13<02:00,  3.09it/s]



  7%|█████▊                                                                           | 29/400 [00:14<02:52,  2.15it/s]



  8%|██████▎                                                                          | 31/400 [00:15<02:06,  2.91it/s]



  8%|██████▍                                                                          | 32/400 [00:15<02:23,  2.56it/s]



  8%|██████▉                                                                          | 34/400 [00:16<02:00,  3.05it/s]



  9%|███████                                                                          | 35/400 [00:16<01:45,  3.47it/s]



  9%|███████▎                                                                         | 36/400 [00:17<03:17,  1.85it/s]



 10%|███████▋                                                                         | 38/400 [00:17<02:09,  2.80it/s]



 10%|███████▉                                                                         | 39/400 [00:18<02:23,  2.51it/s]



 10%|████████▎                                                                        | 41/400 [00:18<02:10,  2.76it/s]



 10%|████████▌                                                                        | 42/400 [00:19<02:12,  2.70it/s]



 11%|████████▋                                                                        | 43/400 [00:19<02:03,  2.90it/s]



 11%|████████▉                                                                        | 44/400 [00:19<01:58,  2.99it/s]



 11%|█████████                                                                        | 45/400 [00:20<02:39,  2.23it/s]



 12%|█████████▎                                                                       | 46/400 [00:22<04:15,  1.39it/s]



 12%|█████████▌                                                                       | 47/400 [00:22<03:45,  1.57it/s]



 12%|██████████▏                                                                      | 50/400 [00:22<02:06,  2.77it/s]



 13%|██████████▎                                                                      | 51/400 [00:23<02:00,  2.89it/s]



 13%|██████████▌                                                                      | 52/400 [00:23<02:12,  2.63it/s]



 13%|██████████▋                                                                      | 53/400 [00:23<02:08,  2.71it/s]



 14%|██████████▉                                                                      | 54/400 [00:24<03:00,  1.92it/s]



 14%|███████████▌                                                                     | 57/400 [00:25<01:59,  2.86it/s]



 14%|███████████▋                                                                     | 58/400 [00:25<01:51,  3.07it/s]



 15%|████████████▎                                                                    | 61/400 [00:25<01:04,  5.25it/s]



 16%|████████████▊                                                                    | 63/400 [00:26<00:59,  5.65it/s]



 16%|█████████████▏                                                                   | 65/400 [00:26<01:01,  5.41it/s]



 16%|█████████████▎                                                                   | 66/400 [00:28<02:57,  1.88it/s]



 17%|█████████████▊                                                                   | 68/400 [00:28<02:04,  2.67it/s]



 17%|█████████████▉                                                                   | 69/400 [00:29<02:17,  2.41it/s]



 18%|██████████████▊                                                                  | 73/400 [00:29<01:15,  4.31it/s]



 19%|███████████████▏                                                                 | 75/400 [00:29<01:03,  5.16it/s]



 19%|███████████████▍                                                                 | 76/400 [00:30<01:01,  5.29it/s]



 19%|███████████████▌                                                                 | 77/400 [00:30<01:00,  5.32it/s]



 20%|███████████████▊                                                                 | 78/400 [00:31<02:40,  2.01it/s]



 20%|███████████████▉                                                                 | 79/400 [00:31<02:11,  2.44it/s]



 20%|████████████████▍                                                                | 81/400 [00:33<02:44,  1.94it/s]



 21%|████████████████▊                                                                | 83/400 [00:33<01:52,  2.82it/s]



 22%|█████████████████▊                                                               | 88/400 [00:33<00:52,  5.95it/s]



 22%|██████████████████▏                                                              | 90/400 [00:34<01:19,  3.91it/s]



Buscando hiperparámetros:   0%|                                                          | 0/400 fits [01:03<?, ?fit/s]





 24%|███████████████████                                                              | 94/400 [00:37<02:22,  2.15it/s]



 24%|███████████████████▏                                                             | 95/400 [00:37<02:08,  2.37it/s]



 24%|███████████████████▋                                                             | 97/400 [00:37<01:48,  2.79it/s]



 24%|███████████████████▊                                                             | 98/400 [00:37<01:35,  3.16it/s]



 25%|████████████████████                                                            | 100/400 [00:39<02:34,  1.95it/s]



 25%|████████████████████▏                                                           | 101/400 [00:40<02:59,  1.67it/s]



 26%|████████████████████▍                                                           | 102/400 [00:40<02:31,  1.97it/s]



 26%|████████████████████▌                                                           | 103/400 [00:41<02:11,  2.26it/s]



 26%|█████████████████████▏                                                          | 106/400 [00:41<01:10,  4.19it/s]



 27%|█████████████████████▌                                                          | 108/400 [00:41<01:10,  4.13it/s]



 28%|██████████████████████                                                          | 110/400 [00:42<01:09,  4.18it/s]



 28%|██████████████████████▌                                                         | 113/400 [00:43<01:29,  3.19it/s]



 28%|██████████████████████▊                                                         | 114/400 [00:44<01:52,  2.55it/s]



 29%|███████████████████████                                                         | 115/400 [00:44<01:42,  2.78it/s]



 29%|███████████████████████▍                                                        | 117/400 [00:44<01:11,  3.94it/s]



 30%|███████████████████████▌                                                        | 118/400 [00:44<01:10,  4.00it/s]



 30%|███████████████████████▊                                                        | 119/400 [00:45<01:07,  4.16it/s]



 30%|████████████████████████                                                        | 120/400 [00:45<01:09,  4.02it/s]



 30%|████████████████████████▏                                                       | 121/400 [00:45<01:04,  4.36it/s]



 30%|████████████████████████▍                                                       | 122/400 [00:45<01:23,  3.33it/s]



 31%|█████████████████████████                                                       | 125/400 [00:47<01:39,  2.77it/s]



 32%|█████████████████████████▏                                                      | 126/400 [00:48<02:21,  1.94it/s]



 32%|█████████████████████████▍                                                      | 127/400 [00:48<02:00,  2.27it/s]



 32%|██████████████████████████                                                      | 130/400 [00:48<01:15,  3.59it/s]



 33%|██████████████████████████▍                                                     | 132/400 [00:49<01:03,  4.20it/s]



 33%|██████████████████████████▌                                                     | 133/400 [00:49<00:57,  4.68it/s]



 34%|██████████████████████████▊                                                     | 134/400 [00:49<01:02,  4.23it/s]



 34%|███████████████████████████▏                                                    | 136/400 [00:52<02:55,  1.50it/s]



 34%|███████████████████████████▍                                                    | 137/400 [00:52<02:30,  1.75it/s]



 35%|███████████████████████████▊                                                    | 139/400 [00:52<01:38,  2.64it/s]



 36%|████████████████████████████▍                                                   | 142/400 [00:52<00:58,  4.41it/s]



 36%|████████████████████████████▊                                                   | 144/400 [00:54<01:53,  2.26it/s]



 36%|█████████████████████████████                                                   | 145/400 [00:55<01:57,  2.17it/s]



 37%|█████████████████████████████▍                                                  | 147/400 [00:55<01:29,  2.83it/s]



 37%|█████████████████████████████▌                                                  | 148/400 [00:55<01:34,  2.66it/s]



 37%|█████████████████████████████▊                                                  | 149/400 [00:56<01:19,  3.14it/s]



 38%|██████████████████████████████▏                                                 | 151/400 [00:56<00:55,  4.46it/s]



 38%|██████████████████████████████▌                                                 | 153/400 [00:56<01:05,  3.77it/s]



 38%|██████████████████████████████▊                                                 | 154/400 [00:57<00:58,  4.22it/s]



 39%|███████████████████████████████▏                                                | 156/400 [00:57<00:43,  5.56it/s]



 39%|███████████████████████████████▍                                                | 157/400 [00:58<01:21,  2.99it/s]



 40%|███████████████████████████████▌                                                | 158/400 [00:58<01:10,  3.43it/s]



 40%|███████████████████████████████▊                                                | 159/400 [00:58<01:15,  3.18it/s]



 40%|████████████████████████████████                                                | 160/400 [00:59<02:02,  1.96it/s]



 40%|████████████████████████████████▏                                               | 161/400 [01:01<03:31,  1.13it/s]



 41%|████████████████████████████████▊                                               | 164/400 [01:01<01:46,  2.22it/s]



 41%|█████████████████████████████████                                               | 165/400 [01:02<01:40,  2.34it/s]



 42%|█████████████████████████████████▌                                              | 168/400 [01:03<01:29,  2.58it/s]



 42%|█████████████████████████████████▊                                              | 169/400 [01:03<01:34,  2.45it/s]



 42%|██████████████████████████████████                                              | 170/400 [01:04<01:28,  2.60it/s]



 43%|██████████████████████████████████▏                                             | 171/400 [01:04<01:34,  2.41it/s]



 43%|██████████████████████████████████▍                                             | 172/400 [01:04<01:19,  2.88it/s]



 44%|███████████████████████████████████                                             | 175/400 [01:04<00:42,  5.29it/s]



 44%|███████████████████████████████████▍                                            | 177/400 [01:05<00:43,  5.17it/s]



 45%|████████████████████████████████████                                            | 180/400 [01:06<01:04,  3.39it/s]



 45%|████████████████████████████████████▏                                           | 181/400 [01:06<01:02,  3.48it/s]



 46%|████████████████████████████████████▍                                           | 182/400 [01:07<01:08,  3.17it/s]



 46%|████████████████████████████████████▌                                           | 183/400 [01:08<01:36,  2.25it/s]



 46%|████████████████████████████████████▊                                           | 184/400 [01:08<01:22,  2.61it/s]



 46%|█████████████████████████████████████▏                                          | 186/400 [01:08<00:58,  3.68it/s]



 47%|█████████████████████████████████████▊                                          | 189/400 [01:08<00:40,  5.15it/s]



 48%|██████████████████████████████████████▍                                         | 192/400 [01:09<00:55,  3.73it/s]



 48%|██████████████████████████████████████▌                                         | 193/400 [01:10<00:54,  3.78it/s]



 48%|██████████████████████████████████████▊                                         | 194/400 [01:10<01:00,  3.38it/s]



 49%|███████████████████████████████████████                                         | 195/400 [01:11<01:32,  2.22it/s]



 49%|███████████████████████████████████████▏                                        | 196/400 [01:11<01:25,  2.40it/s]



 50%|███████████████████████████████████████▌                                        | 198/400 [01:12<00:59,  3.39it/s]



 50%|████████████████████████████████████████▏                                       | 201/400 [01:12<00:36,  5.43it/s]



 51%|████████████████████████████████████████▊                                       | 204/400 [01:13<00:45,  4.33it/s]



 51%|█████████████████████████████████████████                                       | 205/400 [01:13<00:46,  4.19it/s]



 52%|█████████████████████████████████████████▏                                      | 206/400 [01:15<01:36,  2.01it/s]



 52%|█████████████████████████████████████████▍                                      | 207/400 [01:16<02:15,  1.42it/s]



 52%|██████████████████████████████████████████                                      | 210/400 [01:16<01:17,  2.46it/s]



 53%|██████████████████████████████████████████▏                                     | 211/400 [01:17<01:06,  2.83it/s]



 54%|███████████████████████████████████████████▏                                    | 216/400 [01:17<00:34,  5.31it/s]



 55%|███████████████████████████████████████████▌                                    | 218/400 [01:17<00:32,  5.65it/s]



 55%|███████████████████████████████████████████▊                                    | 219/400 [01:19<01:31,  1.98it/s]



 55%|████████████████████████████████████████████                                    | 220/400 [01:20<01:21,  2.22it/s]



 56%|████████████████████████████████████████████▍                                   | 222/400 [01:20<00:58,  3.05it/s]



 56%|████████████████████████████████████████████▌                                   | 223/400 [01:20<00:55,  3.17it/s]



 56%|█████████████████████████████████████████████                                   | 225/400 [01:20<00:41,  4.26it/s]



 56%|█████████████████████████████████████████████▏                                  | 226/400 [01:20<00:36,  4.77it/s]



 57%|█████████████████████████████████████████████▍                                  | 227/400 [01:21<00:32,  5.33it/s]



 57%|█████████████████████████████████████████████▌                                  | 228/400 [01:21<00:31,  5.45it/s]



 58%|██████████████████████████████████████████████▏                                 | 231/400 [01:23<01:23,  2.02it/s]



 58%|██████████████████████████████████████████████▍                                 | 232/400 [01:23<01:14,  2.26it/s]



 58%|██████████████████████████████████████████████▊                                 | 234/400 [01:23<00:51,  3.25it/s]



 59%|███████████████████████████████████████████████                                 | 235/400 [01:24<00:51,  3.23it/s]



 59%|███████████████████████████████████████████████▍                                | 237/400 [01:24<00:41,  3.95it/s]



 60%|███████████████████████████████████████████████▌                                | 238/400 [01:24<00:38,  4.20it/s]



 60%|████████████████████████████████████████████████                                | 240/400 [01:24<00:30,  5.33it/s]



 60%|████████████████████████████████████████████████▏                               | 241/400 [01:27<01:39,  1.60it/s]



 60%|████████████████████████████████████████████████▍                               | 242/400 [01:27<01:25,  1.86it/s]



 61%|████████████████████████████████████████████████▊                               | 244/400 [01:27<00:57,  2.70it/s]



 62%|█████████████████████████████████████████████████▏                              | 246/400 [01:27<00:43,  3.58it/s]



 62%|█████████████████████████████████████████████████▍                              | 247/400 [01:28<00:41,  3.66it/s]



 62%|█████████████████████████████████████████████████▌                              | 248/400 [01:29<01:13,  2.08it/s]



 62%|█████████████████████████████████████████████████▊                              | 249/400 [01:30<01:22,  1.82it/s]



 62%|██████████████████████████████████████████████████                              | 250/400 [01:30<01:07,  2.23it/s]



 63%|██████████████████████████████████████████████████▏                             | 251/400 [01:31<01:45,  1.41it/s]



 63%|██████████████████████████████████████████████████▍                             | 252/400 [01:31<01:22,  1.80it/s]



 64%|██████████████████████████████████████████████████▊                             | 254/400 [01:32<00:50,  2.88it/s]



 64%|███████████████████████████████████████████████████                             | 255/400 [01:32<01:02,  2.30it/s]



 64%|███████████████████████████████████████████████████▌                            | 258/400 [01:33<00:55,  2.57it/s]



 65%|███████████████████████████████████████████████████▊                            | 259/400 [01:34<00:57,  2.47it/s]



 65%|████████████████████████████████████████████████████                            | 260/400 [01:34<00:54,  2.59it/s]



 65%|████████████████████████████████████████████████████▏                           | 261/400 [01:34<00:52,  2.66it/s]



 66%|████████████████████████████████████████████████████▌                           | 263/400 [01:35<00:33,  4.04it/s]



 66%|█████████████████████████████████████████████████████                           | 265/400 [01:35<00:38,  3.53it/s]



 66%|█████████████████████████████████████████████████████▏                          | 266/400 [01:37<01:05,  2.03it/s]



 67%|█████████████████████████████████████████████████████▍                          | 267/400 [01:37<01:07,  1.98it/s]



 67%|█████████████████████████████████████████████████████▊                          | 269/400 [01:37<00:47,  2.75it/s]



 68%|██████████████████████████████████████████████████████                          | 270/400 [01:37<00:39,  3.26it/s]



 68%|██████████████████████████████████████████████████████▍                         | 272/400 [01:38<00:29,  4.33it/s]



 68%|██████████████████████████████████████████████████████▌                         | 273/400 [01:38<00:39,  3.22it/s]



 68%|██████████████████████████████████████████████████████▊                         | 274/400 [01:39<01:05,  1.91it/s]



 69%|███████████████████████████████████████████████████████▏                        | 276/400 [01:42<01:37,  1.27it/s]



 70%|███████████████████████████████████████████████████████▌                        | 278/400 [01:44<01:48,  1.13it/s]



 70%|███████████████████████████████████████████████████████▊                        | 279/400 [01:44<01:28,  1.37it/s]



 70%|████████████████████████████████████████████████████████                        | 280/400 [01:46<02:03,  1.03s/it]



 70%|████████████████████████████████████████████████████████▏                       | 281/400 [01:47<01:44,  1.13it/s]



 70%|████████████████████████████████████████████████████████▍                       | 282/400 [01:49<02:32,  1.29s/it]



 71%|████████████████████████████████████████████████████████▌                       | 283/400 [01:50<02:13,  1.14s/it]



 71%|████████████████████████████████████████████████████████▊                       | 284/400 [01:51<02:01,  1.05s/it]



 72%|█████████████████████████████████████████████████████████▏                      | 286/400 [01:53<01:55,  1.02s/it]



 72%|█████████████████████████████████████████████████████████▌                      | 288/400 [01:53<01:17,  1.44it/s]



 72%|█████████████████████████████████████████████████████████▊                      | 289/400 [01:53<01:15,  1.47it/s]



 72%|██████████████████████████████████████████████████████████                      | 290/400 [01:54<01:19,  1.38it/s]



 73%|██████████████████████████████████████████████████████████▏                     | 291/400 [01:55<01:06,  1.63it/s]



 73%|██████████████████████████████████████████████████████████▌                     | 293/400 [01:55<00:45,  2.33it/s]



 74%|██████████████████████████████████████████████████████████▊                     | 294/400 [01:57<01:21,  1.31it/s]



 74%|███████████████████████████████████████████████████████████                     | 295/400 [01:57<01:10,  1.50it/s]



 74%|███████████████████████████████████████████████████████████▏                    | 296/400 [01:58<01:07,  1.53it/s]



 74%|███████████████████████████████████████████████████████████▍                    | 297/400 [01:59<01:17,  1.33it/s]



 75%|███████████████████████████████████████████████████████████▊                    | 299/400 [01:59<00:46,  2.19it/s]



 75%|████████████████████████████████████████████████████████████                    | 300/400 [01:59<00:39,  2.51it/s]



 76%|████████████████████████████████████████████████████████████▍                   | 302/400 [01:59<00:27,  3.62it/s]



 76%|████████████████████████████████████████████████████████████▌                   | 303/400 [02:00<00:25,  3.87it/s]



 76%|████████████████████████████████████████████████████████████▊                   | 304/400 [02:01<00:42,  2.26it/s]



 76%|█████████████████████████████████████████████████████████████                   | 305/400 [02:01<00:42,  2.24it/s]



 76%|█████████████████████████████████████████████████████████████▏                  | 306/400 [02:01<00:36,  2.58it/s]



 77%|█████████████████████████████████████████████████████████████▊                  | 309/400 [02:02<00:19,  4.76it/s]



 78%|██████████████████████████████████████████████████████████████                  | 310/400 [02:02<00:20,  4.33it/s]



 78%|██████████████████████████████████████████████████████████████▏                 | 311/400 [02:02<00:23,  3.73it/s]



 78%|██████████████████████████████████████████████████████████████▌                 | 313/400 [02:02<00:17,  5.03it/s]



 79%|███████████████████████████████████████████████████████████████                 | 315/400 [02:03<00:15,  5.38it/s]



 79%|███████████████████████████████████████████████████████████████▏                | 316/400 [02:04<00:34,  2.40it/s]



 79%|███████████████████████████████████████████████████████████████▍                | 317/400 [02:05<00:39,  2.12it/s]



 80%|███████████████████████████████████████████████████████████████▌                | 318/400 [02:05<00:35,  2.28it/s]



 80%|███████████████████████████████████████████████████████████████▊                | 319/400 [02:05<00:29,  2.75it/s]



 80%|████████████████████████████████████████████████████████████████▏               | 321/400 [02:06<00:26,  3.03it/s]



 80%|████████████████████████████████████████████████████████████████▍               | 322/400 [02:06<00:24,  3.17it/s]



 81%|████████████████████████████████████████████████████████████████▌               | 323/400 [02:06<00:25,  2.97it/s]



 81%|█████████████████████████████████████████████████████████████████               | 325/400 [02:07<00:20,  3.68it/s]



 82%|█████████████████████████████████████████████████████████████████▏              | 326/400 [02:07<00:17,  4.21it/s]



 82%|█████████████████████████████████████████████████████████████████▌              | 328/400 [02:08<00:22,  3.25it/s]



 82%|█████████████████████████████████████████████████████████████████▊              | 329/400 [02:08<00:27,  2.56it/s]



 82%|██████████████████████████████████████████████████████████████████              | 330/400 [02:09<00:32,  2.15it/s]



 83%|██████████████████████████████████████████████████████████████████▍             | 332/400 [02:09<00:20,  3.37it/s]



 83%|██████████████████████████████████████████████████████████████████▌             | 333/400 [02:10<00:22,  2.94it/s]



 84%|██████████████████████████████████████████████████████████████████▊             | 334/400 [02:10<00:21,  3.05it/s]



 84%|███████████████████████████████████████████████████████████████████             | 335/400 [02:10<00:22,  2.95it/s]



 84%|███████████████████████████████████████████████████████████████████▏            | 336/400 [02:13<00:56,  1.13it/s]



 84%|███████████████████████████████████████████████████████████████████▍            | 337/400 [02:13<00:44,  1.42it/s]



 85%|████████████████████████████████████████████████████████████████████▏           | 341/400 [02:13<00:17,  3.41it/s]



 86%|████████████████████████████████████████████████████████████████████▌           | 343/400 [02:13<00:13,  4.35it/s]



 86%|█████████████████████████████████████████████████████████████████████           | 345/400 [02:14<00:14,  3.79it/s]



 87%|█████████████████████████████████████████████████████████████████████▍          | 347/400 [02:14<00:11,  4.68it/s]



 87%|█████████████████████████████████████████████████████████████████████▌          | 348/400 [02:16<00:24,  2.16it/s]



 87%|█████████████████████████████████████████████████████████████████████▊          | 349/400 [02:16<00:24,  2.06it/s]



 88%|██████████████████████████████████████████████████████████████████████▏         | 351/400 [02:17<00:16,  3.00it/s]



 88%|██████████████████████████████████████████████████████████████████████▌         | 353/400 [02:17<00:11,  3.96it/s]



 89%|███████████████████████████████████████████████████████████████████████▏        | 356/400 [02:19<00:18,  2.44it/s]



 89%|███████████████████████████████████████████████████████████████████████▍        | 357/400 [02:19<00:21,  2.04it/s]



 90%|███████████████████████████████████████████████████████████████████████▌        | 358/400 [02:20<00:19,  2.17it/s]



 90%|███████████████████████████████████████████████████████████████████████▊        | 359/400 [02:20<00:16,  2.44it/s]



 90%|████████████████████████████████████████████████████████████████████████▏       | 361/400 [02:20<00:10,  3.64it/s]



 91%|█████████████████████████████████████████████████████████████████████████       | 365/400 [02:21<00:06,  5.48it/s]



 92%|█████████████████████████████████████████████████████████████████████████▍      | 367/400 [02:21<00:06,  5.13it/s]



 92%|█████████████████████████████████████████████████████████████████████████▌      | 368/400 [02:22<00:08,  3.89it/s]



 92%|█████████████████████████████████████████████████████████████████████████▊      | 369/400 [02:22<00:11,  2.75it/s]



 92%|██████████████████████████████████████████████████████████████████████████      | 370/400 [02:24<00:16,  1.85it/s]



 93%|██████████████████████████████████████████████████████████████████████████▏     | 371/400 [02:24<00:15,  1.83it/s]



 93%|██████████████████████████████████████████████████████████████████████████▍     | 372/400 [02:24<00:13,  2.10it/s]



 93%|██████████████████████████████████████████████████████████████████████████▌     | 373/400 [02:25<00:10,  2.65it/s]



 94%|███████████████████████████████████████████████████████████████████████████     | 375/400 [02:25<00:07,  3.44it/s]



 94%|███████████████████████████████████████████████████████████████████████████▏    | 376/400 [02:25<00:06,  3.65it/s]



 94%|███████████████████████████████████████████████████████████████████████████▍    | 377/400 [02:26<00:07,  2.98it/s]



 94%|███████████████████████████████████████████████████████████████████████████▌    | 378/400 [02:26<00:07,  3.02it/s]



 95%|███████████████████████████████████████████████████████████████████████████▊    | 379/400 [02:26<00:06,  3.15it/s]



 95%|████████████████████████████████████████████████████████████████████████████▏   | 381/400 [02:26<00:03,  4.80it/s]



 96%|████████████████████████████████████████████████████████████████████████████▌   | 383/400 [02:27<00:02,  5.73it/s]



 96%|████████████████████████████████████████████████████████████████████████████▊   | 384/400 [02:27<00:04,  3.56it/s]



 96%|█████████████████████████████████████████████████████████████████████████████   | 385/400 [02:28<00:04,  3.50it/s]



 97%|█████████████████████████████████████████████████████████████████████████████▍  | 387/400 [02:28<00:03,  3.95it/s]



 97%|█████████████████████████████████████████████████████████████████████████████▌  | 388/400 [02:28<00:03,  3.89it/s]



 97%|█████████████████████████████████████████████████████████████████████████████▊  | 389/400 [02:29<00:04,  2.62it/s]



 98%|██████████████████████████████████████████████████████████████████████████████  | 390/400 [02:30<00:04,  2.39it/s]



 98%|██████████████████████████████████████████████████████████████████████████████▍ | 392/400 [02:30<00:02,  3.68it/s]



 98%|██████████████████████████████████████████████████████████████████████████████▌ | 393/400 [02:30<00:01,  4.29it/s]



 99%|███████████████████████████████████████████████████████████████████████████████ | 395/400 [02:30<00:00,  6.12it/s]



 99%|███████████████████████████████████████████████████████████████████████████████▍| 397/400 [02:31<00:00,  4.36it/s]



100%|████████████████████████████████████████████████████████████████████████████████| 400/400 [02:32<00:00,  2.62it/s]


Tiempo búsqueda (s): 152
Best robust CV: 0.2630677408836106
Best params: {'kbest__k': 1023, 'svc__C': np.float64(10.772186132342654), 'svc__class_weight': 'balanced', 'svc__gamma': np.float64(0.05825849149538057)}

TOP-10 CV robust (únicos):
 mean_test_score  std_test_score                                                                                                                            params
        0.263068        0.103613              {'kbest__k': 1023, 'svc__C': 10.772186132342654, 'svc__class_weight': 'balanced', 'svc__gamma': 0.05825849149538057}
        0.205681        0.097874  {'kbest__k': 512, 'svc__C': 1221.212458656701, 'svc__class_weight': {0: 2.0, 1: 4.0, 2: 1.0}, 'svc__gamma': 0.01543656015027835}
        0.205348        0.107474 {'kbest__k': 1023, 'svc__C': 852.6974999202718, 'svc__class_weight': {0: 3.0, 1: 6.0, 2: 1.0}, 'svc__gamma': 0.02589296092555009}
        0.196802        0.039029              {'kbest__k': 1023, 'svc__C': 2.5404580742256915, 'svc__class_weight': 'balanced', 'svc__gamma': 0.06395288218539363}
        0.186214        0.112701   {'kbest__k': 384, 'svc__C': 159.0841875063149, 'svc__class_weight': {0: 1.5, 1: 3.5, 2: 0.8}, 'svc__gamma': 0.0445131431564921}
        0.169621        0.135971             {'kbest__k': 1023, 'svc__C': 784.8081929215668, 'svc__class_weight': 'balanced', 'svc__gamma': 8.215507070842181e-05}
        0.148461        0.161447              {'kbest__k': 128, 'svc__C': 442.0875300846289, 'svc__class_weight': 'balanced', 'svc__gamma': 3.341229373953389e-05}
        0.106438        0.100130  {'kbest__k': 512, 'svc__C': 61.75015483640841, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.05019578058935042}
        0.102361        0.183794            {'kbest__k': 1023, 'svc__C': 34.57103872771002, 'svc__class_weight': 'balanced', 'svc__gamma': 0.00026555217129736324}
        0.071232        0.085421            {'kbest__k': 1023, 'svc__C': 30.036641140654908, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0035753161317240816}

===============================================================================================
Evaluando SVM1...
  AUC_CV=0.5491±0.0332 | AUC_train=0.7748 | AUC_test=0.8418
  Gap_CV=0.2257 | Gap_rep=-0.0671
  Acc=0.7268 | F1w=0.7367 | macroF1=0.6958 | balacc=0.7435

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.53      0.51      0.52       422
      Blazar       0.47      0.65      0.54       189
         QSO       0.75      0.70      0.72       899

    accuracy                           0.64      1510
   macro avg       0.58      0.62      0.60      1510
weighted avg       0.65      0.64      0.65      1510

  CM TEST
        AGN  Blazar  QSO
AGN      71       7   23
Blazar    5      24    1
QSO      58       9  179

  Report TEST
              precision    recall  f1-score   support

         AGN       0.53      0.70      0.60       101
      Blazar       0.60      0.80      0.69        30
         QSO       0.88      0.73      0.80       246

    accuracy                           0.73       377
   macro avg       0.67      0.74      0.70       377
weighted avg       0.77      0.73      0.74       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM1.png

===============================================================================================
Evaluando SVM2...
  AUC_CV=0.5067±0.0577 | AUC_train=0.7380 | AUC_test=0.8101
  Gap_CV=0.2313 | Gap_rep=-0.0721
  Acc=0.7162 | F1w=0.7251 | macroF1=0.6828 | balacc=0.7517

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.55      0.63      0.59       422
      Blazar       0.26      0.98      0.42       189
         QSO       0.98      0.35      0.52       899

    accuracy                           0.51      1510
   macro avg       0.60      0.65      0.51      1510
weighted avg       0.77      0.51      0.52      1510

  CM TEST
        AGN  Blazar  QSO
AGN      83       2   16
Blazar    3      23    4
QSO      63      19  164

  Report TEST
              precision    recall  f1-score   support

         AGN       0.56      0.82      0.66       101
      Blazar       0.52      0.77      0.62        30
         QSO       0.89      0.67      0.76       246

    accuracy                           0.72       377
   macro avg       0.66      0.75      0.68       377
weighted avg       0.77      0.72      0.73       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM2.png

===============================================================================================
Evaluando SVM3...
  AUC_CV=0.5032±0.0538 | AUC_train=0.7478 | AUC_test=0.8136
  Gap_CV=0.2446 | Gap_rep=-0.0658
  Acc=0.7294 | F1w=0.7380 | macroF1=0.6958 | balacc=0.7643

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.55      0.68      0.61       422
      Blazar       0.28      0.98      0.44       189
         QSO       1.00      0.36      0.53       899

    accuracy                           0.53      1510
   macro avg       0.61      0.68      0.53      1510
weighted avg       0.78      0.53      0.54      1510

  CM TEST
        AGN  Blazar  QSO
AGN      81       3   17
Blazar    2      24    4
QSO      58      18  170

  Report TEST
              precision    recall  f1-score   support

         AGN       0.57      0.80      0.67       101
      Blazar       0.53      0.80      0.64        30
         QSO       0.89      0.69      0.78       246

    accuracy                           0.73       377
   macro avg       0.67      0.76      0.70       377
weighted avg       0.78      0.73      0.74       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM3.png

===============================================================================================
Evaluando SVM4...
  AUC_CV=0.5493±0.0252 | AUC_train=0.7438 | AUC_test=0.8205
  Gap_CV=0.1945 | Gap_rep=-0.0767
  Acc=0.6817 | F1w=0.6929 | macroF1=0.6488 | balacc=0.7030

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.54      0.38      0.45       422
      Blazar       0.41      0.62      0.50       189
         QSO       0.71      0.74      0.73       899

    accuracy                           0.62      1510
   macro avg       0.56      0.58      0.56      1510
weighted avg       0.63      0.62      0.62      1510

  CM TEST
        AGN  Blazar  QSO
AGN      62       9   30
Blazar    5      24    1
QSO      64      11  171

  Report TEST
              precision    recall  f1-score   support

         AGN       0.47      0.61      0.53       101
      Blazar       0.55      0.80      0.65        30
         QSO       0.85      0.70      0.76       246

    accuracy                           0.68       377
   macro avg       0.62      0.70      0.65       377
weighted avg       0.72      0.68      0.69       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM4.png

===============================================================================================
Evaluando SVM5...
  AUC_CV=0.5035±0.0571 | AUC_train=0.7477 | AUC_test=0.8378
  Gap_CV=0.2442 | Gap_rep=-0.0901
  Acc=0.7321 | F1w=0.7411 | macroF1=0.6937 | balacc=0.7657

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.57      0.59      0.58       422
      Blazar       0.25      0.98      0.40       189
         QSO       0.98      0.36      0.52       899

    accuracy                           0.50      1510
   macro avg       0.60      0.64      0.50      1510
weighted avg       0.78      0.50      0.52      1510

  CM TEST
        AGN  Blazar  QSO
AGN      81       4   16
Blazar    2      24    4
QSO      56      19  171

  Report TEST
              precision    recall  f1-score   support

         AGN       0.58      0.80      0.68       101
      Blazar       0.51      0.80      0.62        30
         QSO       0.90      0.70      0.78       246

    accuracy                           0.73       377
   macro avg       0.66      0.77      0.69       377
weighted avg       0.78      0.73      0.74       377

  Figura guardada: C:\Users\Gamer\Downloads\SVM5.png

===============================================================================================
RESUMEN FINAL:
 tag  AUC_CV  SD_CV  Gap_CV  AUC_rep  SD_rep  Gap_rep  Acc_test  F1w_test  macroF1  balacc
SVM1  0.5491 0.0332  0.2257   0.8418  0.0332  -0.0671    0.7268    0.7367   0.6958  0.7435
SVM2  0.5067 0.0577  0.2313   0.8101  0.0577  -0.0721    0.7162    0.7251   0.6828  0.7517
SVM3  0.5032 0.0538  0.2446   0.8136  0.0538  -0.0658    0.7294    0.7380   0.6958  0.7643
SVM4  0.5493 0.0252  0.1945   0.8205  0.0252  -0.0767    0.6817    0.6929   0.6488  0.7030
SVM5  0.5035 0.0571  0.2442   0.8378  0.0571  -0.0901    0.7321    0.7411   0.6937  0.7657

>>> Mejor modelo: SVM1  AUC_test=0.8418

Tabla LaTeX guardada en: C:\Users\Gamer\Downloads\tabla_svm_top5.tex

--- PREVIEW ---
\begin{table}[!ht]
\centering
\small
\renewcommand{\arraystretch}{1.18}
\setlength{\tabcolsep}{6pt}
\begin{adjustbox}{width=\textwidth}
\rowcolors{2}{white}{gray!8}
\begin{tabular}{ccccccccc}
\toprule
\rowcolor{BlueHeader}
\color{white}\textbf{Modelo} &
\color{white}\textbf{AUC\textsubscript{CV}} &
\color{white}\textbf{SD\textsubscript{CV}} &
\color{white}\textbf{Gap\textsubscript{CV}} &
\color{white}\textbf{AUC\textsubscript{rep}} &
\color{white}\textbf{SD\textsubscript{rep}} &
\color{white}\textbf{Gap\textsubscript{rep}} &
\color{white}\textbf{Acc\textsubscript{test}} &
\color{white}\textbf{F1\textsubscript{w,test}} \\
\midrule
        \rowcolor{BlueBest} SVM$_{1}$ & 0.5491 & 0.0332 & 0.2257 & 0.8418 & 0.0332 & -0.0671 & 0.7268 & 0.7367 \\
        SVM$_{2}$ & 0.5067 & 0.0577 & 0.2313 & 0.8101 & 0.0577 & -0.0721 & 0.7162 & 0.7251 \\
        SVM$_{3}$ & 0.5032 & 0.0538 & 0.2446 & 0.8136 & 0.0538 & -0.0658 & 0.7294 & 0.7380 \\
        SVM$_{4}$ & 0.5493 & 0.0252 & 0.1945 & 0.8205 & 0.0252 & -0.0767 & 0.6817 & 0.6929 \\
        SVM$_{5}$ & 0.5035 & 0.0571 & 0.2442 & 0.8378 & 0.0571 & -0.0901 & 0.7321 & 0.7411 \\
\bottomrule
\end{tabular}
\end{adjustbox}
\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG).}
\label{tab:svm_top5_auc}
\end{table}

Tiempo total: 00:04:01

# =========================
# IMPORTANCIA POR NIVEL PARA SVM (RBF) CON PERMUTATION IMPORTANCE
# FIRMA ESTÁNDAR CON IISIGNATURE
# =========================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.inspection import permutation_importance

plt.style.use("default")

# -------------------------------------------------
# 1) Mejor modelo
# -------------------------------------------------
svm_best = rs.best_estimator_

# -------------------------------------------------
# 2) Permutation importance sobre el pipeline completo
# -------------------------------------------------
perm = permutation_importance(
    estimator=svm_best,
    X=X_test,
    y=y_test_enc,
    n_repeats=20,
    random_state=42,
    scoring="balanced_accuracy",
    n_jobs=-1
)

importances_mean = perm.importances_mean

# -------------------------------------------------
# 3) DataFrame base con posición ORIGINAL de cada variable
#    Aquí NO usamos nombres numéricos, usamos posición
# -------------------------------------------------
orig_cols = pd.Index(X_test.columns)

df_all = pd.DataFrame({
    "feature": orig_cols.astype(str),
    "importance_mean": importances_mean,
    "orig_pos": np.arange(len(orig_cols))   # posición 0-based
})

# -------------------------------------------------
# 4) Identificar qué variables llegan realmente al SVM final
#    usando máscaras, no nombres
# -------------------------------------------------
selected_mask = np.ones(len(orig_cols), dtype=bool)

if hasattr(svm_best, "named_steps"):

    # VarianceThreshold
    if "vt" in svm_best.named_steps:
        vt_mask = svm_best.named_steps["vt"].get_support()
        selected_mask = vt_mask.copy()

    # SelectKBest (aplicado sobre las que sobrevivieron a VT)
    if "kbest" in svm_best.named_steps:
        kbest_mask = svm_best.named_steps["kbest"].get_support()

        idx_after_vt = np.where(selected_mask)[0]
        new_mask = np.zeros(len(orig_cols), dtype=bool)
        new_mask[idx_after_vt[kbest_mask]] = True
        selected_mask = new_mask

print(f"Variables originales en X_test: {len(orig_cols)}")
print(f"Variables que realmente usa el SVM final: {selected_mask.sum()}")

df_imp_svm = df_all.loc[selected_mask].copy()

# -------------------------------------------------
# 5) Detectar si existe término constante N0
#    (muy común en firma estándar)
# -------------------------------------------------
first_col_values = np.asarray(X_test.iloc[:, 0], dtype=float)
has_n0 = np.allclose(first_col_values, 1.0)

print(f"¿Se detectó término constante N0?: {has_n0}")

# -------------------------------------------------
# 6) Asignar niveles usando POSICIÓN ORIGINAL
# -------------------------------------------------
if has_n0:
    # Firma estándar con término constante:
    # N0: 1
    # N1: 2
    # N2: 4
    # N3: 8
    # ...
    # acumulados incluyendo N0:
    # 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023
    bins = [0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023]
    labels = [f"N{i}" for i in range(10)]   # N0..N9
else:
    # Firma estándar sin término constante:
    # acumulados:
    # 2, 6, 14, 30, 62, 126, 254, 510, 1022
    bins = [0, 2, 6, 14, 30, 62, 126, 254, 510, 1022]
    labels = [f"N{i}" for i in range(1, 10)]  # N1..N9

df_imp_svm["nivel"] = pd.cut(
    df_imp_svm["orig_pos"],
    bins=bins,
    labels=labels,
    right=False,
    include_lowest=True
)

if df_imp_svm["nivel"].isna().any():
    bad = df_imp_svm.loc[df_imp_svm["nivel"].isna(), ["feature", "orig_pos"]].head(10)
    raise ValueError(
        "Hay variables fuera del rango esperado para firma estándar. "
        f"Ejemplos:\n{bad}"
    )

# Excluir N0 si existe, para mantener el mismo estilo del gráfico anterior
df_imp_svm = df_imp_svm[df_imp_svm["nivel"] != "N0"].copy()

df_imp_svm["nivel_num"] = (
    df_imp_svm["nivel"].astype(str).str.replace("N", "", regex=False).astype(int)
)

df_imp_svm = df_imp_svm.sort_values(["nivel_num", "orig_pos"])

print("\nCantidad de variables seleccionadas por nivel:")
print(df_imp_svm["nivel_num"].value_counts().sort_index())

# -------------------------------------------------
# 7) Resumen por nivel
# -------------------------------------------------
res_svm = df_imp_svm.groupby("nivel_num").agg(
    mean=("importance_mean", "mean"),
    median=("importance_mean", "median"),
    count=("importance_mean", "size")
).reset_index()

# Forzar presencia de todos los niveles 1..9
niveles_completos = pd.DataFrame({"nivel_num": np.arange(1, 10)})
res_svm = niveles_completos.merge(res_svm, on="nivel_num", how="left").fillna(0)

print("\nRESUMEN POR NIVEL — SVM FIRMA IISIGNATURE")
print(res_svm.to_string(index=False))

# -------------------------------------------------
# 8) Gráfico
# -------------------------------------------------
x = np.arange(len(res_svm))
labels_plot = [f"Nivel {n}" for n in res_svm["nivel_num"]]

fig, ax = plt.subplots(figsize=(9.5, 5.2), facecolor="white")
ax.set_facecolor("white")

ax.bar(
    x,
    res_svm["mean"],
    color="#8FA8C7",
    edgecolor="#355C7D",
    linewidth=1.2,
    alpha=0.9,
    label="Importancia promedio"
)

ax.scatter(
    x,
    res_svm["median"],
    s=80,
    marker="o",
    color="#355C7D",
    edgecolors="#1F3A56",
    linewidths=1,
    zorder=3,
    label="Mediana"
)

ax.set_xticks(x)
ax.set_xticklabels(labels_plot)
ax.set_xlabel("Nivel")
ax.set_ylabel("Importancia")
ax.set_title("Importancia promedio y mediana por nivel (SVM, firma iisignature)")
ax.grid(axis="y", linestyle="--", alpha=0.35, color="gray")

handles, labels_legend = ax.get_legend_handles_labels()
order = [1, 0]
ax.legend([handles[i] for i in order], [labels_legend[i] for i in order])

fig.tight_layout()

# -------------------------------------------------
# 9) Guardar
# -------------------------------------------------
downloads = Path.home() / "Downloads"
downloads.mkdir(parents=True, exist_ok=True)
ruta_guardado = downloads / "importancia_SVM_IISIGNATURE.png"

fig.savefig(ruta_guardado, dpi=300, bbox_inches="tight", facecolor="white")
print(f"\nGráfico guardado en: {ruta_guardado}")

plt.show()

Variables originales en X_test: 127
Variables que realmente usa el SVM final: 127
¿Se detectó término constante N0?: False

Cantidad de variables seleccionadas por nivel:
nivel_num
1     2
2     4
3     8
4    16
5    32
6    64
7     1
Name: count, dtype: int64

RESUMEN POR NIVEL — SVM FIRMA IISIGNATURE
 nivel_num     mean   median  count
         1 0.016330 0.016330    2.0
         2 0.076683 0.062172    4.0
         3 0.025277 0.021838    8.0
         4 0.017199 0.013411   16.0
         5 0.014639 0.012302   32.0
         6 0.007212 0.005979   64.0
         7 0.002731 0.002731    1.0
         8 0.000000 0.000000    0.0
         9 0.000000 0.000000    0.0

Gráfico guardado en: C:\Users\Gamer\Downloads\importancia_SVM_IISIGNATURE.png

# ==========================================================
# TABLA POR NIVELES (SVM) — AUTO-DETECCIÓN
# - Funciona para firma estándar o log-firma
# - No depende de que X_train.columns sean numéricas
# - Usa el mismo pipeline del modelo final
# ==========================================================

import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from scipy.special import softmax
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

# ----------------------------------------------------------
# 0) REQUISITOS PREVIOS
# ----------------------------------------------------------
# Deben existir:
# X_train, X_test, y_train_enc, y_test_enc, top10, make_pipe

y_train_encoded = y_train_enc
y_test_encoded  = y_test_enc

# ----------------------------------------------------------
# 1) ELEGIR EL MODELO FINAL
# ----------------------------------------------------------
# Cambia este índice según el modelo que quieras evaluar:
# 0 -> SVM1, 1 -> SVM2, ..., 4 -> SVM5
MODEL_IDX = 0

FINAL_PARAMS = top10.iloc[MODEL_IDX]["params"].copy()

print("Parámetros del modelo final:")
print(FINAL_PARAMS)

kbest_original = FINAL_PARAMS.get("kbest__k", X_train.shape[1])

# ----------------------------------------------------------
# 2) FUNCIÓN AUC
# ----------------------------------------------------------
def auc_ovr_macro_from_scores(y_true, decision_scores):
    if decision_scores.ndim == 1:
        decision_scores = decision_scores.reshape(-1, 1)
    proba = softmax(decision_scores, axis=1)
    return roc_auc_score(y_true, proba, multi_class="ovr", average="macro")

# ----------------------------------------------------------
# 3) ORDEN DE COLUMNAS Y DETECCIÓN DE REPRESENTACIÓN
# ----------------------------------------------------------
cols_ordered = list(X_train.columns)
n_total = len(cols_ordered)

print(f"Número total de columnas en X_train: {n_total}")

# Detectar N0 si la primera columna es constante 1
first_col_values = np.asarray(X_train.iloc[:, 0], dtype=float)
has_n0 = np.allclose(first_col_values, 1.0)

print(f"¿Se detectó término constante N0?: {has_n0}")

start_idx = 1 if has_n0 else 0
n_useful = n_total - start_idx

# Detectar automáticamente firma estándar vs log-firma
if n_useful in (1022, 1023):
    rep_name = "firma estándar"
    cum_levels = {
        1: 2,
        2: 6,
        3: 14,
        4: 30,
        5: 62,
        6: 126,
        7: 254,
        8: 510,
        9: 1022
    }
elif n_useful in (127, 128):
    rep_name = "log-firma"
    cum_levels = {
        1: 2,
        2: 3,
        3: 5,
        4: 8,
        5: 14,
        6: 23,
        7: 41,
        8: 71,
        9: 127
    }
else:
    raise ValueError(
        f"No se pudo reconocer la representación. "
        f"Columnas útiles detectadas: {n_useful}. "
        f"Esperaba algo compatible con firma estándar (1022/1023) "
        f"o log-firma (127/128)."
    )

print(f"Representación detectada: {rep_name}")

max_needed = start_idx + cum_levels[9]
if n_total < max_needed:
    raise ValueError(
        f"X_train tiene {n_total} columnas, pero para {rep_name} "
        f"se esperaban al menos {max_needed} considerando has_n0={has_n0}."
    )

# ----------------------------------------------------------
# 4) LOOP POR NIVELES 1..9
# ----------------------------------------------------------
rows = []

for m in tqdm(range(1, 10), desc=f"Evaluando niveles ({rep_name})", unit="nivel"):
    n_feats = cum_levels[m]

    selected = cols_ordered[start_idx : start_idx + n_feats]

    Xtr_m = X_train[selected].copy()
    Xte_m = X_test[selected].copy()

    params_m = FINAL_PARAMS.copy()

    if isinstance(kbest_original, (int, np.integer, float, np.floating)):
        params_m["kbest__k"] = int(min(int(kbest_original), Xtr_m.shape[1]))
    else:
        params_m["kbest__k"] = "all"

    model_m = make_pipe(params_m)
    model_m.fit(Xtr_m, y_train_encoded)

    pred_tr = model_m.predict(Xtr_m)
    pred_te = model_m.predict(Xte_m)

    scores_tr = model_m.decision_function(Xtr_m)
    scores_te = model_m.decision_function(Xte_m)

    acc_tr = accuracy_score(y_train_encoded, pred_tr)
    f1w_tr = f1_score(y_train_encoded, pred_tr, average="weighted", zero_division=0)
    auc_tr = auc_ovr_macro_from_scores(y_train_encoded, scores_tr)

    acc_te = accuracy_score(y_test_encoded, pred_te)
    f1w_te = f1_score(y_test_encoded, pred_te, average="weighted", zero_division=0)
    auc_te = auc_ovr_macro_from_scores(y_test_encoded, scores_te)

    rows.append({
        "NivelFirma": m,
        "N_features": Xtr_m.shape[1],
        "kbest_usado": params_m["kbest__k"],
        "AccTrain": acc_tr,
        "F1wTrain": f1w_tr,
        "AUCTrain": auc_tr,
        "AccTest": acc_te,
        "F1wTest": f1w_te,
        "AUCRep": auc_te
    })

df_levels = pd.DataFrame(rows)

# ----------------------------------------------------------
# 5) ELEGIR EL NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep
# ----------------------------------------------------------
best_auc = df_levels["AUCRep"].max()
tol = 1e-4
best_candidates = df_levels[df_levels["AUCRep"] >= best_auc - tol]
best_simple = best_candidates.sort_values(["NivelFirma"]).iloc[0]

print("\n" + "="*100)
print(f"RESULTADOS POR NIVEL (SVM, {rep_name}) — criterio AUCRep")
print("="*100)
print(df_levels.to_string(index=False, float_format=lambda x: f"{x:.4f}"))

print("\n" + "="*100)
print("NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep (con tolerancia)")
print("="*100)
print(best_simple.to_string())

# ----------------------------------------------------------
# 6) FILAS LaTeX
# ----------------------------------------------------------
print("\n" + "="*100)
print("FILAS LaTeX")
print("="*100)
for _, r in df_levels.iterrows():
    print(
        f"{int(r['NivelFirma'])} & {int(r['N_features'])} & "
        f"{r['AccTrain']:.4f} & {r['F1wTrain']:.4f} & "
        f"{r['AccTest']:.4f} & {r['F1wTest']:.4f} & {r['AUCRep']:.4f} \\\\"
    )

Parámetros del modelo final:
{'kbest__k': 1023, 'svc__C': np.float64(10.772186132342654), 'svc__class_weight': 'balanced', 'svc__gamma': np.float64(0.05825849149538057)}
Número total de columnas en X_train: 127
¿Se detectó término constante N0?: False
Representación detectada: log-firma

Evaluando niveles (log-firma): 100%|██████████████████████████████████████████████████| 9/9 [00:07<00:00,  1.16nivel/s]


====================================================================================================
RESULTADOS POR NIVEL (SVM, log-firma) — criterio AUCRep
====================================================================================================
 NivelFirma  N_features  kbest_usado  AccTrain  F1wTrain  AUCTrain  AccTest  F1wTest  AUCRep
          1           2            2    0.4305    0.4422    0.5394   0.5252   0.5281  0.5357
          2           3            3    0.5106    0.5064    0.5783   0.5358   0.5344  0.5977
          3           5            5    0.5073    0.5163    0.6424   0.5544   0.5729  0.7018
          4           8            8    0.5450    0.5497    0.6775   0.5889   0.6041  0.7254
          5          14           14    0.5795    0.5815    0.7094   0.6366   0.6523  0.7456
          6          23           23    0.5960    0.5988    0.7283   0.6684   0.6787  0.7860
          7          41           41    0.6132    0.6164    0.7512   0.6976   0.7061  0.8110
          8          71           71    0.6179    0.6215    0.7604   0.6897   0.7004  0.8361
          9         127          127    0.6424    0.6453    0.7748   0.7268   0.7367  0.8418

====================================================================================================
NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep (con tolerancia)
====================================================================================================
NivelFirma       9.000000
N_features     127.000000
kbest_usado    127.000000
AccTrain         0.642384
F1wTrain         0.645342
AUCTrain         0.774758
AccTest          0.726790
F1wTest          0.736721
AUCRep           0.841813

====================================================================================================
FILAS LaTeX
====================================================================================================
1 & 2 & 0.4305 & 0.4422 & 0.5252 & 0.5281 & 0.5357 \\
2 & 3 & 0.5106 & 0.5064 & 0.5358 & 0.5344 & 0.5977 \\
3 & 5 & 0.5073 & 0.5163 & 0.5544 & 0.5729 & 0.7018 \\
4 & 8 & 0.5450 & 0.5497 & 0.5889 & 0.6041 & 0.7254 \\
5 & 14 & 0.5795 & 0.5815 & 0.6366 & 0.6523 & 0.7456 \\
6 & 23 & 0.5960 & 0.5988 & 0.6684 & 0.6787 & 0.7860 \\
7 & 41 & 0.6132 & 0.6164 & 0.6976 & 0.7061 & 0.8110 \\
8 & 71 & 0.6179 & 0.6215 & 0.6897 & 0.7004 & 0.8361 \\
9 & 127 & 0.6424 & 0.6453 & 0.7268 & 0.7367 & 0.8418 \\

DATOS REALES

# %pip install -U imbalanced-learn

Defaulting to user installation because normal site-packages is not writeable

Collecting imbalanced-learn

  Downloading imbalanced_learn-0.12.4-py3-none-any.whl.metadata (8.3 kB)

Requirement already satisfied: numpy>=1.17.3 in /home/felorrieta/.local/lib/python3.8/site-packages (from imbalanced-learn) (1.24.4)

Requirement already satisfied: scipy>=1.5.0 in /home/felorrieta/.local/lib/python3.8/site-packages (from imbalanced-learn) (1.10.1)

Requirement already satisfied: scikit-learn>=1.0.2 in /home/felorrieta/.local/lib/python3.8/site-packages (from imbalanced-learn) (1.3.2)

Requirement already satisfied: joblib>=1.1.1 in /home/felorrieta/.local/lib/python3.8/site-packages (from imbalanced-learn) (1.4.2)

Requirement already satisfied: threadpoolctl>=2.0.0 in /home/felorrieta/.local/lib/python3.8/site-packages (from imbalanced-learn) (3.5.0)

Downloading imbalanced_learn-0.12.4-py3-none-any.whl (258 kB)

WARNING: Error parsing dependencies of distro-info: Invalid version: '0.23ubuntu1'

WARNING: Error parsing dependencies of python-debian: Invalid version: '0.1.36ubuntu1'

Installing collected packages: imbalanced-learn

Successfully installed imbalanced-learn-0.12.4

Note: you may need to restart the kernel to use updated packages.

ESIG FIRMA SVM

import numpy as np
import pandas as pd
import time
import warnings
import os
warnings.filterwarnings("ignore")

from tqdm.auto import tqdm
from tqdm_joblib import tqdm_joblib
import joblib

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import VarianceThreshold, SelectKBest, mutual_info_classif
from sklearn.svm import SVC

from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV
from sklearn.metrics import (
    confusion_matrix, classification_report,
    f1_score, balanced_accuracy_score, recall_score,
    accuracy_score, roc_auc_score, make_scorer
)
from scipy.stats import loguniform
from scipy.special import softmax

from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE

start_time = time.time()

# =========================
# 0) DATOS REALES
# =========================
x = pd.read_csv('/home/felorrieta/Catalina/path_signature_esig_REALES_M9.csv')
y = pd.read_csv('/home/felorrieta/Catalina/ts_v9.0.1_SMBH_ZTF_xmatch.csv')
y["id"] = y["oid"]
data = pd.merge(x, y, on="id")

data_train = data.sample(frac=0.8, random_state=42).reset_index(drop=True)
data_test  = data.drop(data_train.index).reset_index(drop=True)

X_train = data_train.drop(columns=['oid', 'survey_class_mapped', 'survey_class', 'survey_class_cat', 'id'])
y_train = data_train['survey_class_mapped']
X_test  = data_test[X_train.columns].copy()
y_test  = data_test['survey_class_mapped']

le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)
labels = list(le.classes_)

DOWNLOADS = r'/home/felorrieta/Catalina'
os.makedirs(DOWNLOADS, exist_ok=True)

print("Shapes:", X_train.shape, X_test.shape)
print("Labels:", labels)
print("Distribución train:", dict(zip(*np.unique(y_train_enc, return_counts=True))))

# =========================
# 1) SCORERS
# =========================
def robust_score(y_true, y_pred):
    mf1     = f1_score(y_true, y_pred, average="macro", zero_division=0)
    bacc    = balanced_accuracy_score(y_true, y_pred)
    recalls = recall_score(y_true, y_pred, average=None, zero_division=0)
    minR    = float(np.min(recalls))
    penalty = 0.0 if minR >= 0.30 else (0.30 - minR) * 2.0
    return 0.50 * mf1 + 0.40 * bacc + 0.10 * minR - penalty

robust_scorer = make_scorer(robust_score)

# =========================
# 2) PIPELINE CON SMOTE
# =========================
min_class_count = min(np.bincount(y_train_enc))
smote_k = min(5, min_class_count - 1)

def make_pipe(params=None):
    p = ImbPipeline(steps=[
        ("imp",   SimpleImputer(strategy="median")),
        ("sc",    StandardScaler()),
        ("vt",    VarianceThreshold(0.0)),
        ("smote", SMOTE(k_neighbors=smote_k, random_state=42)),
        ("kbest", SelectKBest(score_func=mutual_info_classif)),
        ("svc",   SVC(kernel="rbf", decision_function_shape="ovr", cache_size=4000))
    ])
    if params:
        p.set_params(**params)
    return p

pipe = make_pipe()

# =========================
# 3) ESPACIO DE BÚSQUEDA
# =========================
cw_options = [
    "balanced",
    {0: 2.0, 1: 4.0, 2: 1.0},
    {0: 2.5, 1: 5.0, 2: 1.0},
    {0: 1.5, 1: 3.5, 2: 0.8},
    {0: 3.0, 1: 6.0, 2: 1.0},
    {0: 1.0, 1: 3.0, 2: 0.7},
]

# máximo real de features disponibles antes de kbest:
# imputación + variance threshold
imp_tmp = SimpleImputer(strategy="median")
X_tmp = imp_tmp.fit_transform(X_train)

vt_tmp = VarianceThreshold(0.0)
X_tmp_vt = vt_tmp.fit_transform(X_tmp)

n_features_after_vt = X_tmp_vt.shape[1]

k_grid_base = [128, 256, 384, 512, 768, 1023]
k_grid = [k for k in k_grid_base if k <= n_features_after_vt]

if n_features_after_vt not in k_grid:
    k_grid.append(n_features_after_vt)

k_grid = sorted(set(k_grid))

print("Número de features originales :", X_train.shape[1])
print("Número de features post-VT    :", n_features_after_vt)
print("Grid kbest usado              :", k_grid)

param_dist = {
    "kbest__k":          k_grid,
    "svc__C":            loguniform(0.5, 2000),
    "svc__gamma":        loguniform(1e-6, 0.1),
    "svc__class_weight": cw_options,
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# =========================
# 4) BÚSQUEDA CON BARRA DE PROGRESO
# =========================
t0 = time.time()
rs = RandomizedSearchCV(
    estimator=pipe,
    param_distributions=param_dist,
    n_iter=80,
    scoring=robust_scorer,
    cv=cv,
    random_state=42,
    n_jobs=-1,
    verbose=0,
    refit=True,
    return_train_score=False,
    error_score=0.0
)

n_fits = 80 * cv.n_splits
with tqdm_joblib(tqdm(
    desc="Buscando hiperparámetros",
    total=n_fits,
    unit="fit",
    colour="cyan",
    bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} fits [{elapsed}<{remaining}, {rate_fmt}]"
)):
    with joblib.parallel_config(backend="threading"):
        rs.fit(X_train, y_train_enc)

print("\nTiempo búsqueda (s):", int(time.time() - t0))
print("Best robust CV:", rs.best_score_)
print("Best params:", rs.best_params_)

# =========================
# 5) TOP-10 ÚNICOS
# =========================
def _canon_value(v):
    if isinstance(v, np.generic):
        return v.item()
    if isinstance(v, dict):
        return tuple(sorted((int(k), float(w)) for k, w in v.items()))
    return v

def _params_key(d):
    return tuple(sorted((k, _canon_value(v)) for k, v in d.items()))

res = pd.DataFrame(rs.cv_results_).copy()
res["params_key"] = res["params"].apply(_params_key)
res = res.sort_values(["mean_test_score", "std_test_score"], ascending=[False, True]).reset_index(drop=True)
res_unique = res.drop_duplicates(subset=["params_key"], keep="first").reset_index(drop=True)
top10 = res_unique.head(10)[["mean_test_score", "std_test_score", "params"]].copy()

print("\nTOP-10 CV robust (únicos):")
print(top10.to_string(index=False))

# =========================
# 6) FUNCIONES DE EVALUACIÓN Y PLOTEO
# =========================
def cv_auc_manual(params, X, y, cv):
    aucs = []

    for tr_idx, va_idx in cv.split(X, y):
        Xtr = X.iloc[tr_idx].copy()
        Xva = X.iloc[va_idx].copy()
        ytr = y[tr_idx]
        yva = y[va_idx]

        model = make_pipe(params)
        model.fit(Xtr, ytr)

        scores = model.decision_function(Xva)
        if scores.ndim == 1:
            scores = scores.reshape(-1, 1)

        proba = softmax(scores, axis=1)
        auc = roc_auc_score(
            yva,
            proba,
            multi_class="ovr",
            average="macro"
        )
        aucs.append(auc)

    return float(np.mean(aucs)), float(np.std(aucs))

def _row_normalize(cm):
    cm = cm.astype(float)
    row_sums = cm.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    return (cm / row_sums) * 100.0

def plot_cms(cm_train, cm_test, tag, save_dir, subtitle="",
             gap_width=0.28, wspace=0.15,
             label_fontsize=13, tick_fontsize=13, title_fontsize=14):
    cm_tr_pct = _row_normalize(cm_train)
    cm_te_pct = _row_normalize(cm_test)

    fig = plt.figure(figsize=(10.8, 4.8))
    gs = gridspec.GridSpec(1, 4, width_ratios=[1, gap_width, 1, 0.08], wspace=wspace)
    ax1    = fig.add_subplot(gs[0, 0])
    ax_gap = fig.add_subplot(gs[0, 1]); ax_gap.axis("off")
    ax2    = fig.add_subplot(gs[0, 2])
    ax_cb  = fig.add_subplot(gs[0, 3])

    im = None
    for ax, cm_pct, cm_cnt, title in [
        (ax1, cm_tr_pct, cm_train, "Train"),
        (ax2, cm_te_pct, cm_test,  "Test"),
    ]:
        im = ax.imshow(cm_pct, cmap="Blues", vmin=0, vmax=100)
        ax.set_title(title, fontsize=title_fontsize)
        ax.set_xticks(np.arange(len(labels)))
        ax.set_yticks(np.arange(len(labels)))
        ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=tick_fontsize)
        ax.set_yticklabels(labels, fontsize=tick_fontsize)
        ax.set_xlabel("Predicho", fontsize=label_fontsize)
        ax.set_ylabel("Real", fontsize=label_fontsize)

        for i in range(cm_pct.shape[0]):
            for j in range(cm_pct.shape[1]):
                pct = cm_pct[i, j]
                cnt = int(cm_cnt[i, j])
                color_txt = "white" if pct > 50 else "black"
                ax.text(j, i - 0.10, f"{pct:.1f}%", ha="center", va="center",
                        color=color_txt, fontsize=10, fontweight="bold")
                ax.text(j, i + 0.22, f"({cnt})", ha="center", va="center",
                        color=color_txt, fontsize=7)

    fig.colorbar(im, cax=ax_cb, label="% por fila (clase real)")
    fig.suptitle(f"{tag}\n{subtitle}", fontsize=13, y=0.98)
    fig.subplots_adjust(left=0.08, right=0.92, bottom=0.22, top=0.82)

    fname = os.path.join(save_dir, f"{tag}.png")
    fig.savefig(fname, dpi=300, bbox_inches="tight")
    plt.close(fig)
    print(f"  Figura guardada: {fname}")

def eval_one(params, tag):
    print("\n" + "="*95)
    print(f"Evaluando {tag}...")

    auc_cv, sd_cv = cv_auc_manual(params, X_train, y_train_enc, cv)

    model = make_pipe(params)
    model.fit(X_train, y_train_enc)

    yhat_tr = model.predict(X_train)
    yhat_te = model.predict(X_test)

    df_tr = model.decision_function(X_train)
    df_te = model.decision_function(X_test)

    if df_tr.ndim == 1:
        df_tr = df_tr.reshape(-1, 1)
    if df_te.ndim == 1:
        df_te = df_te.reshape(-1, 1)

    proba_tr = softmax(df_tr, axis=1)
    proba_te = softmax(df_te, axis=1)

    auc_train = roc_auc_score(y_train_enc, proba_tr, multi_class="ovr", average="macro")
    auc_test  = roc_auc_score(y_test_enc,  proba_te, multi_class="ovr", average="macro")

    gap_cv  = round(auc_train - auc_cv, 4)
    gap_rep = round(auc_train - auc_test, 4)

    acc_train = accuracy_score(y_train_enc, yhat_tr)
    f1w_train = f1_score(y_train_enc, yhat_tr, average="weighted", zero_division=0)

    acc_test  = accuracy_score(y_test_enc, yhat_te)
    f1w_test  = f1_score(y_test_enc, yhat_te, average="weighted", zero_division=0)
    mf1_test  = f1_score(y_test_enc, yhat_te, average="macro", zero_division=0)
    bacc_test = balanced_accuracy_score(y_test_enc, yhat_te)

    cm_tr = confusion_matrix(y_train_enc, yhat_tr)
    cm_te = confusion_matrix(y_test_enc, yhat_te)

    print(f"  AUC_CV={auc_cv:.4f}±{sd_cv:.4f} | AUC_train={auc_train:.4f} | AUC_test={auc_test:.4f}")
    print(f"  Gap_CV={gap_cv:.4f} | Gap_rep={gap_rep:.4f}")
    print(f"  Acc_train={acc_train:.4f} | F1w_train={f1w_train:.4f}")
    print(f"  Acc_test={acc_test:.4f} | F1w_test={f1w_test:.4f} | macroF1={mf1_test:.4f} | balacc={bacc_test:.4f}")

    print("\n  Report TRAIN")
    print(classification_report(y_train_enc, yhat_tr, target_names=labels, zero_division=0))
    print("  CM TEST")
    print(pd.DataFrame(cm_te, index=labels, columns=labels))
    print("\n  Report TEST")
    print(classification_report(y_test_enc, yhat_te, target_names=labels, zero_division=0))

    subtitle = (
        f"Acc train={acc_train:.3f} | "
        f"Acc test={acc_test:.3f} | "
        f"F1w train={f1w_train:.3f} | "
        f"F1w test={f1w_test:.3f}"
    )
    plot_cms(cm_tr, cm_te, tag, DOWNLOADS, subtitle=subtitle)

    return {
        "tag":       tag,
        "AUC_CV":    round(auc_cv, 4),
        "SD_CV":     round(sd_cv, 4),
        "Gap_CV":    gap_cv,
        "AUC_rep":   round(auc_test, 4),
        "SD_rep":    round(sd_cv, 4),
        "Gap_rep":   gap_rep,
        "Acc_train": round(acc_train, 4),
        "F1w_train": round(f1w_train, 4),
        "Acc_test":  round(acc_test, 4),
        "F1w_test":  round(f1w_test, 4),
        "macroF1":   round(mf1_test, 4),
        "balacc":    round(bacc_test, 4),
    }

# =========================
# 7) EVALUAR TOP-5
# =========================
results_summary = []
for i, row in enumerate(top10.head(5).itertuples(index=False), start=1):
    r = eval_one(row.params, f"SVM{i}")
    results_summary.append(r)

# =========================
# 8) TABLA RESUMEN + LATEX
# =========================
summary_df = pd.DataFrame(results_summary)

print("\n" + "="*95)
print("RESUMEN FINAL:")
print(summary_df.to_string(index=False))

best_idx = summary_df["AUC_rep"].idxmax()
best_tag = summary_df.loc[best_idx, "tag"]

print(f"\n>>> Mejor modelo: {best_tag}  AUC_test={summary_df.loc[best_idx,'AUC_rep']:.4f}")

latex_rows = []
for _, r in summary_df.iterrows():
    highlight = r"\rowcolor{BlueBest} " if r["tag"] == best_tag else ""
    latex_rows.append(
        f"        {highlight}SVM$_{{{r['tag'][-1]}}}$ & "
        f"{r['AUC_CV']:.4f} & {r['SD_CV']:.4f} & {r['Gap_CV']:.4f} & "
        f"{r['AUC_rep']:.4f} & {r['SD_rep']:.4f} & {r['Gap_rep']:.4f} & "
        f"{r['Acc_test']:.4f} & {r['F1w_test']:.4f} \\\\"
    )

latex_table = (
    r"\begin{table}[!ht]" + "\n"
    r"\centering" + "\n"
    r"\small" + "\n"
    r"\renewcommand{\arraystretch}{1.18}" + "\n"
    r"\setlength{\tabcolsep}{6pt}" + "\n"
    r"\begin{adjustbox}{width=\textwidth}" + "\n"
    r"\rowcolors{2}{white}{gray!8}" + "\n"
    r"\begin{tabular}{ccccccccc}" + "\n"
    r"\toprule" + "\n"
    r"\rowcolor{BlueHeader}" + "\n"
    r"\color{white}\textbf{Modelo} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Acc\textsubscript{test}} &" + "\n"
    r"\color{white}\textbf{F1\textsubscript{w,test}} \\" + "\n"
    r"\midrule" + "\n"
    + "\n".join(latex_rows) + "\n"
    r"\bottomrule" + "\n"
    r"\end{tabular}" + "\n"
    r"\end{adjustbox}" + "\n"
    r"\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG, datos reales).}" + "\n"
    r"\label{tab:svm_top5_auc_reales}" + "\n"
    r"\end{table}"
)

latex_path = os.path.join(DOWNLOADS, "tabla_svm_top5_reales.tex")
with open(latex_path, "w", encoding="utf-8") as f:
    f.write(latex_table)

print(f"\nTabla LaTeX guardada en: {latex_path}")
print("\n--- PREVIEW ---\n" + latex_table)

# =========================
# 9) TIEMPO TOTAL
# =========================
elapsed = int(time.time() - start_time)
h, rem = divmod(elapsed, 3600)
m, s = divmod(rem, 60)
print(f"\nTiempo total: {h:02d}:{m:02d}:{s:02d}")

Shapes: (1438, 1023) (359, 1023)
Labels: ['AGN', 'Blazar', 'QSO']
Distribución train: {0: 400, 1: 186, 2: 852}
Número de features originales : 1023
Número de features post-VT    : 1022
Grid kbest usado              : [128, 256, 384, 512, 768, 1022]


Tiempo búsqueda (s): 1149
Best robust CV: 0.5234135709118863
Best params: {'kbest__k': 1022, 'svc__C': 2.540458074225692, 'svc__class_weight': 'balanced', 'svc__gamma': 0.06395288218539359}

TOP-10 CV robust (únicos):
 mean_test_score  std_test_score                                                                                                                             params
        0.523414        0.020234                {'kbest__k': 1022, 'svc__C': 2.540458074225692, 'svc__class_weight': 'balanced', 'svc__gamma': 0.06395288218539359}
        0.519272        0.023435              {'kbest__k': 1022, 'svc__C': 10.772186132342652, 'svc__class_weight': 'balanced', 'svc__gamma': 0.058258491495380586}
        0.493648        0.057261              {'kbest__k': 1022, 'svc__C': 784.8081929215672, 'svc__class_weight': 'balanced', 'svc__gamma': 8.215507070842176e-05}
        0.481633        0.020617  {'kbest__k': 1022, 'svc__C': 3.782991252375207, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.07842676320619435}
        0.436526        0.086372              {'kbest__k': 1022, 'svc__C': 34.57103872771001, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0002655521712973633}
        0.402986        0.155718             {'kbest__k': 1022, 'svc__C': 30.036641140654915, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0035753161317240768}
        0.383491        0.114271             {'kbest__k': 1022, 'svc__C': 1.2456144526856274, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0017316120634454752}
        0.202436        0.161274             {'kbest__k': 1022, 'svc__C': 0.6576892252744614, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0014851035064938562}
        0.103377        0.125536 {'kbest__k': 1022, 'svc__C': 852.6974999202719, 'svc__class_weight': {0: 3.0, 1: 6.0, 2: 1.0}, 'svc__gamma': 0.025892960925550088}
       -0.001291        0.339224   {'kbest__k': 512, 'svc__C': 61.75015483640841, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.05019578058935036}

===============================================================================================
Evaluando SVM1...
  AUC_CV=0.7002±0.0152 | AUC_train=0.9195 | AUC_test=0.8484
  Gap_CV=0.2193 | Gap_rep=0.0711
  Acc_train=0.8227 | F1w_train=0.8230
  Acc_test=0.7632 | F1w_test=0.7620 | macroF1=0.7234 | balacc=0.7253

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.71      0.73      0.72       400
      Blazar       0.89      0.87      0.88       186
         QSO       0.86      0.86      0.86       852

    accuracy                           0.82      1438
   macro avg       0.82      0.82      0.82      1438
weighted avg       0.82      0.82      0.82      1438

  CM TEST
        AGN  Blazar  QSO
AGN      55       2   38
Blazar    4      22    3
QSO      32       6  197

  Report TEST
              precision    recall  f1-score   support

         AGN       0.60      0.58      0.59        95
      Blazar       0.73      0.76      0.75        29
         QSO       0.83      0.84      0.83       235

    accuracy                           0.76       359
   macro avg       0.72      0.73      0.72       359
weighted avg       0.76      0.76      0.76       359

  Figura guardada: /home/felorrieta/Catalina/SVM1.png

===============================================================================================
Evaluando SVM2...
  AUC_CV=0.6857±0.0124 | AUC_train=0.9403 | AUC_test=0.8564
  Gap_CV=0.2546 | Gap_rep=0.0839
  Acc_train=0.8651 | F1w_train=0.8651
  Acc_test=0.8078 | F1w_test=0.8064 | macroF1=0.7733 | balacc=0.7706

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.78      0.78      0.78       400
      Blazar       0.95      0.90      0.92       186
         QSO       0.89      0.90      0.89       852

    accuracy                           0.87      1438
   macro avg       0.87      0.86      0.87      1438
weighted avg       0.87      0.87      0.87      1438

  CM TEST
        AGN  Blazar  QSO
AGN      61       3   31
Blazar    3      23    3
QSO      26       3  206

  Report TEST
              precision    recall  f1-score   support

         AGN       0.68      0.64      0.66        95
      Blazar       0.79      0.79      0.79        29
         QSO       0.86      0.88      0.87       235

    accuracy                           0.81       359
   macro avg       0.78      0.77      0.77       359
weighted avg       0.81      0.81      0.81       359

  Figura guardada: /home/felorrieta/Catalina/SVM2.png

===============================================================================================
Evaluando SVM3...
  AUC_CV=0.7102±0.0309 | AUC_train=0.7974 | AUC_test=0.7484
  Gap_CV=0.0871 | Gap_rep=0.0490
  Acc_train=0.7010 | F1w_train=0.6898
  Acc_test=0.7019 | F1w_test=0.6870 | macroF1=0.5917 | balacc=0.5897

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.56      0.42      0.48       400
      Blazar       0.67      0.66      0.66       186
         QSO       0.75      0.84      0.79       852

    accuracy                           0.70      1438
   macro avg       0.66      0.64      0.65      1438
weighted avg       0.69      0.70      0.69      1438

  CM TEST
        AGN  Blazar  QSO
AGN      34       9   52
Blazar    5      16    8
QSO      28       5  202

  Report TEST
              precision    recall  f1-score   support

         AGN       0.51      0.36      0.42        95
      Blazar       0.53      0.55      0.54        29
         QSO       0.77      0.86      0.81       235

    accuracy                           0.70       359
   macro avg       0.60      0.59      0.59       359
weighted avg       0.68      0.70      0.69       359

  Figura guardada: /home/felorrieta/Catalina/SVM3.png

===============================================================================================
Evaluando SVM4...
  AUC_CV=0.6568±0.0108 | AUC_train=0.9255 | AUC_test=0.8409
  Gap_CV=0.2687 | Gap_rep=0.0846
  Acc_train=0.7782 | F1w_train=0.7814
  Acc_test=0.6797 | F1w_test=0.6981 | macroF1=0.6207 | balacc=0.7196

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.67      0.81      0.74       400
      Blazar       0.64      0.97      0.77       186
         QSO       0.91      0.72      0.81       852

    accuracy                           0.78      1438
   macro avg       0.74      0.83      0.77      1438
weighted avg       0.81      0.78      0.78      1438

  CM TEST
        AGN  Blazar  QSO
AGN      63      14   18
Blazar    3      24    2
QSO      49      29  157

  Report TEST
              precision    recall  f1-score   support

         AGN       0.55      0.66      0.60        95
      Blazar       0.36      0.83      0.50        29
         QSO       0.89      0.67      0.76       235

    accuracy                           0.68       359
   macro avg       0.60      0.72      0.62       359
weighted avg       0.75      0.68      0.70       359

  Figura guardada: /home/felorrieta/Catalina/SVM4.png

===============================================================================================
Evaluando SVM5...
  AUC_CV=0.6922±0.0229 | AUC_train=0.7634 | AUC_test=0.7227
  Gap_CV=0.0712 | Gap_rep=0.0407
  Acc_train=0.6641 | F1w_train=0.6454
  Acc_test=0.6825 | F1w_test=0.6612 | macroF1=0.5516 | balacc=0.5652

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.50      0.32      0.39       400
      Blazar       0.57      0.62      0.60       186
         QSO       0.73      0.84      0.78       852

    accuracy                           0.66      1438
   macro avg       0.60      0.59      0.59      1438
weighted avg       0.64      0.66      0.65      1438

  CM TEST
        AGN  Blazar  QSO
AGN      27      12   56
Blazar    4      16    9
QSO      25       8  202

  Report TEST
              precision    recall  f1-score   support

         AGN       0.48      0.28      0.36        95
      Blazar       0.44      0.55      0.49        29
         QSO       0.76      0.86      0.80       235

    accuracy                           0.68       359
   macro avg       0.56      0.57      0.55       359
weighted avg       0.66      0.68      0.66       359

  Figura guardada: /home/felorrieta/Catalina/SVM5.png

===============================================================================================
RESUMEN FINAL:
 tag  AUC_CV  SD_CV  Gap_CV  AUC_rep  SD_rep  Gap_rep  Acc_train  F1w_train  Acc_test  F1w_test  macroF1  balacc
SVM1  0.7002 0.0152  0.2193   0.8484  0.0152   0.0711     0.8227     0.8230    0.7632    0.7620   0.7234  0.7253
SVM2  0.6857 0.0124  0.2546   0.8564  0.0124   0.0839     0.8651     0.8651    0.8078    0.8064   0.7733  0.7706
SVM3  0.7102 0.0309  0.0871   0.7484  0.0309   0.0490     0.7010     0.6898    0.7019    0.6870   0.5917  0.5897
SVM4  0.6568 0.0108  0.2687   0.8409  0.0108   0.0846     0.7782     0.7814    0.6797    0.6981   0.6207  0.7196
SVM5  0.6922 0.0229  0.0712   0.7227  0.0229   0.0407     0.6641     0.6454    0.6825    0.6612   0.5516  0.5652

>>> Mejor modelo: SVM2  AUC_test=0.8564

Tabla LaTeX guardada en: /home/felorrieta/Catalina/tabla_svm_top5_reales.tex

--- PREVIEW ---
\begin{table}[!ht]
\centering
\small
\renewcommand{\arraystretch}{1.18}
\setlength{\tabcolsep}{6pt}
\begin{adjustbox}{width=\textwidth}
\rowcolors{2}{white}{gray!8}
\begin{tabular}{ccccccccc}
\toprule
\rowcolor{BlueHeader}
\color{white}\textbf{Modelo} &
\color{white}\textbf{AUC\textsubscript{CV}} &
\color{white}\textbf{SD\textsubscript{CV}} &
\color{white}\textbf{Gap\textsubscript{CV}} &
\color{white}\textbf{AUC\textsubscript{rep}} &
\color{white}\textbf{SD\textsubscript{rep}} &
\color{white}\textbf{Gap\textsubscript{rep}} &
\color{white}\textbf{Acc\textsubscript{test}} &
\color{white}\textbf{F1\textsubscript{w,test}} \\
\midrule
        SVM$_{1}$ & 0.7002 & 0.0152 & 0.2193 & 0.8484 & 0.0152 & 0.0711 & 0.7632 & 0.7620 \\
        \rowcolor{BlueBest} SVM$_{2}$ & 0.6857 & 0.0124 & 0.2546 & 0.8564 & 0.0124 & 0.0839 & 0.8078 & 0.8064 \\
        SVM$_{3}$ & 0.7102 & 0.0309 & 0.0871 & 0.7484 & 0.0309 & 0.0490 & 0.7019 & 0.6870 \\
        SVM$_{4}$ & 0.6568 & 0.0108 & 0.2687 & 0.8409 & 0.0108 & 0.0846 & 0.6797 & 0.6981 \\
        SVM$_{5}$ & 0.6922 & 0.0229 & 0.0712 & 0.7227 & 0.0229 & 0.0407 & 0.6825 & 0.6612 \\
\bottomrule
\end{tabular}
\end{adjustbox}
\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG, datos reales).}
\label{tab:svm_top5_auc_reales}
\end{table}

Tiempo total: 00:23:25

ESIG LOGFIRMA SVM

import numpy as np
import pandas as pd
import time
import warnings
import os
warnings.filterwarnings("ignore")

from tqdm.auto import tqdm
from tqdm_joblib import tqdm_joblib
import joblib

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import VarianceThreshold, SelectKBest, mutual_info_classif
from sklearn.svm import SVC

from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV
from sklearn.metrics import (
    confusion_matrix, classification_report,
    f1_score, balanced_accuracy_score, recall_score,
    accuracy_score, roc_auc_score, make_scorer
)
from scipy.stats import loguniform
from scipy.special import softmax

from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE

start_time = time.time()

# =========================
# 0) DATOS REALES
# =========================
x = pd.read_csv('/home/felorrieta/Catalina/logsignature_esig_REALES_M9.csv')
y = pd.read_csv('/home/felorrieta/Catalina/ts_v9.0.1_SMBH_ZTF_xmatch.csv')
y["id"] = y["oid"]
data = pd.merge(x, y, on="id")

data_train = data.sample(frac=0.8, random_state=42).reset_index(drop=True)
data_test  = data.drop(data_train.index).reset_index(drop=True)

X_train = data_train.drop(columns=['oid', 'survey_class_mapped', 'survey_class', 'survey_class_cat', 'id'])
y_train = data_train['survey_class_mapped']
X_test  = data_test[X_train.columns].copy()
y_test  = data_test['survey_class_mapped']

le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)
labels = list(le.classes_)

DOWNLOADS = r'/home/felorrieta/Catalina'
os.makedirs(DOWNLOADS, exist_ok=True)

print("Shapes:", X_train.shape, X_test.shape)
print("Labels:", labels)
print("Distribución train:", dict(zip(*np.unique(y_train_enc, return_counts=True))))

# =========================
# 1) SCORERS
# =========================
def robust_score(y_true, y_pred):
    mf1     = f1_score(y_true, y_pred, average="macro", zero_division=0)
    bacc    = balanced_accuracy_score(y_true, y_pred)
    recalls = recall_score(y_true, y_pred, average=None, zero_division=0)
    minR    = float(np.min(recalls))
    penalty = 0.0 if minR >= 0.30 else (0.30 - minR) * 2.0
    return 0.50 * mf1 + 0.40 * bacc + 0.10 * minR - penalty

robust_scorer = make_scorer(robust_score)

# =========================
# 2) PIPELINE CON SMOTE
# =========================
min_class_count = min(np.bincount(y_train_enc))
smote_k = min(5, min_class_count - 1)

def make_pipe(params=None):
    p = ImbPipeline(steps=[
        ("imp",   SimpleImputer(strategy="median")),
        ("sc",    StandardScaler()),
        ("vt",    VarianceThreshold(0.0)),
        ("smote", SMOTE(k_neighbors=smote_k, random_state=42)),
        ("kbest", SelectKBest(score_func=mutual_info_classif)),
        ("svc",   SVC(kernel="rbf", decision_function_shape="ovr", cache_size=4000))
    ])
    if params:
        p.set_params(**params)
    return p

pipe = make_pipe()

# =========================
# 3) ESPACIO DE BÚSQUEDA
# =========================
cw_options = [
    "balanced",
    {0: 2.0, 1: 4.0, 2: 1.0},
    {0: 2.5, 1: 5.0, 2: 1.0},
    {0: 1.5, 1: 3.5, 2: 0.8},
    {0: 3.0, 1: 6.0, 2: 1.0},
    {0: 1.0, 1: 3.0, 2: 0.7},
]

# máximo real de features disponibles antes de kbest:
# imputación + variance threshold
imp_tmp = SimpleImputer(strategy="median")
X_tmp = imp_tmp.fit_transform(X_train)

vt_tmp = VarianceThreshold(0.0)
X_tmp_vt = vt_tmp.fit_transform(X_tmp)

n_features_after_vt = X_tmp_vt.shape[1]

k_grid_base = [128, 256, 384, 512, 768, 1023]
k_grid = [k for k in k_grid_base if k <= n_features_after_vt]

if n_features_after_vt not in k_grid:
    k_grid.append(n_features_after_vt)

k_grid = sorted(set(k_grid))

print("Número de features originales :", X_train.shape[1])
print("Número de features post-VT    :", n_features_after_vt)
print("Grid kbest usado              :", k_grid)

param_dist = {
    "kbest__k":          k_grid,
    "svc__C":            loguniform(0.5, 2000),
    "svc__gamma":        loguniform(1e-6, 0.1),
    "svc__class_weight": cw_options,
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# =========================
# 4) BÚSQUEDA CON BARRA DE PROGRESO
# =========================
t0 = time.time()
rs = RandomizedSearchCV(
    estimator=pipe,
    param_distributions=param_dist,
    n_iter=80,
    scoring=robust_scorer,
    cv=cv,
    random_state=42,
    n_jobs=-1,
    verbose=0,
    refit=True,
    return_train_score=False,
    error_score=0.0
)

n_fits = 80 * cv.n_splits
with tqdm_joblib(tqdm(
    desc="Buscando hiperparámetros",
    total=n_fits,
    unit="fit",
    colour="cyan",
    bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} fits [{elapsed}<{remaining}, {rate_fmt}]"
)):
    with joblib.parallel_config(backend="threading"):
        rs.fit(X_train, y_train_enc)

print("\nTiempo búsqueda (s):", int(time.time() - t0))
print("Best robust CV:", rs.best_score_)
print("Best params:", rs.best_params_)

# =========================
# 5) TOP-10 ÚNICOS
# =========================
def _canon_value(v):
    if isinstance(v, np.generic):
        return v.item()
    if isinstance(v, dict):
        return tuple(sorted((int(k), float(w)) for k, w in v.items()))
    return v

def _params_key(d):
    return tuple(sorted((k, _canon_value(v)) for k, v in d.items()))

res = pd.DataFrame(rs.cv_results_).copy()
res["params_key"] = res["params"].apply(_params_key)
res = res.sort_values(["mean_test_score", "std_test_score"], ascending=[False, True]).reset_index(drop=True)
res_unique = res.drop_duplicates(subset=["params_key"], keep="first").reset_index(drop=True)
top10 = res_unique.head(10)[["mean_test_score", "std_test_score", "params"]].copy()

print("\nTOP-10 CV robust (únicos):")
print(top10.to_string(index=False))

# =========================
# 6) FUNCIONES DE EVALUACIÓN Y PLOTEO
# =========================
def cv_auc_manual(params, X, y, cv):
    aucs = []

    for tr_idx, va_idx in cv.split(X, y):
        Xtr = X.iloc[tr_idx].copy()
        Xva = X.iloc[va_idx].copy()
        ytr = y[tr_idx]
        yva = y[va_idx]

        model = make_pipe(params)
        model.fit(Xtr, ytr)

        scores = model.decision_function(Xva)
        if scores.ndim == 1:
            scores = scores.reshape(-1, 1)

        proba = softmax(scores, axis=1)
        auc = roc_auc_score(
            yva,
            proba,
            multi_class="ovr",
            average="macro"
        )
        aucs.append(auc)

    return float(np.mean(aucs)), float(np.std(aucs))

def _row_normalize(cm):
    cm = cm.astype(float)
    row_sums = cm.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    return (cm / row_sums) * 100.0

def plot_cms(cm_train, cm_test, tag, save_dir, subtitle="",
             gap_width=0.28, wspace=0.15,
             label_fontsize=13, tick_fontsize=13, title_fontsize=14):
    cm_tr_pct = _row_normalize(cm_train)
    cm_te_pct = _row_normalize(cm_test)

    fig = plt.figure(figsize=(10.8, 4.8))
    gs = gridspec.GridSpec(1, 4, width_ratios=[1, gap_width, 1, 0.08], wspace=wspace)
    ax1    = fig.add_subplot(gs[0, 0])
    ax_gap = fig.add_subplot(gs[0, 1]); ax_gap.axis("off")
    ax2    = fig.add_subplot(gs[0, 2])
    ax_cb  = fig.add_subplot(gs[0, 3])

    im = None
    for ax, cm_pct, cm_cnt, title in [
        (ax1, cm_tr_pct, cm_train, "Train"),
        (ax2, cm_te_pct, cm_test,  "Test"),
    ]:
        im = ax.imshow(cm_pct, cmap="Blues", vmin=0, vmax=100)
        ax.set_title(title, fontsize=title_fontsize)
        ax.set_xticks(np.arange(len(labels)))
        ax.set_yticks(np.arange(len(labels)))
        ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=tick_fontsize)
        ax.set_yticklabels(labels, fontsize=tick_fontsize)
        ax.set_xlabel("Predicho", fontsize=label_fontsize)
        ax.set_ylabel("Real", fontsize=label_fontsize)

        for i in range(cm_pct.shape[0]):
            for j in range(cm_pct.shape[1]):
                pct = cm_pct[i, j]
                cnt = int(cm_cnt[i, j])
                color_txt = "white" if pct > 50 else "black"
                ax.text(j, i - 0.10, f"{pct:.1f}%", ha="center", va="center",
                        color=color_txt, fontsize=10, fontweight="bold")
                ax.text(j, i + 0.22, f"({cnt})", ha="center", va="center",
                        color=color_txt, fontsize=7)

    fig.colorbar(im, cax=ax_cb, label="% por fila (clase real)")
    fig.suptitle(f"{tag}\n{subtitle}", fontsize=13, y=0.98)
    fig.subplots_adjust(left=0.08, right=0.92, bottom=0.22, top=0.82)

    fname = os.path.join(save_dir, f"{tag}.png")
    fig.savefig(fname, dpi=300, bbox_inches="tight")
    plt.close(fig)
    print(f"  Figura guardada: {fname}")

def eval_one(params, tag):
    print("\n" + "="*95)
    print(f"Evaluando {tag}...")

    auc_cv, sd_cv = cv_auc_manual(params, X_train, y_train_enc, cv)

    model = make_pipe(params)
    model.fit(X_train, y_train_enc)

    yhat_tr = model.predict(X_train)
    yhat_te = model.predict(X_test)

    df_tr = model.decision_function(X_train)
    df_te = model.decision_function(X_test)

    if df_tr.ndim == 1:
        df_tr = df_tr.reshape(-1, 1)
    if df_te.ndim == 1:
        df_te = df_te.reshape(-1, 1)

    proba_tr = softmax(df_tr, axis=1)
    proba_te = softmax(df_te, axis=1)

    auc_train = roc_auc_score(y_train_enc, proba_tr, multi_class="ovr", average="macro")
    auc_test  = roc_auc_score(y_test_enc,  proba_te, multi_class="ovr", average="macro")

    gap_cv  = round(auc_train - auc_cv, 4)
    gap_rep = round(auc_train - auc_test, 4)

    acc_train = accuracy_score(y_train_enc, yhat_tr)
    f1w_train = f1_score(y_train_enc, yhat_tr, average="weighted", zero_division=0)

    acc_test  = accuracy_score(y_test_enc, yhat_te)
    f1w_test  = f1_score(y_test_enc, yhat_te, average="weighted", zero_division=0)
    mf1_test  = f1_score(y_test_enc, yhat_te, average="macro", zero_division=0)
    bacc_test = balanced_accuracy_score(y_test_enc, yhat_te)

    cm_tr = confusion_matrix(y_train_enc, yhat_tr)
    cm_te = confusion_matrix(y_test_enc, yhat_te)

    print(f"  AUC_CV={auc_cv:.4f}±{sd_cv:.4f} | AUC_train={auc_train:.4f} | AUC_test={auc_test:.4f}")
    print(f"  Gap_CV={gap_cv:.4f} | Gap_rep={gap_rep:.4f}")
    print(f"  Acc_train={acc_train:.4f} | F1w_train={f1w_train:.4f}")
    print(f"  Acc_test={acc_test:.4f} | F1w_test={f1w_test:.4f} | macroF1={mf1_test:.4f} | balacc={bacc_test:.4f}")

    print("\n  Report TRAIN")
    print(classification_report(y_train_enc, yhat_tr, target_names=labels, zero_division=0))
    print("  CM TEST")
    print(pd.DataFrame(cm_te, index=labels, columns=labels))
    print("\n  Report TEST")
    print(classification_report(y_test_enc, yhat_te, target_names=labels, zero_division=0))

    subtitle = (
        f"Acc train={acc_train:.3f} | "
        f"Acc test={acc_test:.3f} | "
        f"F1w train={f1w_train:.3f} | "
        f"F1w test={f1w_test:.3f}"
    )
    plot_cms(cm_tr, cm_te, tag, DOWNLOADS, subtitle=subtitle)

    return {
        "tag":       tag,
        "AUC_CV":    round(auc_cv, 4),
        "SD_CV":     round(sd_cv, 4),
        "Gap_CV":    gap_cv,
        "AUC_rep":   round(auc_test, 4),
        "SD_rep":    round(sd_cv, 4),
        "Gap_rep":   gap_rep,
        "Acc_train": round(acc_train, 4),
        "F1w_train": round(f1w_train, 4),
        "Acc_test":  round(acc_test, 4),
        "F1w_test":  round(f1w_test, 4),
        "macroF1":   round(mf1_test, 4),
        "balacc":    round(bacc_test, 4),
    }

# =========================
# 7) EVALUAR TOP-5
# =========================
results_summary = []
for i, row in enumerate(top10.head(5).itertuples(index=False), start=1):
    r = eval_one(row.params, f"SVM{i}")
    results_summary.append(r)

# =========================
# 8) TABLA RESUMEN + LATEX
# =========================
summary_df = pd.DataFrame(results_summary)

print("\n" + "="*95)
print("RESUMEN FINAL:")
print(summary_df.to_string(index=False))

best_idx = summary_df["AUC_rep"].idxmax()
best_tag = summary_df.loc[best_idx, "tag"]

print(f"\n>>> Mejor modelo: {best_tag}  AUC_test={summary_df.loc[best_idx,'AUC_rep']:.4f}")

latex_rows = []
for _, r in summary_df.iterrows():
    highlight = r"\rowcolor{BlueBest} " if r["tag"] == best_tag else ""
    latex_rows.append(
        f"        {highlight}SVM$_{{{r['tag'][-1]}}}$ & "
        f"{r['AUC_CV']:.4f} & {r['SD_CV']:.4f} & {r['Gap_CV']:.4f} & "
        f"{r['AUC_rep']:.4f} & {r['SD_rep']:.4f} & {r['Gap_rep']:.4f} & "
        f"{r['Acc_test']:.4f} & {r['F1w_test']:.4f} \\\\"
    )

latex_table = (
    r"\begin{table}[!ht]" + "\n"
    r"\centering" + "\n"
    r"\small" + "\n"
    r"\renewcommand{\arraystretch}{1.18}" + "\n"
    r"\setlength{\tabcolsep}{6pt}" + "\n"
    r"\begin{adjustbox}{width=\textwidth}" + "\n"
    r"\rowcolors{2}{white}{gray!8}" + "\n"
    r"\begin{tabular}{ccccccccc}" + "\n"
    r"\toprule" + "\n"
    r"\rowcolor{BlueHeader}" + "\n"
    r"\color{white}\textbf{Modelo} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Acc\textsubscript{test}} &" + "\n"
    r"\color{white}\textbf{F1\textsubscript{w,test}} \\" + "\n"
    r"\midrule" + "\n"
    + "\n".join(latex_rows) + "\n"
    r"\bottomrule" + "\n"
    r"\end{tabular}" + "\n"
    r"\end{adjustbox}" + "\n"
    r"\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG, datos reales).}" + "\n"
    r"\label{tab:svm_top5_auc_reales}" + "\n"
    r"\end{table}"
)

latex_path = os.path.join(DOWNLOADS, "tabla_svm_top5_reales.tex")
with open(latex_path, "w", encoding="utf-8") as f:
    f.write(latex_table)

print(f"\nTabla LaTeX guardada en: {latex_path}")
print("\n--- PREVIEW ---\n" + latex_table)

# =========================
# 9) TIEMPO TOTAL
# =========================
elapsed = int(time.time() - start_time)
h, rem = divmod(elapsed, 3600)
m, s = divmod(rem, 60)
print(f"\nTiempo total: {h:02d}:{m:02d}:{s:02d}")

Shapes: (1438, 127) (359, 127)
Labels: ['AGN', 'Blazar', 'QSO']
Distribución train: {0: 400, 1: 186, 2: 852}
Número de features originales : 127
Número de features post-VT    : 127
Grid kbest usado              : [127]


Tiempo búsqueda (s): 162
Best robust CV: 0.3703095759597865
Best params: {'kbest__k': 127, 'svc__C': 212.3514489840471, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.027293781650374747}

TOP-10 CV robust (únicos):
 mean_test_score  std_test_score                                                                                                                            params
        0.370310        0.036511 {'kbest__k': 127, 'svc__C': 212.3514489840471, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.027293781650374747}
        0.330110        0.120751 {'kbest__k': 127, 'svc__C': 1210.8812479497356, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.04298276297821863}
        0.261995        0.163258 {'kbest__k': 127, 'svc__C': 1558.2727568596022, 'svc__class_weight': {0: 1.5, 1: 3.5, 2: 0.8}, 'svc__gamma': 0.04929053648241675}
        0.207006        0.086388              {'kbest__k': 127, 'svc__C': 10.727228488423368, 'svc__class_weight': 'balanced', 'svc__gamma': 0.004376044634724402}
        0.165991        0.121830             {'kbest__k': 127, 'svc__C': 14.322952287566451, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0016224010950061374}
        0.097500        0.067387             {'kbest__k': 127, 'svc__C': 1793.8906019043648, 'svc__class_weight': 'balanced', 'svc__gamma': 9.853225172032555e-06}
        0.090816        0.057863             {'kbest__k': 127, 'svc__C': 755.5775804589694, 'svc__class_weight': 'balanced', 'svc__gamma': 2.4794580392016226e-05}
        0.038322        0.081904              {'kbest__k': 127, 'svc__C': 5.042794667153304, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0014851035064938562}
        0.034671        0.095839              {'kbest__k': 127, 'svc__C': 34.57103872771001, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0002655521712973633}
        0.000748        0.134132             {'kbest__k': 127, 'svc__C': 1.1102063605007702, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0028489753426125634}

===============================================================================================
Evaluando SVM1...
  AUC_CV=0.5903±0.0205 | AUC_train=0.8034 | AUC_test=0.7368
  Gap_CV=0.2131 | Gap_rep=0.0665
  Acc_train=0.6384 | F1w_train=0.6496
  Acc_test=0.5933 | F1w_test=0.6230 | macroF1=0.5250 | balacc=0.6321

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.56      0.52      0.54       400
      Blazar       0.40      0.94      0.57       186
         QSO       0.84      0.63      0.72       852

    accuracy                           0.64      1438
   macro avg       0.60      0.70      0.61      1438
weighted avg       0.71      0.64      0.65      1438

  CM TEST
        AGN  Blazar  QSO
AGN      47      26   22
Blazar    2      23    4
QSO      49      43  143

  Report TEST
              precision    recall  f1-score   support

         AGN       0.48      0.49      0.49        95
      Blazar       0.25      0.79      0.38        29
         QSO       0.85      0.61      0.71       235

    accuracy                           0.59       359
   macro avg       0.53      0.63      0.53       359
weighted avg       0.70      0.59      0.62       359

  Figura guardada: /home/felorrieta/Catalina/SVM1.png

===============================================================================================
Evaluando SVM2...
  AUC_CV=0.5902±0.0100 | AUC_train=0.8603 | AUC_test=0.7667
  Gap_CV=0.2700 | Gap_rep=0.0935
  Acc_train=0.7587 | F1w_train=0.7638
  Acc_test=0.6657 | F1w_test=0.6825 | macroF1=0.5914 | balacc=0.6715

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.70      0.69      0.69       400
      Blazar       0.55      0.95      0.70       186
         QSO       0.88      0.75      0.81       852

    accuracy                           0.76      1438
   macro avg       0.71      0.80      0.73      1438
weighted avg       0.79      0.76      0.76      1438

  CM TEST
        AGN  Blazar  QSO
AGN      53      15   27
Blazar    4      22    3
QSO      42      29  164

  Report TEST
              precision    recall  f1-score   support

         AGN       0.54      0.56      0.55        95
      Blazar       0.33      0.76      0.46        29
         QSO       0.85      0.70      0.76       235

    accuracy                           0.67       359
   macro avg       0.57      0.67      0.59       359
weighted avg       0.72      0.67      0.68       359

  Figura guardada: /home/felorrieta/Catalina/SVM2.png

===============================================================================================
Evaluando SVM3...
  AUC_CV=0.5650±0.0157 | AUC_train=0.8706 | AUC_test=0.7809
  Gap_CV=0.3056 | Gap_rep=0.0897
  Acc_train=0.7267 | F1w_train=0.7309
  Acc_test=0.6462 | F1w_test=0.6646 | macroF1=0.6030 | balacc=0.6870

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.56      0.85      0.68       400
      Blazar       0.64      0.95      0.77       186
         QSO       0.94      0.62      0.75       852

    accuracy                           0.73      1438
   macro avg       0.72      0.81      0.73      1438
weighted avg       0.80      0.73      0.73      1438

  CM TEST
        AGN  Blazar  QSO
AGN      70      12   13
Blazar    6      21    2
QSO      75      19  141

  Report TEST
              precision    recall  f1-score   support

         AGN       0.46      0.74      0.57        95
      Blazar       0.40      0.72      0.52        29
         QSO       0.90      0.60      0.72       235

    accuracy                           0.65       359
   macro avg       0.59      0.69      0.60       359
weighted avg       0.75      0.65      0.66       359

  Figura guardada: /home/felorrieta/Catalina/SVM3.png

===============================================================================================
Evaluando SVM4...
  AUC_CV=0.6608±0.0258 | AUC_train=0.7171 | AUC_test=0.7010
  Gap_CV=0.0563 | Gap_rep=0.0160
  Acc_train=0.6307 | F1w_train=0.5982
  Acc_test=0.6546 | F1w_test=0.6175 | macroF1=0.4900 | balacc=0.5099

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.44      0.21      0.29       400
      Blazar       0.55      0.59      0.57       186
         QSO       0.68      0.84      0.75       852

    accuracy                           0.63      1438
   macro avg       0.56      0.55      0.54      1438
weighted avg       0.60      0.63      0.60      1438

  CM TEST
        AGN  Blazar  QSO
AGN      17      11   67
Blazar    4      14   11
QSO      20      11  204

  Report TEST
              precision    recall  f1-score   support

         AGN       0.41      0.18      0.25        95
      Blazar       0.39      0.48      0.43        29
         QSO       0.72      0.87      0.79       235

    accuracy                           0.65       359
   macro avg       0.51      0.51      0.49       359
weighted avg       0.61      0.65      0.62       359

  Figura guardada: /home/felorrieta/Catalina/SVM4.png

===============================================================================================
Evaluando SVM5...
  AUC_CV=0.6566±0.0236 | AUC_train=0.6877 | AUC_test=0.6876
  Gap_CV=0.0312 | Gap_rep=0.0001
  Acc_train=0.6134 | F1w_train=0.5708
  Acc_test=0.6435 | F1w_test=0.6020 | macroF1=0.4708 | balacc=0.4980

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.42      0.15      0.22       400
      Blazar       0.47      0.59      0.52       186
         QSO       0.67      0.84      0.74       852

    accuracy                           0.61      1438
   macro avg       0.52      0.52      0.50      1438
weighted avg       0.57      0.61      0.57      1438

  CM TEST
        AGN  Blazar  QSO
AGN      14      13   68
Blazar    2      14   13
QSO      21      11  203

  Report TEST
              precision    recall  f1-score   support

         AGN       0.38      0.15      0.21        95
      Blazar       0.37      0.48      0.42        29
         QSO       0.71      0.86      0.78       235

    accuracy                           0.64       359
   macro avg       0.49      0.50      0.47       359
weighted avg       0.60      0.64      0.60       359

  Figura guardada: /home/felorrieta/Catalina/SVM5.png

===============================================================================================
RESUMEN FINAL:
 tag  AUC_CV  SD_CV  Gap_CV  AUC_rep  SD_rep  Gap_rep  Acc_train  F1w_train  Acc_test  F1w_test  macroF1  balacc
SVM1  0.5903 0.0205  0.2131   0.7368  0.0205   0.0665     0.6384     0.6496    0.5933    0.6230   0.5250  0.6321
SVM2  0.5902 0.0100  0.2700   0.7667  0.0100   0.0935     0.7587     0.7638    0.6657    0.6825   0.5914  0.6715
SVM3  0.5650 0.0157  0.3056   0.7809  0.0157   0.0897     0.7267     0.7309    0.6462    0.6646   0.6030  0.6870
SVM4  0.6608 0.0258  0.0563   0.7010  0.0258   0.0160     0.6307     0.5982    0.6546    0.6175   0.4900  0.5099
SVM5  0.6566 0.0236  0.0312   0.6876  0.0236   0.0001     0.6134     0.5708    0.6435    0.6020   0.4708  0.4980

>>> Mejor modelo: SVM3  AUC_test=0.7809

Tabla LaTeX guardada en: /home/felorrieta/Catalina/tabla_svm_top5_reales.tex

--- PREVIEW ---
\begin{table}[!ht]
\centering
\small
\renewcommand{\arraystretch}{1.18}
\setlength{\tabcolsep}{6pt}
\begin{adjustbox}{width=\textwidth}
\rowcolors{2}{white}{gray!8}
\begin{tabular}{ccccccccc}
\toprule
\rowcolor{BlueHeader}
\color{white}\textbf{Modelo} &
\color{white}\textbf{AUC\textsubscript{CV}} &
\color{white}\textbf{SD\textsubscript{CV}} &
\color{white}\textbf{Gap\textsubscript{CV}} &
\color{white}\textbf{AUC\textsubscript{rep}} &
\color{white}\textbf{SD\textsubscript{rep}} &
\color{white}\textbf{Gap\textsubscript{rep}} &
\color{white}\textbf{Acc\textsubscript{test}} &
\color{white}\textbf{F1\textsubscript{w,test}} \\
\midrule
        SVM$_{1}$ & 0.5903 & 0.0205 & 0.2131 & 0.7368 & 0.0205 & 0.0665 & 0.5933 & 0.6230 \\
        SVM$_{2}$ & 0.5902 & 0.0100 & 0.2700 & 0.7667 & 0.0100 & 0.0935 & 0.6657 & 0.6825 \\
        \rowcolor{BlueBest} SVM$_{3}$ & 0.5650 & 0.0157 & 0.3056 & 0.7809 & 0.0157 & 0.0897 & 0.6462 & 0.6646 \\
        SVM$_{4}$ & 0.6608 & 0.0258 & 0.0563 & 0.7010 & 0.0258 & 0.0160 & 0.6546 & 0.6175 \\
        SVM$_{5}$ & 0.6566 & 0.0236 & 0.0312 & 0.6876 & 0.0236 & 0.0001 & 0.6435 & 0.6020 \\
\bottomrule
\end{tabular}
\end{adjustbox}
\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG, datos reales).}
\label{tab:svm_top5_auc_reales}
\end{table}

Tiempo total: 00:03:29

IISIGNATURE FIRMA SVM

import numpy as np
import pandas as pd
import time
import warnings
import os
warnings.filterwarnings("ignore")

from tqdm.auto import tqdm
from tqdm_joblib import tqdm_joblib
import joblib

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import VarianceThreshold, SelectKBest, mutual_info_classif
from sklearn.svm import SVC

from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV
from sklearn.metrics import (
    confusion_matrix, classification_report,
    f1_score, balanced_accuracy_score, recall_score,
    accuracy_score, roc_auc_score, make_scorer
)
from scipy.stats import loguniform
from scipy.special import softmax

from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE

start_time = time.time()

# =========================
# 0) DATOS REALES
# =========================
x = pd.read_csv('/home/felorrieta/Downloads/path_signature_iisignature_M9.csv')
y = pd.read_csv('/home/felorrieta/Catalina/ts_v9.0.1_SMBH_ZTF_xmatch.csv')
y["id"] = y["oid"]
data = pd.merge(x, y, on="id")

data_train = data.sample(frac=0.8, random_state=42).reset_index(drop=True)
data_test  = data.drop(data_train.index).reset_index(drop=True)

X_train = data_train.drop(columns=['oid', 'survey_class_mapped', 'survey_class', 'survey_class_cat', 'id'])
y_train = data_train['survey_class_mapped']
X_test  = data_test[X_train.columns].copy()
y_test  = data_test['survey_class_mapped']

le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)
labels = list(le.classes_)

DOWNLOADS = r'/home/felorrieta/Catalina'
os.makedirs(DOWNLOADS, exist_ok=True)

print("Shapes:", X_train.shape, X_test.shape)
print("Labels:", labels)
print("Distribución train:", dict(zip(*np.unique(y_train_enc, return_counts=True))))

# =========================
# 1) SCORERS
# =========================
def robust_score(y_true, y_pred):
    mf1     = f1_score(y_true, y_pred, average="macro", zero_division=0)
    bacc    = balanced_accuracy_score(y_true, y_pred)
    recalls = recall_score(y_true, y_pred, average=None, zero_division=0)
    minR    = float(np.min(recalls))
    penalty = 0.0 if minR >= 0.30 else (0.30 - minR) * 2.0
    return 0.50 * mf1 + 0.40 * bacc + 0.10 * minR - penalty

robust_scorer = make_scorer(robust_score)

# =========================
# 2) PIPELINE CON SMOTE
# =========================
min_class_count = min(np.bincount(y_train_enc))
smote_k = min(5, min_class_count - 1)

def make_pipe(params=None):
    p = ImbPipeline(steps=[
        ("imp",   SimpleImputer(strategy="median")),
        ("sc",    StandardScaler()),
        ("vt",    VarianceThreshold(0.0)),
        ("smote", SMOTE(k_neighbors=smote_k, random_state=42)),
        ("kbest", SelectKBest(score_func=mutual_info_classif)),
        ("svc",   SVC(kernel="rbf", decision_function_shape="ovr", cache_size=4000))
    ])
    if params:
        p.set_params(**params)
    return p

pipe = make_pipe()

# =========================
# 3) ESPACIO DE BÚSQUEDA
# =========================
cw_options = [
    "balanced",
    {0: 2.0, 1: 4.0, 2: 1.0},
    {0: 2.5, 1: 5.0, 2: 1.0},
    {0: 1.5, 1: 3.5, 2: 0.8},
    {0: 3.0, 1: 6.0, 2: 1.0},
    {0: 1.0, 1: 3.0, 2: 0.7},
]

# máximo real de features disponibles antes de kbest:
# imputación + variance threshold
imp_tmp = SimpleImputer(strategy="median")
X_tmp = imp_tmp.fit_transform(X_train)

vt_tmp = VarianceThreshold(0.0)
X_tmp_vt = vt_tmp.fit_transform(X_tmp)

n_features_after_vt = X_tmp_vt.shape[1]

k_grid_base = [128, 256, 384, 512, 768, 1023]
k_grid = [k for k in k_grid_base if k <= n_features_after_vt]

if n_features_after_vt not in k_grid:
    k_grid.append(n_features_after_vt)

k_grid = sorted(set(k_grid))

print("Número de features originales :", X_train.shape[1])
print("Número de features post-VT    :", n_features_after_vt)
print("Grid kbest usado              :", k_grid)

param_dist = {
    "kbest__k":          k_grid,
    "svc__C":            loguniform(0.5, 2000),
    "svc__gamma":        loguniform(1e-6, 0.1),
    "svc__class_weight": cw_options,
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# =========================
# 4) BÚSQUEDA CON BARRA DE PROGRESO
# =========================
t0 = time.time()
rs = RandomizedSearchCV(
    estimator=pipe,
    param_distributions=param_dist,
    n_iter=80,
    scoring=robust_scorer,
    cv=cv,
    random_state=42,
    n_jobs=-1,
    verbose=0,
    refit=True,
    return_train_score=False,
    error_score=0.0
)

n_fits = 80 * cv.n_splits
with tqdm_joblib(tqdm(
    desc="Buscando hiperparámetros",
    total=n_fits,
    unit="fit",
    colour="cyan",
    bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} fits [{elapsed}<{remaining}, {rate_fmt}]"
)):
    with joblib.parallel_config(backend="threading"):
        rs.fit(X_train, y_train_enc)

print("\nTiempo búsqueda (s):", int(time.time() - t0))
print("Best robust CV:", rs.best_score_)
print("Best params:", rs.best_params_)

# =========================
# 5) TOP-10 ÚNICOS
# =========================
def _canon_value(v):
    if isinstance(v, np.generic):
        return v.item()
    if isinstance(v, dict):
        return tuple(sorted((int(k), float(w)) for k, w in v.items()))
    return v

def _params_key(d):
    return tuple(sorted((k, _canon_value(v)) for k, v in d.items()))

res = pd.DataFrame(rs.cv_results_).copy()
res["params_key"] = res["params"].apply(_params_key)
res = res.sort_values(["mean_test_score", "std_test_score"], ascending=[False, True]).reset_index(drop=True)
res_unique = res.drop_duplicates(subset=["params_key"], keep="first").reset_index(drop=True)
top10 = res_unique.head(10)[["mean_test_score", "std_test_score", "params"]].copy()

print("\nTOP-10 CV robust (únicos):")
print(top10.to_string(index=False))

# =========================
# 6) FUNCIONES DE EVALUACIÓN Y PLOTEO
# =========================
def cv_auc_manual(params, X, y, cv):
    aucs = []

    for tr_idx, va_idx in cv.split(X, y):
        Xtr = X.iloc[tr_idx].copy()
        Xva = X.iloc[va_idx].copy()
        ytr = y[tr_idx]
        yva = y[va_idx]

        model = make_pipe(params)
        model.fit(Xtr, ytr)

        scores = model.decision_function(Xva)
        if scores.ndim == 1:
            scores = scores.reshape(-1, 1)

        proba = softmax(scores, axis=1)
        auc = roc_auc_score(
            yva,
            proba,
            multi_class="ovr",
            average="macro"
        )
        aucs.append(auc)

    return float(np.mean(aucs)), float(np.std(aucs))

def _row_normalize(cm):
    cm = cm.astype(float)
    row_sums = cm.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    return (cm / row_sums) * 100.0

def plot_cms(cm_train, cm_test, tag, save_dir, subtitle="",
             gap_width=0.28, wspace=0.15,
             label_fontsize=13, tick_fontsize=13, title_fontsize=14):
    cm_tr_pct = _row_normalize(cm_train)
    cm_te_pct = _row_normalize(cm_test)

    fig = plt.figure(figsize=(10.8, 4.8))
    gs = gridspec.GridSpec(1, 4, width_ratios=[1, gap_width, 1, 0.08], wspace=wspace)
    ax1    = fig.add_subplot(gs[0, 0])
    ax_gap = fig.add_subplot(gs[0, 1]); ax_gap.axis("off")
    ax2    = fig.add_subplot(gs[0, 2])
    ax_cb  = fig.add_subplot(gs[0, 3])

    im = None
    for ax, cm_pct, cm_cnt, title in [
        (ax1, cm_tr_pct, cm_train, "Train"),
        (ax2, cm_te_pct, cm_test,  "Test"),
    ]:
        im = ax.imshow(cm_pct, cmap="Blues", vmin=0, vmax=100)
        ax.set_title(title, fontsize=title_fontsize)
        ax.set_xticks(np.arange(len(labels)))
        ax.set_yticks(np.arange(len(labels)))
        ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=tick_fontsize)
        ax.set_yticklabels(labels, fontsize=tick_fontsize)
        ax.set_xlabel("Predicho", fontsize=label_fontsize)
        ax.set_ylabel("Real", fontsize=label_fontsize)

        for i in range(cm_pct.shape[0]):
            for j in range(cm_pct.shape[1]):
                pct = cm_pct[i, j]
                cnt = int(cm_cnt[i, j])
                color_txt = "white" if pct > 50 else "black"
                ax.text(j, i - 0.10, f"{pct:.1f}%", ha="center", va="center",
                        color=color_txt, fontsize=10, fontweight="bold")
                ax.text(j, i + 0.22, f"({cnt})", ha="center", va="center",
                        color=color_txt, fontsize=7)

    fig.colorbar(im, cax=ax_cb, label="% por fila (clase real)")
    fig.suptitle(f"{tag}\n{subtitle}", fontsize=13, y=0.98)
    fig.subplots_adjust(left=0.08, right=0.92, bottom=0.22, top=0.82)

    fname = os.path.join(save_dir, f"{tag}.png")
    fig.savefig(fname, dpi=300, bbox_inches="tight")
    plt.close(fig)
    print(f"  Figura guardada: {fname}")

def eval_one(params, tag):
    print("\n" + "="*95)
    print(f"Evaluando {tag}...")

    auc_cv, sd_cv = cv_auc_manual(params, X_train, y_train_enc, cv)

    model = make_pipe(params)
    model.fit(X_train, y_train_enc)

    yhat_tr = model.predict(X_train)
    yhat_te = model.predict(X_test)

    df_tr = model.decision_function(X_train)
    df_te = model.decision_function(X_test)

    if df_tr.ndim == 1:
        df_tr = df_tr.reshape(-1, 1)
    if df_te.ndim == 1:
        df_te = df_te.reshape(-1, 1)

    proba_tr = softmax(df_tr, axis=1)
    proba_te = softmax(df_te, axis=1)

    auc_train = roc_auc_score(y_train_enc, proba_tr, multi_class="ovr", average="macro")
    auc_test  = roc_auc_score(y_test_enc,  proba_te, multi_class="ovr", average="macro")

    gap_cv  = round(auc_train - auc_cv, 4)
    gap_rep = round(auc_train - auc_test, 4)

    acc_train = accuracy_score(y_train_enc, yhat_tr)
    f1w_train = f1_score(y_train_enc, yhat_tr, average="weighted", zero_division=0)

    acc_test  = accuracy_score(y_test_enc, yhat_te)
    f1w_test  = f1_score(y_test_enc, yhat_te, average="weighted", zero_division=0)
    mf1_test  = f1_score(y_test_enc, yhat_te, average="macro", zero_division=0)
    bacc_test = balanced_accuracy_score(y_test_enc, yhat_te)

    cm_tr = confusion_matrix(y_train_enc, yhat_tr)
    cm_te = confusion_matrix(y_test_enc, yhat_te)

    print(f"  AUC_CV={auc_cv:.4f}±{sd_cv:.4f} | AUC_train={auc_train:.4f} | AUC_test={auc_test:.4f}")
    print(f"  Gap_CV={gap_cv:.4f} | Gap_rep={gap_rep:.4f}")
    print(f"  Acc_train={acc_train:.4f} | F1w_train={f1w_train:.4f}")
    print(f"  Acc_test={acc_test:.4f} | F1w_test={f1w_test:.4f} | macroF1={mf1_test:.4f} | balacc={bacc_test:.4f}")

    print("\n  Report TRAIN")
    print(classification_report(y_train_enc, yhat_tr, target_names=labels, zero_division=0))
    print("  CM TEST")
    print(pd.DataFrame(cm_te, index=labels, columns=labels))
    print("\n  Report TEST")
    print(classification_report(y_test_enc, yhat_te, target_names=labels, zero_division=0))

    subtitle = (
        f"Acc train={acc_train:.3f} | "
        f"Acc test={acc_test:.3f} | "
        f"F1w train={f1w_train:.3f} | "
        f"F1w test={f1w_test:.3f}"
    )
    plot_cms(cm_tr, cm_te, tag, DOWNLOADS, subtitle=subtitle)

    return {
        "tag":       tag,
        "AUC_CV":    round(auc_cv, 4),
        "SD_CV":     round(sd_cv, 4),
        "Gap_CV":    gap_cv,
        "AUC_rep":   round(auc_test, 4),
        "SD_rep":    round(sd_cv, 4),
        "Gap_rep":   gap_rep,
        "Acc_train": round(acc_train, 4),
        "F1w_train": round(f1w_train, 4),
        "Acc_test":  round(acc_test, 4),
        "F1w_test":  round(f1w_test, 4),
        "macroF1":   round(mf1_test, 4),
        "balacc":    round(bacc_test, 4),
    }

# =========================
# 7) EVALUAR TOP-5
# =========================
results_summary = []
for i, row in enumerate(top10.head(5).itertuples(index=False), start=1):
    r = eval_one(row.params, f"SVM{i}")
    results_summary.append(r)

# =========================
# 8) TABLA RESUMEN + LATEX
# =========================
summary_df = pd.DataFrame(results_summary)

print("\n" + "="*95)
print("RESUMEN FINAL:")
print(summary_df.to_string(index=False))

best_idx = summary_df["AUC_rep"].idxmax()
best_tag = summary_df.loc[best_idx, "tag"]

print(f"\n>>> Mejor modelo: {best_tag}  AUC_test={summary_df.loc[best_idx,'AUC_rep']:.4f}")

latex_rows = []
for _, r in summary_df.iterrows():
    highlight = r"\rowcolor{BlueBest} " if r["tag"] == best_tag else ""
    latex_rows.append(
        f"        {highlight}SVM$_{{{r['tag'][-1]}}}$ & "
        f"{r['AUC_CV']:.4f} & {r['SD_CV']:.4f} & {r['Gap_CV']:.4f} & "
        f"{r['AUC_rep']:.4f} & {r['SD_rep']:.4f} & {r['Gap_rep']:.4f} & "
        f"{r['Acc_test']:.4f} & {r['F1w_test']:.4f} \\\\"
    )

latex_table = (
    r"\begin{table}[!ht]" + "\n"
    r"\centering" + "\n"
    r"\small" + "\n"
    r"\renewcommand{\arraystretch}{1.18}" + "\n"
    r"\setlength{\tabcolsep}{6pt}" + "\n"
    r"\begin{adjustbox}{width=\textwidth}" + "\n"
    r"\rowcolors{2}{white}{gray!8}" + "\n"
    r"\begin{tabular}{ccccccccc}" + "\n"
    r"\toprule" + "\n"
    r"\rowcolor{BlueHeader}" + "\n"
    r"\color{white}\textbf{Modelo} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Acc\textsubscript{test}} &" + "\n"
    r"\color{white}\textbf{F1\textsubscript{w,test}} \\" + "\n"
    r"\midrule" + "\n"
    + "\n".join(latex_rows) + "\n"
    r"\bottomrule" + "\n"
    r"\end{tabular}" + "\n"
    r"\end{adjustbox}" + "\n"
    r"\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG, datos reales).}" + "\n"
    r"\label{tab:svm_top5_auc_reales}" + "\n"
    r"\end{table}"
)

latex_path = os.path.join(DOWNLOADS, "tabla_svm_top5_reales.tex")
with open(latex_path, "w", encoding="utf-8") as f:
    f.write(latex_table)

print(f"\nTabla LaTeX guardada en: {latex_path}")
print("\n--- PREVIEW ---\n" + latex_table)

# =========================
# 9) TIEMPO TOTAL
# =========================
elapsed = int(time.time() - start_time)
h, rem = divmod(elapsed, 3600)
m, s = divmod(rem, 60)
print(f"\nTiempo total: {h:02d}:{m:02d}:{s:02d}")

Shapes: (1510, 1022) (377, 1022)
Labels: ['AGN', 'Blazar', 'QSO']
Distribución train: {0: 422, 1: 189, 2: 899}
Número de features originales : 1022
Número de features post-VT    : 1022
Grid kbest usado              : [128, 256, 384, 512, 768, 1022]


Tiempo búsqueda (s): 1160
Best robust CV: 0.5443172816015224
Best params: {'kbest__k': 1022, 'svc__C': 784.8081929215672, 'svc__class_weight': 'balanced', 'svc__gamma': 8.215507070842176e-05}

TOP-10 CV robust (únicos):
 mean_test_score  std_test_score                                                                                                                            params
        0.544317        0.007625             {'kbest__k': 1022, 'svc__C': 784.8081929215672, 'svc__class_weight': 'balanced', 'svc__gamma': 8.215507070842176e-05}
        0.534014        0.023643               {'kbest__k': 1022, 'svc__C': 2.540458074225692, 'svc__class_weight': 'balanced', 'svc__gamma': 0.06395288218539359}
        0.530168        0.024917             {'kbest__k': 1022, 'svc__C': 10.772186132342652, 'svc__class_weight': 'balanced', 'svc__gamma': 0.058258491495380586}
        0.529176        0.022424             {'kbest__k': 1022, 'svc__C': 34.57103872771001, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0002655521712973633}
        0.516400        0.024635 {'kbest__k': 1022, 'svc__C': 3.782991252375207, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.07842676320619435}
        0.511990        0.018056 {'kbest__k': 512, 'svc__C': 1221.212458656701, 'svc__class_weight': {0: 2.0, 1: 4.0, 2: 1.0}, 'svc__gamma': 0.015436560150278347}
        0.499701        0.043360            {'kbest__k': 1022, 'svc__C': 30.036641140654915, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0035753161317240768}
        0.483330        0.071734            {'kbest__k': 1022, 'svc__C': 1.2456144526856274, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0017316120634454752}
        0.357605        0.112258            {'kbest__k': 1022, 'svc__C': 0.6576892252744614, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0014851035064938562}
        0.295434        0.231032  {'kbest__k': 512, 'svc__C': 61.75015483640841, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.05019578058935036}

===============================================================================================
Evaluando SVM1...
  AUC_CV=0.7320±0.0056 | AUC_train=0.8010 | AUC_test=0.7609
  Gap_CV=0.0691 | Gap_rep=0.0402
  Acc_train=0.7126 | F1w_train=0.7030
  Acc_test=0.7135 | F1w_test=0.6999 | macroF1=0.6212 | balacc=0.6160

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.57      0.45      0.51       422
      Blazar       0.72      0.63      0.68       189
         QSO       0.76      0.85      0.80       899

    accuracy                           0.71      1510
   macro avg       0.68      0.65      0.66      1510
weighted avg       0.70      0.71      0.70      1510

  CM TEST
        AGN  Blazar  QSO
AGN      39       7   55
Blazar    5      18    7
QSO      29       5  212

  Report TEST
              precision    recall  f1-score   support

         AGN       0.53      0.39      0.45       101
      Blazar       0.60      0.60      0.60        30
         QSO       0.77      0.86      0.82       246

    accuracy                           0.71       377
   macro avg       0.64      0.62      0.62       377
weighted avg       0.70      0.71      0.70       377

  Figura guardada: /home/felorrieta/Catalina/SVM1.png

===============================================================================================
Evaluando SVM2...
  AUC_CV=0.7069±0.0146 | AUC_train=0.9198 | AUC_test=0.8313
  Gap_CV=0.2129 | Gap_rep=0.0885
  Acc_train=0.8245 | F1w_train=0.8252
  Acc_test=0.7215 | F1w_test=0.7198 | macroF1=0.6731 | balacc=0.7078

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.71      0.74      0.72       422
      Blazar       0.87      0.87      0.87       189
         QSO       0.87      0.86      0.86       899

    accuracy                           0.82      1510
   macro avg       0.82      0.82      0.82      1510
weighted avg       0.83      0.82      0.83      1510

  CM TEST
        AGN  Blazar  QSO
AGN      49       9   43
Blazar    3      25    2
QSO      41       7  198

  Report TEST
              precision    recall  f1-score   support

         AGN       0.53      0.49      0.51       101
      Blazar       0.61      0.83      0.70        30
         QSO       0.81      0.80      0.81       246

    accuracy                           0.72       377
   macro avg       0.65      0.71      0.67       377
weighted avg       0.72      0.72      0.72       377

  Figura guardada: /home/felorrieta/Catalina/SVM2.png

===============================================================================================
Evaluando SVM3...
  AUC_CV=0.6988±0.0197 | AUC_train=0.9442 | AUC_test=0.8469
  Gap_CV=0.2454 | Gap_rep=0.0973
  Acc_train=0.8662 | F1w_train=0.8669
  Acc_test=0.7613 | F1w_test=0.7604 | macroF1=0.7206 | balacc=0.7437

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.77      0.81      0.79       422
      Blazar       0.92      0.92      0.92       189
         QSO       0.90      0.88      0.89       899

    accuracy                           0.87      1510
   macro avg       0.86      0.87      0.87      1510
weighted avg       0.87      0.87      0.87      1510

  CM TEST
        AGN  Blazar  QSO
AGN      57       7   37
Blazar    3      25    2
QSO      36       5  205

  Report TEST
              precision    recall  f1-score   support

         AGN       0.59      0.56      0.58       101
      Blazar       0.68      0.83      0.75        30
         QSO       0.84      0.83      0.84       246

    accuracy                           0.76       377
   macro avg       0.70      0.74      0.72       377
weighted avg       0.76      0.76      0.76       377

  Figura guardada: /home/felorrieta/Catalina/SVM3.png

===============================================================================================
Evaluando SVM4...
  AUC_CV=0.7270±0.0145 | AUC_train=0.7751 | AUC_test=0.7370
  Gap_CV=0.0480 | Gap_rep=0.0381
  Acc_train=0.6821 | F1w_train=0.6669
  Acc_test=0.7003 | F1w_test=0.6810 | macroF1=0.5985 | balacc=0.6073

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.53      0.37      0.44       422
      Blazar       0.66      0.61      0.64       189
         QSO       0.73      0.84      0.78       899

    accuracy                           0.68      1510
   macro avg       0.64      0.61      0.62      1510
weighted avg       0.66      0.68      0.67      1510

  CM TEST
        AGN  Blazar  QSO
AGN      33       9   59
Blazar    2      19    9
QSO      27       7  212

  Report TEST
              precision    recall  f1-score   support

         AGN       0.53      0.33      0.40       101
      Blazar       0.54      0.63      0.58        30
         QSO       0.76      0.86      0.81       246

    accuracy                           0.70       377
   macro avg       0.61      0.61      0.60       377
weighted avg       0.68      0.70      0.68       377

  Figura guardada: /home/felorrieta/Catalina/SVM4.png

===============================================================================================
Evaluando SVM5...
  AUC_CV=0.6850±0.0187 | AUC_train=0.9247 | AUC_test=0.8254
  Gap_CV=0.2397 | Gap_rep=0.0993
  Acc_train=0.8053 | F1w_train=0.8088
  Acc_test=0.7056 | F1w_test=0.7195 | macroF1=0.6419 | balacc=0.7328

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.70      0.81      0.75       422
      Blazar       0.67      0.98      0.79       189
         QSO       0.93      0.77      0.84       899

    accuracy                           0.81      1510
   macro avg       0.76      0.85      0.79      1510
weighted avg       0.83      0.81      0.81      1510

  CM TEST
        AGN  Blazar  QSO
AGN      61      17   23
Blazar    3      26    1
QSO      45      22  179

  Report TEST
              precision    recall  f1-score   support

         AGN       0.56      0.60      0.58       101
      Blazar       0.40      0.87      0.55        30
         QSO       0.88      0.73      0.80       246

    accuracy                           0.71       377
   macro avg       0.61      0.73      0.64       377
weighted avg       0.76      0.71      0.72       377

  Figura guardada: /home/felorrieta/Catalina/SVM5.png

===============================================================================================
RESUMEN FINAL:
 tag  AUC_CV  SD_CV  Gap_CV  AUC_rep  SD_rep  Gap_rep  Acc_train  F1w_train  Acc_test  F1w_test  macroF1  balacc
SVM1  0.7320 0.0056  0.0691   0.7609  0.0056   0.0402     0.7126     0.7030    0.7135    0.6999   0.6212  0.6160
SVM2  0.7069 0.0146  0.2129   0.8313  0.0146   0.0885     0.8245     0.8252    0.7215    0.7198   0.6731  0.7078
SVM3  0.6988 0.0197  0.2454   0.8469  0.0197   0.0973     0.8662     0.8669    0.7613    0.7604   0.7206  0.7437
SVM4  0.7270 0.0145  0.0480   0.7370  0.0145   0.0381     0.6821     0.6669    0.7003    0.6810   0.5985  0.6073
SVM5  0.6850 0.0187  0.2397   0.8254  0.0187   0.0993     0.8053     0.8088    0.7056    0.7195   0.6419  0.7328

>>> Mejor modelo: SVM3  AUC_test=0.8469

Tabla LaTeX guardada en: /home/felorrieta/Catalina/tabla_svm_top5_reales.tex

--- PREVIEW ---
\begin{table}[!ht]
\centering
\small
\renewcommand{\arraystretch}{1.18}
\setlength{\tabcolsep}{6pt}
\begin{adjustbox}{width=\textwidth}
\rowcolors{2}{white}{gray!8}
\begin{tabular}{ccccccccc}
\toprule
\rowcolor{BlueHeader}
\color{white}\textbf{Modelo} &
\color{white}\textbf{AUC\textsubscript{CV}} &
\color{white}\textbf{SD\textsubscript{CV}} &
\color{white}\textbf{Gap\textsubscript{CV}} &
\color{white}\textbf{AUC\textsubscript{rep}} &
\color{white}\textbf{SD\textsubscript{rep}} &
\color{white}\textbf{Gap\textsubscript{rep}} &
\color{white}\textbf{Acc\textsubscript{test}} &
\color{white}\textbf{F1\textsubscript{w,test}} \\
\midrule
        SVM$_{1}$ & 0.7320 & 0.0056 & 0.0691 & 0.7609 & 0.0056 & 0.0402 & 0.7135 & 0.6999 \\
        SVM$_{2}$ & 0.7069 & 0.0146 & 0.2129 & 0.8313 & 0.0146 & 0.0885 & 0.7215 & 0.7198 \\
        \rowcolor{BlueBest} SVM$_{3}$ & 0.6988 & 0.0197 & 0.2454 & 0.8469 & 0.0197 & 0.0973 & 0.7613 & 0.7604 \\
        SVM$_{4}$ & 0.7270 & 0.0145 & 0.0480 & 0.7370 & 0.0145 & 0.0381 & 0.7003 & 0.6810 \\
        SVM$_{5}$ & 0.6850 & 0.0187 & 0.2397 & 0.8254 & 0.0187 & 0.0993 & 0.7056 & 0.7195 \\
\bottomrule
\end{tabular}
\end{adjustbox}
\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG, datos reales).}
\label{tab:svm_top5_auc_reales}
\end{table}

Tiempo total: 00:23:53

IISIGNATURE LOGFIRMA SVM

import numpy as np
import pandas as pd
import time
import warnings
import os
warnings.filterwarnings("ignore")

from tqdm.auto import tqdm
from tqdm_joblib import tqdm_joblib
import joblib

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import VarianceThreshold, SelectKBest, mutual_info_classif
from sklearn.svm import SVC

from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV
from sklearn.metrics import (
    confusion_matrix, classification_report,
    f1_score, balanced_accuracy_score, recall_score,
    accuracy_score, roc_auc_score, make_scorer
)
from scipy.stats import loguniform
from scipy.special import softmax

from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE

start_time = time.time()

# =========================
# 0) DATOS REALES
# =========================
x = pd.read_csv('/home/felorrieta/Downloads/log_signature_iisignature_M9.csv')
y = pd.read_csv('/home/felorrieta/Catalina/ts_v9.0.1_SMBH_ZTF_xmatch.csv')
y["id"] = y["oid"]
data = pd.merge(x, y, on="id")

data_train = data.sample(frac=0.8, random_state=42).reset_index(drop=True)
data_test  = data.drop(data_train.index).reset_index(drop=True)

X_train = data_train.drop(columns=['oid', 'survey_class_mapped', 'survey_class', 'survey_class_cat', 'id'])
y_train = data_train['survey_class_mapped']
X_test  = data_test[X_train.columns].copy()
y_test  = data_test['survey_class_mapped']

le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)
labels = list(le.classes_)

DOWNLOADS = r'/home/felorrieta/Catalina'
os.makedirs(DOWNLOADS, exist_ok=True)

print("Shapes:", X_train.shape, X_test.shape)
print("Labels:", labels)
print("Distribución train:", dict(zip(*np.unique(y_train_enc, return_counts=True))))

# =========================
# 1) SCORERS
# =========================
def robust_score(y_true, y_pred):
    mf1     = f1_score(y_true, y_pred, average="macro", zero_division=0)
    bacc    = balanced_accuracy_score(y_true, y_pred)
    recalls = recall_score(y_true, y_pred, average=None, zero_division=0)
    minR    = float(np.min(recalls))
    penalty = 0.0 if minR >= 0.30 else (0.30 - minR) * 2.0
    return 0.50 * mf1 + 0.40 * bacc + 0.10 * minR - penalty

robust_scorer = make_scorer(robust_score)

# =========================
# 2) PIPELINE CON SMOTE
# =========================
min_class_count = min(np.bincount(y_train_enc))
smote_k = min(5, min_class_count - 1)

def make_pipe(params=None):
    p = ImbPipeline(steps=[
        ("imp",   SimpleImputer(strategy="median")),
        ("sc",    StandardScaler()),
        ("vt",    VarianceThreshold(0.0)),
        ("smote", SMOTE(k_neighbors=smote_k, random_state=42)),
        ("kbest", SelectKBest(score_func=mutual_info_classif)),
        ("svc",   SVC(kernel="rbf", decision_function_shape="ovr", cache_size=4000))
    ])
    if params:
        p.set_params(**params)
    return p

pipe = make_pipe()

# =========================
# 3) ESPACIO DE BÚSQUEDA
# =========================
cw_options = [
    "balanced",
    {0: 2.0, 1: 4.0, 2: 1.0},
    {0: 2.5, 1: 5.0, 2: 1.0},
    {0: 1.5, 1: 3.5, 2: 0.8},
    {0: 3.0, 1: 6.0, 2: 1.0},
    {0: 1.0, 1: 3.0, 2: 0.7},
]

# máximo real de features disponibles antes de kbest:
# imputación + variance threshold
imp_tmp = SimpleImputer(strategy="median")
X_tmp = imp_tmp.fit_transform(X_train)

vt_tmp = VarianceThreshold(0.0)
X_tmp_vt = vt_tmp.fit_transform(X_tmp)

n_features_after_vt = X_tmp_vt.shape[1]

k_grid_base = [128, 256, 384, 512, 768, 1023]
k_grid = [k for k in k_grid_base if k <= n_features_after_vt]

if n_features_after_vt not in k_grid:
    k_grid.append(n_features_after_vt)

k_grid = sorted(set(k_grid))

print("Número de features originales :", X_train.shape[1])
print("Número de features post-VT    :", n_features_after_vt)
print("Grid kbest usado              :", k_grid)

param_dist = {
    "kbest__k":          k_grid,
    "svc__C":            loguniform(0.5, 2000),
    "svc__gamma":        loguniform(1e-6, 0.1),
    "svc__class_weight": cw_options,
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# =========================
# 4) BÚSQUEDA CON BARRA DE PROGRESO
# =========================
t0 = time.time()
rs = RandomizedSearchCV(
    estimator=pipe,
    param_distributions=param_dist,
    n_iter=80,
    scoring=robust_scorer,
    cv=cv,
    random_state=42,
    n_jobs=-1,
    verbose=0,
    refit=True,
    return_train_score=False,
    error_score=0.0
)

n_fits = 80 * cv.n_splits
with tqdm_joblib(tqdm(
    desc="Buscando hiperparámetros",
    total=n_fits,
    unit="fit",
    colour="cyan",
    bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} fits [{elapsed}<{remaining}, {rate_fmt}]"
)):
    with joblib.parallel_config(backend="threading"):
        rs.fit(X_train, y_train_enc)

print("\nTiempo búsqueda (s):", int(time.time() - t0))
print("Best robust CV:", rs.best_score_)
print("Best params:", rs.best_params_)

# =========================
# 5) TOP-10 ÚNICOS
# =========================
def _canon_value(v):
    if isinstance(v, np.generic):
        return v.item()
    if isinstance(v, dict):
        return tuple(sorted((int(k), float(w)) for k, w in v.items()))
    return v

def _params_key(d):
    return tuple(sorted((k, _canon_value(v)) for k, v in d.items()))

res = pd.DataFrame(rs.cv_results_).copy()
res["params_key"] = res["params"].apply(_params_key)
res = res.sort_values(["mean_test_score", "std_test_score"], ascending=[False, True]).reset_index(drop=True)
res_unique = res.drop_duplicates(subset=["params_key"], keep="first").reset_index(drop=True)
top10 = res_unique.head(10)[["mean_test_score", "std_test_score", "params"]].copy()

print("\nTOP-10 CV robust (únicos):")
print(top10.to_string(index=False))

# =========================
# 6) FUNCIONES DE EVALUACIÓN Y PLOTEO
# =========================
def cv_auc_manual(params, X, y, cv):
    aucs = []

    for tr_idx, va_idx in cv.split(X, y):
        Xtr = X.iloc[tr_idx].copy()
        Xva = X.iloc[va_idx].copy()
        ytr = y[tr_idx]
        yva = y[va_idx]

        model = make_pipe(params)
        model.fit(Xtr, ytr)

        scores = model.decision_function(Xva)
        if scores.ndim == 1:
            scores = scores.reshape(-1, 1)

        proba = softmax(scores, axis=1)
        auc = roc_auc_score(
            yva,
            proba,
            multi_class="ovr",
            average="macro"
        )
        aucs.append(auc)

    return float(np.mean(aucs)), float(np.std(aucs))

def _row_normalize(cm):
    cm = cm.astype(float)
    row_sums = cm.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    return (cm / row_sums) * 100.0

def plot_cms(cm_train, cm_test, tag, save_dir, subtitle="",
             gap_width=0.28, wspace=0.15,
             label_fontsize=13, tick_fontsize=13, title_fontsize=14):
    cm_tr_pct = _row_normalize(cm_train)
    cm_te_pct = _row_normalize(cm_test)

    fig = plt.figure(figsize=(10.8, 4.8))
    gs = gridspec.GridSpec(1, 4, width_ratios=[1, gap_width, 1, 0.08], wspace=wspace)
    ax1    = fig.add_subplot(gs[0, 0])
    ax_gap = fig.add_subplot(gs[0, 1]); ax_gap.axis("off")
    ax2    = fig.add_subplot(gs[0, 2])
    ax_cb  = fig.add_subplot(gs[0, 3])

    im = None
    for ax, cm_pct, cm_cnt, title in [
        (ax1, cm_tr_pct, cm_train, "Train"),
        (ax2, cm_te_pct, cm_test,  "Test"),
    ]:
        im = ax.imshow(cm_pct, cmap="Blues", vmin=0, vmax=100)
        ax.set_title(title, fontsize=title_fontsize)
        ax.set_xticks(np.arange(len(labels)))
        ax.set_yticks(np.arange(len(labels)))
        ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=tick_fontsize)
        ax.set_yticklabels(labels, fontsize=tick_fontsize)
        ax.set_xlabel("Predicho", fontsize=label_fontsize)
        ax.set_ylabel("Real", fontsize=label_fontsize)

        for i in range(cm_pct.shape[0]):
            for j in range(cm_pct.shape[1]):
                pct = cm_pct[i, j]
                cnt = int(cm_cnt[i, j])
                color_txt = "white" if pct > 50 else "black"
                ax.text(j, i - 0.10, f"{pct:.1f}%", ha="center", va="center",
                        color=color_txt, fontsize=10, fontweight="bold")
                ax.text(j, i + 0.22, f"({cnt})", ha="center", va="center",
                        color=color_txt, fontsize=7)

    fig.colorbar(im, cax=ax_cb, label="% por fila (clase real)")
    fig.suptitle(f"{tag}\n{subtitle}", fontsize=13, y=0.98)
    fig.subplots_adjust(left=0.08, right=0.92, bottom=0.22, top=0.82)

    fname = os.path.join(save_dir, f"{tag}.png")
    fig.savefig(fname, dpi=300, bbox_inches="tight")
    plt.close(fig)
    print(f"  Figura guardada: {fname}")

def eval_one(params, tag):
    print("\n" + "="*95)
    print(f"Evaluando {tag}...")

    auc_cv, sd_cv = cv_auc_manual(params, X_train, y_train_enc, cv)

    model = make_pipe(params)
    model.fit(X_train, y_train_enc)

    yhat_tr = model.predict(X_train)
    yhat_te = model.predict(X_test)

    df_tr = model.decision_function(X_train)
    df_te = model.decision_function(X_test)

    if df_tr.ndim == 1:
        df_tr = df_tr.reshape(-1, 1)
    if df_te.ndim == 1:
        df_te = df_te.reshape(-1, 1)

    proba_tr = softmax(df_tr, axis=1)
    proba_te = softmax(df_te, axis=1)

    auc_train = roc_auc_score(y_train_enc, proba_tr, multi_class="ovr", average="macro")
    auc_test  = roc_auc_score(y_test_enc,  proba_te, multi_class="ovr", average="macro")

    gap_cv  = round(auc_train - auc_cv, 4)
    gap_rep = round(auc_train - auc_test, 4)

    acc_train = accuracy_score(y_train_enc, yhat_tr)
    f1w_train = f1_score(y_train_enc, yhat_tr, average="weighted", zero_division=0)

    acc_test  = accuracy_score(y_test_enc, yhat_te)
    f1w_test  = f1_score(y_test_enc, yhat_te, average="weighted", zero_division=0)
    mf1_test  = f1_score(y_test_enc, yhat_te, average="macro", zero_division=0)
    bacc_test = balanced_accuracy_score(y_test_enc, yhat_te)

    cm_tr = confusion_matrix(y_train_enc, yhat_tr)
    cm_te = confusion_matrix(y_test_enc, yhat_te)

    print(f"  AUC_CV={auc_cv:.4f}±{sd_cv:.4f} | AUC_train={auc_train:.4f} | AUC_test={auc_test:.4f}")
    print(f"  Gap_CV={gap_cv:.4f} | Gap_rep={gap_rep:.4f}")
    print(f"  Acc_train={acc_train:.4f} | F1w_train={f1w_train:.4f}")
    print(f"  Acc_test={acc_test:.4f} | F1w_test={f1w_test:.4f} | macroF1={mf1_test:.4f} | balacc={bacc_test:.4f}")

    print("\n  Report TRAIN")
    print(classification_report(y_train_enc, yhat_tr, target_names=labels, zero_division=0))
    print("  CM TEST")
    print(pd.DataFrame(cm_te, index=labels, columns=labels))
    print("\n  Report TEST")
    print(classification_report(y_test_enc, yhat_te, target_names=labels, zero_division=0))

    subtitle = (
        f"Acc train={acc_train:.3f} | "
        f"Acc test={acc_test:.3f} | "
        f"F1w train={f1w_train:.3f} | "
        f"F1w test={f1w_test:.3f}"
    )
    plot_cms(cm_tr, cm_te, tag, DOWNLOADS, subtitle=subtitle)

    return {
        "tag":       tag,
        "AUC_CV":    round(auc_cv, 4),
        "SD_CV":     round(sd_cv, 4),
        "Gap_CV":    gap_cv,
        "AUC_rep":   round(auc_test, 4),
        "SD_rep":    round(sd_cv, 4),
        "Gap_rep":   gap_rep,
        "Acc_train": round(acc_train, 4),
        "F1w_train": round(f1w_train, 4),
        "Acc_test":  round(acc_test, 4),
        "F1w_test":  round(f1w_test, 4),
        "macroF1":   round(mf1_test, 4),
        "balacc":    round(bacc_test, 4),
    }

# =========================
# 7) EVALUAR TOP-5
# =========================
results_summary = []
for i, row in enumerate(top10.head(5).itertuples(index=False), start=1):
    r = eval_one(row.params, f"SVM{i}")
    results_summary.append(r)

# =========================
# 8) TABLA RESUMEN + LATEX
# =========================
summary_df = pd.DataFrame(results_summary)

print("\n" + "="*95)
print("RESUMEN FINAL:")
print(summary_df.to_string(index=False))

best_idx = summary_df["AUC_rep"].idxmax()
best_tag = summary_df.loc[best_idx, "tag"]

print(f"\n>>> Mejor modelo: {best_tag}  AUC_test={summary_df.loc[best_idx,'AUC_rep']:.4f}")

latex_rows = []
for _, r in summary_df.iterrows():
    highlight = r"\rowcolor{BlueBest} " if r["tag"] == best_tag else ""
    latex_rows.append(
        f"        {highlight}SVM$_{{{r['tag'][-1]}}}$ & "
        f"{r['AUC_CV']:.4f} & {r['SD_CV']:.4f} & {r['Gap_CV']:.4f} & "
        f"{r['AUC_rep']:.4f} & {r['SD_rep']:.4f} & {r['Gap_rep']:.4f} & "
        f"{r['Acc_test']:.4f} & {r['F1w_test']:.4f} \\\\"
    )

latex_table = (
    r"\begin{table}[!ht]" + "\n"
    r"\centering" + "\n"
    r"\small" + "\n"
    r"\renewcommand{\arraystretch}{1.18}" + "\n"
    r"\setlength{\tabcolsep}{6pt}" + "\n"
    r"\begin{adjustbox}{width=\textwidth}" + "\n"
    r"\rowcolors{2}{white}{gray!8}" + "\n"
    r"\begin{tabular}{ccccccccc}" + "\n"
    r"\toprule" + "\n"
    r"\rowcolor{BlueHeader}" + "\n"
    r"\color{white}\textbf{Modelo} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Acc\textsubscript{test}} &" + "\n"
    r"\color{white}\textbf{F1\textsubscript{w,test}} \\" + "\n"
    r"\midrule" + "\n"
    + "\n".join(latex_rows) + "\n"
    r"\bottomrule" + "\n"
    r"\end{tabular}" + "\n"
    r"\end{adjustbox}" + "\n"
    r"\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG, datos reales).}" + "\n"
    r"\label{tab:svm_top5_auc_reales}" + "\n"
    r"\end{table}"
)

latex_path = os.path.join(DOWNLOADS, "tabla_svm_top5_reales.tex")
with open(latex_path, "w", encoding="utf-8") as f:
    f.write(latex_table)

print(f"\nTabla LaTeX guardada en: {latex_path}")
print("\n--- PREVIEW ---\n" + latex_table)

# =========================
# 9) TIEMPO TOTAL
# =========================
elapsed = int(time.time() - start_time)
h, rem = divmod(elapsed, 3600)
m, s = divmod(rem, 60)
print(f"\nTiempo total: {h:02d}:{m:02d}:{s:02d}")

Shapes: (1510, 127) (377, 127)
Labels: ['AGN', 'Blazar', 'QSO']
Distribución train: {0: 422, 1: 189, 2: 899}
Número de features originales : 127
Número de features post-VT    : 127
Grid kbest usado              : [127]


Tiempo búsqueda (s): 162
Best robust CV: 0.4398444155347854
Best params: {'kbest__k': 127, 'svc__C': 212.3514489840471, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.027293781650374747}

TOP-10 CV robust (únicos):
 mean_test_score  std_test_score                                                                                                                             params
        0.439844        0.011299  {'kbest__k': 127, 'svc__C': 212.3514489840471, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.027293781650374747}
        0.360285        0.089240  {'kbest__k': 127, 'svc__C': 1210.8812479497356, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.04298276297821863}
        0.324175        0.093931  {'kbest__k': 127, 'svc__C': 1558.2727568596022, 'svc__class_weight': {0: 1.5, 1: 3.5, 2: 0.8}, 'svc__gamma': 0.04929053648241675}
        0.298744        0.196596 {'kbest__k': 127, 'svc__C': 1070.9566477401963, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.003012464909249056}
        0.231564        0.105747  {'kbest__k': 127, 'svc__C': 742.1094824436065, 'svc__class_weight': {0: 2.0, 1: 4.0, 2: 1.0}, 'svc__gamma': 0.010979988817809663}
        0.231414        0.020787               {'kbest__k': 127, 'svc__C': 10.727228488423368, 'svc__class_weight': 'balanced', 'svc__gamma': 0.004376044634724402}
        0.136623        0.048075              {'kbest__k': 127, 'svc__C': 14.322952287566451, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0016224010950061374}
        0.083460        0.103483              {'kbest__k': 127, 'svc__C': 755.5775804589694, 'svc__class_weight': 'balanced', 'svc__gamma': 2.4794580392016226e-05}
        0.075111        0.112170              {'kbest__k': 127, 'svc__C': 1793.8906019043648, 'svc__class_weight': 'balanced', 'svc__gamma': 9.853225172032555e-06}
        0.053129        0.084173               {'kbest__k': 127, 'svc__C': 34.57103872771001, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0002655521712973633}

===============================================================================================
Evaluando SVM1...
  AUC_CV=0.6165±0.0073 | AUC_train=0.8214 | AUC_test=0.7161
  Gap_CV=0.2048 | Gap_rep=0.1053
  Acc_train=0.6914 | F1w_train=0.6994
  Acc_test=0.5915 | F1w_test=0.6068 | macroF1=0.5074 | balacc=0.6044

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.59      0.55      0.57       422
      Blazar       0.46      0.89      0.61       189
         QSO       0.85      0.72      0.78       899

    accuracy                           0.69      1510
   macro avg       0.64      0.72      0.65      1510
weighted avg       0.73      0.69      0.70      1510

  CM TEST
        AGN  Blazar  QSO
AGN      35      28   38
Blazar    3      24    3
QSO      52      30  164

  Report TEST
              precision    recall  f1-score   support

         AGN       0.39      0.35      0.37       101
      Blazar       0.29      0.80      0.43        30
         QSO       0.80      0.67      0.73       246

    accuracy                           0.59       377
   macro avg       0.49      0.60      0.51       377
weighted avg       0.65      0.59      0.61       377

  Figura guardada: /home/felorrieta/Catalina/SVM1.png

===============================================================================================
Evaluando SVM2...
  AUC_CV=0.6072±0.0030 | AUC_train=0.8687 | AUC_test=0.7528
  Gap_CV=0.2615 | Gap_rep=0.1160
  Acc_train=0.7808 | F1w_train=0.7844
  Acc_test=0.6737 | F1w_test=0.6829 | macroF1=0.6040 | balacc=0.6756

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.68      0.71      0.69       422
      Blazar       0.62      0.95      0.75       189
         QSO       0.90      0.78      0.83       899

    accuracy                           0.78      1510
   macro avg       0.73      0.81      0.76      1510
weighted avg       0.80      0.78      0.78      1510

  CM TEST
        AGN  Blazar  QSO
AGN      50      17   34
Blazar    3      24    3
QSO      48      18  180

  Report TEST
              precision    recall  f1-score   support

         AGN       0.50      0.50      0.50       101
      Blazar       0.41      0.80      0.54        30
         QSO       0.83      0.73      0.78       246

    accuracy                           0.67       377
   macro avg       0.58      0.68      0.60       377
weighted avg       0.71      0.67      0.68       377

  Figura guardada: /home/felorrieta/Catalina/SVM2.png

===============================================================================================
Evaluando SVM3...
  AUC_CV=0.6027±0.0039 | AUC_train=0.8835 | AUC_test=0.7707
  Gap_CV=0.2808 | Gap_rep=0.1128
  Acc_train=0.7861 | F1w_train=0.7906
  Acc_test=0.6790 | F1w_test=0.6921 | macroF1=0.6240 | balacc=0.6919

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.64      0.82      0.72       422
      Blazar       0.69      0.94      0.80       189
         QSO       0.93      0.74      0.82       899

    accuracy                           0.79      1510
   macro avg       0.75      0.83      0.78      1510
weighted avg       0.82      0.79      0.79      1510

  CM TEST
        AGN  Blazar  QSO
AGN      62      13   26
Blazar    4      23    3
QSO      58      17  171

  Report TEST
              precision    recall  f1-score   support

         AGN       0.50      0.61      0.55       101
      Blazar       0.43      0.77      0.55        30
         QSO       0.85      0.70      0.77       246

    accuracy                           0.68       377
   macro avg       0.60      0.69      0.62       377
weighted avg       0.73      0.68      0.69       377

  Figura guardada: /home/felorrieta/Catalina/SVM3.png

===============================================================================================
Evaluando SVM4...
  AUC_CV=0.6304±0.0279 | AUC_train=0.7389 | AUC_test=0.6841
  Gap_CV=0.1085 | Gap_rep=0.0548
  Acc_train=0.4709 | F1w_train=0.5036
  Acc_test=0.3767 | F1w_test=0.4267 | macroF1=0.3323 | balacc=0.4830

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.53      0.34      0.41       422
      Blazar       0.23      0.93      0.36       189
         QSO       0.84      0.44      0.58       899

    accuracy                           0.47      1510
   macro avg       0.53      0.57      0.45      1510
weighted avg       0.68      0.47      0.50      1510

  CM TEST
        AGN  Blazar  QSO
AGN      19      58   24
Blazar    2      26    2
QSO      33     116   97

  Report TEST
              precision    recall  f1-score   support

         AGN       0.35      0.19      0.25       101
      Blazar       0.13      0.87      0.23        30
         QSO       0.79      0.39      0.53       246

    accuracy                           0.38       377
   macro avg       0.42      0.48      0.33       377
weighted avg       0.62      0.38      0.43       377

  Figura guardada: /home/felorrieta/Catalina/SVM4.png

===============================================================================================
Evaluando SVM5...
  AUC_CV=0.6120±0.0110 | AUC_train=0.7853 | AUC_test=0.6965
  Gap_CV=0.1733 | Gap_rep=0.0888
  Acc_train=0.4689 | F1w_train=0.4337
  Acc_test=0.3979 | F1w_test=0.3794 | macroF1=0.4106 | balacc=0.5618

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.35      0.82      0.49       422
      Blazar       0.53      0.84      0.65       189
         QSO       0.95      0.22      0.36       899

    accuracy                           0.47      1510
   macro avg       0.61      0.63      0.50      1510
weighted avg       0.73      0.47      0.43      1510

  CM TEST
        AGN  Blazar  QSO
AGN      74      22    5
Blazar    7      22    1
QSO     170      22   54

  Report TEST
              precision    recall  f1-score   support

         AGN       0.29      0.73      0.42       101
      Blazar       0.33      0.73      0.46        30
         QSO       0.90      0.22      0.35       246

    accuracy                           0.40       377
   macro avg       0.51      0.56      0.41       377
weighted avg       0.69      0.40      0.38       377

  Figura guardada: /home/felorrieta/Catalina/SVM5.png

===============================================================================================
RESUMEN FINAL:
 tag  AUC_CV  SD_CV  Gap_CV  AUC_rep  SD_rep  Gap_rep  Acc_train  F1w_train  Acc_test  F1w_test  macroF1  balacc
SVM1  0.6165 0.0073  0.2048   0.7161  0.0073   0.1053     0.6914     0.6994    0.5915    0.6068   0.5074  0.6044
SVM2  0.6072 0.0030  0.2615   0.7528  0.0030   0.1160     0.7808     0.7844    0.6737    0.6829   0.6040  0.6756
SVM3  0.6027 0.0039  0.2808   0.7707  0.0039   0.1128     0.7861     0.7906    0.6790    0.6921   0.6240  0.6919
SVM4  0.6304 0.0279  0.1085   0.6841  0.0279   0.0548     0.4709     0.5036    0.3767    0.4267   0.3323  0.4830
SVM5  0.6120 0.0110  0.1733   0.6965  0.0110   0.0888     0.4689     0.4337    0.3979    0.3794   0.4106  0.5618

>>> Mejor modelo: SVM3  AUC_test=0.7707

Tabla LaTeX guardada en: /home/felorrieta/Catalina/tabla_svm_top5_reales.tex

--- PREVIEW ---
\begin{table}[!ht]
\centering
\small
\renewcommand{\arraystretch}{1.18}
\setlength{\tabcolsep}{6pt}
\begin{adjustbox}{width=\textwidth}
\rowcolors{2}{white}{gray!8}
\begin{tabular}{ccccccccc}
\toprule
\rowcolor{BlueHeader}
\color{white}\textbf{Modelo} &
\color{white}\textbf{AUC\textsubscript{CV}} &
\color{white}\textbf{SD\textsubscript{CV}} &
\color{white}\textbf{Gap\textsubscript{CV}} &
\color{white}\textbf{AUC\textsubscript{rep}} &
\color{white}\textbf{SD\textsubscript{rep}} &
\color{white}\textbf{Gap\textsubscript{rep}} &
\color{white}\textbf{Acc\textsubscript{test}} &
\color{white}\textbf{F1\textsubscript{w,test}} \\
\midrule
        SVM$_{1}$ & 0.6165 & 0.0073 & 0.2048 & 0.7161 & 0.0073 & 0.1053 & 0.5915 & 0.6068 \\
        SVM$_{2}$ & 0.6072 & 0.0030 & 0.2615 & 0.7528 & 0.0030 & 0.1160 & 0.6737 & 0.6829 \\
        \rowcolor{BlueBest} SVM$_{3}$ & 0.6027 & 0.0039 & 0.2808 & 0.7707 & 0.0039 & 0.1128 & 0.6790 & 0.6921 \\
        SVM$_{4}$ & 0.6304 & 0.0279 & 0.1085 & 0.6841 & 0.0279 & 0.0548 & 0.3767 & 0.4267 \\
        SVM$_{5}$ & 0.6120 & 0.0110 & 0.1733 & 0.6965 & 0.0110 & 0.0888 & 0.3979 & 0.3794 \\
\bottomrule
\end{tabular}
\end{adjustbox}
\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG, datos reales).}
\label{tab:svm_top5_auc_reales}
\end{table}

Tiempo total: 00:03:35

GRÁFICO DE IMPORTANCIAS - DATOS REALES

# =========================
# IMPORTANCIA POR NIVEL PARA SVM (RBF) CON PERMUTATION IMPORTANCE
# ESIG FIRMA — DATOS REALES — MODELO FINAL SVM2
# =========================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.inspection import permutation_importance

plt.style.use("default")

# -------------------------------------------------
# 1) Tomar el modelo FINAL que elegiste para la tesis
#    En tus resultados de ESIG firma reales:
#    SVM1 = top10.iloc[0]
#    SVM2 = top10.iloc[1]   <-- este fue el seleccionado
# -------------------------------------------------
svm2_params = top10.iloc[1]["params"].copy()
svm_best = make_pipe(svm2_params)
svm_best.fit(X_train, y_train_enc)

print("Parámetros usados (SVM2):")
print(svm2_params)

# -------------------------------------------------
# 2) Calcular permutation importance sobre el pipeline completo
# -------------------------------------------------
perm = permutation_importance(
    estimator=svm_best,
    X=X_test,
    y=y_test_enc,
    n_repeats=20,
    random_state=42,
    scoring="balanced_accuracy",
    n_jobs=-1
)

importances_mean = perm.importances_mean

# -------------------------------------------------
# 3) DataFrame base con posición ORIGINAL de cada variable
# -------------------------------------------------
orig_cols = pd.Index(X_test.columns)

df_all = pd.DataFrame({
    "feature": orig_cols.astype(str),
    "importance_mean": importances_mean,
    "orig_pos": np.arange(len(orig_cols))   # posición 0-based
})

# -------------------------------------------------
# 4) Identificar qué variables llegan realmente al SVM final
#    usando máscaras, no nombres
# -------------------------------------------------
selected_mask = np.ones(len(orig_cols), dtype=bool)

if hasattr(svm_best, "named_steps"):

    # VarianceThreshold
    if "vt" in svm_best.named_steps:
        vt_mask = svm_best.named_steps["vt"].get_support()
        selected_mask = vt_mask.copy()

    # SelectKBest (aplicado sobre las que sobrevivieron a VT)
    if "kbest" in svm_best.named_steps:
        kbest_mask = svm_best.named_steps["kbest"].get_support()

        idx_after_vt = np.where(selected_mask)[0]
        new_mask = np.zeros(len(orig_cols), dtype=bool)
        new_mask[idx_after_vt[kbest_mask]] = True
        selected_mask = new_mask

print(f"Variables originales en X_test: {len(orig_cols)}")
print(f"Variables que realmente usa el SVM final: {selected_mask.sum()}")

df_imp_svm = df_all.loc[selected_mask].copy()

# -------------------------------------------------
# 5) Detectar si existe término constante N0
# -------------------------------------------------
first_col_values = np.asarray(X_test.iloc[:, 0], dtype=float)
has_n0 = np.allclose(first_col_values, 1.0)

print(f"¿Se detectó término constante N0?: {has_n0}")

# -------------------------------------------------
# 6) Asignar niveles usando POSICIÓN ORIGINAL
#    Firma estándar 2D, M=9
# -------------------------------------------------
if has_n0:
    # acumulados incluyendo N0:
    # 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023
    bins = [0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023]
    labels = [f"N{i}" for i in range(10)]   # N0..N9
else:
    # acumulados sin N0:
    # 2, 6, 14, 30, 62, 126, 254, 510, 1022
    bins = [0, 2, 6, 14, 30, 62, 126, 254, 510, 1022]
    labels = [f"N{i}" for i in range(1, 10)]  # N1..N9

df_imp_svm["nivel"] = pd.cut(
    df_imp_svm["orig_pos"],
    bins=bins,
    labels=labels,
    right=False,
    include_lowest=True
)

if df_imp_svm["nivel"].isna().any():
    bad = df_imp_svm.loc[df_imp_svm["nivel"].isna(), ["feature", "orig_pos"]].head(10)
    raise ValueError(
        "Hay variables fuera del rango esperado para firma estándar. "
        f"Ejemplos:\n{bad}"
    )

# Excluir N0 si existe
df_imp_svm = df_imp_svm[df_imp_svm["nivel"] != "N0"].copy()

df_imp_svm["nivel_num"] = (
    df_imp_svm["nivel"].astype(str).str.replace("N", "", regex=False).astype(int)
)

df_imp_svm = df_imp_svm.sort_values(["nivel_num", "orig_pos"])

print("\nCantidad de variables seleccionadas por nivel:")
print(df_imp_svm["nivel_num"].value_counts().sort_index())

# -------------------------------------------------
# 7) Resumen por nivel
# -------------------------------------------------
res_svm = df_imp_svm.groupby("nivel_num").agg(
    mean=("importance_mean", "mean"),
    median=("importance_mean", "median"),
    count=("importance_mean", "size")
).reset_index()

niveles_completos = pd.DataFrame({"nivel_num": np.arange(1, 10)})
res_svm = niveles_completos.merge(res_svm, on="nivel_num", how="left").fillna(0)

print("\nRESUMEN POR NIVEL — SVM ESIG FIRMA REALES")
print(res_svm.to_string(index=False))

# -------------------------------------------------
# 8) Gráfico
# -------------------------------------------------
x = np.arange(len(res_svm))
labels_plot = [f"Nivel {n}" for n in res_svm["nivel_num"]]

fig, ax = plt.subplots(figsize=(9.5, 5.2), facecolor="white")
ax.set_facecolor("white")

ax.bar(
    x,
    res_svm["mean"],
    color="#8FA8C7",
    edgecolor="#355C7D",
    linewidth=1.2,
    alpha=0.9,
    label="Importancia promedio"
)

ax.scatter(
    x,
    res_svm["median"],
    s=80,
    marker="o",
    color="#355C7D",
    edgecolors="#1F3A56",
    linewidths=1,
    zorder=3,
    label="Mediana"
)

ax.set_xticks(x)
ax.set_xticklabels(labels_plot)
ax.set_xlabel("Nivel")
ax.set_ylabel("Importancia")
ax.set_title("Importancia promedio y mediana por nivel (SVM, ESIG firma, datos reales)")
ax.grid(axis="y", linestyle="--", alpha=0.35, color="gray")

handles, labels_legend = ax.get_legend_handles_labels()
order = [1, 0]
ax.legend([handles[i] for i in order], [labels_legend[i] for i in order])

fig.tight_layout()

# -------------------------------------------------
# 9) Guardar
# -------------------------------------------------
ruta_guardado = Path("/home/felorrieta/Catalina") / "importancia_promedio_mediana_por_nivel_SVM_ESIG_firma_reales.png"
fig.savefig(ruta_guardado, dpi=300, bbox_inches="tight", facecolor="white")
print(f"\nGráfico guardado en: {ruta_guardado}")

plt.show()

Parámetros usados (SVM2):
{'kbest__k': 127, 'svc__C': 1210.8812479497356, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.04298276297821863}
Variables originales en X_test: 127
Variables que realmente usa el SVM final: 127
¿Se detectó término constante N0?: False

Cantidad de variables seleccionadas por nivel:
nivel_num
1     2
2     4
3     8
4    16
5    32
6    64
7     1
Name: count, dtype: int64

RESUMEN POR NIVEL — SVM ESIG FIRMA REALES
 nivel_num     mean   median  count
         1 0.025279 0.025279    2.0
         2 0.235251 0.221730    4.0
         3 0.107954 0.104148    8.0
         4 0.084784 0.025123   16.0
         5 0.026801 0.012563   32.0
         6 0.020072 0.000398   64.0
         7 0.000000 0.000000    1.0
         8 0.000000 0.000000    0.0
         9 0.000000 0.000000    0.0

Gráfico guardado en: /home/felorrieta/Catalina/importancia_promedio_mediana_por_nivel_SVM_ESIG_firma_reales.png

PROFUNDIDAD

import pandas as pd
from sklearn.preprocessing import LabelEncoder

# =========================
# ESIG FIRMA — DATOS REALES
# =========================
x_sig = pd.read_csv('/home/felorrieta/Catalina/path_signature_esig_REALES_M9.csv')
y_meta = pd.read_csv('/home/felorrieta/Catalina/ts_v9.0.1_SMBH_ZTF_xmatch.csv')

y_meta["id"] = y_meta["oid"]
data_sig = pd.merge(x_sig, y_meta, on="id")

data_train_sig = data_sig.sample(frac=0.8, random_state=42).reset_index(drop=True)
data_test_sig  = data_sig.drop(data_train_sig.index).reset_index(drop=True)

X_train_sig = data_train_sig.drop(columns=['oid','survey_class_mapped','survey_class','survey_class_cat','id'])
y_train_sig = data_train_sig['survey_class_mapped']

X_test_sig = data_test_sig[X_train_sig.columns].copy()
y_test_sig = data_test_sig['survey_class_mapped']

le_sig = LabelEncoder()
y_train_sig_enc = le_sig.fit_transform(y_train_sig)
y_test_sig_enc  = le_sig.transform(y_test_sig)

print("Shape X_train_sig:", X_train_sig.shape)
print("Shape X_test_sig :", X_test_sig.shape)
print("Labels:", list(le_sig.classes_))

Shape X_train_sig: (1438, 1023)
Shape X_test_sig : (359, 1023)
Labels: ['AGN', 'Blazar', 'QSO']

# ==========================================================
# TABLA POR NIVELES (ESIG FIRMA, datos reales) — criterio AUCRep
# MODELO FINAL: SVM2
# ==========================================================

import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from scipy.special import softmax
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import VarianceThreshold

# ----------------------------------------------------------
# 0) USAR EXPLÍCITAMENTE LOS DATOS DE ESIG FIRMA
# ----------------------------------------------------------
Xtr_full = X_train_sig.copy()
Xte_full = X_test_sig.copy()
ytr_full = y_train_sig_enc.copy()
yte_full = y_test_sig_enc.copy()

print("Shape Xtr_full:", Xtr_full.shape)
print("Shape Xte_full :", Xte_full.shape)

# ----------------------------------------------------------
# 1) PARÁMETROS REALES DEL MODELO FINAL SVM2
# ----------------------------------------------------------
FINAL_PARAMS = {
    "kbest__k": 1022,
    "svc__C": 10.772186132342652,
    "svc__class_weight": "balanced",
    "svc__gamma": 0.058258491495380586
}
MODEL_TAG = "SVM2"

print(f"Parámetros de {MODEL_TAG}:")
print(FINAL_PARAMS)

kbest_original = FINAL_PARAMS["kbest__k"]

# ----------------------------------------------------------
# 2) FUNCIÓN AUC
# ----------------------------------------------------------
def auc_ovr_macro_from_scores(y_true, decision_scores):
    if decision_scores.ndim == 1:
        decision_scores = decision_scores.reshape(-1, 1)
    proba = softmax(decision_scores, axis=1)
    return roc_auc_score(y_true, proba, multi_class="ovr", average="macro")

# ----------------------------------------------------------
# 3) ORDEN DE COLUMNAS POR POSICIÓN
# ----------------------------------------------------------
cols_ordered = list(Xtr_full.columns)
n_total = len(cols_ordered)

print(f"Número total de columnas en Xtr_full: {n_total}")

first_col_values = np.asarray(Xtr_full.iloc[:, 0], dtype=float)
has_n0 = np.allclose(first_col_values, 1.0)

print(f"¿Se detectó término constante N0?: {has_n0}")

start_idx = 1 if has_n0 else 0

# Firma estándar 2D, M=9
cum_levels = {
    1: 2,
    2: 6,
    3: 14,
    4: 30,
    5: 62,
    6: 126,
    7: 254,
    8: 510,
    9: 1022
}

max_needed = start_idx + cum_levels[9]
if n_total < max_needed:
    raise ValueError(
        f"Xtr_full tiene {n_total} columnas, pero para firma estándar 2D M=9 "
        f"se esperaban al menos {max_needed} considerando has_n0={has_n0}."
    )

# ----------------------------------------------------------
# 4) CONTAR FEATURES POST-VT EN CADA NIVEL
# ----------------------------------------------------------
def count_features_after_vt(X_df):
    imp = SimpleImputer(strategy="median")
    X_imp = imp.fit_transform(X_df)

    vt = VarianceThreshold(0.0)
    X_vt = vt.fit_transform(X_imp)

    return X_vt.shape[1]

# ----------------------------------------------------------
# 5) LOOP POR NIVELES 1..9
# ----------------------------------------------------------
rows = []

for m in tqdm(range(1, 10), desc=f"Evaluando niveles ({MODEL_TAG})", unit="nivel"):
    n_feats = cum_levels[m]

    selected = cols_ordered[start_idx : start_idx + n_feats]

    Xtr_m = Xtr_full[selected].copy()
    Xte_m = Xte_full[selected].copy()

    params_m = FINAL_PARAMS.copy()

    # número real de variables disponibles luego de VT
    n_features_post_vt = count_features_after_vt(Xtr_m)

    # kbest debe quedar acotado por las variables disponibles en ese nivel
    params_m["kbest__k"] = int(min(kbest_original, n_features_post_vt))

    model_m = make_pipe(params_m)
    model_m.fit(Xtr_m, ytr_full)

    pred_tr = model_m.predict(Xtr_m)
    pred_te = model_m.predict(Xte_m)

    scores_tr = model_m.decision_function(Xtr_m)
    scores_te = model_m.decision_function(Xte_m)

    acc_tr = accuracy_score(ytr_full, pred_tr)
    f1w_tr = f1_score(ytr_full, pred_tr, average="weighted", zero_division=0)
    auc_tr = auc_ovr_macro_from_scores(ytr_full, scores_tr)

    acc_te = accuracy_score(yte_full, pred_te)
    f1w_te = f1_score(yte_full, pred_te, average="weighted", zero_division=0)
    auc_te = auc_ovr_macro_from_scores(yte_full, scores_te)

    rows.append({
        "NivelFirma": m,
        "N_features": Xtr_m.shape[1],
        "N_features_postVT": n_features_post_vt,
        "kbest_usado": params_m["kbest__k"],
        "AccTrain": acc_tr,
        "F1wTrain": f1w_tr,
        "AUCTrain": auc_tr,
        "AccTest": acc_te,
        "F1wTest": f1w_te,
        "AUCRep": auc_te
    })

df_levels = pd.DataFrame(rows)

# ----------------------------------------------------------
# 6) NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep
# ----------------------------------------------------------
best_auc = df_levels["AUCRep"].max()
tol = 1e-4
best_candidates = df_levels[df_levels["AUCRep"] >= best_auc - tol]
best_simple = best_candidates.sort_values(["NivelFirma"]).iloc[0]

print("\n" + "=" * 100)
print(f"RESULTADOS POR NIVEL ({MODEL_TAG}) — criterio AUCRep")
print("=" * 100)
print(df_levels.to_string(index=False, float_format=lambda x: f"{x:.4f}"))

print("\n" + "=" * 100)
print("NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep (con tolerancia)")
print("=" * 100)
print(best_simple.to_string())

# ----------------------------------------------------------
# 7) FILAS LaTeX
# ----------------------------------------------------------
print("\n" + "=" * 100)
print("FILAS LaTeX")
print("=" * 100)
for _, r in df_levels.iterrows():
    print(
        f"{int(r['NivelFirma'])} & {int(r['N_features'])} & "
        f"{r['AccTrain']:.4f} & {r['F1wTrain']:.4f} & {r['AUCTrain']:.4f} & "
        f"{r['AccTest']:.4f} & {r['F1wTest']:.4f} & {r['AUCRep']:.4f} \\\\"
    )

Shape Xtr_full: (1438, 1023)
Shape Xte_full : (359, 1023)
Parámetros de SVM2:
{'kbest__k': 1022, 'svc__C': 10.772186132342652, 'svc__class_weight': 'balanced', 'svc__gamma': 0.058258491495380586}
Número total de columnas en Xtr_full: 1023
¿Se detectó término constante N0?: True


====================================================================================================
RESULTADOS POR NIVEL (SVM2) — criterio AUCRep
====================================================================================================
 NivelFirma  N_features  N_features_postVT  kbest_usado  AccTrain  F1wTrain  AUCTrain  AccTest  F1wTest  AUCRep
          1           2                  2            2    0.5932    0.4829    0.5294   0.6462   0.5332  0.5100
          2           6                  6            6    0.5723    0.5196    0.6138   0.6212   0.5668  0.5811
          3          14                 14           14    0.6551    0.6404    0.7603   0.6713   0.6588  0.7396
          4          30                 30           30    0.6961    0.6876    0.7959   0.6741   0.6655  0.7571
          5          62                 62           62    0.7239    0.7199    0.8286   0.6825   0.6800  0.7815
          6         126                126          126    0.7740    0.7719    0.8648   0.7437   0.7405  0.8092
          7         254                254          254    0.8088    0.8079    0.8980   0.7549   0.7523  0.8320
          8         510                510          510    0.8394    0.8394    0.9242   0.7744   0.7735  0.8441
          9        1022               1022         1022    0.8651    0.8651    0.9403   0.8078   0.8064  0.8564

====================================================================================================
NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep (con tolerancia)
====================================================================================================
NivelFirma              9.000000
N_features           1022.000000
N_features_postVT    1022.000000
kbest_usado          1022.000000
AccTrain                0.865090
F1wTrain                0.865122
AUCTrain                0.940291
AccTest                 0.807799
F1wTest                 0.806352
AUCRep                  0.856357

====================================================================================================
FILAS LaTeX
====================================================================================================
1 & 2 & 0.5932 & 0.4829 & 0.5294 & 0.6462 & 0.5332 & 0.5100 \\
2 & 6 & 0.5723 & 0.5196 & 0.6138 & 0.6212 & 0.5668 & 0.5811 \\
3 & 14 & 0.6551 & 0.6404 & 0.7603 & 0.6713 & 0.6588 & 0.7396 \\
4 & 30 & 0.6961 & 0.6876 & 0.7959 & 0.6741 & 0.6655 & 0.7571 \\
5 & 62 & 0.7239 & 0.7199 & 0.8286 & 0.6825 & 0.6800 & 0.7815 \\
6 & 126 & 0.7740 & 0.7719 & 0.8648 & 0.7437 & 0.7405 & 0.8092 \\
7 & 254 & 0.8088 & 0.8079 & 0.8980 & 0.7549 & 0.7523 & 0.8320 \\
8 & 510 & 0.8394 & 0.8394 & 0.9242 & 0.7744 & 0.7735 & 0.8441 \\
9 & 1022 & 0.8651 & 0.8651 & 0.9403 & 0.8078 & 0.8064 & 0.8564 \\