DATOS SIMULADOS

# import sys
# print(sys.executable)
# !{sys.executable} -m pip install xgboost

/usr/bin/python3

Defaulting to user installation because normal site-packages is not writeable

Collecting xgboost

  Downloading xgboost-2.1.4-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)

Requirement already satisfied: numpy in /home/felorrieta/.local/lib/python3.8/site-packages (from xgboost) (1.24.4)

Collecting nvidia-nccl-cu12 (from xgboost)

  Downloading nvidia_nccl_cu12-2.29.7-py3-none-manylinux_2_18_x86_64.whl.metadata (2.1 kB)

Requirement already satisfied: scipy in /home/felorrieta/.local/lib/python3.8/site-packages (from xgboost) (1.10.1)

Downloading xgboost-2.1.4-py3-none-manylinux_2_28_x86_64.whl (223.6 MB)

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 223.6/223.6 MB 34.8 MB/s eta 0:00:0000:0100:01

Downloading nvidia_nccl_cu12-2.29.7-py3-none-manylinux_2_18_x86_64.whl (293.6 MB)

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 293.6/293.6 MB 33.7 MB/s eta 0:00:0000:0100:01

WARNING: Error parsing dependencies of distro-info: Invalid version: '0.23ubuntu1'

WARNING: Error parsing dependencies of python-debian: Invalid version: '0.1.36ubuntu1'

Installing collected packages: nvidia-nccl-cu12, xgboost

Successfully installed nvidia-nccl-cu12-2.29.7 xgboost-2.1.4

# ============================================================
# BENCHMARK DE TIEMPOS — XGBOOST SIMULADOS
# ESIG/IISIG + FIRMA/LOG-FIRMA
# Corre todo desde cero, pero solo resume tiempos
# ============================================================

import time
import inspect
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import (
    StratifiedKFold,
    RepeatedStratifiedKFold,
    ParameterSampler
)
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import (
    roc_auc_score,
    accuracy_score,
    f1_score,
    balanced_accuracy_score
)
from sklearn.utils.class_weight import compute_class_weight
from scipy.stats import randint, uniform, loguniform

import xgboost as xgb
from xgboost import XGBClassifier


# ============================================================
# 0) RUTAS
# ============================================================
PATHS = {
    "ESIG firma": "/home/felorrieta/Catalina/path_signature_esig_M9.csv",
    "ESIG log-firma": "/home/felorrieta/Catalina/logsignature_esig_M9.csv",
    "IISIG firma": "/home/felorrieta/Catalina/path_signature_iisig_M9.csv",
    "IISIG log-firma": "/home/felorrieta/Catalina/logsignature_iisig_M9.csv",
}

PATH_Y = "/home/felorrieta/Catalina/ts_v9.0.1_SMBH_ZTF_xmatch.csv"

# ============================================================
# 1) CONFIGURACIÓN
# ============================================================
N_ITER = 30
TOP_K = 5
RANDOM_STATE_SPLIT = 42

param_dist = {
    "learning_rate": loguniform(0.007, 0.06),
    "max_depth": randint(2, 6),
    "min_child_weight": loguniform(10.0, 150.0),
    "subsample": uniform(0.65, 0.35),
    "colsample_bytree": uniform(0.65, 0.35),
    "colsample_bynode": uniform(0.65, 0.35),
    "gamma": loguniform(1e-4, 8.0),
    "reg_alpha": loguniform(1e-10, 1.0),
    "reg_lambda": loguniform(1.0, 200.0),
    "grow_policy": ["depthwise", "lossguide"],
    "max_leaves": randint(16, 129),
}


# ============================================================
# 2) HELPERS
# ============================================================
def hms(seconds: float) -> str:
    seconds = int(round(seconds))
    h, r = divmod(seconds, 3600)
    m, s = divmod(r, 60)
    return f"{h:02d}:{m:02d}:{s:02d}"

def to_plain_params(d):
    out = {}
    for k, v in d.items():
        out[k] = float(v) if isinstance(v, (np.floating,)) else v
    return out

def make_sample_weight(y_enc, cw_dict):
    return np.array([cw_dict[int(c)] for c in y_enc], dtype=float)

def load_and_split(path_x, path_y):
    x = pd.read_csv(path_x)
    y = pd.read_csv(path_y)
    y["id"] = y["oid"]

    data = pd.merge(x, y, on="id")

    train_idx = data.sample(frac=0.8, random_state=RANDOM_STATE_SPLIT).index
    data_train = data.loc[train_idx].reset_index(drop=True)
    data_test  = data.drop(train_idx).reset_index(drop=True)

    X_train = data_train.drop(columns=['oid', 'survey_class_mapped', 'survey_class', 'survey_class_cat', 'id'])
    y_train = data_train['survey_class_mapped']

    X_test  = data_test[X_train.columns].copy()
    y_test  = data_test['survey_class_mapped']

    return X_train, X_test, y_train, y_test

def encode_and_weights(y_train, y_test):
    le = LabelEncoder()
    y_train_enc = le.fit_transform(y_train)
    y_test_enc  = le.transform(y_test)

    labels = le.classes_
    n_classes = len(labels)

    classes = np.unique(y_train_enc)
    cw = compute_class_weight(class_weight="balanced", classes=classes, y=y_train_enc)
    class_weight_dict = {int(c): float(w) for c, w in zip(classes, cw)}

    HARD_CLASSES = ["AGN", "QSO"]
    HARD_FACTOR  = 1.8

    for name in HARD_CLASSES:
        if name in labels:
            hid = int(np.where(labels == name)[0][0])
            class_weight_dict[hid] *= HARD_FACTOR

    return y_train_enc, y_test_enc, labels, n_classes, class_weight_dict

def preprocess_fit_transform(X_tr_raw, X_va_raw):
    imp = SimpleImputer(strategy="median")
    X_tr_i = imp.fit_transform(X_tr_raw)
    X_va_i = imp.transform(X_va_raw)

    vt = VarianceThreshold(0.0)
    X_tr_v = vt.fit_transform(X_tr_i)
    X_va_v = vt.transform(X_va_i)
    return X_tr_v, X_va_v

def preprocess_full(X_train_raw, X_test_raw):
    imp = SimpleImputer(strategy="median")
    X_train_i = imp.fit_transform(X_train_raw)
    X_test_i  = imp.transform(X_test_raw)

    vt = VarianceThreshold(0.0)
    X_train_v = vt.fit_transform(X_train_i)
    X_test_v  = vt.transform(X_test_i)
    return X_train_v, X_test_v

def fit_xgb_one_fold(params, X_tr, y_tr, X_va, y_va, n_classes, w_tr=None, w_va=None):
    base = dict(
        random_state=42,
        n_jobs=-1,
        tree_method="hist",
        n_estimators=20000,
        verbosity=0,
        objective="multi:softprob",
        num_class=n_classes,
        eval_metric="mlogloss",
    )

    es = xgb.callback.EarlyStopping(rounds=250, save_best=True)
    model = XGBClassifier(**base, **params, callbacks=[es])

    fit_kwargs = dict(
        X=X_tr,
        y=y_tr,
        sample_weight=w_tr,
        eval_set=[(X_va, y_va)],
        verbose=False
    )

    sig = inspect.signature(model.fit)
    if "sample_weight_eval_set" in sig.parameters and (w_va is not None):
        fit_kwargs["sample_weight_eval_set"] = [w_va]

    model.fit(**fit_kwargs)
    return model

def f1_blazar_factory(labels):
    BLZ_ID = int(np.where(labels == "Blazar")[0][0]) if "Blazar" in labels else None

    def f1_blazar(y_true, y_pred):
        if BLZ_ID is None:
            return np.nan
        return f1_score((y_true == BLZ_ID).astype(int), (y_pred == BLZ_ID).astype(int))
    return f1_blazar

def oof_eval_xgb(params, X, y, cv, n_classes, class_weight_dict, f1_blazar):
    K = len(np.unique(y))
    proba_sum = np.zeros((len(y), K), dtype=float)
    proba_cnt = np.zeros(len(y), dtype=float)

    fold_f1m = []
    fold_bacc = []
    fold_f1blz = []
    best_iters = []

    for tr_idx, va_idx in cv.split(X, y):
        X_tr_raw, X_va_raw = X[tr_idx], X[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]

        X_tr, X_va = preprocess_fit_transform(X_tr_raw, X_va_raw)

        w_tr = make_sample_weight(y_tr, class_weight_dict)
        w_va = make_sample_weight(y_va, class_weight_dict)

        model = fit_xgb_one_fold(params, X_tr, y_tr, X_va, y_va, n_classes, w_tr=w_tr, w_va=w_va)

        p_va = model.predict_proba(X_va)

        proba_sum[va_idx] += p_va
        proba_cnt[va_idx] += 1.0

        yhat_va = np.argmax(p_va, axis=1)
        fold_f1m.append(f1_score(y_va, yhat_va, average="macro"))
        fold_bacc.append(balanced_accuracy_score(y_va, yhat_va))
        fold_f1blz.append(f1_blazar(y_va, yhat_va))

        best_iters.append(getattr(model, "best_iteration", None))

    proba_oof = proba_sum / (proba_cnt[:, None] + 1e-12)
    yhat_oof = np.argmax(proba_oof, axis=1)

    oof_macroF1 = f1_score(y, yhat_oof, average="macro")
    oof_bacc    = balanced_accuracy_score(y, yhat_oof)
    oof_acc     = accuracy_score(y, yhat_oof)
    oof_f1blz   = f1_blazar(y, yhat_oof)

    its = [b for b in best_iters if b is not None]
    best_n_cv = int(np.median(its) + 1) if len(its) else 800

    return {
        "oof_macroF1": float(oof_macroF1),
        "oof_bacc": float(oof_bacc),
        "oof_acc": float(oof_acc),
        "oof_f1_blazar": float(oof_f1blz),
        "fold_f1m_mean": float(np.mean(fold_f1m)),
        "fold_f1m_std": float(np.std(fold_f1m)),
        "fold_bacc_mean": float(np.mean(fold_bacc)),
        "fold_f1blz_mean": float(np.mean(fold_f1blz)),
        "best_n_cv": int(best_n_cv),
    }

def cv_auc_gap(params, X, y, cv, n_classes, class_weight_dict):
    auc_tr, auc_va = [], []

    for tr_idx, va_idx in cv.split(X, y):
        X_tr_raw, X_va_raw = X[tr_idx], X[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]

        X_tr, X_va = preprocess_fit_transform(X_tr_raw, X_va_raw)

        w_tr = make_sample_weight(y_tr, class_weight_dict)
        w_va = make_sample_weight(y_va, class_weight_dict)

        model = fit_xgb_one_fold(params, X_tr, y_tr, X_va, y_va, n_classes, w_tr=w_tr, w_va=w_va)

        p_tr = model.predict_proba(X_tr)
        p_va = model.predict_proba(X_va)

        auc_tr.append(roc_auc_score(y_tr, p_tr, multi_class="ovr", average="weighted"))
        auc_va.append(roc_auc_score(y_va, p_va, multi_class="ovr", average="weighted"))

    mean_va = float(np.mean(auc_va))
    std_va  = float(np.std(auc_va))
    gap     = float(np.mean(auc_tr) - mean_va)
    return mean_va, std_va, gap


# ============================================================
# 3) PIPELINE COMPLETO POR REPRESENTACIÓN
# ============================================================
def run_one_xgb_timing(tag, path_x, path_y, n_iter=N_ITER, top_k=TOP_K):
    print("\n" + "=" * 110)
    print(tag)
    print("=" * 110)

    total_t0 = time.time()

    # ---------- carga ----------
    X_train, X_test, y_train, y_test = load_and_split(path_x, path_y)
    y_train_enc, y_test_enc, labels, n_classes, class_weight_dict = encode_and_weights(y_train, y_test)
    f1_blazar = f1_blazar_factory(labels)

    X_train_np = np.asarray(X_train)
    X_test_np  = np.asarray(X_test)

    print("Shapes:", X_train.shape, X_test.shape)
    print("Labels:", list(labels))

    # ---------- búsqueda ----------
    cv_rep = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=42)
    sampler = list(ParameterSampler(param_dist, n_iter=n_iter, random_state=42))

    search_t0 = time.time()
    rows = []

    for params in tqdm(sampler, total=n_iter, desc=f"OOF trials — {tag}"):
        stats = oof_eval_xgb(params, X_train_np, y_train_enc, cv_rep, n_classes, class_weight_dict, f1_blazar)
        rows.append({"params": params, **stats})

    results = pd.DataFrame(rows)
    results = results.sort_values(
        ["oof_macroF1", "fold_f1m_std", "oof_bacc", "oof_f1_blazar"],
        ascending=[False, True, False, False]
    ).reset_index(drop=True)

    search_t1 = time.time()

    topk = results.head(top_k).reset_index(drop=True)

    # ---------- evaluación top-5 ----------
    eval_t0 = time.time()

    X_train_v, X_test_v = preprocess_full(X_train_np, X_test_np)
    w_train_full = make_sample_weight(y_train_enc, class_weight_dict)

    top5_summary = []

    for i, row in topk.iterrows():
        params = row["params"]
        best_n = int(row["best_n_cv"])

        model = XGBClassifier(
            random_state=100 + i,
            n_jobs=-1,
            tree_method="hist",
            n_estimators=best_n,
            verbosity=0,
            objective="multi:softprob",
            num_class=n_classes,
            eval_metric="mlogloss",
            **params
        )

        model.fit(X_train_v, y_train_enc, sample_weight=w_train_full, verbose=False)

        p_tr = model.predict_proba(X_train_v)
        p_te = model.predict_proba(X_test_v)

        yhat_tr = np.argmax(p_tr, axis=1)
        yhat_te = np.argmax(p_te, axis=1)

        top5_summary.append({
            "Modelo": f"XGB_{i+1}",
            "Acc_train": accuracy_score(y_train_enc, yhat_tr),
            "F1_w_train": f1_score(y_train_enc, yhat_tr, average="weighted"),
            "Acc_test": accuracy_score(y_test_enc, yhat_te),
            "F1_w_test": f1_score(y_test_enc, yhat_te, average="weighted"),
            "macroF1_test": f1_score(y_test_enc, yhat_te, average="macro"),
            "bacc_test": balanced_accuracy_score(y_test_enc, yhat_te),
            "best_n": best_n,
        })

    eval_t1 = time.time()

    # ---------- métricas tipo tabla final ----------
    latex_t0 = time.time()

    cv_5 = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    cv_5x2 = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=42)

    latex_rows = []

    for i, row in topk.iterrows():
        params = row["params"]

        auc_cv,  sd_cv,  gap_cv  = cv_auc_gap(params, X_train_np, y_train_enc, cv_5, n_classes, class_weight_dict)
        auc_rep, sd_rep, gap_rep = cv_auc_gap(params, X_train_np, y_train_enc, cv_5x2, n_classes, class_weight_dict)

        latex_rows.append({
            "Modelo": f"XGB_{i+1}",
            "AUC_CV": auc_cv,
            "SD_CV": sd_cv,
            "Gap_CV": gap_cv,
            "AUC_rep": auc_rep,
            "SD_rep": sd_rep,
            "Gap_rep": gap_rep,
            "Acc_test": top5_summary[i]["Acc_test"],
            "F1_w_test": top5_summary[i]["F1_w_test"],
        })

    latex_t1 = time.time()

    total_t1 = time.time()

    best_model = pd.DataFrame(latex_rows).sort_values(
        ["Acc_test", "F1_w_test"], ascending=False
    ).reset_index(drop=True).iloc[0]

    out = {
        "Representación": tag,
        "Mejor modelo": best_model["Modelo"],
        "Tiempo búsqueda": hms(search_t1 - search_t0),
        "Tiempo top5": hms(eval_t1 - eval_t0),
        "Tiempo tabla": hms(latex_t1 - latex_t0),
        "Tiempo total": hms(total_t1 - total_t0),
    }

    print("\nResumen de tiempos:")
    for k, v in out.items():
        print(f"{k}: {v}")

    return out


# ============================================================
# 4) CORRER LAS 4 REPRESENTACIONES
# ============================================================
timings = []

for tag, path_x in PATHS.items():
    timings.append(run_one_xgb_timing(tag, path_x, PATH_Y, n_iter=N_ITER, top_k=TOP_K))

timings_df = pd.DataFrame(timings)

print("\n" + "=" * 110)
print("RESUMEN FINAL DE TIEMPOS")
print("=" * 110)
print(timings_df.to_string(index=False))

timings_df.to_csv("/home/felorrieta/Catalina/tiempos_xgboost_simulados.csv", index=False)
print("\nResumen guardado en: /home/felorrieta/Catalina/tiempos_xgboost_simulados.csv")


==============================================================================================================
ESIG firma
==============================================================================================================
Shapes: (1510, 1023) (377, 1023)
Labels: ['AGN', 'Blazar', 'QSO']


Resumen de tiempos:
Representación: ESIG firma
Mejor modelo: XGB_3
Tiempo búsqueda: 02:04:09
Tiempo top5: 00:00:50
Tiempo tabla: 00:35:12
Tiempo total: 02:40:12

==============================================================================================================
ESIG log-firma
==============================================================================================================
Shapes: (1510, 127) (377, 127)
Labels: ['AGN', 'Blazar', 'QSO']


Resumen de tiempos:
Representación: ESIG log-firma
Mejor modelo: XGB_2
Tiempo búsqueda: 00:08:55
Tiempo top5: 00:00:03
Tiempo tabla: 00:02:22
Tiempo total: 00:11:20

==============================================================================================================
IISIG firma
==============================================================================================================
Shapes: (1510, 1022) (377, 1022)
Labels: ['AGN', 'Blazar', 'QSO']


Resumen de tiempos:
Representación: IISIG firma
Mejor modelo: XGB_4
Tiempo búsqueda: 02:06:07
Tiempo top5: 00:00:43
Tiempo tabla: 00:25:32
Tiempo total: 02:32:22

==============================================================================================================
IISIG log-firma
==============================================================================================================
Shapes: (1032, 127) (258, 127)
Labels: ['AGN', 'Blazar', 'QSO']


Resumen de tiempos:
Representación: IISIG log-firma
Mejor modelo: XGB_1
Tiempo búsqueda: 00:06:46
Tiempo top5: 00:00:04
Tiempo tabla: 00:02:45
Tiempo total: 00:09:34

==============================================================================================================
RESUMEN FINAL DE TIEMPOS
==============================================================================================================
 Representación Mejor modelo Tiempo búsqueda Tiempo top5 Tiempo tabla Tiempo total
     ESIG firma        XGB_3        02:04:09    00:00:50     00:35:12     02:40:12
 ESIG log-firma        XGB_2        00:08:55    00:00:03     00:02:22     00:11:20
    IISIG firma        XGB_4        02:06:07    00:00:43     00:25:32     02:32:22
IISIG log-firma        XGB_1        00:06:46    00:00:04     00:02:45     00:09:34

Resumen guardado en: /home/felorrieta/Catalina/tiempos_xgboost_simulados.csv

DATOS REALES

XGB: ESIG - FIRMA

# ============================================================
# XGB OOF-first (5x2) — ESIG FIRMA, DATOS REALES
# + FIXED OOF (no overwrite in repeated CV)
# + TOP-5 evaluation: TRAIN & TEST confusion matrices + reports
# + Timing (search / top5 eval / total)
# + Extra: build a LaTeX-ready table for TOP-5
# ============================================================

import time
import inspect
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import (
    StratifiedKFold,
    RepeatedStratifiedKFold,
    ParameterSampler
)
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import (
    roc_auc_score,
    confusion_matrix,
    classification_report,
    accuracy_score,
    f1_score,
    balanced_accuracy_score
)
from sklearn.utils.class_weight import compute_class_weight
from scipy.stats import randint, uniform, loguniform

import xgboost as xgb
from xgboost import XGBClassifier


# ============================================================
# 1) LOAD + MERGE + SPLIT (80/20)  [ESIG FIRMA, DATOS REALES]
# ============================================================
x = pd.read_csv('/home/felorrieta/Catalina/path_signature_esig_REALES_M9.csv')
y = pd.read_csv('/home/felorrieta/Catalina/ts_v9.0.1_SMBH_ZTF_xmatch.csv')
y["id"] = y["oid"]

data = pd.merge(x, y, on="id")

# split correcto: primero muestreo índices, luego tomo complemento real
train_idx = data.sample(frac=0.8, random_state=42).index
data_train = data.loc[train_idx].reset_index(drop=True)
data_test  = data.drop(train_idx).reset_index(drop=True)

X_train = data_train.drop(columns=['oid', 'survey_class_mapped', 'survey_class', 'survey_class_cat', 'id'])
y_train = data_train['survey_class_mapped']

X_test  = data_test[X_train.columns].copy()
y_test  = data_test['survey_class_mapped']

X_train_np = np.asarray(X_train)
X_test_np  = np.asarray(X_test)

print("Shapes:", X_train.shape, X_test.shape)

# ============================================================
# 2) Encoding + class weights (+ optional HARD_FACTOR)
# ============================================================
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)

labels = le.classes_
n_classes = len(labels)

classes = np.unique(y_train_enc)
cw = compute_class_weight(class_weight="balanced", classes=classes, y=y_train_enc)
class_weight_dict = {int(c): float(w) for c, w in zip(classes, cw)}

# opcional: subir peso a clases que se confunden
HARD_CLASSES = ["AGN", "QSO"]
HARD_FACTOR  = 1.8

if HARD_CLASSES:
    for name in HARD_CLASSES:
        if name in labels:
            hid = int(np.where(labels == name)[0][0])
            class_weight_dict[hid] *= HARD_FACTOR

def make_sample_weight(y_enc, cw_dict):
    return np.array([cw_dict[int(c)] for c in y_enc], dtype=float)

BLZ_ID = int(np.where(labels == "Blazar")[0][0]) if "Blazar" in labels else None

def f1_blazar(y_true, y_pred):
    if BLZ_ID is None:
        return np.nan
    return f1_score((y_true == BLZ_ID).astype(int), (y_pred == BLZ_ID).astype(int))

print("Labels:", list(labels))
print("Distribución train:", dict(zip(*np.unique(y_train_enc, return_counts=True))))
print("Class weights:", class_weight_dict)

# ============================================================
# 3) Preprocess + XGB fit (early stopping)
# ============================================================
def preprocess_fit_transform(X_tr_raw, X_va_raw):
    imp = SimpleImputer(strategy="median")
    X_tr_i = imp.fit_transform(X_tr_raw)
    X_va_i = imp.transform(X_va_raw)

    vt = VarianceThreshold(0.0)
    X_tr_v = vt.fit_transform(X_tr_i)
    X_va_v = vt.transform(X_va_i)
    return X_tr_v, X_va_v, imp, vt

def preprocess_full(X_train_raw, X_test_raw):
    imp = SimpleImputer(strategy="median")
    X_train_i = imp.fit_transform(X_train_raw)
    X_test_i  = imp.transform(X_test_raw)

    vt = VarianceThreshold(0.0)
    X_train_v = vt.fit_transform(X_train_i)
    X_test_v  = vt.transform(X_test_i)
    return X_train_v, X_test_v, imp, vt

def fit_xgb_one_fold(params, X_tr, y_tr, X_va, y_va, w_tr=None, w_va=None):
    base = dict(
        random_state=42,
        n_jobs=-1,
        tree_method="hist",
        n_estimators=20000,
        verbosity=0,
        objective="multi:softprob",
        num_class=n_classes,
        eval_metric="mlogloss",
    )

    es = xgb.callback.EarlyStopping(rounds=250, save_best=True)
    model = XGBClassifier(**base, **params, callbacks=[es])

    fit_kwargs = dict(
        X=X_tr,
        y=y_tr,
        sample_weight=w_tr,
        eval_set=[(X_va, y_va)],
        verbose=False
    )

    sig = inspect.signature(model.fit)
    if "sample_weight_eval_set" in sig.parameters and (w_va is not None):
        fit_kwargs["sample_weight_eval_set"] = [w_va]

    model.fit(**fit_kwargs)
    return model

def to_plain_params(d):
    out = {}
    for k, v in d.items():
        out[k] = float(v) if isinstance(v, (np.floating,)) else v
    return out

# ============================================================
# 4) FIXED OOF eval for repeated CV (accumulate & average)
# ============================================================
def oof_eval_xgb(params, X, y, cv):
    K = len(np.unique(y))
    proba_sum = np.zeros((len(y), K), dtype=float)
    proba_cnt = np.zeros(len(y), dtype=float)

    fold_f1m = []
    fold_bacc = []
    fold_f1blz = []
    best_iters = []

    for tr_idx, va_idx in cv.split(X, y):
        X_tr_raw, X_va_raw = X[tr_idx], X[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]

        X_tr, X_va, _, _ = preprocess_fit_transform(X_tr_raw, X_va_raw)

        w_tr = make_sample_weight(y_tr, class_weight_dict)
        w_va = make_sample_weight(y_va, class_weight_dict)

        model = fit_xgb_one_fold(params, X_tr, y_tr, X_va, y_va, w_tr=w_tr, w_va=w_va)

        p_va = model.predict_proba(X_va)

        # acumular OOF, no sobreescribir
        proba_sum[va_idx] += p_va
        proba_cnt[va_idx] += 1.0

        yhat_va = np.argmax(p_va, axis=1)
        fold_f1m.append(f1_score(y_va, yhat_va, average="macro"))
        fold_bacc.append(balanced_accuracy_score(y_va, yhat_va))
        fold_f1blz.append(f1_blazar(y_va, yhat_va))

        best_iters.append(getattr(model, "best_iteration", None))

    proba_oof = proba_sum / (proba_cnt[:, None] + 1e-12)
    yhat_oof = np.argmax(proba_oof, axis=1)

    oof_macroF1 = f1_score(y, yhat_oof, average="macro")
    oof_bacc    = balanced_accuracy_score(y, yhat_oof)
    oof_acc     = accuracy_score(y, yhat_oof)
    oof_f1blz   = f1_blazar(y, yhat_oof)

    its = [b for b in best_iters if b is not None]
    best_n_cv = int(np.median(its) + 1) if len(its) else 800

    return {
        "oof_macroF1": float(oof_macroF1),
        "oof_bacc": float(oof_bacc),
        "oof_acc": float(oof_acc),
        "oof_f1_blazar": float(oof_f1blz),
        "fold_f1m_mean": float(np.mean(fold_f1m)),
        "fold_f1m_std": float(np.std(fold_f1m)),
        "fold_bacc_mean": float(np.mean(fold_bacc)),
        "fold_f1blz_mean": float(np.mean(fold_f1blz)),
        "best_n_cv": int(best_n_cv),
    }

# ============================================================
# 5) Search space + OOF-first run
# ============================================================
TOTAL_T0 = time.time()

cv_rep = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=42)

param_dist = {
    "learning_rate": loguniform(0.007, 0.06),
    "max_depth": randint(2, 6),
    "min_child_weight": loguniform(10.0, 150.0),
    "subsample": uniform(0.65, 0.35),
    "colsample_bytree": uniform(0.65, 0.35),
    "colsample_bynode": uniform(0.65, 0.35),
    "gamma": loguniform(1e-4, 8.0),
    "reg_alpha": loguniform(1e-10, 1.0),
    "reg_lambda": loguniform(1.0, 200.0),
    "grow_policy": ["depthwise", "lossguide"],
    "max_leaves": randint(16, 129),
}

N_ITER = 30
TOP_K  = 5

sampler = list(ParameterSampler(param_dist, n_iter=N_ITER, random_state=42))

print("\n########## XGB OOF-first (5x2) — ESIG FIRMA REALES ##########")
print(f"N_ITER={N_ITER} | CV=5x2 | TOP_K={TOP_K}")

SEARCH_T0 = time.time()
rows = []

for params in tqdm(sampler, total=N_ITER, desc="OOF trials"):
    stats = oof_eval_xgb(params, X_train_np, y_train_enc, cv_rep)
    rows.append({"params": params, **stats})

results = pd.DataFrame(rows)

results = results.sort_values(
    ["oof_macroF1", "fold_f1m_std", "oof_bacc", "oof_f1_blazar"],
    ascending=[False, True, False, False]
).reset_index(drop=True)

SEARCH_T1 = time.time()

print("\nTOP 10 by FIXED OOF macro-F1 (and stability):")
print(results.head(10)[[
    "oof_macroF1", "oof_bacc", "oof_acc", "oof_f1_blazar",
    "fold_f1m_std", "best_n_cv", "params"
]].to_string(index=False))

print("\nTiempo búsqueda (hh:mm:ss):", time.strftime("%H:%M:%S", time.gmtime(SEARCH_T1 - SEARCH_T0)))

# ============================================================
# 6) Train TOP-5 on full-train and show TRAIN+TEST confusion matrices
# ============================================================
EVAL_T0 = time.time()

topk = results.head(TOP_K).reset_index(drop=True)

X_train_v, X_test_v, _, _ = preprocess_full(X_train_np, X_test_np)
w_train_full = make_sample_weight(y_train_enc, class_weight_dict)

print("\n" + "#" * 95)
print("TOP-5 (selected by FIXED OOF) — TRAIN & TEST evaluation (reference)")
print("#" * 95)

top5_summary = []

for i, row in topk.iterrows():
    params = row["params"]
    best_n = int(row["best_n_cv"])

    model = XGBClassifier(
        random_state=100 + i,
        n_jobs=-1,
        tree_method="hist",
        n_estimators=best_n,
        verbosity=0,
        objective="multi:softprob",
        num_class=n_classes,
        eval_metric="mlogloss",
        **params
    )
    model.fit(X_train_v, y_train_enc, sample_weight=w_train_full, verbose=False)

    p_tr = model.predict_proba(X_train_v)
    p_te = model.predict_proba(X_test_v)

    yhat_tr = np.argmax(p_tr, axis=1)
    yhat_te = np.argmax(p_te, axis=1)

    # métricas train/test
    f1m_tr = f1_score(y_train_enc, yhat_tr, average="macro")
    f1m_te = f1_score(y_test_enc,  yhat_te, average="macro")
    bacc_tr = balanced_accuracy_score(y_train_enc, yhat_tr)
    bacc_te = balanced_accuracy_score(y_test_enc,  yhat_te)
    acc_tr = accuracy_score(y_train_enc, yhat_tr)
    acc_te = accuracy_score(y_test_enc,  yhat_te)
    f1w_te = f1_score(y_test_enc, yhat_te, average="weighted")
    f1blz_te = f1_blazar(y_test_enc, yhat_te)

    print("\n" + "=" * 95)
    print(f"OOF-TOP{TOP_K} | Model #{i+1}")
    print("=" * 95)
    print("FIXED OOF macroF1:", f"{row['oof_macroF1']:.4f}", "| fold std:", f"{row['fold_f1m_std']:.4f}")
    print("PARAMS:", {"best_n_estimators": best_n, **to_plain_params(params)})

    print("\nMÉTRICAS (TRAIN / TEST)")
    print(f"macro-F1 train={f1m_tr:.4f} | test={f1m_te:.4f}")
    print(f"bal_acc  train={bacc_tr:.4f} | test={bacc_te:.4f}")
    print(f"acc      train={acc_tr:.4f} | test={acc_te:.4f}")
    print(f"F1_w test={f1w_te:.4f} | F1(Blazar) test={f1blz_te:.4f}")
    print(f"GAP macro-F1 (train-test) = {f1m_tr - f1m_te:.4f}")

    cm_tr = confusion_matrix(y_train_enc, yhat_tr)
    cm_te = confusion_matrix(y_test_enc,  yhat_te)

    print("\nMatriz de confusión (TRAIN)")
    print(pd.DataFrame(cm_tr, index=labels, columns=labels))

    print("\nMatriz de confusión (TEST)")
    print(pd.DataFrame(cm_te, index=labels, columns=labels))

    print("\nClassification report (TRAIN)")
    print(classification_report(y_train_enc, yhat_tr, target_names=labels, zero_division=0))

    print("\nClassification report (TEST)")
    print(classification_report(y_test_enc, yhat_te, target_names=labels, zero_division=0))

    top5_summary.append({
        "Modelo": f"XGB_{i+1}",
        "best_n": best_n,
        "oof_macroF1": row["oof_macroF1"],
        "oof_f1_blazar": row["oof_f1_blazar"],
        "fold_f1m_std": row["fold_f1m_std"],
        "Acc_test": acc_te,
        "F1_w_test": f1w_te,
        "macroF1_test": f1m_te,
        "bacc_test": bacc_te,
    })

EVAL_T1 = time.time()
TOTAL_T1 = time.time()

print("\nTiempo eval TOP-5 (hh:mm:ss):", time.strftime("%H:%M:%S", time.gmtime(EVAL_T1 - EVAL_T0)))
print("Tiempo TOTAL (hh:mm:ss):", time.strftime("%H:%M:%S", time.gmtime(TOTAL_T1 - TOTAL_T0)))

# ============================================================
# 7) Extra: Build LaTeX-ready table metrics for TOP-5
# ============================================================
def cv_auc_gap(params, X, y, cv):
    auc_tr, auc_va = [], []

    for tr_idx, va_idx in cv.split(X, y):
        X_tr_raw, X_va_raw = X[tr_idx], X[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]

        X_tr, X_va, _, _ = preprocess_fit_transform(X_tr_raw, X_va_raw)

        w_tr = make_sample_weight(y_tr, class_weight_dict)
        w_va = make_sample_weight(y_va, class_weight_dict)

        model = fit_xgb_one_fold(params, X_tr, y_tr, X_va, y_va, w_tr=w_tr, w_va=w_va)

        p_tr = model.predict_proba(X_tr)
        p_va = model.predict_proba(X_va)

        auc_tr.append(roc_auc_score(y_tr, p_tr, multi_class="ovr", average="weighted"))
        auc_va.append(roc_auc_score(y_va, p_va, multi_class="ovr", average="weighted"))

    mean_va = float(np.mean(auc_va))
    std_va  = float(np.std(auc_va))
    gap     = float(np.mean(auc_tr) - mean_va)
    return mean_va, std_va, gap

cv_5 = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_5x2 = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=42)

LATEX_T0 = time.time()
latex_rows = []

for i, row in tqdm(list(topk.iterrows()), total=len(topk), desc="Building LaTeX table metrics"):
    params = row["params"]

    auc_cv,  sd_cv,  gap_cv  = cv_auc_gap(params, X_train_np, y_train_enc, cv_5)
    auc_rep, sd_rep, gap_rep = cv_auc_gap(params, X_train_np, y_train_enc, cv_5x2)

    acc_test = top5_summary[i]["Acc_test"]
    f1w_test = top5_summary[i]["F1_w_test"]

    latex_rows.append({
        "Modelo": top5_summary[i]["Modelo"],
        "AUC_CV": auc_cv,
        "SD_CV": sd_cv,
        "Gap_CV": gap_cv,
        "AUC_rep": auc_rep,
        "SD_rep": sd_rep,
        "Gap_rep": gap_rep,
        "Acc_test": acc_test,
        "F1_w_test": f1w_test,
    })

LATEX_T1 = time.time()
latex_df = pd.DataFrame(latex_rows)

print("\nTiempo métricas tabla LaTeX (hh:mm:ss):", time.strftime("%H:%M:%S", time.gmtime(LATEX_T1 - LATEX_T0)))
print("\nTabla resumen (para elegir mejor modelo):")
print(latex_df.sort_values(["Acc_test", "F1_w_test"], ascending=False).to_string(index=False))

print("\n--- LaTeX rows (pegables dentro de tu tabular) ---")
for _, r in latex_df.iterrows():
    print(
        f"{r['Modelo']} & "
        f"{r['AUC_CV']:.4f} & {r['SD_CV']:.4f} & {r['Gap_CV']:.4f} & "
        f"{r['AUC_rep']:.4f} & {r['SD_rep']:.4f} & {r['Gap_rep']:.4f} & "
        f"{r['Acc_test']:.4f} & {r['F1_w_test']:.4f} \\\\"
    )

Shapes: (1438, 1023) (359, 1023)
Labels: ['AGN', 'Blazar', 'QSO']
Distribución train: {0: 400, 1: 186, 2: 852}
Class weights: {0: 2.157, 1: 2.5770609318996414, 2: 1.0126760563380282}

########## XGB OOF-first (5x2) — ESIG FIRMA REALES ##########
N_ITER=30 | CV=5x2 | TOP_K=5


TOP 10 by FIXED OOF macro-F1 (and stability):
 oof_macroF1  oof_bacc  oof_acc  oof_f1_blazar  fold_f1m_std  best_n_cv                                                                                                                                                                                                                                                                                                                                                                params
    0.589677  0.581771 0.652295       0.519174      0.024102        172        {'colsample_bynode': 0.7810890415965769, 'colsample_bytree': 0.9827500072434707, 'gamma': 0.3881791159604766, 'grow_policy': 'depthwise', 'learning_rate': 0.0252342845499294, 'max_depth': 3, 'max_leaves': 98, 'min_child_weight': 13.109303695322625, 'reg_alpha': 3.912811440759049e-06, 'reg_lambda': 5.859674932405375, 'subsample': 0.7000033862726793}
    0.589554  0.585860 0.648818       0.522727      0.022725        143    {'colsample_bynode': 0.8894815592792549, 'colsample_bytree': 0.8040533728088605, 'gamma': 0.0003966107188259859, 'grow_policy': 'depthwise', 'learning_rate': 0.04192887667353639, 'max_depth': 4, 'max_leaves': 93, 'min_child_weight': 28.835199622758644, 'reg_alpha': 6.642948382671036e-09, 'reg_lambda': 54.71536021387711, 'subsample': 0.7988045560719357}
    0.588700  0.582151 0.648122       0.520468      0.024207         97 {'colsample_bynode': 0.7452462872846224, 'colsample_bytree': 0.8766915421894768, 'gamma': 0.00010058922341116494, 'grow_policy': 'depthwise', 'learning_rate': 0.044484168177941416, 'max_depth': 4, 'max_leaves': 85, 'min_child_weight': 39.873599110055316, 'reg_alpha': 1.4931726672035842e-06, 'reg_lambda': 3.2439692898781605, 'subsample': 0.691952878566789}
    0.588691  0.583614 0.652295       0.511494      0.023493         83       {'colsample_bynode': 0.994198099313195, 'colsample_bytree': 0.7895885548555936, 'gamma': 1.0070219632034507, 'grow_policy': 'lossguide', 'learning_rate': 0.042119161345132834, 'max_depth': 5, 'max_leaves': 74, 'min_child_weight': 16.571536957884465, 'reg_alpha': 2.557121805028822e-10, 'reg_lambda': 22.89087461386255, 'subsample': 0.8871475266447988}
    0.585722  0.581783 0.648122       0.512821      0.019529        154        {'colsample_bynode': 0.7185940018467009, 'colsample_bytree': 0.6658295511186884, 'gamma': 0.003936623559141479, 'grow_policy': 'lossguide', 'learning_rate': 0.03486482325766769, 'max_depth': 4, 'max_leaves': 68, 'min_child_weight': 48.986105915986826, 'reg_alpha': 0.4493172298007678, 'reg_lambda': 24.93469224439704, 'subsample': 0.7465997137078901}
    0.585465  0.580042 0.646732       0.514451      0.027379        227     {'colsample_bynode': 0.8716913147786483, 'colsample_bytree': 0.9550112065657013, 'gamma': 0.8719197460192358, 'grow_policy': 'lossguide', 'learning_rate': 0.017128683475627633, 'max_depth': 3, 'max_leaves': 90, 'min_child_weight': 24.069071892146443, 'reg_alpha': 1.6629513614106732e-09, 'reg_lambda': 6.604683530146009, 'subsample': 0.9673899545410138}
    0.585309  0.582354 0.645341       0.508571      0.023196       1546   {'colsample_bynode': 0.6558057401247497, 'colsample_bytree': 0.8292325704047483, 'gamma': 0.0012898201800603955, 'grow_policy': 'lossguide', 'learning_rate': 0.008606684434368822, 'max_depth': 2, 'max_leaves': 48, 'min_child_weight': 29.34816680866506, 'reg_alpha': 1.5049203166488291e-05, 'reg_lambda': 84.64398106421382, 'subsample': 0.8864915409637483}
    0.585276  0.579220 0.650904       0.508671      0.025674        331    {'colsample_bynode': 0.8778109655320985, 'colsample_bytree': 0.6697440526594851, 'gamma': 0.3467567716188714, 'grow_policy': 'lossguide', 'learning_rate': 0.011046424508353313, 'max_depth': 5, 'max_leaves': 75, 'min_child_weight': 16.432378919707624, 'reg_alpha': 1.1026112761510001e-07, 'reg_lambda': 16.124278458562614, 'subsample': 0.8011807565247405}
    0.584032  0.585607 0.643255       0.514905      0.021353        264   {'colsample_bynode': 0.8981188489185415, 'colsample_bytree': 0.8434869919177677, 'gamma': 0.0028432556862043037, 'grow_policy': 'lossguide', 'learning_rate': 0.03990037190387228, 'max_depth': 2, 'max_leaves': 39, 'min_child_weight': 118.59168268812645, 'reg_alpha': 1.2984462863721065e-05, 'reg_lambda': 14.25620846729104, 'subsample': 0.9294033126383713}
    0.583478  0.579072 0.641864       0.514451      0.022544        894    {'colsample_bynode': 0.7568773511205378, 'colsample_bytree': 0.8199648137551794, 'gamma': 0.015800248790431463, 'grow_policy': 'depthwise', 'learning_rate': 0.011333217908108557, 'max_depth': 2, 'max_leaves': 83, 'min_child_weight': 19.011946259792996, 'reg_alpha': 1.794721153226432e-07, 'reg_lambda': 52.20343909245747, 'subsample': 0.8773715146665251}

Tiempo búsqueda (hh:mm:ss): 02:04:43

###############################################################################################
TOP-5 (selected by FIXED OOF) — TRAIN & TEST evaluation (reference)
###############################################################################################

===============================================================================================
OOF-TOP5 | Model #1
===============================================================================================
FIXED OOF macroF1: 0.5897 | fold std: 0.0241
PARAMS: {'best_n_estimators': 172, 'colsample_bynode': 0.7810890415965769, 'colsample_bytree': 0.9827500072434707, 'gamma': 0.3881791159604766, 'grow_policy': 'depthwise', 'learning_rate': 0.0252342845499294, 'max_depth': 3, 'max_leaves': 98, 'min_child_weight': 13.109303695322625, 'reg_alpha': 3.912811440759049e-06, 'reg_lambda': 5.859674932405375, 'subsample': 0.7000033862726793}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.7831 | test=0.6218
bal_acc  train=0.7779 | test=0.6158
acc      train=0.8060 | test=0.6685
F1_w test=0.6747 | F1(Blazar) test=0.5867
GAP macro-F1 (train-test) = 0.1613

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     303       8   89
Blazar   23     136   27
QSO     109      23  720

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      57       3   39
Blazar   16      22    3
QSO      49       9  161

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.70      0.76      0.73       400
      Blazar       0.81      0.73      0.77       186
         QSO       0.86      0.85      0.85       852

    accuracy                           0.81      1438
   macro avg       0.79      0.78      0.78      1438
weighted avg       0.81      0.81      0.81      1438


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.47      0.58      0.52        99
      Blazar       0.65      0.54      0.59        41
         QSO       0.79      0.74      0.76       219

    accuracy                           0.67       359
   macro avg       0.64      0.62      0.62       359
weighted avg       0.69      0.67      0.67       359


===============================================================================================
OOF-TOP5 | Model #2
===============================================================================================
FIXED OOF macroF1: 0.5896 | fold std: 0.0227
PARAMS: {'best_n_estimators': 143, 'colsample_bynode': 0.8894815592792549, 'colsample_bytree': 0.8040533728088605, 'gamma': 0.0003966107188259859, 'grow_policy': 'depthwise', 'learning_rate': 0.04192887667353639, 'max_depth': 4, 'max_leaves': 93, 'min_child_weight': 28.835199622758644, 'reg_alpha': 6.642948382671036e-09, 'reg_lambda': 54.71536021387711, 'subsample': 0.7988045560719357}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.8049 | test=0.6451
bal_acc  train=0.8073 | test=0.6447
acc      train=0.8303 | test=0.6992
F1_w test=0.7026 | F1(Blazar) test=0.5823
GAP macro-F1 (train-test) = 0.1598

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     329      12   59
Blazar   23     139   24
QSO     102      24  726

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      60       4   35
Blazar   13      23    5
QSO      40      11  168

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.72      0.82      0.77       400
      Blazar       0.79      0.75      0.77       186
         QSO       0.90      0.85      0.87       852

    accuracy                           0.83      1438
   macro avg       0.81      0.81      0.80      1438
weighted avg       0.84      0.83      0.83      1438


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.53      0.61      0.57        99
      Blazar       0.61      0.56      0.58        41
         QSO       0.81      0.77      0.79       219

    accuracy                           0.70       359
   macro avg       0.65      0.64      0.65       359
weighted avg       0.71      0.70      0.70       359


===============================================================================================
OOF-TOP5 | Model #3
===============================================================================================
FIXED OOF macroF1: 0.5887 | fold std: 0.0242
PARAMS: {'best_n_estimators': 97, 'colsample_bynode': 0.7452462872846224, 'colsample_bytree': 0.8766915421894768, 'gamma': 0.00010058922341116494, 'grow_policy': 'depthwise', 'learning_rate': 0.044484168177941416, 'max_depth': 4, 'max_leaves': 85, 'min_child_weight': 39.873599110055316, 'reg_alpha': 1.4931726672035842e-06, 'reg_lambda': 3.2439692898781605, 'subsample': 0.691952878566789}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.7909 | test=0.6518
bal_acc  train=0.7930 | test=0.6488
acc      train=0.8199 | test=0.7047
F1_w test=0.7066 | F1(Blazar) test=0.6076
GAP macro-F1 (train-test) = 0.1391

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     313      16   71
Blazar   25     138   23
QSO      98      26  728

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      57       3   39
Blazar   14      24    3
QSO      36      11  172

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.72      0.78      0.75       400
      Blazar       0.77      0.74      0.75       186
         QSO       0.89      0.85      0.87       852

    accuracy                           0.82      1438
   macro avg       0.79      0.79      0.79      1438
weighted avg       0.82      0.82      0.82      1438


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.53      0.58      0.55        99
      Blazar       0.63      0.59      0.61        41
         QSO       0.80      0.79      0.79       219

    accuracy                           0.70       359
   macro avg       0.66      0.65      0.65       359
weighted avg       0.71      0.70      0.71       359


===============================================================================================
OOF-TOP5 | Model #4
===============================================================================================
FIXED OOF macroF1: 0.5887 | fold std: 0.0235
PARAMS: {'best_n_estimators': 83, 'colsample_bynode': 0.994198099313195, 'colsample_bytree': 0.7895885548555936, 'gamma': 1.0070219632034507, 'grow_policy': 'lossguide', 'learning_rate': 0.042119161345132834, 'max_depth': 5, 'max_leaves': 74, 'min_child_weight': 16.571536957884465, 'reg_alpha': 2.557121805028822e-10, 'reg_lambda': 22.89087461386255, 'subsample': 0.8871475266447988}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.8474 | test=0.6402
bal_acc  train=0.8528 | test=0.6360
acc      train=0.8623 | test=0.6880
F1_w test=0.6914 | F1(Blazar) test=0.6154
GAP macro-F1 (train-test) = 0.2072

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     350       5   45
Blazar   15     152   19
QSO      92      22  738

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      55       4   40
Blazar   14      24    3
QSO      42       9  168

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.77      0.88      0.82       400
      Blazar       0.85      0.82      0.83       186
         QSO       0.92      0.87      0.89       852

    accuracy                           0.86      1438
   macro avg       0.85      0.85      0.85      1438
weighted avg       0.87      0.86      0.86      1438


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.50      0.56      0.52        99
      Blazar       0.65      0.59      0.62        41
         QSO       0.80      0.77      0.78       219

    accuracy                           0.69       359
   macro avg       0.65      0.64      0.64       359
weighted avg       0.70      0.69      0.69       359


===============================================================================================
OOF-TOP5 | Model #5
===============================================================================================
FIXED OOF macroF1: 0.5857 | fold std: 0.0195
PARAMS: {'best_n_estimators': 154, 'colsample_bynode': 0.7185940018467009, 'colsample_bytree': 0.6658295511186884, 'gamma': 0.003936623559141479, 'grow_policy': 'lossguide', 'learning_rate': 0.03486482325766769, 'max_depth': 4, 'max_leaves': 68, 'min_child_weight': 48.986105915986826, 'reg_alpha': 0.4493172298007678, 'reg_lambda': 24.93469224439704, 'subsample': 0.7465997137078901}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.7647 | test=0.6298
bal_acc  train=0.7681 | test=0.6319
acc      train=0.7969 | test=0.6825
F1_w test=0.6879 | F1(Blazar) test=0.5750
GAP macro-F1 (train-test) = 0.1349

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     305      17   78
Blazar   27     132   27
QSO     111      32  709

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      58       5   36
Blazar   15      23    3
QSO      44      11  164

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.69      0.76      0.72       400
      Blazar       0.73      0.71      0.72       186
         QSO       0.87      0.83      0.85       852

    accuracy                           0.80      1438
   macro avg       0.76      0.77      0.76      1438
weighted avg       0.80      0.80      0.80      1438


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.50      0.59      0.54        99
      Blazar       0.59      0.56      0.57        41
         QSO       0.81      0.75      0.78       219

    accuracy                           0.68       359
   macro avg       0.63      0.63      0.63       359
weighted avg       0.70      0.68      0.69       359


Tiempo eval TOP-5 (hh:mm:ss): 00:00:33
Tiempo TOTAL (hh:mm:ss): 02:05:16


Tiempo métricas tabla LaTeX (hh:mm:ss): 00:23:16

Tabla resumen (para elegir mejor modelo):
Modelo   AUC_CV    SD_CV   Gap_CV  AUC_rep   SD_rep  Gap_rep  Acc_test  F1_w_test
 XGB_3 0.757542 0.021354 0.179264 0.761073 0.016049 0.177197  0.704735   0.706641
 XGB_2 0.756722 0.021016 0.173117 0.761123 0.016055 0.176664  0.699164   0.702616
 XGB_4 0.757981 0.019249 0.203570 0.760244 0.013963 0.202548  0.688022   0.691402
 XGB_5 0.761508 0.021281 0.169757 0.763813 0.015937 0.163591  0.682451   0.687910
 XGB_1 0.759685 0.017415 0.178914 0.763062 0.013461 0.173777  0.668524   0.674723

--- LaTeX rows (pegables dentro de tu tabular) ---
XGB_1 & 0.7597 & 0.0174 & 0.1789 & 0.7631 & 0.0135 & 0.1738 & 0.6685 & 0.6747 \\
XGB_2 & 0.7567 & 0.0210 & 0.1731 & 0.7611 & 0.0161 & 0.1767 & 0.6992 & 0.7026 \\
XGB_3 & 0.7575 & 0.0214 & 0.1793 & 0.7611 & 0.0160 & 0.1772 & 0.7047 & 0.7066 \\
XGB_4 & 0.7580 & 0.0192 & 0.2036 & 0.7602 & 0.0140 & 0.2025 & 0.6880 & 0.6914 \\
XGB_5 & 0.7615 & 0.0213 & 0.1698 & 0.7638 & 0.0159 & 0.1636 & 0.6825 & 0.6879 \\

# ============================================================
# MATRICES DE CONFUSIÓN (TRAIN vs TEST) — XGBOOST TOP-5
# ESIG FIRMA — DATOS REALES
# Estilo igual a RF, con colormap VERDE ("Greens")
# Guarda 5 imágenes en /home/felorrieta/Catalina:
# XGB_ESIG_FIRMA_REALES_1.png ... XGB_ESIG_FIRMA_REALES_5.png
# ============================================================

import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from pathlib import Path

from sklearn.impute import SimpleImputer
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score

from xgboost import XGBClassifier

start_time_save = time.time()

# -----------------------------
# Carpeta de salida (Linux)
# -----------------------------
downloads = Path("/home/felorrieta/Catalina")
downloads.mkdir(parents=True, exist_ok=True)

# -----------------------------
# Checks: necesitamos topk (o results) + data ya preparada
# -----------------------------
# Variables esperadas:
# X_train_np, X_test_np, y_train_enc, y_test_enc, labels, n_classes, class_weight_dict
required = ["X_train_np", "X_test_np", "y_train_enc", "y_test_enc", "labels", "n_classes", "class_weight_dict"]
missing = [v for v in required if v not in globals()]
if missing:
    raise ValueError(f"Faltan variables en tu notebook: {missing}")

# topk: DataFrame con columnas 'params' y 'best_n_cv'
if "topk" not in globals():
    if "results" in globals():
        topk = results.head(5).reset_index(drop=True)
    else:
        raise ValueError("No encuentro 'topk' ni 'results'. Necesito topk=results.head(5) de la búsqueda OOF.")

# asegurar 5 filas
TOP_K = 5
topk = topk.head(TOP_K).reset_index(drop=True)

labels = np.array(labels)
X_train_np = np.asarray(X_train_np)
X_test_np  = np.asarray(X_test_np)
y_train_enc = np.asarray(y_train_enc)
y_test_enc  = np.asarray(y_test_enc)

def _row_normalize(cm):
    cm = cm.astype(float)
    row_sums = cm.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    return (cm / row_sums) * 100.0

def save_confusion_train_test(cm_train, cm_test, labels, outpath,
                              title_prefix="", subtitle="",
                              gap_width=0.28, wspace=0.15,
                              label_fontsize=13, tick_fontsize=13, title_fontsize=14,
                              cmap="Greens"):
    """
    Guarda TRAIN y TEST lado a lado:
    - % por fila grande
    - (conteo) pequeño debajo
    """
    cm_tr_pct = _row_normalize(cm_train)
    cm_te_pct = _row_normalize(cm_test)

    fig = plt.figure(figsize=(10.8, 4.8))
    gs = gridspec.GridSpec(
        1, 4,
        width_ratios=[1, gap_width, 1, 0.08],
        wspace=wspace
    )

    ax1 = fig.add_subplot(gs[0, 0])
    ax_gap = fig.add_subplot(gs[0, 1])
    ax2 = fig.add_subplot(gs[0, 2])
    ax_cbar = fig.add_subplot(gs[0, 3])
    ax_gap.axis("off")

    panels = [
        (ax1, cm_tr_pct, cm_train, "Train"),
        (ax2, cm_te_pct, cm_test,  "Test"),
    ]

    vmin, vmax = 0, 100

    for ax, cm_pct, cm_cnt, t in panels:
        im = ax.imshow(cm_pct, cmap=cmap, vmin=vmin, vmax=vmax)
        ax.set_title(t, fontsize=title_fontsize)

        ax.set_xticks(np.arange(len(labels)))
        ax.set_yticks(np.arange(len(labels)))
        ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=tick_fontsize)
        ax.set_yticklabels(labels, fontsize=tick_fontsize)

        ax.set_xlabel("Predicho", fontsize=label_fontsize)
        ax.set_ylabel("Real", fontsize=label_fontsize)

        thr = 50
        for i in range(cm_pct.shape[0]):
            for j in range(cm_pct.shape[1]):
                pct = cm_pct[i, j]
                cnt = int(cm_cnt[i, j])
                color_txt = "white" if pct > thr else "black"

                ax.text(j, i - 0.10, f"{pct:.1f}%",
                        ha="center", va="center",
                        color=color_txt, fontsize=10, fontweight="bold")
                ax.text(j, i + 0.22, f"({cnt})",
                        ha="center", va="center",
                        color=color_txt, fontsize=7)

    fig.colorbar(im, cax=ax_cbar, label="% por fila (clase real)")
    fig.suptitle(f"{title_prefix}\n{subtitle}", fontsize=13, y=0.98)

    fig.subplots_adjust(left=0.08, right=0.92, bottom=0.22, top=0.82)

    fig.savefig(outpath, dpi=300, bbox_inches="tight")
    plt.close(fig)

def to_plain_params(d):
    out = {}
    for k, v in d.items():
        if isinstance(v, (np.floating,)):
            out[k] = float(v)
        else:
            out[k] = v
    return out

def make_sample_weight(y_enc, cw_dict):
    return np.array([cw_dict[int(c)] for c in y_enc], dtype=float)

# -----------------------------
# Preprocess fijo (fit SOLO con train)
# igual para todos los modelos
# -----------------------------
imp = SimpleImputer(strategy="median")
Xtr_i = imp.fit_transform(X_train_np)
Xte_i = imp.transform(X_test_np)

vt = VarianceThreshold(0.0)
Xtr = vt.fit_transform(Xtr_i)
Xte = vt.transform(Xte_i)

wtr = make_sample_weight(y_train_enc, class_weight_dict)

# -----------------------------
# TOP-5 desde topk (en el orden 0..4)
# random_state = 100 + i para reproducibilidad
# -----------------------------
top5_in_order = []
for i in range(len(topk)):
    name = f"XGB_{i+1}"
    params_i = to_plain_params(topk.loc[i, "params"])
    best_n = int(topk.loc[i, "best_n_cv"]) if "best_n_cv" in topk.columns else 800
    top5_in_order.append((name, best_n, params_i))

# -----------------------------
# Entrenar + guardar imágenes
# -----------------------------
for i, (name, best_n, params_i) in enumerate(top5_in_order):
    xgb_clf = XGBClassifier(
        random_state=100 + i,
        n_jobs=-1,
        tree_method="hist",
        n_estimators=best_n,
        verbosity=0,
        objective="multi:softprob",
        num_class=n_classes,
        eval_metric="mlogloss",
        **params_i
    )

    xgb_clf.fit(Xtr, y_train_enc, sample_weight=wtr, verbose=False)

    # Predicciones
    y_pred_tr = np.argmax(xgb_clf.predict_proba(Xtr), axis=1)
    y_pred_te = np.argmax(xgb_clf.predict_proba(Xte), axis=1)

    cm_train = confusion_matrix(y_train_enc, y_pred_tr)
    cm_test  = confusion_matrix(y_test_enc,  y_pred_te)

    acc_tr = accuracy_score(y_train_enc, y_pred_tr)
    acc_te = accuracy_score(y_test_enc,  y_pred_te)
    f1w_tr = f1_score(y_train_enc, y_pred_tr, average="weighted", zero_division=0)
    f1w_te = f1_score(y_test_enc,  y_pred_te, average="weighted", zero_division=0)

    subtitle = (
        f"Acc train={acc_tr:.3f} | Acc test={acc_te:.3f} | "
        f"F1w train={f1w_tr:.3f} | F1w test={f1w_te:.3f}"
    )

    outpath = downloads / f"XGB_ESIG_FIRMA_REALES_{i+1}.png"
    save_confusion_train_test(
        cm_train, cm_test, labels,
        outpath=outpath,
        title_prefix=f"{name} (ESIG firma, datos reales) | n_estimators={best_n}",
        subtitle=subtitle,
        gap_width=0.28,
        wspace=0.15,
        cmap="Greens"
    )

    print(f"✅ Guardado: {outpath}")

elapsed = int(time.time() - start_time_save)
h, r = divmod(elapsed, 3600)
m, s = divmod(r, 60)
print(f"\nTiempo total guardando figuras: {h:02d}:{m:02d}:{s:02d}")

✅ Guardado: /home/felorrieta/Catalina/XGB_ESIG_FIRMA_REALES_1.png
✅ Guardado: /home/felorrieta/Catalina/XGB_ESIG_FIRMA_REALES_2.png
✅ Guardado: /home/felorrieta/Catalina/XGB_ESIG_FIRMA_REALES_3.png
✅ Guardado: /home/felorrieta/Catalina/XGB_ESIG_FIRMA_REALES_4.png
✅ Guardado: /home/felorrieta/Catalina/XGB_ESIG_FIRMA_REALES_5.png

Tiempo total guardando figuras: 00:00:35

XGB: ESIG - LOGFIRMA

# ============================================================
# XGB OOF-first (5x2) — ESIG LOG-FIRMA, DATOS REALES
# + FIXED OOF (no overwrite in repeated CV)
# + TOP-5 evaluation: TRAIN & TEST confusion matrices + reports
# + Timing (search / top5 eval / total)
# + Extra: build a LaTeX-ready table for TOP-5
# ============================================================

import time
import inspect
import os
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import (
    StratifiedKFold,
    RepeatedStratifiedKFold,
    ParameterSampler
)
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import (
    roc_auc_score,
    confusion_matrix,
    classification_report,
    accuracy_score,
    f1_score,
    balanced_accuracy_score
)
from sklearn.utils.class_weight import compute_class_weight
from scipy.stats import randint, uniform, loguniform

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

import xgboost as xgb
from xgboost import XGBClassifier


# ============================================================
# 1) LOAD + MERGE + SPLIT (80/20)  [ESIG LOG-FIRMA, DATOS REALES]
# ============================================================
x = pd.read_csv('/home/felorrieta/Catalina/logsignature_esig_REALES_M9.csv')
y = pd.read_csv('/home/felorrieta/Catalina/ts_v9.0.1_SMBH_ZTF_xmatch.csv')
y["id"] = y["oid"]

data = pd.merge(x, y, on="id")

# split correcto
train_idx = data.sample(frac=0.8, random_state=42).index
data_train = data.loc[train_idx].reset_index(drop=True)
data_test  = data.drop(train_idx).reset_index(drop=True)

X_train = data_train.drop(columns=['oid', 'survey_class_mapped', 'survey_class', 'survey_class_cat', 'id'])
y_train = data_train['survey_class_mapped']

X_test  = data_test[X_train.columns].copy()
y_test  = data_test['survey_class_mapped']

X_train_np = np.asarray(X_train)
X_test_np  = np.asarray(X_test)

print("Shapes:", X_train.shape, X_test.shape)

# ============================================================
# 2) Encoding + class weights (+ optional HARD_FACTOR)
# ============================================================
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)

labels = le.classes_
n_classes = len(labels)

classes = np.unique(y_train_enc)
cw = compute_class_weight(class_weight="balanced", classes=classes, y=y_train_enc)
class_weight_dict = {int(c): float(w) for c, w in zip(classes, cw)}

# opcional: subir peso a clases difíciles
HARD_CLASSES = ["AGN", "QSO"]
HARD_FACTOR  = 1.8

if HARD_CLASSES:
    for name in HARD_CLASSES:
        if name in labels:
            hid = int(np.where(labels == name)[0][0])
            class_weight_dict[hid] *= HARD_FACTOR

def make_sample_weight(y_enc, cw_dict):
    return np.array([cw_dict[int(c)] for c in y_enc], dtype=float)

BLZ_ID = int(np.where(labels == "Blazar")[0][0]) if "Blazar" in labels else None

def f1_blazar(y_true, y_pred):
    if BLZ_ID is None:
        return np.nan
    return f1_score((y_true == BLZ_ID).astype(int), (y_pred == BLZ_ID).astype(int))

print("Labels:", list(labels))
print("Distribución train:", dict(zip(*np.unique(y_train_enc, return_counts=True))))
print("Class weights:", class_weight_dict)

# ============================================================
# 3) Preprocess + XGB fit (early stopping)
# ============================================================
def preprocess_fit_transform(X_tr_raw, X_va_raw):
    imp = SimpleImputer(strategy="median")
    X_tr_i = imp.fit_transform(X_tr_raw)
    X_va_i = imp.transform(X_va_raw)

    vt = VarianceThreshold(0.0)
    X_tr_v = vt.fit_transform(X_tr_i)
    X_va_v = vt.transform(X_va_i)
    return X_tr_v, X_va_v, imp, vt

def preprocess_full(X_train_raw, X_test_raw):
    imp = SimpleImputer(strategy="median")
    X_train_i = imp.fit_transform(X_train_raw)
    X_test_i  = imp.transform(X_test_raw)

    vt = VarianceThreshold(0.0)
    X_train_v = vt.fit_transform(X_train_i)
    X_test_v  = vt.transform(X_test_i)
    return X_train_v, X_test_v, imp, vt

def fit_xgb_one_fold(params, X_tr, y_tr, X_va, y_va, w_tr=None, w_va=None):
    base = dict(
        random_state=42,
        n_jobs=-1,
        tree_method="hist",
        n_estimators=20000,
        verbosity=0,
        objective="multi:softprob",
        num_class=n_classes,
        eval_metric="mlogloss",
    )

    es = xgb.callback.EarlyStopping(rounds=250, save_best=True)
    model = XGBClassifier(**base, **params, callbacks=[es])

    fit_kwargs = dict(
        X=X_tr,
        y=y_tr,
        sample_weight=w_tr,
        eval_set=[(X_va, y_va)],
        verbose=False
    )

    sig = inspect.signature(model.fit)
    if "sample_weight_eval_set" in sig.parameters and (w_va is not None):
        fit_kwargs["sample_weight_eval_set"] = [w_va]

    model.fit(**fit_kwargs)
    return model

def to_plain_params(d):
    out = {}
    for k, v in d.items():
        out[k] = float(v) if isinstance(v, (np.floating,)) else v
    return out

# ============================================================
# 4) FIXED OOF eval for repeated CV (accumulate & average)
# ============================================================
def oof_eval_xgb(params, X, y, cv):
    K = len(np.unique(y))
    proba_sum = np.zeros((len(y), K), dtype=float)
    proba_cnt = np.zeros(len(y), dtype=float)

    fold_f1m = []
    fold_bacc = []
    fold_f1blz = []
    best_iters = []

    for tr_idx, va_idx in cv.split(X, y):
        X_tr_raw, X_va_raw = X[tr_idx], X[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]

        X_tr, X_va, _, _ = preprocess_fit_transform(X_tr_raw, X_va_raw)

        w_tr = make_sample_weight(y_tr, class_weight_dict)
        w_va = make_sample_weight(y_va, class_weight_dict)

        model = fit_xgb_one_fold(params, X_tr, y_tr, X_va, y_va, w_tr=w_tr, w_va=w_va)

        p_va = model.predict_proba(X_va)

        proba_sum[va_idx] += p_va
        proba_cnt[va_idx] += 1.0

        yhat_va = np.argmax(p_va, axis=1)
        fold_f1m.append(f1_score(y_va, yhat_va, average="macro"))
        fold_bacc.append(balanced_accuracy_score(y_va, yhat_va))
        fold_f1blz.append(f1_blazar(y_va, yhat_va))

        best_iters.append(getattr(model, "best_iteration", None))

    proba_oof = proba_sum / (proba_cnt[:, None] + 1e-12)
    yhat_oof = np.argmax(proba_oof, axis=1)

    oof_macroF1 = f1_score(y, yhat_oof, average="macro")
    oof_bacc    = balanced_accuracy_score(y, yhat_oof)
    oof_acc     = accuracy_score(y, yhat_oof)
    oof_f1blz   = f1_blazar(y, yhat_oof)

    its = [b for b in best_iters if b is not None]
    best_n_cv = int(np.median(its) + 1) if len(its) else 800

    return {
        "oof_macroF1": float(oof_macroF1),
        "oof_bacc": float(oof_bacc),
        "oof_acc": float(oof_acc),
        "oof_f1_blazar": float(oof_f1blz),
        "fold_f1m_mean": float(np.mean(fold_f1m)),
        "fold_f1m_std": float(np.std(fold_f1m)),
        "fold_bacc_mean": float(np.mean(fold_bacc)),
        "fold_f1blz_mean": float(np.mean(fold_f1blz)),
        "best_n_cv": int(best_n_cv),
    }

# ============================================================
# 5) Search space + OOF-first run
# ============================================================
TOTAL_T0 = time.time()

cv_rep = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=42)

param_dist = {
    "learning_rate": loguniform(0.007, 0.06),
    "max_depth": randint(2, 6),
    "min_child_weight": loguniform(10.0, 150.0),
    "subsample": uniform(0.65, 0.35),
    "colsample_bytree": uniform(0.65, 0.35),
    "colsample_bynode": uniform(0.65, 0.35),
    "gamma": loguniform(1e-4, 8.0),
    "reg_alpha": loguniform(1e-10, 1.0),
    "reg_lambda": loguniform(1.0, 200.0),
    "grow_policy": ["depthwise", "lossguide"],
    "max_leaves": randint(16, 129),
}

N_ITER = 30
TOP_K  = 5

sampler = list(ParameterSampler(param_dist, n_iter=N_ITER, random_state=42))

print("\n########## XGB OOF-first (5x2) — ESIG LOG-FIRMA REALES ##########")
print(f"N_ITER={N_ITER} | CV=5x2 | TOP_K={TOP_K}")

SEARCH_T0 = time.time()
rows = []

for params in tqdm(sampler, total=N_ITER, desc="OOF trials"):
    stats = oof_eval_xgb(params, X_train_np, y_train_enc, cv_rep)
    rows.append({"params": params, **stats})

results = pd.DataFrame(rows)

results = results.sort_values(
    ["oof_macroF1", "fold_f1m_std", "oof_bacc", "oof_f1_blazar"],
    ascending=[False, True, False, False]
).reset_index(drop=True)

SEARCH_T1 = time.time()

print("\nTOP 10 by FIXED OOF macro-F1 (and stability):")
print(results.head(10)[[
    "oof_macroF1", "oof_bacc", "oof_acc", "oof_f1_blazar",
    "fold_f1m_std", "best_n_cv", "params"
]].to_string(index=False))

print("\nTiempo búsqueda (hh:mm:ss):", time.strftime("%H:%M:%S", time.gmtime(SEARCH_T1 - SEARCH_T0)))

# ============================================================
# 6) Train TOP-5 on full-train and show TRAIN+TEST confusion matrices
# ============================================================
def _row_normalize(cm):
    cm = cm.astype(float)
    row_sums = cm.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    return (cm / row_sums) * 100.0

def save_confusion_train_test(cm_train, cm_test, labels, outpath,
                              title_prefix="", subtitle="",
                              gap_width=0.28, wspace=0.15,
                              label_fontsize=13, tick_fontsize=13, title_fontsize=14,
                              cmap="Greens"):
    cm_tr_pct = _row_normalize(cm_train)
    cm_te_pct = _row_normalize(cm_test)

    fig = plt.figure(figsize=(10.8, 4.8))
    gs = gridspec.GridSpec(1, 4, width_ratios=[1, gap_width, 1, 0.08], wspace=wspace)

    ax1 = fig.add_subplot(gs[0, 0])
    ax_gap = fig.add_subplot(gs[0, 1])
    ax2 = fig.add_subplot(gs[0, 2])
    ax_cbar = fig.add_subplot(gs[0, 3])
    ax_gap.axis("off")

    panels = [
        (ax1, cm_tr_pct, cm_train, "Train"),
        (ax2, cm_te_pct, cm_test,  "Test"),
    ]

    for ax, cm_pct, cm_cnt, t in panels:
        im = ax.imshow(cm_pct, cmap=cmap, vmin=0, vmax=100)
        ax.set_title(t, fontsize=title_fontsize)

        ax.set_xticks(np.arange(len(labels)))
        ax.set_yticks(np.arange(len(labels)))
        ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=tick_fontsize)
        ax.set_yticklabels(labels, fontsize=tick_fontsize)

        ax.set_xlabel("Predicho", fontsize=label_fontsize)
        ax.set_ylabel("Real", fontsize=label_fontsize)

        thr = 50
        for i in range(cm_pct.shape[0]):
            for j in range(cm_pct.shape[1]):
                pct = cm_pct[i, j]
                cnt = int(cm_cnt[i, j])
                color_txt = "white" if pct > thr else "black"

                ax.text(j, i - 0.10, f"{pct:.1f}%",
                        ha="center", va="center",
                        color=color_txt, fontsize=10, fontweight="bold")
                ax.text(j, i + 0.22, f"({cnt})",
                        ha="center", va="center",
                        color=color_txt, fontsize=7)

    fig.colorbar(im, cax=ax_cbar, label="% por fila (clase real)")
    fig.suptitle(f"{title_prefix}\n{subtitle}", fontsize=13, y=0.98)
    fig.subplots_adjust(left=0.08, right=0.92, bottom=0.22, top=0.82)

    fig.savefig(outpath, dpi=300, bbox_inches="tight")
    plt.close(fig)

EVAL_T0 = time.time()

topk = results.head(TOP_K).reset_index(drop=True)

X_train_v, X_test_v, _, _ = preprocess_full(X_train_np, X_test_np)
w_train_full = make_sample_weight(y_train_enc, class_weight_dict)

print("\n" + "#" * 95)
print("TOP-5 (selected by FIXED OOF) — TRAIN & TEST evaluation (reference)")
print("#" * 95)

top5_summary = []

downloads = "/home/felorrieta/Catalina"
os.makedirs(downloads, exist_ok=True)

for i, row in topk.iterrows():
    params = row["params"]
    best_n = int(row["best_n_cv"])

    model = XGBClassifier(
        random_state=100 + i,
        n_jobs=-1,
        tree_method="hist",
        n_estimators=best_n,
        verbosity=0,
        objective="multi:softprob",
        num_class=n_classes,
        eval_metric="mlogloss",
        **params
    )
    model.fit(X_train_v, y_train_enc, sample_weight=w_train_full, verbose=False)

    p_tr = model.predict_proba(X_train_v)
    p_te = model.predict_proba(X_test_v)

    yhat_tr = np.argmax(p_tr, axis=1)
    yhat_te = np.argmax(p_te, axis=1)

    f1m_tr = f1_score(y_train_enc, yhat_tr, average="macro")
    f1m_te = f1_score(y_test_enc,  yhat_te, average="macro")
    bacc_tr = balanced_accuracy_score(y_train_enc, yhat_tr)
    bacc_te = balanced_accuracy_score(y_test_enc,  yhat_te)
    acc_tr = accuracy_score(y_train_enc, yhat_tr)
    acc_te = accuracy_score(y_test_enc,  yhat_te)
    f1w_te = f1_score(y_test_enc, yhat_te, average="weighted")
    f1blz_te = f1_blazar(y_test_enc, yhat_te)

    print("\n" + "=" * 95)
    print(f"OOF-TOP{TOP_K} | Model #{i+1}")
    print("=" * 95)
    print("FIXED OOF macroF1:", f"{row['oof_macroF1']:.4f}", "| fold std:", f"{row['fold_f1m_std']:.4f}")
    print("PARAMS:", {"best_n_estimators": best_n, **to_plain_params(params)})

    print("\nMÉTRICAS (TRAIN / TEST)")
    print(f"macro-F1 train={f1m_tr:.4f} | test={f1m_te:.4f}")
    print(f"bal_acc  train={bacc_tr:.4f} | test={bacc_te:.4f}")
    print(f"acc      train={acc_tr:.4f} | test={acc_te:.4f}")
    print(f"F1_w test={f1w_te:.4f} | F1(Blazar) test={f1blz_te:.4f}")
    print(f"GAP macro-F1 (train-test) = {f1m_tr - f1m_te:.4f}")

    cm_tr = confusion_matrix(y_train_enc, yhat_tr)
    cm_te = confusion_matrix(y_test_enc,  yhat_te)

    print("\nMatriz de confusión (TRAIN)")
    print(pd.DataFrame(cm_tr, index=labels, columns=labels))

    print("\nMatriz de confusión (TEST)")
    print(pd.DataFrame(cm_te, index=labels, columns=labels))

    print("\nClassification report (TRAIN)")
    print(classification_report(y_train_enc, yhat_tr, target_names=labels, zero_division=0))

    print("\nClassification report (TEST)")
    print(classification_report(y_test_enc, yhat_te, target_names=labels, zero_division=0))

    subtitle = (
        f"Acc train={acc_tr:.3f} | Acc test={acc_te:.3f} | "
        f"F1w train={f1_score(y_train_enc, yhat_tr, average='weighted', zero_division=0):.3f} | "
        f"F1w test={f1w_te:.3f}"
    )

    outpath = os.path.join(downloads, f"XGB_ESIG_LOGFIRMA_REALES_{i+1}.png")
    save_confusion_train_test(
        cm_tr, cm_te, labels,
        outpath=outpath,
        title_prefix=f"XGB_{i+1} (ESIG log-firma, datos reales) | n_estimators={best_n}",
        subtitle=subtitle,
        gap_width=0.28,
        wspace=0.15,
        cmap="Greens"
    )
    print(f"✅ Guardado: {outpath}")

    top5_summary.append({
        "Modelo": f"XGB_{i+1}",
        "best_n": best_n,
        "oof_macroF1": row["oof_macroF1"],
        "oof_f1_blazar": row["oof_f1_blazar"],
        "fold_f1m_std": row["fold_f1m_std"],
        "Acc_test": acc_te,
        "F1_w_test": f1w_te,
        "macroF1_test": f1m_te,
        "bacc_test": bacc_te,
    })

EVAL_T1 = time.time()
TOTAL_T1 = time.time()

print("\nTiempo eval TOP-5 (hh:mm:ss):", time.strftime("%H:%M:%S", time.gmtime(EVAL_T1 - EVAL_T0)))
print("Tiempo TOTAL (hh:mm:ss):", time.strftime("%H:%M:%S", time.gmtime(TOTAL_T1 - TOTAL_T0)))

# ============================================================
# 7) Extra: Build LaTeX-ready table metrics for TOP-5
# ============================================================
def cv_auc_gap(params, X, y, cv):
    auc_tr, auc_va = [], []

    for tr_idx, va_idx in cv.split(X, y):
        X_tr_raw, X_va_raw = X[tr_idx], X[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]

        X_tr, X_va, _, _ = preprocess_fit_transform(X_tr_raw, X_va_raw)

        w_tr = make_sample_weight(y_tr, class_weight_dict)
        w_va = make_sample_weight(y_va, class_weight_dict)

        model = fit_xgb_one_fold(params, X_tr, y_tr, X_va, y_va, w_tr=w_tr, w_va=w_va)

        p_tr = model.predict_proba(X_tr)
        p_va = model.predict_proba(X_va)

        auc_tr.append(roc_auc_score(y_tr, p_tr, multi_class="ovr", average="weighted"))
        auc_va.append(roc_auc_score(y_va, p_va, multi_class="ovr", average="weighted"))

    mean_va = float(np.mean(auc_va))
    std_va  = float(np.std(auc_va))
    gap     = float(np.mean(auc_tr) - mean_va)
    return mean_va, std_va, gap

cv_5 = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_5x2 = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=42)

LATEX_T0 = time.time()
latex_rows = []

for i, row in tqdm(list(topk.iterrows()), total=len(topk), desc="Building LaTeX table metrics"):
    params = row["params"]

    auc_cv,  sd_cv,  gap_cv  = cv_auc_gap(params, X_train_np, y_train_enc, cv_5)
    auc_rep, sd_rep, gap_rep = cv_auc_gap(params, X_train_np, y_train_enc, cv_5x2)

    acc_test = top5_summary[i]["Acc_test"]
    f1w_test = top5_summary[i]["F1_w_test"]

    latex_rows.append({
        "Modelo": top5_summary[i]["Modelo"],
        "AUC_CV": auc_cv,
        "SD_CV": sd_cv,
        "Gap_CV": gap_cv,
        "AUC_rep": auc_rep,
        "SD_rep": sd_rep,
        "Gap_rep": gap_rep,
        "Acc_test": acc_test,
        "F1_w_test": f1w_test,
    })

LATEX_T1 = time.time()
latex_df = pd.DataFrame(latex_rows)

print("\nTiempo métricas tabla LaTeX (hh:mm:ss):", time.strftime("%H:%M:%S", time.gmtime(LATEX_T1 - LATEX_T0)))
print("\nTabla resumen (para elegir mejor modelo):")
print(latex_df.sort_values(["Acc_test", "F1_w_test"], ascending=False).to_string(index=False))

print("\n--- LaTeX rows (pegables dentro de tu tabular) ---")
for _, r in latex_df.iterrows():
    print(
        f"{r['Modelo']} & "
        f"{r['AUC_CV']:.4f} & {r['SD_CV']:.4f} & {r['Gap_CV']:.4f} & "
        f"{r['AUC_rep']:.4f} & {r['SD_rep']:.4f} & {r['Gap_rep']:.4f} & "
        f"{r['Acc_test']:.4f} & {r['F1_w_test']:.4f} \\\\"
    )

Shapes: (1438, 127) (359, 127)
Labels: ['AGN', 'Blazar', 'QSO']
Distribución train: {0: 400, 1: 186, 2: 852}
Class weights: {0: 2.157, 1: 2.5770609318996414, 2: 1.0126760563380282}

########## XGB OOF-first (5x2) — ESIG LOG-FIRMA REALES ##########
N_ITER=30 | CV=5x2 | TOP_K=5


TOP 10 by FIXED OOF macro-F1 (and stability):
 oof_macroF1  oof_bacc  oof_acc  oof_f1_blazar  fold_f1m_std  best_n_cv                                                                                                                                                                                                                                                                                                                                                                params
    0.574601  0.586720 0.623088       0.513854      0.023086        131      {'colsample_bynode': 0.8774873757722178, 'colsample_bytree': 0.8956884070401961, 'gamma': 0.7977063868174838, 'grow_policy': 'lossguide', 'learning_rate': 0.04702827154794471, 'max_depth': 4, 'max_leaves': 107, 'min_child_weight': 47.951030377685704, 'reg_alpha': 2.425164655536382e-06, 'reg_lambda': 35.18457384151612, 'subsample': 0.7648534336161562}
    0.573482  0.583503 0.621697       0.510309      0.022854       1107    {'colsample_bynode': 0.7568773511205378, 'colsample_bytree': 0.8199648137551794, 'gamma': 0.015800248790431463, 'grow_policy': 'depthwise', 'learning_rate': 0.011333217908108557, 'max_depth': 2, 'max_leaves': 83, 'min_child_weight': 19.011946259792996, 'reg_alpha': 1.794721153226432e-07, 'reg_lambda': 52.20343909245747, 'subsample': 0.8773715146665251}
    0.570688  0.580671 0.625174       0.503778      0.024016        101 {'colsample_bynode': 0.7452462872846224, 'colsample_bytree': 0.8766915421894768, 'gamma': 0.00010058922341116494, 'grow_policy': 'depthwise', 'learning_rate': 0.044484168177941416, 'max_depth': 4, 'max_leaves': 85, 'min_child_weight': 39.873599110055316, 'reg_alpha': 1.4931726672035842e-06, 'reg_lambda': 3.2439692898781605, 'subsample': 0.691952878566789}
    0.569917  0.571922 0.625869       0.514745      0.018538        133 {'colsample_bynode': 0.7681653099912698, 'colsample_bytree': 0.9800183963693817, 'gamma': 0.0038432006551539924, 'grow_policy': 'lossguide', 'learning_rate': 0.023218646777297004, 'max_depth': 5, 'max_leaves': 128, 'min_child_weight': 11.921178877039386, 'reg_alpha': 3.460621800595392e-08, 'reg_lambda': 3.6988713446272268, 'subsample': 0.8937064954939259}
    0.569284  0.582240 0.616829       0.507538      0.022707        552    {'colsample_bynode': 0.8830728248161238, 'colsample_bytree': 0.8569542256977045, 'gamma': 0.0022232365438083607, 'grow_policy': 'depthwise', 'learning_rate': 0.019306328518668175, 'max_depth': 2, 'max_leaves': 62, 'min_child_weight': 68.99870818520014, 'reg_alpha': 0.004053689557345564, 'reg_lambda': 19.566613546446423, 'subsample': 0.9198385129840964}
    0.566854  0.577646 0.616134       0.501279      0.023950       1517   {'colsample_bynode': 0.6558057401247497, 'colsample_bytree': 0.8292325704047483, 'gamma': 0.0012898201800603955, 'grow_policy': 'lossguide', 'learning_rate': 0.008606684434368822, 'max_depth': 2, 'max_leaves': 48, 'min_child_weight': 29.34816680866506, 'reg_alpha': 1.5049203166488291e-05, 'reg_lambda': 84.64398106421382, 'subsample': 0.8864915409637483}
    0.565838  0.575905 0.614743       0.500000      0.026245        302      {'colsample_bynode': 0.870898519099042, 'colsample_bytree': 0.928183956239577, 'gamma': 0.029139017098458005, 'grow_policy': 'lossguide', 'learning_rate': 0.029460477479655373, 'max_depth': 2, 'max_leaves': 84, 'min_child_weight': 18.651719774462304, 'reg_alpha': 9.815983028687125e-06, 'reg_lambda': 20.71089276523123, 'subsample': 0.9189939050072081}
    0.563333  0.581882 0.605702       0.522782      0.029166        283  {'colsample_bynode': 0.6652613201140518, 'colsample_bytree': 0.9980926787790694, 'gamma': 0.020145601595883543, 'grow_policy': 'lossguide', 'learning_rate': 0.04997067275212073, 'max_depth': 4, 'max_leaves': 120, 'min_child_weight': 132.09898131000074, 'reg_alpha': 2.0300327610941567e-07, 'reg_lambda': 18.703753458105492, 'subsample': 0.8503023642097934}
    0.561952  0.564766 0.618915       0.501333      0.021375         74    {'colsample_bynode': 0.7519301990693147, 'colsample_bytree': 0.8641485131528328, 'gamma': 0.00048300424915854063, 'grow_policy': 'lossguide', 'learning_rate': 0.05671053612296404, 'max_depth': 4, 'max_leaves': 77, 'min_child_weight': 12.78089996481106, 'reg_alpha': 0.0001527074036654596, 'reg_lambda': 7.586762940448893, 'subsample': 0.9941308100323758}
    0.561851  0.571891 0.609179       0.501292      0.021909        154       {'colsample_bynode': 0.919944621340081, 'colsample_bytree': 0.6759156281069316, 'gamma': 0.005722551691267099, 'grow_policy': 'depthwise', 'learning_rate': 0.049980903508955704, 'max_depth': 2, 'max_leaves': 87, 'min_child_weight': 33.77497290007452, 'reg_alpha': 8.997071356223067e-10, 'reg_lambda': 7.13286363374483, 'subsample': 0.8840944384322625}

Tiempo búsqueda (hh:mm:ss): 00:11:57

###############################################################################################
TOP-5 (selected by FIXED OOF) — TRAIN & TEST evaluation (reference)
###############################################################################################

===============================================================================================
OOF-TOP5 | Model #1
===============================================================================================
FIXED OOF macroF1: 0.5746 | fold std: 0.0231
PARAMS: {'best_n_estimators': 131, 'colsample_bynode': 0.8774873757722178, 'colsample_bytree': 0.8956884070401961, 'gamma': 0.7977063868174838, 'grow_policy': 'lossguide', 'learning_rate': 0.04702827154794471, 'max_depth': 4, 'max_leaves': 107, 'min_child_weight': 47.951030377685704, 'reg_alpha': 2.425164655536382e-06, 'reg_lambda': 35.18457384151612, 'subsample': 0.7648534336161562}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.7104 | test=0.5935
bal_acc  train=0.7285 | test=0.5980
acc      train=0.7497 | test=0.6407
F1_w test=0.6471 | F1(Blazar) test=0.5610
GAP macro-F1 (train-test) = 0.1168

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     285      28   87
Blazar   30     129   27
QSO     128      60  664

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      52       6   41
Blazar   13      23    5
QSO      52      12  155

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.64      0.71      0.68       400
      Blazar       0.59      0.69      0.64       186
         QSO       0.85      0.78      0.81       852

    accuracy                           0.75      1438
   macro avg       0.70      0.73      0.71      1438
weighted avg       0.76      0.75      0.75      1438


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.44      0.53      0.48        99
      Blazar       0.56      0.56      0.56        41
         QSO       0.77      0.71      0.74       219

    accuracy                           0.64       359
   macro avg       0.59      0.60      0.59       359
weighted avg       0.66      0.64      0.65       359

✅ Guardado: /home/felorrieta/Catalina/XGB_ESIG_LOGFIRMA_REALES_1.png

===============================================================================================
OOF-TOP5 | Model #2
===============================================================================================
FIXED OOF macroF1: 0.5735 | fold std: 0.0229
PARAMS: {'best_n_estimators': 1107, 'colsample_bynode': 0.7568773511205378, 'colsample_bytree': 0.8199648137551794, 'gamma': 0.015800248790431463, 'grow_policy': 'depthwise', 'learning_rate': 0.011333217908108557, 'max_depth': 2, 'max_leaves': 83, 'min_child_weight': 19.011946259792996, 'reg_alpha': 1.794721153226432e-07, 'reg_lambda': 52.20343909245747, 'subsample': 0.8773715146665251}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.6770 | test=0.5612
bal_acc  train=0.6929 | test=0.5607
acc      train=0.7135 | test=0.6100
F1_w test=0.6183 | F1(Blazar) test=0.5316
GAP macro-F1 (train-test) = 0.1158

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     276      24  100
Blazar   40     121   25
QSO     164      59  629

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      48       7   44
Blazar   15      21    5
QSO      59      10  150

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.57      0.69      0.63       400
      Blazar       0.59      0.65      0.62       186
         QSO       0.83      0.74      0.78       852

    accuracy                           0.71      1438
   macro avg       0.67      0.69      0.68      1438
weighted avg       0.73      0.71      0.72      1438


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.39      0.48      0.43        99
      Blazar       0.55      0.51      0.53        41
         QSO       0.75      0.68      0.72       219

    accuracy                           0.61       359
   macro avg       0.57      0.56      0.56       359
weighted avg       0.63      0.61      0.62       359

✅ Guardado: /home/felorrieta/Catalina/XGB_ESIG_LOGFIRMA_REALES_2.png

===============================================================================================
OOF-TOP5 | Model #3
===============================================================================================
FIXED OOF macroF1: 0.5707 | fold std: 0.0240
PARAMS: {'best_n_estimators': 101, 'colsample_bynode': 0.7452462872846224, 'colsample_bytree': 0.8766915421894768, 'gamma': 0.00010058922341116494, 'grow_policy': 'depthwise', 'learning_rate': 0.044484168177941416, 'max_depth': 4, 'max_leaves': 85, 'min_child_weight': 39.873599110055316, 'reg_alpha': 1.4931726672035842e-06, 'reg_lambda': 3.2439692898781605, 'subsample': 0.691952878566789}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.7122 | test=0.5726
bal_acc  train=0.7291 | test=0.5770
acc      train=0.7483 | test=0.6212
F1_w test=0.6285 | F1(Blazar) test=0.5366
GAP macro-F1 (train-test) = 0.1396

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     285      24   91
Blazar   30     130   26
QSO     133      58  661

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      50       7   42
Blazar   13      22    6
QSO      56      12  151

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.64      0.71      0.67       400
      Blazar       0.61      0.70      0.65       186
         QSO       0.85      0.78      0.81       852

    accuracy                           0.75      1438
   macro avg       0.70      0.73      0.71      1438
weighted avg       0.76      0.75      0.75      1438


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.42      0.51      0.46        99
      Blazar       0.54      0.54      0.54        41
         QSO       0.76      0.69      0.72       219

    accuracy                           0.62       359
   macro avg       0.57      0.58      0.57       359
weighted avg       0.64      0.62      0.63       359

✅ Guardado: /home/felorrieta/Catalina/XGB_ESIG_LOGFIRMA_REALES_3.png

===============================================================================================
OOF-TOP5 | Model #4
===============================================================================================
FIXED OOF macroF1: 0.5699 | fold std: 0.0185
PARAMS: {'best_n_estimators': 133, 'colsample_bynode': 0.7681653099912698, 'colsample_bytree': 0.9800183963693817, 'gamma': 0.0038432006551539924, 'grow_policy': 'lossguide', 'learning_rate': 0.023218646777297004, 'max_depth': 5, 'max_leaves': 128, 'min_child_weight': 11.921178877039386, 'reg_alpha': 3.460621800595392e-08, 'reg_lambda': 3.6988713446272268, 'subsample': 0.8937064954939259}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.8294 | test=0.5896
bal_acc  train=0.8373 | test=0.5856
acc      train=0.8428 | test=0.6295
F1_w test=0.6331 | F1(Blazar) test=0.6076
GAP macro-F1 (train-test) = 0.2398

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     327       3   70
Blazar   17     156   13
QSO      92      31  729

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      45       4   50
Blazar   12      24    5
QSO      52      10  157

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.75      0.82      0.78       400
      Blazar       0.82      0.84      0.83       186
         QSO       0.90      0.86      0.88       852

    accuracy                           0.84      1438
   macro avg       0.82      0.84      0.83      1438
weighted avg       0.85      0.84      0.84      1438


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.41      0.45      0.43        99
      Blazar       0.63      0.59      0.61        41
         QSO       0.74      0.72      0.73       219

    accuracy                           0.63       359
   macro avg       0.59      0.59      0.59       359
weighted avg       0.64      0.63      0.63       359

✅ Guardado: /home/felorrieta/Catalina/XGB_ESIG_LOGFIRMA_REALES_4.png

===============================================================================================
OOF-TOP5 | Model #5
===============================================================================================
FIXED OOF macroF1: 0.5693 | fold std: 0.0227
PARAMS: {'best_n_estimators': 552, 'colsample_bynode': 0.8830728248161238, 'colsample_bytree': 0.8569542256977045, 'gamma': 0.0022232365438083607, 'grow_policy': 'depthwise', 'learning_rate': 0.019306328518668175, 'max_depth': 2, 'max_leaves': 62, 'min_child_weight': 68.99870818520014, 'reg_alpha': 0.004053689557345564, 'reg_lambda': 19.566613546446423, 'subsample': 0.9198385129840964}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.6735 | test=0.5553
bal_acc  train=0.6909 | test=0.5509
acc      train=0.7072 | test=0.5989
F1_w test=0.6086 | F1(Blazar) test=0.5455
GAP macro-F1 (train-test) = 0.1182

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     273      25  102
Blazar   38     123   25
QSO     172      59  621

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      46       7   46
Blazar   16      21    4
QSO      63       8  148

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.57      0.68      0.62       400
      Blazar       0.59      0.66      0.63       186
         QSO       0.83      0.73      0.78       852

    accuracy                           0.71      1438
   macro avg       0.66      0.69      0.67      1438
weighted avg       0.73      0.71      0.71      1438


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.37      0.46      0.41        99
      Blazar       0.58      0.51      0.55        41
         QSO       0.75      0.68      0.71       219

    accuracy                           0.60       359
   macro avg       0.57      0.55      0.56       359
weighted avg       0.62      0.60      0.61       359

✅ Guardado: /home/felorrieta/Catalina/XGB_ESIG_LOGFIRMA_REALES_5.png

Tiempo eval TOP-5 (hh:mm:ss): 00:00:08
Tiempo TOTAL (hh:mm:ss): 00:12:05


Tiempo métricas tabla LaTeX (hh:mm:ss): 00:02:40

Tabla resumen (para elegir mejor modelo):
Modelo   AUC_CV    SD_CV   Gap_CV  AUC_rep   SD_rep  Gap_rep  Acc_test  F1_w_test
 XGB_1 0.724764 0.027276 0.150819 0.727142 0.021136 0.158595  0.640669   0.647102
 XGB_4 0.722220 0.023777 0.238291 0.722134 0.020072 0.237952  0.629526   0.633142
 XGB_3 0.727686 0.027799 0.167410 0.727711 0.022728 0.163806  0.621170   0.628517
 XGB_2 0.725049 0.029409 0.125062 0.725364 0.022357 0.133193  0.610028   0.618326
 XGB_5 0.724209 0.028485 0.115026 0.724952 0.021513 0.121061  0.598886   0.608573

--- LaTeX rows (pegables dentro de tu tabular) ---
XGB_1 & 0.7248 & 0.0273 & 0.1508 & 0.7271 & 0.0211 & 0.1586 & 0.6407 & 0.6471 \\
XGB_2 & 0.7250 & 0.0294 & 0.1251 & 0.7254 & 0.0224 & 0.1332 & 0.6100 & 0.6183 \\
XGB_3 & 0.7277 & 0.0278 & 0.1674 & 0.7277 & 0.0227 & 0.1638 & 0.6212 & 0.6285 \\
XGB_4 & 0.7222 & 0.0238 & 0.2383 & 0.7221 & 0.0201 & 0.2380 & 0.6295 & 0.6331 \\
XGB_5 & 0.7242 & 0.0285 & 0.1150 & 0.7250 & 0.0215 & 0.1211 & 0.5989 & 0.6086 \\

XGB: IISIGNATURE - FIRMA

# ============================================================
# XGB OOF-first (5x2) — IISIGNATURE FIRMA, DATOS REALES
# + FIXED OOF (no overwrite in repeated CV)
# + TOP-5 evaluation: TRAIN & TEST confusion matrices + reports
# + Timing (search / top5 eval / total)
# + Extra: build a LaTeX-ready table for TOP-5
# ============================================================

import time
import inspect
import os
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import (
    StratifiedKFold,
    RepeatedStratifiedKFold,
    ParameterSampler
)
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import (
    roc_auc_score,
    confusion_matrix,
    classification_report,
    accuracy_score,
    f1_score,
    balanced_accuracy_score
)
from sklearn.utils.class_weight import compute_class_weight
from scipy.stats import randint, uniform, loguniform

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

import xgboost as xgb
from xgboost import XGBClassifier


# ============================================================
# 1) LOAD + MERGE + SPLIT (80/20)  [IISIGNATURE FIRMA, DATOS REALES]
# ============================================================
# Si tu archivo tiene otro nombre, cambia solo esta ruta.
x = pd.read_csv('/home/felorrieta/Downloads/path_signature_iisignature_M9.csv')
y = pd.read_csv('/home/felorrieta/Catalina/ts_v9.0.1_SMBH_ZTF_xmatch.csv')
y["id"] = y["oid"]

data = pd.merge(x, y, on="id")

train_idx = data.sample(frac=0.8, random_state=42).index
data_train = data.loc[train_idx].reset_index(drop=True)
data_test  = data.drop(train_idx).reset_index(drop=True)

X_train = data_train.drop(columns=['oid', 'survey_class_mapped', 'survey_class', 'survey_class_cat', 'id'])
y_train = data_train['survey_class_mapped']

X_test  = data_test[X_train.columns].copy()
y_test  = data_test['survey_class_mapped']

X_train_np = np.asarray(X_train)
X_test_np  = np.asarray(X_test)

print("Shapes:", X_train.shape, X_test.shape)

# ============================================================
# 2) Encoding + class weights (+ optional HARD_FACTOR)
# ============================================================
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)

labels = le.classes_
n_classes = len(labels)

classes = np.unique(y_train_enc)
cw = compute_class_weight(class_weight="balanced", classes=classes, y=y_train_enc)
class_weight_dict = {int(c): float(w) for c, w in zip(classes, cw)}

# opcional: subir peso a clases más difíciles
HARD_CLASSES = ["AGN", "QSO"]
HARD_FACTOR  = 1.8

if HARD_CLASSES:
    for name in HARD_CLASSES:
        if name in labels:
            hid = int(np.where(labels == name)[0][0])
            class_weight_dict[hid] *= HARD_FACTOR

def make_sample_weight(y_enc, cw_dict):
    return np.array([cw_dict[int(c)] for c in y_enc], dtype=float)

BLZ_ID = int(np.where(labels == "Blazar")[0][0]) if "Blazar" in labels else None

def f1_blazar(y_true, y_pred):
    if BLZ_ID is None:
        return np.nan
    return f1_score((y_true == BLZ_ID).astype(int), (y_pred == BLZ_ID).astype(int))

print("Labels:", list(labels))
print("Distribución train:", dict(zip(*np.unique(y_train_enc, return_counts=True))))
print("Class weights:", class_weight_dict)

# ============================================================
# 3) Preprocess + XGB fit (early stopping)
# ============================================================
def preprocess_fit_transform(X_tr_raw, X_va_raw):
    imp = SimpleImputer(strategy="median")
    X_tr_i = imp.fit_transform(X_tr_raw)
    X_va_i = imp.transform(X_va_raw)

    vt = VarianceThreshold(0.0)
    X_tr_v = vt.fit_transform(X_tr_i)
    X_va_v = vt.transform(X_va_i)
    return X_tr_v, X_va_v, imp, vt

def preprocess_full(X_train_raw, X_test_raw):
    imp = SimpleImputer(strategy="median")
    X_train_i = imp.fit_transform(X_train_raw)
    X_test_i  = imp.transform(X_test_raw)

    vt = VarianceThreshold(0.0)
    X_train_v = vt.fit_transform(X_train_i)
    X_test_v  = vt.transform(X_test_i)
    return X_train_v, X_test_v, imp, vt

def fit_xgb_one_fold(params, X_tr, y_tr, X_va, y_va, w_tr=None, w_va=None):
    base = dict(
        random_state=42,
        n_jobs=-1,
        tree_method="hist",
        n_estimators=20000,
        verbosity=0,
        objective="multi:softprob",
        num_class=n_classes,
        eval_metric="mlogloss",
    )

    es = xgb.callback.EarlyStopping(rounds=250, save_best=True)
    model = XGBClassifier(**base, **params, callbacks=[es])

    fit_kwargs = dict(
        X=X_tr,
        y=y_tr,
        sample_weight=w_tr,
        eval_set=[(X_va, y_va)],
        verbose=False
    )

    sig = inspect.signature(model.fit)
    if "sample_weight_eval_set" in sig.parameters and (w_va is not None):
        fit_kwargs["sample_weight_eval_set"] = [w_va]

    model.fit(**fit_kwargs)
    return model

def to_plain_params(d):
    out = {}
    for k, v in d.items():
        out[k] = float(v) if isinstance(v, (np.floating,)) else v
    return out

# ============================================================
# 4) FIXED OOF eval for repeated CV (accumulate & average)
# ============================================================
def oof_eval_xgb(params, X, y, cv):
    K = len(np.unique(y))
    proba_sum = np.zeros((len(y), K), dtype=float)
    proba_cnt = np.zeros(len(y), dtype=float)

    fold_f1m = []
    fold_bacc = []
    fold_f1blz = []
    best_iters = []

    for tr_idx, va_idx in cv.split(X, y):
        X_tr_raw, X_va_raw = X[tr_idx], X[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]

        X_tr, X_va, _, _ = preprocess_fit_transform(X_tr_raw, X_va_raw)

        w_tr = make_sample_weight(y_tr, class_weight_dict)
        w_va = make_sample_weight(y_va, class_weight_dict)

        model = fit_xgb_one_fold(params, X_tr, y_tr, X_va, y_va, w_tr=w_tr, w_va=w_va)

        p_va = model.predict_proba(X_va)

        proba_sum[va_idx] += p_va
        proba_cnt[va_idx] += 1.0

        yhat_va = np.argmax(p_va, axis=1)
        fold_f1m.append(f1_score(y_va, yhat_va, average="macro"))
        fold_bacc.append(balanced_accuracy_score(y_va, yhat_va))
        fold_f1blz.append(f1_blazar(y_va, yhat_va))

        best_iters.append(getattr(model, "best_iteration", None))

    proba_oof = proba_sum / (proba_cnt[:, None] + 1e-12)
    yhat_oof = np.argmax(proba_oof, axis=1)

    oof_macroF1 = f1_score(y, yhat_oof, average="macro")
    oof_bacc    = balanced_accuracy_score(y, yhat_oof)
    oof_acc     = accuracy_score(y, yhat_oof)
    oof_f1blz   = f1_blazar(y, yhat_oof)

    its = [b for b in best_iters if b is not None]
    best_n_cv = int(np.median(its) + 1) if len(its) else 800

    return {
        "oof_macroF1": float(oof_macroF1),
        "oof_bacc": float(oof_bacc),
        "oof_acc": float(oof_acc),
        "oof_f1_blazar": float(oof_f1blz),
        "fold_f1m_mean": float(np.mean(fold_f1m)),
        "fold_f1m_std": float(np.std(fold_f1m)),
        "fold_bacc_mean": float(np.mean(fold_bacc)),
        "fold_f1blz_mean": float(np.mean(fold_f1blz)),
        "best_n_cv": int(best_n_cv),
    }

# ============================================================
# 5) Search space + OOF-first run
# ============================================================
TOTAL_T0 = time.time()

cv_rep = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=42)

param_dist = {
    "learning_rate": loguniform(0.007, 0.06),
    "max_depth": randint(2, 6),
    "min_child_weight": loguniform(10.0, 150.0),
    "subsample": uniform(0.65, 0.35),
    "colsample_bytree": uniform(0.65, 0.35),
    "colsample_bynode": uniform(0.65, 0.35),
    "gamma": loguniform(1e-4, 8.0),
    "reg_alpha": loguniform(1e-10, 1.0),
    "reg_lambda": loguniform(1.0, 200.0),
    "grow_policy": ["depthwise", "lossguide"],
    "max_leaves": randint(16, 129),
}

N_ITER = 30
TOP_K  = 5

sampler = list(ParameterSampler(param_dist, n_iter=N_ITER, random_state=42))

print("\n########## XGB OOF-first (5x2) — IISIGNATURE FIRMA REALES ##########")
print(f"N_ITER={N_ITER} | CV=5x2 | TOP_K={TOP_K}")

SEARCH_T0 = time.time()
rows = []

for params in tqdm(sampler, total=N_ITER, desc="OOF trials"):
    stats = oof_eval_xgb(params, X_train_np, y_train_enc, cv_rep)
    rows.append({"params": params, **stats})

results = pd.DataFrame(rows)

results = results.sort_values(
    ["oof_macroF1", "fold_f1m_std", "oof_bacc", "oof_f1_blazar"],
    ascending=[False, True, False, False]
).reset_index(drop=True)

SEARCH_T1 = time.time()

print("\nTOP 10 by FIXED OOF macro-F1 (and stability):")
print(results.head(10)[[
    "oof_macroF1", "oof_bacc", "oof_acc", "oof_f1_blazar",
    "fold_f1m_std", "best_n_cv", "params"
]].to_string(index=False))

print("\nTiempo búsqueda (hh:mm:ss):", time.strftime("%H:%M:%S", time.gmtime(SEARCH_T1 - SEARCH_T0)))

# ============================================================
# 6) Train TOP-5 on full-train and show TRAIN+TEST confusion matrices
# ============================================================
def _row_normalize(cm):
    cm = cm.astype(float)
    row_sums = cm.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    return (cm / row_sums) * 100.0

def save_confusion_train_test(cm_train, cm_test, labels, outpath,
                              title_prefix="", subtitle="",
                              gap_width=0.28, wspace=0.15,
                              label_fontsize=13, tick_fontsize=13, title_fontsize=14,
                              cmap="Greens"):
    cm_tr_pct = _row_normalize(cm_train)
    cm_te_pct = _row_normalize(cm_test)

    fig = plt.figure(figsize=(10.8, 4.8))
    gs = gridspec.GridSpec(1, 4, width_ratios=[1, gap_width, 1, 0.08], wspace=wspace)

    ax1 = fig.add_subplot(gs[0, 0])
    ax_gap = fig.add_subplot(gs[0, 1])
    ax2 = fig.add_subplot(gs[0, 2])
    ax_cbar = fig.add_subplot(gs[0, 3])
    ax_gap.axis("off")

    panels = [
        (ax1, cm_tr_pct, cm_train, "Train"),
        (ax2, cm_te_pct, cm_test,  "Test"),
    ]

    for ax, cm_pct, cm_cnt, t in panels:
        im = ax.imshow(cm_pct, cmap=cmap, vmin=0, vmax=100)
        ax.set_title(t, fontsize=title_fontsize)

        ax.set_xticks(np.arange(len(labels)))
        ax.set_yticks(np.arange(len(labels)))
        ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=tick_fontsize)
        ax.set_yticklabels(labels, fontsize=tick_fontsize)

        ax.set_xlabel("Predicho", fontsize=label_fontsize)
        ax.set_ylabel("Real", fontsize=label_fontsize)

        thr = 50
        for i in range(cm_pct.shape[0]):
            for j in range(cm_pct.shape[1]):
                pct = cm_pct[i, j]
                cnt = int(cm_cnt[i, j])
                color_txt = "white" if pct > thr else "black"

                ax.text(j, i - 0.10, f"{pct:.1f}%",
                        ha="center", va="center",
                        color=color_txt, fontsize=10, fontweight="bold")
                ax.text(j, i + 0.22, f"({cnt})",
                        ha="center", va="center",
                        color=color_txt, fontsize=7)

    fig.colorbar(im, cax=ax_cbar, label="% por fila (clase real)")
    fig.suptitle(f"{title_prefix}\n{subtitle}", fontsize=13, y=0.98)
    fig.subplots_adjust(left=0.08, right=0.92, bottom=0.22, top=0.82)
    fig.savefig(outpath, dpi=300, bbox_inches="tight")
    plt.close(fig)

EVAL_T0 = time.time()

topk = results.head(TOP_K).reset_index(drop=True)

X_train_v, X_test_v, _, _ = preprocess_full(X_train_np, X_test_np)
w_train_full = make_sample_weight(y_train_enc, class_weight_dict)

print("\n" + "#" * 95)
print("TOP-5 (selected by FIXED OOF) — TRAIN & TEST evaluation (reference)")
print("#" * 95)

top5_summary = []

downloads = "/home/felorrieta/Catalina"
os.makedirs(downloads, exist_ok=True)

for i, row in topk.iterrows():
    params = row["params"]
    best_n = int(row["best_n_cv"])

    model = XGBClassifier(
        random_state=100 + i,
        n_jobs=-1,
        tree_method="hist",
        n_estimators=best_n,
        verbosity=0,
        objective="multi:softprob",
        num_class=n_classes,
        eval_metric="mlogloss",
        **params
    )
    model.fit(X_train_v, y_train_enc, sample_weight=w_train_full, verbose=False)

    p_tr = model.predict_proba(X_train_v)
    p_te = model.predict_proba(X_test_v)

    yhat_tr = np.argmax(p_tr, axis=1)
    yhat_te = np.argmax(p_te, axis=1)

    f1m_tr = f1_score(y_train_enc, yhat_tr, average="macro")
    f1m_te = f1_score(y_test_enc,  yhat_te, average="macro")
    bacc_tr = balanced_accuracy_score(y_train_enc, yhat_tr)
    bacc_te = balanced_accuracy_score(y_test_enc,  yhat_te)
    acc_tr = accuracy_score(y_train_enc, yhat_tr)
    acc_te = accuracy_score(y_test_enc,  yhat_te)
    f1w_te = f1_score(y_test_enc, yhat_te, average="weighted")
    f1blz_te = f1_blazar(y_test_enc, yhat_te)

    print("\n" + "=" * 95)
    print(f"OOF-TOP{TOP_K} | Model #{i+1}")
    print("=" * 95)
    print("FIXED OOF macroF1:", f"{row['oof_macroF1']:.4f}", "| fold std:", f"{row['fold_f1m_std']:.4f}")
    print("PARAMS:", {"best_n_estimators": best_n, **to_plain_params(params)})

    print("\nMÉTRICAS (TRAIN / TEST)")
    print(f"macro-F1 train={f1m_tr:.4f} | test={f1m_te:.4f}")
    print(f"bal_acc  train={bacc_tr:.4f} | test={bacc_te:.4f}")
    print(f"acc      train={acc_tr:.4f} | test={acc_te:.4f}")
    print(f"F1_w test={f1w_te:.4f} | F1(Blazar) test={f1blz_te:.4f}")
    print(f"GAP macro-F1 (train-test) = {f1m_tr - f1m_te:.4f}")

    cm_tr = confusion_matrix(y_train_enc, yhat_tr)
    cm_te = confusion_matrix(y_test_enc,  yhat_te)

    print("\nMatriz de confusión (TRAIN)")
    print(pd.DataFrame(cm_tr, index=labels, columns=labels))

    print("\nMatriz de confusión (TEST)")
    print(pd.DataFrame(cm_te, index=labels, columns=labels))

    print("\nClassification report (TRAIN)")
    print(classification_report(y_train_enc, yhat_tr, target_names=labels, zero_division=0))

    print("\nClassification report (TEST)")
    print(classification_report(y_test_enc, yhat_te, target_names=labels, zero_division=0))

    subtitle = (
        f"Acc train={acc_tr:.3f} | Acc test={acc_te:.3f} | "
        f"F1w train={f1_score(y_train_enc, yhat_tr, average='weighted', zero_division=0):.3f} | "
        f"F1w test={f1w_te:.3f}"
    )

    outpath = os.path.join(downloads, f"XGB_IISIG_FIRMA_REALES_{i+1}.png")
    save_confusion_train_test(
        cm_tr, cm_te, labels,
        outpath=outpath,
        title_prefix=f"XGB_{i+1} (IISIG firma, datos reales) | n_estimators={best_n}",
        subtitle=subtitle,
        gap_width=0.28,
        wspace=0.15,
        cmap="Greens"
    )
    print(f"✅ Guardado: {outpath}")

    top5_summary.append({
        "Modelo": f"XGB_{i+1}",
        "best_n": best_n,
        "oof_macroF1": row["oof_macroF1"],
        "oof_f1_blazar": row["oof_f1_blazar"],
        "fold_f1m_std": row["fold_f1m_std"],
        "Acc_test": acc_te,
        "F1_w_test": f1w_te,
        "macroF1_test": f1m_te,
        "bacc_test": bacc_te,
    })

EVAL_T1 = time.time()
TOTAL_T1 = time.time()

print("\nTiempo eval TOP-5 (hh:mm:ss):", time.strftime("%H:%M:%S", time.gmtime(EVAL_T1 - EVAL_T0)))
print("Tiempo TOTAL (hh:mm:ss):", time.strftime("%H:%M:%S", time.gmtime(TOTAL_T1 - TOTAL_T0)))

# ============================================================
# 7) Extra: Build LaTeX-ready table metrics for TOP-5
# ============================================================
def cv_auc_gap(params, X, y, cv):
    auc_tr, auc_va = [], []

    for tr_idx, va_idx in cv.split(X, y):
        X_tr_raw, X_va_raw = X[tr_idx], X[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]

        X_tr, X_va, _, _ = preprocess_fit_transform(X_tr_raw, X_va_raw)

        w_tr = make_sample_weight(y_tr, class_weight_dict)
        w_va = make_sample_weight(y_va, class_weight_dict)

        model = fit_xgb_one_fold(params, X_tr, y_tr, X_va, y_va, w_tr=w_tr, w_va=w_va)

        p_tr = model.predict_proba(X_tr)
        p_va = model.predict_proba(X_va)

        auc_tr.append(roc_auc_score(y_tr, p_tr, multi_class="ovr", average="weighted"))
        auc_va.append(roc_auc_score(y_va, p_va, multi_class="ovr", average="weighted"))

    mean_va = float(np.mean(auc_va))
    std_va  = float(np.std(auc_va))
    gap     = float(np.mean(auc_tr) - mean_va)
    return mean_va, std_va, gap

cv_5 = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_5x2 = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=42)

LATEX_T0 = time.time()
latex_rows = []

for i, row in tqdm(list(topk.iterrows()), total=len(topk), desc="Building LaTeX table metrics"):
    params = row["params"]

    auc_cv,  sd_cv,  gap_cv  = cv_auc_gap(params, X_train_np, y_train_enc, cv_5)
    auc_rep, sd_rep, gap_rep = cv_auc_gap(params, X_train_np, y_train_enc, cv_5x2)

    acc_test = top5_summary[i]["Acc_test"]
    f1w_test = top5_summary[i]["F1_w_test"]

    latex_rows.append({
        "Modelo": top5_summary[i]["Modelo"],
        "AUC_CV": auc_cv,
        "SD_CV": sd_cv,
        "Gap_CV": gap_cv,
        "AUC_rep": auc_rep,
        "SD_rep": sd_rep,
        "Gap_rep": gap_rep,
        "Acc_test": acc_test,
        "F1_w_test": f1w_test,
    })

LATEX_T1 = time.time()
latex_df = pd.DataFrame(latex_rows)

print("\nTiempo métricas tabla LaTeX (hh:mm:ss):", time.strftime("%H:%M:%S", time.gmtime(LATEX_T1 - LATEX_T0)))
print("\nTabla resumen (para elegir mejor modelo):")
print(latex_df.sort_values(["Acc_test", "F1_w_test"], ascending=False).to_string(index=False))

print("\n--- LaTeX rows (pegables dentro de tu tabular) ---")
for _, r in latex_df.iterrows():
    print(
        f"{r['Modelo']} & "
        f"{r['AUC_CV']:.4f} & {r['SD_CV']:.4f} & {r['Gap_CV']:.4f} & "
        f"{r['AUC_rep']:.4f} & {r['SD_rep']:.4f} & {r['Gap_rep']:.4f} & "
        f"{r['Acc_test']:.4f} & {r['F1_w_test']:.4f} \\\\"
    )

Shapes: (1510, 1022) (377, 1022)
Labels: ['AGN', 'Blazar', 'QSO']
Distribución train: {0: 422, 1: 189, 2: 899}
Class weights: {0: 2.146919431279621, 1: 2.6631393298059964, 2: 1.0077864293659624}

########## XGB OOF-first (5x2) — IISIGNATURE FIRMA REALES ##########
N_ITER=30 | CV=5x2 | TOP_K=5


TOP 10 by FIXED OOF macro-F1 (and stability):
 oof_macroF1  oof_bacc  oof_acc  oof_f1_blazar  fold_f1m_std  best_n_cv                                                                                                                                                                                                                                                                                                                                                                params
    0.624304  0.625665 0.666887       0.578512      0.028515        287   {'colsample_bynode': 0.8981188489185415, 'colsample_bytree': 0.8434869919177677, 'gamma': 0.0028432556862043037, 'grow_policy': 'lossguide', 'learning_rate': 0.03990037190387228, 'max_depth': 2, 'max_leaves': 39, 'min_child_weight': 118.59168268812645, 'reg_alpha': 1.2984462863721065e-05, 'reg_lambda': 14.25620846729104, 'subsample': 0.9294033126383713}
    0.623138  0.624362 0.669536       0.567493      0.031271        515   {'colsample_bynode': 0.8228284587275367, 'colsample_bytree': 0.8329564902836979, 'gamma': 0.012481652467320591, 'grow_policy': 'lossguide', 'learning_rate': 0.017989635897986907, 'max_depth': 4, 'max_leaves': 78, 'min_child_weight': 113.10966992904058, 'reg_alpha': 5.671556094422195e-06, 'reg_lambda': 19.774885928261973, 'subsample': 0.8934306302491446}
    0.622944  0.617994 0.672848       0.564841      0.026862        195       {'colsample_bynode': 0.919944621340081, 'colsample_bytree': 0.6759156281069316, 'gamma': 0.005722551691267099, 'grow_policy': 'depthwise', 'learning_rate': 0.049980903508955704, 'max_depth': 2, 'max_leaves': 87, 'min_child_weight': 33.77497290007452, 'reg_alpha': 8.997071356223067e-10, 'reg_lambda': 7.13286363374483, 'subsample': 0.8840944384322625}
    0.622017  0.610866 0.678808       0.562130      0.027085        146 {'colsample_bynode': 0.7681653099912698, 'colsample_bytree': 0.9800183963693817, 'gamma': 0.0038432006551539924, 'grow_policy': 'lossguide', 'learning_rate': 0.023218646777297004, 'max_depth': 5, 'max_leaves': 128, 'min_child_weight': 11.921178877039386, 'reg_alpha': 3.460621800595392e-08, 'reg_lambda': 3.6988713446272268, 'subsample': 0.8937064954939259}
    0.620659  0.616879 0.668874       0.568182      0.027061        193        {'colsample_bynode': 0.7185940018467009, 'colsample_bytree': 0.6658295511186884, 'gamma': 0.003936623559141479, 'grow_policy': 'lossguide', 'learning_rate': 0.03486482325766769, 'max_depth': 4, 'max_leaves': 68, 'min_child_weight': 48.986105915986826, 'reg_alpha': 0.4493172298007678, 'reg_lambda': 24.93469224439704, 'subsample': 0.7465997137078901}
    0.620596  0.619994 0.666225       0.572222      0.031975        715     {'colsample_bynode': 0.9073256417342703, 'colsample_bytree': 0.7231750672581998, 'gamma': 0.04516147720002087, 'grow_policy': 'lossguide', 'learning_rate': 0.012183478572923461, 'max_depth': 5, 'max_leaves': 31, 'min_child_weight': 91.43824593314153, 'reg_alpha': 3.564577947646552e-05, 'reg_lambda': 16.547861144770245, 'subsample': 0.7346483018151582}
    0.620373  0.611991 0.672848       0.565598      0.024600         99       {'colsample_bynode': 0.994198099313195, 'colsample_bytree': 0.7895885548555936, 'gamma': 1.0070219632034507, 'grow_policy': 'lossguide', 'learning_rate': 0.042119161345132834, 'max_depth': 5, 'max_leaves': 74, 'min_child_weight': 16.571536957884465, 'reg_alpha': 2.557121805028822e-10, 'reg_lambda': 22.89087461386255, 'subsample': 0.8871475266447988}
    0.619962  0.612978 0.672848       0.555556      0.023980        200        {'colsample_bynode': 0.7810890415965769, 'colsample_bytree': 0.9827500072434707, 'gamma': 0.3881791159604766, 'grow_policy': 'depthwise', 'learning_rate': 0.0252342845499294, 'max_depth': 3, 'max_leaves': 98, 'min_child_weight': 13.109303695322625, 'reg_alpha': 3.912811440759049e-06, 'reg_lambda': 5.859674932405375, 'subsample': 0.7000033862726793}
    0.618684  0.615206 0.670861       0.555241      0.025579        174      {'colsample_bynode': 0.8774873757722178, 'colsample_bytree': 0.8956884070401961, 'gamma': 0.7977063868174838, 'grow_policy': 'lossguide', 'learning_rate': 0.04702827154794471, 'max_depth': 4, 'max_leaves': 107, 'min_child_weight': 47.951030377685704, 'reg_alpha': 2.425164655536382e-06, 'reg_lambda': 35.18457384151612, 'subsample': 0.7648534336161562}
    0.617908  0.623238 0.667550       0.559367      0.026170        571    {'colsample_bynode': 0.722779582003866, 'colsample_bytree': 0.848695114736997, 'gamma': 0.0001424071813891538, 'grow_policy': 'lossguide', 'learning_rate': 0.037011629776466666, 'max_depth': 3, 'max_leaves': 89, 'min_child_weight': 112.82325501221334, 'reg_alpha': 9.527956991613819e-05, 'reg_lambda': 132.20877067405615, 'subsample': 0.6809723757181718}

Tiempo búsqueda (hh:mm:ss): 02:27:42

###############################################################################################
TOP-5 (selected by FIXED OOF) — TRAIN & TEST evaluation (reference)
###############################################################################################

===============================================================================================
OOF-TOP5 | Model #1
===============================================================================================
FIXED OOF macroF1: 0.6243 | fold std: 0.0285
PARAMS: {'best_n_estimators': 287, 'colsample_bynode': 0.8981188489185415, 'colsample_bytree': 0.8434869919177677, 'gamma': 0.0028432556862043037, 'grow_policy': 'lossguide', 'learning_rate': 0.03990037190387228, 'max_depth': 2, 'max_leaves': 39, 'min_child_weight': 118.59168268812645, 'reg_alpha': 1.2984462863721065e-05, 'reg_lambda': 14.25620846729104, 'subsample': 0.9294033126383713}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.7325 | test=0.5363
bal_acc  train=0.7415 | test=0.5333
acc      train=0.7629 | test=0.5995
F1_w test=0.6100 | F1(Blazar) test=0.4889
GAP macro-F1 (train-test) = 0.1961

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     318      17   87
Blazar   37     130   22
QSO     156      39  704

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      44      12   42
Blazar   19      22    7
QSO      63       8  160

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.62      0.75      0.68       422
      Blazar       0.70      0.69      0.69       189
         QSO       0.87      0.78      0.82       899

    accuracy                           0.76      1510
   macro avg       0.73      0.74      0.73      1510
weighted avg       0.78      0.76      0.77      1510


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.35      0.45      0.39        98
      Blazar       0.52      0.46      0.49        48
         QSO       0.77      0.69      0.73       231

    accuracy                           0.60       377
   macro avg       0.55      0.53      0.54       377
weighted avg       0.63      0.60      0.61       377

✅ Guardado: /home/felorrieta/Catalina/XGB_IISIG_FIRMA_REALES_1.png

===============================================================================================
OOF-TOP5 | Model #2
===============================================================================================
FIXED OOF macroF1: 0.6231 | fold std: 0.0313
PARAMS: {'best_n_estimators': 515, 'colsample_bynode': 0.8228284587275367, 'colsample_bytree': 0.8329564902836979, 'gamma': 0.012481652467320591, 'grow_policy': 'lossguide', 'learning_rate': 0.017989635897986907, 'max_depth': 4, 'max_leaves': 78, 'min_child_weight': 113.10966992904058, 'reg_alpha': 5.671556094422195e-06, 'reg_lambda': 19.774885928261973, 'subsample': 0.8934306302491446}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.7673 | test=0.5380
bal_acc  train=0.7791 | test=0.5342
acc      train=0.8007 | test=0.6048
F1_w test=0.6138 | F1(Blazar) test=0.4889
GAP macro-F1 (train-test) = 0.2293

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     344      17   61
Blazar   33     134   22
QSO     129      39  731

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      43      12   43
Blazar   19      22    7
QSO      60       8  163

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.68      0.82      0.74       422
      Blazar       0.71      0.71      0.71       189
         QSO       0.90      0.81      0.85       899

    accuracy                           0.80      1510
   macro avg       0.76      0.78      0.77      1510
weighted avg       0.81      0.80      0.80      1510


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.35      0.44      0.39        98
      Blazar       0.52      0.46      0.49        48
         QSO       0.77      0.71      0.73       231

    accuracy                           0.60       377
   macro avg       0.55      0.53      0.54       377
weighted avg       0.63      0.60      0.61       377

✅ Guardado: /home/felorrieta/Catalina/XGB_IISIG_FIRMA_REALES_2.png

===============================================================================================
OOF-TOP5 | Model #3
===============================================================================================
FIXED OOF macroF1: 0.6229 | fold std: 0.0269
PARAMS: {'best_n_estimators': 195, 'colsample_bynode': 0.919944621340081, 'colsample_bytree': 0.6759156281069316, 'gamma': 0.005722551691267099, 'grow_policy': 'depthwise', 'learning_rate': 0.049980903508955704, 'max_depth': 2, 'max_leaves': 87, 'min_child_weight': 33.77497290007452, 'reg_alpha': 8.997071356223067e-10, 'reg_lambda': 7.13286363374483, 'subsample': 0.8840944384322625}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.7600 | test=0.5357
bal_acc  train=0.7668 | test=0.5293
acc      train=0.7834 | test=0.6021
F1_w test=0.6112 | F1(Blazar) test=0.4828
GAP macro-F1 (train-test) = 0.2243

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     331      10   81
Blazar   36     136   17
QSO     150      33  716

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      44      10   44
Blazar   20      21    7
QSO      61       8  162

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.64      0.78      0.71       422
      Blazar       0.76      0.72      0.74       189
         QSO       0.88      0.80      0.84       899

    accuracy                           0.78      1510
   macro avg       0.76      0.77      0.76      1510
weighted avg       0.80      0.78      0.79      1510


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.35      0.45      0.39        98
      Blazar       0.54      0.44      0.48        48
         QSO       0.76      0.70      0.73       231

    accuracy                           0.60       377
   macro avg       0.55      0.53      0.54       377
weighted avg       0.63      0.60      0.61       377

✅ Guardado: /home/felorrieta/Catalina/XGB_IISIG_FIRMA_REALES_3.png

===============================================================================================
OOF-TOP5 | Model #4
===============================================================================================
FIXED OOF macroF1: 0.6220 | fold std: 0.0271
PARAMS: {'best_n_estimators': 146, 'colsample_bynode': 0.7681653099912698, 'colsample_bytree': 0.9800183963693817, 'gamma': 0.0038432006551539924, 'grow_policy': 'lossguide', 'learning_rate': 0.023218646777297004, 'max_depth': 5, 'max_leaves': 128, 'min_child_weight': 11.921178877039386, 'reg_alpha': 3.460621800595392e-08, 'reg_lambda': 3.6988713446272268, 'subsample': 0.8937064954939259}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.9029 | test=0.5319
bal_acc  train=0.9062 | test=0.5213
acc      train=0.9106 | test=0.6048
F1_w test=0.6119 | F1(Blazar) test=0.4762
GAP macro-F1 (train-test) = 0.3710

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     384       1   37
Blazar    9     169   11
QSO      62      15  822

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      42      10   46
Blazar   21      20    7
QSO      59       6  166

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.84      0.91      0.88       422
      Blazar       0.91      0.89      0.90       189
         QSO       0.94      0.91      0.93       899

    accuracy                           0.91      1510
   macro avg       0.90      0.91      0.90      1510
weighted avg       0.91      0.91      0.91      1510


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.34      0.43      0.38        98
      Blazar       0.56      0.42      0.48        48
         QSO       0.76      0.72      0.74       231

    accuracy                           0.60       377
   macro avg       0.55      0.52      0.53       377
weighted avg       0.62      0.60      0.61       377

✅ Guardado: /home/felorrieta/Catalina/XGB_IISIG_FIRMA_REALES_4.png

===============================================================================================
OOF-TOP5 | Model #5
===============================================================================================
FIXED OOF macroF1: 0.6207 | fold std: 0.0271
PARAMS: {'best_n_estimators': 193, 'colsample_bynode': 0.7185940018467009, 'colsample_bytree': 0.6658295511186884, 'gamma': 0.003936623559141479, 'grow_policy': 'lossguide', 'learning_rate': 0.03486482325766769, 'max_depth': 4, 'max_leaves': 68, 'min_child_weight': 48.986105915986826, 'reg_alpha': 0.4493172298007678, 'reg_lambda': 24.93469224439704, 'subsample': 0.7465997137078901}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.7953 | test=0.5362
bal_acc  train=0.8072 | test=0.5314
acc      train=0.8225 | test=0.5995
F1_w test=0.6090 | F1(Blazar) test=0.4944
GAP macro-F1 (train-test) = 0.2591

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     352      14   56
Blazar   26     143   20
QSO     117      35  747

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      43      11   44
Blazar   19      22    7
QSO      62       8  161

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.71      0.83      0.77       422
      Blazar       0.74      0.76      0.75       189
         QSO       0.91      0.83      0.87       899

    accuracy                           0.82      1510
   macro avg       0.79      0.81      0.80      1510
weighted avg       0.83      0.82      0.83      1510


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.35      0.44      0.39        98
      Blazar       0.54      0.46      0.49        48
         QSO       0.76      0.70      0.73       231

    accuracy                           0.60       377
   macro avg       0.55      0.53      0.54       377
weighted avg       0.62      0.60      0.61       377

✅ Guardado: /home/felorrieta/Catalina/XGB_IISIG_FIRMA_REALES_5.png

Tiempo eval TOP-5 (hh:mm:ss): 00:00:50
Tiempo TOTAL (hh:mm:ss): 02:28:32


Tiempo métricas tabla LaTeX (hh:mm:ss): 00:23:22

Tabla resumen (para elegir mejor modelo):
Modelo   AUC_CV    SD_CV   Gap_CV  AUC_rep   SD_rep  Gap_rep  Acc_test  F1_w_test
 XGB_2 0.782503 0.012681 0.140546 0.783070 0.026996 0.136221  0.604775   0.613750
 XGB_4 0.788071 0.013103 0.198956 0.786239 0.024258 0.199283  0.604775   0.611942
 XGB_3 0.784585 0.019379 0.143540 0.786218 0.028188 0.136208  0.602122   0.611174
 XGB_1 0.780012 0.012938 0.134095 0.781641 0.023678 0.130896  0.599469   0.609991
 XGB_5 0.784962 0.013402 0.151678 0.784013 0.025772 0.152230  0.599469   0.609017

--- LaTeX rows (pegables dentro de tu tabular) ---
XGB_1 & 0.7800 & 0.0129 & 0.1341 & 0.7816 & 0.0237 & 0.1309 & 0.5995 & 0.6100 \\
XGB_2 & 0.7825 & 0.0127 & 0.1405 & 0.7831 & 0.0270 & 0.1362 & 0.6048 & 0.6138 \\
XGB_3 & 0.7846 & 0.0194 & 0.1435 & 0.7862 & 0.0282 & 0.1362 & 0.6021 & 0.6112 \\
XGB_4 & 0.7881 & 0.0131 & 0.1990 & 0.7862 & 0.0243 & 0.1993 & 0.6048 & 0.6119 \\
XGB_5 & 0.7850 & 0.0134 & 0.1517 & 0.7840 & 0.0258 & 0.1522 & 0.5995 & 0.6090 \\

XGB: IISIGNATURE - LOGFIRMA

import numpy as np
import pandas as pd
import time
import warnings
import os
warnings.filterwarnings("ignore")

from tqdm.auto import tqdm
from tqdm_joblib import tqdm_joblib
import joblib

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import VarianceThreshold, SelectKBest, mutual_info_classif
from sklearn.svm import SVC

from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV
from sklearn.metrics import (
    confusion_matrix, classification_report,
    f1_score, balanced_accuracy_score, recall_score,
    accuracy_score, roc_auc_score, make_scorer
)
from scipy.stats import loguniform
from scipy.special import softmax

from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE

start_time = time.time()

# =========================
# 0) DATOS REALES
# =========================
x = pd.read_csv('/home/felorrieta/Downloads/path_signature_iisignature_M9.csv')
y = pd.read_csv('/home/felorrieta/Catalina/ts_v9.0.1_SMBH_ZTF_xmatch.csv')
y["id"] = y["oid"]
data = pd.merge(x, y, on="id")

data_train = data.sample(frac=0.8, random_state=42).reset_index(drop=True)
data_test  = data.drop(data_train.index).reset_index(drop=True)

X_train = data_train.drop(columns=['oid', 'survey_class_mapped', 'survey_class', 'survey_class_cat', 'id'])
y_train = data_train['survey_class_mapped']
X_test  = data_test[X_train.columns].copy()
y_test  = data_test['survey_class_mapped']

le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)
labels = list(le.classes_)

DOWNLOADS = r'/home/felorrieta/Catalina'
os.makedirs(DOWNLOADS, exist_ok=True)

print("Shapes:", X_train.shape, X_test.shape)
print("Labels:", labels)
print("Distribución train:", dict(zip(*np.unique(y_train_enc, return_counts=True))))

# =========================
# 1) SCORERS
# =========================
def robust_score(y_true, y_pred):
    mf1     = f1_score(y_true, y_pred, average="macro", zero_division=0)
    bacc    = balanced_accuracy_score(y_true, y_pred)
    recalls = recall_score(y_true, y_pred, average=None, zero_division=0)
    minR    = float(np.min(recalls))
    penalty = 0.0 if minR >= 0.30 else (0.30 - minR) * 2.0
    return 0.50 * mf1 + 0.40 * bacc + 0.10 * minR - penalty

robust_scorer = make_scorer(robust_score)

# =========================
# 2) PIPELINE CON SMOTE
# =========================
min_class_count = min(np.bincount(y_train_enc))
smote_k = min(5, min_class_count - 1)

def make_pipe(params=None):
    p = ImbPipeline(steps=[
        ("imp",   SimpleImputer(strategy="median")),
        ("sc",    StandardScaler()),
        ("vt",    VarianceThreshold(0.0)),
        ("smote", SMOTE(k_neighbors=smote_k, random_state=42)),
        ("kbest", SelectKBest(score_func=mutual_info_classif)),
        ("svc",   SVC(kernel="rbf", decision_function_shape="ovr", cache_size=4000))
    ])
    if params:
        p.set_params(**params)
    return p

pipe = make_pipe()

# =========================
# 3) ESPACIO DE BÚSQUEDA
# =========================
cw_options = [
    "balanced",
    {0: 2.0, 1: 4.0, 2: 1.0},
    {0: 2.5, 1: 5.0, 2: 1.0},
    {0: 1.5, 1: 3.5, 2: 0.8},
    {0: 3.0, 1: 6.0, 2: 1.0},
    {0: 1.0, 1: 3.0, 2: 0.7},
]

# máximo real de features disponibles antes de kbest:
# imputación + variance threshold
imp_tmp = SimpleImputer(strategy="median")
X_tmp = imp_tmp.fit_transform(X_train)

vt_tmp = VarianceThreshold(0.0)
X_tmp_vt = vt_tmp.fit_transform(X_tmp)

n_features_after_vt = X_tmp_vt.shape[1]

k_grid_base = [128, 256, 384, 512, 768, 1023]
k_grid = [k for k in k_grid_base if k <= n_features_after_vt]

if n_features_after_vt not in k_grid:
    k_grid.append(n_features_after_vt)

k_grid = sorted(set(k_grid))

print("Número de features originales :", X_train.shape[1])
print("Número de features post-VT    :", n_features_after_vt)
print("Grid kbest usado              :", k_grid)

param_dist = {
    "kbest__k":          k_grid,
    "svc__C":            loguniform(0.5, 2000),
    "svc__gamma":        loguniform(1e-6, 0.1),
    "svc__class_weight": cw_options,
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# =========================
# 4) BÚSQUEDA CON BARRA DE PROGRESO
# =========================
t0 = time.time()
rs = RandomizedSearchCV(
    estimator=pipe,
    param_distributions=param_dist,
    n_iter=80,
    scoring=robust_scorer,
    cv=cv,
    random_state=42,
    n_jobs=-1,
    verbose=0,
    refit=True,
    return_train_score=False,
    error_score=0.0
)

n_fits = 80 * cv.n_splits
with tqdm_joblib(tqdm(
    desc="Buscando hiperparámetros",
    total=n_fits,
    unit="fit",
    colour="cyan",
    bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} fits [{elapsed}<{remaining}, {rate_fmt}]"
)):
    with joblib.parallel_config(backend="threading"):
        rs.fit(X_train, y_train_enc)

print("\nTiempo búsqueda (s):", int(time.time() - t0))
print("Best robust CV:", rs.best_score_)
print("Best params:", rs.best_params_)

# =========================
# 5) TOP-10 ÚNICOS
# =========================
def _canon_value(v):
    if isinstance(v, np.generic):
        return v.item()
    if isinstance(v, dict):
        return tuple(sorted((int(k), float(w)) for k, w in v.items()))
    return v

def _params_key(d):
    return tuple(sorted((k, _canon_value(v)) for k, v in d.items()))

res = pd.DataFrame(rs.cv_results_).copy()
res["params_key"] = res["params"].apply(_params_key)
res = res.sort_values(["mean_test_score", "std_test_score"], ascending=[False, True]).reset_index(drop=True)
res_unique = res.drop_duplicates(subset=["params_key"], keep="first").reset_index(drop=True)
top10 = res_unique.head(10)[["mean_test_score", "std_test_score", "params"]].copy()

print("\nTOP-10 CV robust (únicos):")
print(top10.to_string(index=False))

# =========================
# 6) FUNCIONES DE EVALUACIÓN Y PLOTEO
# =========================
def cv_auc_manual(params, X, y, cv):
    aucs = []

    for tr_idx, va_idx in cv.split(X, y):
        Xtr = X.iloc[tr_idx].copy()
        Xva = X.iloc[va_idx].copy()
        ytr = y[tr_idx]
        yva = y[va_idx]

        model = make_pipe(params)
        model.fit(Xtr, ytr)

        scores = model.decision_function(Xva)
        if scores.ndim == 1:
            scores = scores.reshape(-1, 1)

        proba = softmax(scores, axis=1)
        auc = roc_auc_score(
            yva,
            proba,
            multi_class="ovr",
            average="macro"
        )
        aucs.append(auc)

    return float(np.mean(aucs)), float(np.std(aucs))

def _row_normalize(cm):
    cm = cm.astype(float)
    row_sums = cm.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    return (cm / row_sums) * 100.0

def plot_cms(cm_train, cm_test, tag, save_dir, subtitle="",
             gap_width=0.28, wspace=0.15,
             label_fontsize=13, tick_fontsize=13, title_fontsize=14):
    cm_tr_pct = _row_normalize(cm_train)
    cm_te_pct = _row_normalize(cm_test)

    fig = plt.figure(figsize=(10.8, 4.8))
    gs = gridspec.GridSpec(1, 4, width_ratios=[1, gap_width, 1, 0.08], wspace=wspace)
    ax1    = fig.add_subplot(gs[0, 0])
    ax_gap = fig.add_subplot(gs[0, 1]); ax_gap.axis("off")
    ax2    = fig.add_subplot(gs[0, 2])
    ax_cb  = fig.add_subplot(gs[0, 3])

    im = None
    for ax, cm_pct, cm_cnt, title in [
        (ax1, cm_tr_pct, cm_train, "Train"),
        (ax2, cm_te_pct, cm_test,  "Test"),
    ]:
        im = ax.imshow(cm_pct, cmap="Blues", vmin=0, vmax=100)
        ax.set_title(title, fontsize=title_fontsize)
        ax.set_xticks(np.arange(len(labels)))
        ax.set_yticks(np.arange(len(labels)))
        ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=tick_fontsize)
        ax.set_yticklabels(labels, fontsize=tick_fontsize)
        ax.set_xlabel("Predicho", fontsize=label_fontsize)
        ax.set_ylabel("Real", fontsize=label_fontsize)

        for i in range(cm_pct.shape[0]):
            for j in range(cm_pct.shape[1]):
                pct = cm_pct[i, j]
                cnt = int(cm_cnt[i, j])
                color_txt = "white" if pct > 50 else "black"
                ax.text(j, i - 0.10, f"{pct:.1f}%", ha="center", va="center",
                        color=color_txt, fontsize=10, fontweight="bold")
                ax.text(j, i + 0.22, f"({cnt})", ha="center", va="center",
                        color=color_txt, fontsize=7)

    fig.colorbar(im, cax=ax_cb, label="% por fila (clase real)")
    fig.suptitle(f"{tag}\n{subtitle}", fontsize=13, y=0.98)
    fig.subplots_adjust(left=0.08, right=0.92, bottom=0.22, top=0.82)

    fname = os.path.join(save_dir, f"{tag}.png")
    fig.savefig(fname, dpi=300, bbox_inches="tight")
    plt.close(fig)
    print(f"  Figura guardada: {fname}")

def eval_one(params, tag):
    print("\n" + "="*95)
    print(f"Evaluando {tag}...")

    auc_cv, sd_cv = cv_auc_manual(params, X_train, y_train_enc, cv)

    model = make_pipe(params)
    model.fit(X_train, y_train_enc)

    yhat_tr = model.predict(X_train)
    yhat_te = model.predict(X_test)

    df_tr = model.decision_function(X_train)
    df_te = model.decision_function(X_test)

    if df_tr.ndim == 1:
        df_tr = df_tr.reshape(-1, 1)
    if df_te.ndim == 1:
        df_te = df_te.reshape(-1, 1)

    proba_tr = softmax(df_tr, axis=1)
    proba_te = softmax(df_te, axis=1)

    auc_train = roc_auc_score(y_train_enc, proba_tr, multi_class="ovr", average="macro")
    auc_test  = roc_auc_score(y_test_enc,  proba_te, multi_class="ovr", average="macro")

    gap_cv  = round(auc_train - auc_cv, 4)
    gap_rep = round(auc_train - auc_test, 4)

    acc_train = accuracy_score(y_train_enc, yhat_tr)
    f1w_train = f1_score(y_train_enc, yhat_tr, average="weighted", zero_division=0)

    acc_test  = accuracy_score(y_test_enc, yhat_te)
    f1w_test  = f1_score(y_test_enc, yhat_te, average="weighted", zero_division=0)
    mf1_test  = f1_score(y_test_enc, yhat_te, average="macro", zero_division=0)
    bacc_test = balanced_accuracy_score(y_test_enc, yhat_te)

    cm_tr = confusion_matrix(y_train_enc, yhat_tr)
    cm_te = confusion_matrix(y_test_enc, yhat_te)

    print(f"  AUC_CV={auc_cv:.4f}±{sd_cv:.4f} | AUC_train={auc_train:.4f} | AUC_test={auc_test:.4f}")
    print(f"  Gap_CV={gap_cv:.4f} | Gap_rep={gap_rep:.4f}")
    print(f"  Acc_train={acc_train:.4f} | F1w_train={f1w_train:.4f}")
    print(f"  Acc_test={acc_test:.4f} | F1w_test={f1w_test:.4f} | macroF1={mf1_test:.4f} | balacc={bacc_test:.4f}")

    print("\n  Report TRAIN")
    print(classification_report(y_train_enc, yhat_tr, target_names=labels, zero_division=0))
    print("  CM TEST")
    print(pd.DataFrame(cm_te, index=labels, columns=labels))
    print("\n  Report TEST")
    print(classification_report(y_test_enc, yhat_te, target_names=labels, zero_division=0))

    subtitle = (
        f"Acc train={acc_train:.3f} | "
        f"Acc test={acc_test:.3f} | "
        f"F1w train={f1w_train:.3f} | "
        f"F1w test={f1w_test:.3f}"
    )
    plot_cms(cm_tr, cm_te, tag, DOWNLOADS, subtitle=subtitle)

    return {
        "tag":       tag,
        "AUC_CV":    round(auc_cv, 4),
        "SD_CV":     round(sd_cv, 4),
        "Gap_CV":    gap_cv,
        "AUC_rep":   round(auc_test, 4),
        "SD_rep":    round(sd_cv, 4),
        "Gap_rep":   gap_rep,
        "Acc_train": round(acc_train, 4),
        "F1w_train": round(f1w_train, 4),
        "Acc_test":  round(acc_test, 4),
        "F1w_test":  round(f1w_test, 4),
        "macroF1":   round(mf1_test, 4),
        "balacc":    round(bacc_test, 4),
    }

# =========================
# 7) EVALUAR TOP-5
# =========================
results_summary = []
for i, row in enumerate(top10.head(5).itertuples(index=False), start=1):
    r = eval_one(row.params, f"SVM{i}")
    results_summary.append(r)

# =========================
# 8) TABLA RESUMEN + LATEX
# =========================
summary_df = pd.DataFrame(results_summary)

print("\n" + "="*95)
print("RESUMEN FINAL:")
print(summary_df.to_string(index=False))

best_idx = summary_df["AUC_rep"].idxmax()
best_tag = summary_df.loc[best_idx, "tag"]

print(f"\n>>> Mejor modelo: {best_tag}  AUC_test={summary_df.loc[best_idx,'AUC_rep']:.4f}")

latex_rows = []
for _, r in summary_df.iterrows():
    highlight = r"\rowcolor{BlueBest} " if r["tag"] == best_tag else ""
    latex_rows.append(
        f"        {highlight}SVM$_{{{r['tag'][-1]}}}$ & "
        f"{r['AUC_CV']:.4f} & {r['SD_CV']:.4f} & {r['Gap_CV']:.4f} & "
        f"{r['AUC_rep']:.4f} & {r['SD_rep']:.4f} & {r['Gap_rep']:.4f} & "
        f"{r['Acc_test']:.4f} & {r['F1w_test']:.4f} \\\\"
    )

latex_table = (
    r"\begin{table}[!ht]" + "\n"
    r"\centering" + "\n"
    r"\small" + "\n"
    r"\renewcommand{\arraystretch}{1.18}" + "\n"
    r"\setlength{\tabcolsep}{6pt}" + "\n"
    r"\begin{adjustbox}{width=\textwidth}" + "\n"
    r"\rowcolors{2}{white}{gray!8}" + "\n"
    r"\begin{tabular}{ccccccccc}" + "\n"
    r"\toprule" + "\n"
    r"\rowcolor{BlueHeader}" + "\n"
    r"\color{white}\textbf{Modelo} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{CV}} &" + "\n"
    r"\color{white}\textbf{AUC\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{SD\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Gap\textsubscript{rep}} &" + "\n"
    r"\color{white}\textbf{Acc\textsubscript{test}} &" + "\n"
    r"\color{white}\textbf{F1\textsubscript{w,test}} \\" + "\n"
    r"\midrule" + "\n"
    + "\n".join(latex_rows) + "\n"
    r"\bottomrule" + "\n"
    r"\end{tabular}" + "\n"
    r"\end{adjustbox}" + "\n"
    r"\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG, datos reales).}" + "\n"
    r"\label{tab:svm_top5_auc_reales}" + "\n"
    r"\end{table}"
)

latex_path = os.path.join(DOWNLOADS, "tabla_svm_top5_reales.tex")
with open(latex_path, "w", encoding="utf-8") as f:
    f.write(latex_table)

print(f"\nTabla LaTeX guardada en: {latex_path}")
print("\n--- PREVIEW ---\n" + latex_table)

# =========================
# 9) TIEMPO TOTAL
# =========================
elapsed = int(time.time() - start_time)
h, rem = divmod(elapsed, 3600)
m, s = divmod(rem, 60)
print(f"\nTiempo total: {h:02d}:{m:02d}:{s:02d}")

Shapes: (1510, 1022) (377, 1022)
Labels: ['AGN', 'Blazar', 'QSO']
Distribución train: {0: 422, 1: 189, 2: 899}
Número de features originales : 1022
Número de features post-VT    : 1022
Grid kbest usado              : [128, 256, 384, 512, 768, 1022]


Tiempo búsqueda (s): 1160
Best robust CV: 0.5443172816015224
Best params: {'kbest__k': 1022, 'svc__C': 784.8081929215672, 'svc__class_weight': 'balanced', 'svc__gamma': 8.215507070842176e-05}

TOP-10 CV robust (únicos):
 mean_test_score  std_test_score                                                                                                                            params
        0.544317        0.007625             {'kbest__k': 1022, 'svc__C': 784.8081929215672, 'svc__class_weight': 'balanced', 'svc__gamma': 8.215507070842176e-05}
        0.534014        0.023643               {'kbest__k': 1022, 'svc__C': 2.540458074225692, 'svc__class_weight': 'balanced', 'svc__gamma': 0.06395288218539359}
        0.530168        0.024917             {'kbest__k': 1022, 'svc__C': 10.772186132342652, 'svc__class_weight': 'balanced', 'svc__gamma': 0.058258491495380586}
        0.529176        0.022424             {'kbest__k': 1022, 'svc__C': 34.57103872771001, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0002655521712973633}
        0.516400        0.024635 {'kbest__k': 1022, 'svc__C': 3.782991252375207, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.07842676320619435}
        0.511990        0.018056 {'kbest__k': 512, 'svc__C': 1221.212458656701, 'svc__class_weight': {0: 2.0, 1: 4.0, 2: 1.0}, 'svc__gamma': 0.015436560150278347}
        0.499701        0.043360            {'kbest__k': 1022, 'svc__C': 30.036641140654915, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0035753161317240768}
        0.483330        0.071734            {'kbest__k': 1022, 'svc__C': 1.2456144526856274, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0017316120634454752}
        0.357605        0.112258            {'kbest__k': 1022, 'svc__C': 0.6576892252744614, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0014851035064938562}
        0.295434        0.231032  {'kbest__k': 512, 'svc__C': 61.75015483640841, 'svc__class_weight': {0: 1.0, 1: 3.0, 2: 0.7}, 'svc__gamma': 0.05019578058935036}

===============================================================================================
Evaluando SVM1...
  AUC_CV=0.7320±0.0056 | AUC_train=0.8010 | AUC_test=0.7609
  Gap_CV=0.0691 | Gap_rep=0.0402
  Acc_train=0.7126 | F1w_train=0.7030
  Acc_test=0.7135 | F1w_test=0.6999 | macroF1=0.6212 | balacc=0.6160

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.57      0.45      0.51       422
      Blazar       0.72      0.63      0.68       189
         QSO       0.76      0.85      0.80       899

    accuracy                           0.71      1510
   macro avg       0.68      0.65      0.66      1510
weighted avg       0.70      0.71      0.70      1510

  CM TEST
        AGN  Blazar  QSO
AGN      39       7   55
Blazar    5      18    7
QSO      29       5  212

  Report TEST
              precision    recall  f1-score   support

         AGN       0.53      0.39      0.45       101
      Blazar       0.60      0.60      0.60        30
         QSO       0.77      0.86      0.82       246

    accuracy                           0.71       377
   macro avg       0.64      0.62      0.62       377
weighted avg       0.70      0.71      0.70       377

  Figura guardada: /home/felorrieta/Catalina/SVM1.png

===============================================================================================
Evaluando SVM2...
  AUC_CV=0.7069±0.0146 | AUC_train=0.9198 | AUC_test=0.8313
  Gap_CV=0.2129 | Gap_rep=0.0885
  Acc_train=0.8245 | F1w_train=0.8252
  Acc_test=0.7215 | F1w_test=0.7198 | macroF1=0.6731 | balacc=0.7078

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.71      0.74      0.72       422
      Blazar       0.87      0.87      0.87       189
         QSO       0.87      0.86      0.86       899

    accuracy                           0.82      1510
   macro avg       0.82      0.82      0.82      1510
weighted avg       0.83      0.82      0.83      1510

  CM TEST
        AGN  Blazar  QSO
AGN      49       9   43
Blazar    3      25    2
QSO      41       7  198

  Report TEST
              precision    recall  f1-score   support

         AGN       0.53      0.49      0.51       101
      Blazar       0.61      0.83      0.70        30
         QSO       0.81      0.80      0.81       246

    accuracy                           0.72       377
   macro avg       0.65      0.71      0.67       377
weighted avg       0.72      0.72      0.72       377

  Figura guardada: /home/felorrieta/Catalina/SVM2.png

===============================================================================================
Evaluando SVM3...
  AUC_CV=0.6988±0.0197 | AUC_train=0.9442 | AUC_test=0.8469
  Gap_CV=0.2454 | Gap_rep=0.0973
  Acc_train=0.8662 | F1w_train=0.8669
  Acc_test=0.7613 | F1w_test=0.7604 | macroF1=0.7206 | balacc=0.7437

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.77      0.81      0.79       422
      Blazar       0.92      0.92      0.92       189
         QSO       0.90      0.88      0.89       899

    accuracy                           0.87      1510
   macro avg       0.86      0.87      0.87      1510
weighted avg       0.87      0.87      0.87      1510

  CM TEST
        AGN  Blazar  QSO
AGN      57       7   37
Blazar    3      25    2
QSO      36       5  205

  Report TEST
              precision    recall  f1-score   support

         AGN       0.59      0.56      0.58       101
      Blazar       0.68      0.83      0.75        30
         QSO       0.84      0.83      0.84       246

    accuracy                           0.76       377
   macro avg       0.70      0.74      0.72       377
weighted avg       0.76      0.76      0.76       377

  Figura guardada: /home/felorrieta/Catalina/SVM3.png

===============================================================================================
Evaluando SVM4...
  AUC_CV=0.7270±0.0145 | AUC_train=0.7751 | AUC_test=0.7370
  Gap_CV=0.0480 | Gap_rep=0.0381
  Acc_train=0.6821 | F1w_train=0.6669
  Acc_test=0.7003 | F1w_test=0.6810 | macroF1=0.5985 | balacc=0.6073

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.53      0.37      0.44       422
      Blazar       0.66      0.61      0.64       189
         QSO       0.73      0.84      0.78       899

    accuracy                           0.68      1510
   macro avg       0.64      0.61      0.62      1510
weighted avg       0.66      0.68      0.67      1510

  CM TEST
        AGN  Blazar  QSO
AGN      33       9   59
Blazar    2      19    9
QSO      27       7  212

  Report TEST
              precision    recall  f1-score   support

         AGN       0.53      0.33      0.40       101
      Blazar       0.54      0.63      0.58        30
         QSO       0.76      0.86      0.81       246

    accuracy                           0.70       377
   macro avg       0.61      0.61      0.60       377
weighted avg       0.68      0.70      0.68       377

  Figura guardada: /home/felorrieta/Catalina/SVM4.png

===============================================================================================
Evaluando SVM5...
  AUC_CV=0.6850±0.0187 | AUC_train=0.9247 | AUC_test=0.8254
  Gap_CV=0.2397 | Gap_rep=0.0993
  Acc_train=0.8053 | F1w_train=0.8088
  Acc_test=0.7056 | F1w_test=0.7195 | macroF1=0.6419 | balacc=0.7328

  Report TRAIN
              precision    recall  f1-score   support

         AGN       0.70      0.81      0.75       422
      Blazar       0.67      0.98      0.79       189
         QSO       0.93      0.77      0.84       899

    accuracy                           0.81      1510
   macro avg       0.76      0.85      0.79      1510
weighted avg       0.83      0.81      0.81      1510

  CM TEST
        AGN  Blazar  QSO
AGN      61      17   23
Blazar    3      26    1
QSO      45      22  179

  Report TEST
              precision    recall  f1-score   support

         AGN       0.56      0.60      0.58       101
      Blazar       0.40      0.87      0.55        30
         QSO       0.88      0.73      0.80       246

    accuracy                           0.71       377
   macro avg       0.61      0.73      0.64       377
weighted avg       0.76      0.71      0.72       377

  Figura guardada: /home/felorrieta/Catalina/SVM5.png

===============================================================================================
RESUMEN FINAL:
 tag  AUC_CV  SD_CV  Gap_CV  AUC_rep  SD_rep  Gap_rep  Acc_train  F1w_train  Acc_test  F1w_test  macroF1  balacc
SVM1  0.7320 0.0056  0.0691   0.7609  0.0056   0.0402     0.7126     0.7030    0.7135    0.6999   0.6212  0.6160
SVM2  0.7069 0.0146  0.2129   0.8313  0.0146   0.0885     0.8245     0.8252    0.7215    0.7198   0.6731  0.7078
SVM3  0.6988 0.0197  0.2454   0.8469  0.0197   0.0973     0.8662     0.8669    0.7613    0.7604   0.7206  0.7437
SVM4  0.7270 0.0145  0.0480   0.7370  0.0145   0.0381     0.6821     0.6669    0.7003    0.6810   0.5985  0.6073
SVM5  0.6850 0.0187  0.2397   0.8254  0.0187   0.0993     0.8053     0.8088    0.7056    0.7195   0.6419  0.7328

>>> Mejor modelo: SVM3  AUC_test=0.8469

Tabla LaTeX guardada en: /home/felorrieta/Catalina/tabla_svm_top5_reales.tex

--- PREVIEW ---
\begin{table}[!ht]
\centering
\small
\renewcommand{\arraystretch}{1.18}
\setlength{\tabcolsep}{6pt}
\begin{adjustbox}{width=\textwidth}
\rowcolors{2}{white}{gray!8}
\begin{tabular}{ccccccccc}
\toprule
\rowcolor{BlueHeader}
\color{white}\textbf{Modelo} &
\color{white}\textbf{AUC\textsubscript{CV}} &
\color{white}\textbf{SD\textsubscript{CV}} &
\color{white}\textbf{Gap\textsubscript{CV}} &
\color{white}\textbf{AUC\textsubscript{rep}} &
\color{white}\textbf{SD\textsubscript{rep}} &
\color{white}\textbf{Gap\textsubscript{rep}} &
\color{white}\textbf{Acc\textsubscript{test}} &
\color{white}\textbf{F1\textsubscript{w,test}} \\
\midrule
        SVM$_{1}$ & 0.7320 & 0.0056 & 0.0691 & 0.7609 & 0.0056 & 0.0402 & 0.7135 & 0.6999 \\
        SVM$_{2}$ & 0.7069 & 0.0146 & 0.2129 & 0.8313 & 0.0146 & 0.0885 & 0.7215 & 0.7198 \\
        \rowcolor{BlueBest} SVM$_{3}$ & 0.6988 & 0.0197 & 0.2454 & 0.8469 & 0.0197 & 0.0973 & 0.7613 & 0.7604 \\
        SVM$_{4}$ & 0.7270 & 0.0145 & 0.0480 & 0.7370 & 0.0145 & 0.0381 & 0.7003 & 0.6810 \\
        SVM$_{5}$ & 0.6850 & 0.0187 & 0.2397 & 0.8254 & 0.0187 & 0.0993 & 0.7056 & 0.7195 \\
\bottomrule
\end{tabular}
\end{adjustbox}
\caption{Desempeño de los cinco mejores modelos SVM (path signature, ESIG, datos reales).}
\label{tab:svm_top5_auc_reales}
\end{table}

Tiempo total: 00:23:53

# ============================================================
# XGB OOF-first (5x2) — IISIGNATURE LOG-FIRMA, DATOS REALES
# + FIXED OOF (no overwrite in repeated CV)
# + TOP-5 evaluation: TRAIN & TEST confusion matrices + reports
# + Timing (search / top5 eval / total)
# + Extra: build a LaTeX-ready table for TOP-5
# ============================================================

import time
import inspect
import os
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import (
    StratifiedKFold,
    RepeatedStratifiedKFold,
    ParameterSampler
)
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import (
    roc_auc_score,
    confusion_matrix,
    classification_report,
    accuracy_score,
    f1_score,
    balanced_accuracy_score
)
from sklearn.utils.class_weight import compute_class_weight
from scipy.stats import randint, uniform, loguniform

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

import xgboost as xgb
from xgboost import XGBClassifier


# ============================================================
# 1) LOAD + MERGE + SPLIT (80/20)  [IISIGNATURE LOG-FIRMA, DATOS REALES]
# ============================================================
x = pd.read_csv('/home/felorrieta/Catalina/logsignature_iisig_M9.csv')
y = pd.read_csv('/home/felorrieta/Catalina/ts_v9.0.1_SMBH_ZTF_xmatch.csv')
y["id"] = y["oid"]

data = pd.merge(x, y, on="id")

train_idx = data.sample(frac=0.8, random_state=42).index
data_train = data.loc[train_idx].reset_index(drop=True)
data_test  = data.drop(train_idx).reset_index(drop=True)

X_train = data_train.drop(columns=['oid', 'survey_class_mapped', 'survey_class', 'survey_class_cat', 'id'])
y_train = data_train['survey_class_mapped']

X_test  = data_test[X_train.columns].copy()
y_test  = data_test['survey_class_mapped']

X_train_np = np.asarray(X_train)
X_test_np  = np.asarray(X_test)

print("Shapes:", X_train.shape, X_test.shape)

# ============================================================
# 2) Encoding + class weights
# ============================================================
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)

labels = le.classes_
n_classes = len(labels)

classes = np.unique(y_train_enc)
cw = compute_class_weight(class_weight="balanced", classes=classes, y=y_train_enc)
class_weight_dict = {int(c): float(w) for c, w in zip(classes, cw)}

HARD_CLASSES = ["AGN", "QSO"]
HARD_FACTOR  = 1.8

if HARD_CLASSES:
    for name in HARD_CLASSES:
        if name in labels:
            hid = int(np.where(labels == name)[0][0])
            class_weight_dict[hid] *= HARD_FACTOR

def make_sample_weight(y_enc, cw_dict):
    return np.array([cw_dict[int(c)] for c in y_enc], dtype=float)

BLZ_ID = int(np.where(labels == "Blazar")[0][0]) if "Blazar" in labels else None

def f1_blazar(y_true, y_pred):
    if BLZ_ID is None:
        return np.nan
    return f1_score((y_true == BLZ_ID).astype(int), (y_pred == BLZ_ID).astype(int))

print("Labels:", list(labels))
print("Distribución train:", dict(zip(*np.unique(y_train_enc, return_counts=True))))
print("Class weights:", class_weight_dict)

# ============================================================
# 3) Preprocess + XGB fit (early stopping)
# ============================================================
def preprocess_fit_transform(X_tr_raw, X_va_raw):
    imp = SimpleImputer(strategy="median")
    X_tr_i = imp.fit_transform(X_tr_raw)
    X_va_i = imp.transform(X_va_raw)

    vt = VarianceThreshold(0.0)
    X_tr_v = vt.fit_transform(X_tr_i)
    X_va_v = vt.transform(X_va_i)
    return X_tr_v, X_va_v, imp, vt

def preprocess_full(X_train_raw, X_test_raw):
    imp = SimpleImputer(strategy="median")
    X_train_i = imp.fit_transform(X_train_raw)
    X_test_i  = imp.transform(X_test_raw)

    vt = VarianceThreshold(0.0)
    X_train_v = vt.fit_transform(X_train_i)
    X_test_v  = vt.transform(X_test_i)
    return X_train_v, X_test_v, imp, vt

def fit_xgb_one_fold(params, X_tr, y_tr, X_va, y_va, w_tr=None, w_va=None):
    base = dict(
        random_state=42,
        n_jobs=-1,
        tree_method="hist",
        n_estimators=20000,
        verbosity=0,
        objective="multi:softprob",
        num_class=n_classes,
        eval_metric="mlogloss",
    )

    es = xgb.callback.EarlyStopping(rounds=250, save_best=True)
    model = XGBClassifier(**base, **params, callbacks=[es])

    fit_kwargs = dict(
        X=X_tr,
        y=y_tr,
        sample_weight=w_tr,
        eval_set=[(X_va, y_va)],
        verbose=False
    )

    sig = inspect.signature(model.fit)
    if "sample_weight_eval_set" in sig.parameters and (w_va is not None):
        fit_kwargs["sample_weight_eval_set"] = [w_va]

    model.fit(**fit_kwargs)
    return model

def to_plain_params(d):
    out = {}
    for k, v in d.items():
        out[k] = float(v) if isinstance(v, (np.floating,)) else v
    return out

# ============================================================
# 4) FIXED OOF eval for repeated CV
# ============================================================
def oof_eval_xgb(params, X, y, cv):
    K = len(np.unique(y))
    proba_sum = np.zeros((len(y), K), dtype=float)
    proba_cnt = np.zeros(len(y), dtype=float)

    fold_f1m = []
    fold_bacc = []
    fold_f1blz = []
    best_iters = []

    for tr_idx, va_idx in cv.split(X, y):
        X_tr_raw, X_va_raw = X[tr_idx], X[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]

        X_tr, X_va, _, _ = preprocess_fit_transform(X_tr_raw, X_va_raw)

        w_tr = make_sample_weight(y_tr, class_weight_dict)
        w_va = make_sample_weight(y_va, class_weight_dict)

        model = fit_xgb_one_fold(params, X_tr, y_tr, X_va, y_va, w_tr=w_tr, w_va=w_va)

        p_va = model.predict_proba(X_va)

        proba_sum[va_idx] += p_va
        proba_cnt[va_idx] += 1.0

        yhat_va = np.argmax(p_va, axis=1)
        fold_f1m.append(f1_score(y_va, yhat_va, average="macro"))
        fold_bacc.append(balanced_accuracy_score(y_va, yhat_va))
        fold_f1blz.append(f1_blazar(y_va, yhat_va))

        best_iters.append(getattr(model, "best_iteration", None))

    proba_oof = proba_sum / (proba_cnt[:, None] + 1e-12)
    yhat_oof = np.argmax(proba_oof, axis=1)

    oof_macroF1 = f1_score(y, yhat_oof, average="macro")
    oof_bacc    = balanced_accuracy_score(y, yhat_oof)
    oof_acc     = accuracy_score(y, yhat_oof)
    oof_f1blz   = f1_blazar(y, yhat_oof)

    its = [b for b in best_iters if b is not None]
    best_n_cv = int(np.median(its) + 1) if len(its) else 800

    return {
        "oof_macroF1": float(oof_macroF1),
        "oof_bacc": float(oof_bacc),
        "oof_acc": float(oof_acc),
        "oof_f1_blazar": float(oof_f1blz),
        "fold_f1m_mean": float(np.mean(fold_f1m)),
        "fold_f1m_std": float(np.std(fold_f1m)),
        "fold_bacc_mean": float(np.mean(fold_bacc)),
        "fold_f1blz_mean": float(np.mean(fold_f1blz)),
        "best_n_cv": int(best_n_cv),
    }

# ============================================================
# 5) Search space + OOF-first run
# ============================================================
TOTAL_T0 = time.time()

cv_rep = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=42)

param_dist = {
    "learning_rate": loguniform(0.007, 0.06),
    "max_depth": randint(2, 6),
    "min_child_weight": loguniform(10.0, 150.0),
    "subsample": uniform(0.65, 0.35),
    "colsample_bytree": uniform(0.65, 0.35),
    "colsample_bynode": uniform(0.65, 0.35),
    "gamma": loguniform(1e-4, 8.0),
    "reg_alpha": loguniform(1e-10, 1.0),
    "reg_lambda": loguniform(1.0, 200.0),
    "grow_policy": ["depthwise", "lossguide"],
    "max_leaves": randint(16, 129),
}

N_ITER = 30
TOP_K  = 5

sampler = list(ParameterSampler(param_dist, n_iter=N_ITER, random_state=42))

print("\n########## XGB OOF-first (5x2) — IISIGNATURE LOG-FIRMA REALES ##########")
print(f"N_ITER={N_ITER} | CV=5x2 | TOP_K={TOP_K}")

SEARCH_T0 = time.time()
rows = []

for params in tqdm(sampler, total=N_ITER, desc="OOF trials"):
    stats = oof_eval_xgb(params, X_train_np, y_train_enc, cv_rep)
    rows.append({"params": params, **stats})

results = pd.DataFrame(rows)

results = results.sort_values(
    ["oof_macroF1", "fold_f1m_std", "oof_bacc", "oof_f1_blazar"],
    ascending=[False, True, False, False]
).reset_index(drop=True)

SEARCH_T1 = time.time()

print("\nTOP 10 by FIXED OOF macro-F1 (and stability):")
print(results.head(10)[[
    "oof_macroF1", "oof_bacc", "oof_acc", "oof_f1_blazar",
    "fold_f1m_std", "best_n_cv", "params"
]].to_string(index=False))

print("\nTiempo búsqueda (hh:mm:ss):", time.strftime("%H:%M:%S", time.gmtime(SEARCH_T1 - SEARCH_T0)))

# ============================================================
# 6) MATRICES TRAIN/TEST PARA EL TOP-5
# ============================================================
def _row_normalize(cm):
    cm = cm.astype(float)
    row_sums = cm.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    return (cm / row_sums) * 100.0

def save_confusion_train_test(cm_train, cm_test, labels, outpath,
                              title_prefix="", subtitle="",
                              gap_width=0.28, wspace=0.15,
                              label_fontsize=13, tick_fontsize=13, title_fontsize=14,
                              cmap="Greens"):
    cm_tr_pct = _row_normalize(cm_train)
    cm_te_pct = _row_normalize(cm_test)

    fig = plt.figure(figsize=(10.8, 4.8))
    gs = gridspec.GridSpec(1, 4, width_ratios=[1, gap_width, 1, 0.08], wspace=wspace)

    ax1 = fig.add_subplot(gs[0, 0])
    ax_gap = fig.add_subplot(gs[0, 1])
    ax2 = fig.add_subplot(gs[0, 2])
    ax_cbar = fig.add_subplot(gs[0, 3])
    ax_gap.axis("off")

    panels = [
        (ax1, cm_tr_pct, cm_train, "Train"),
        (ax2, cm_te_pct, cm_test,  "Test"),
    ]

    for ax, cm_pct, cm_cnt, t in panels:
        im = ax.imshow(cm_pct, cmap=cmap, vmin=0, vmax=100)
        ax.set_title(t, fontsize=title_fontsize)

        ax.set_xticks(np.arange(len(labels)))
        ax.set_yticks(np.arange(len(labels)))
        ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=tick_fontsize)
        ax.set_yticklabels(labels, fontsize=tick_fontsize)

        ax.set_xlabel("Predicho", fontsize=label_fontsize)
        ax.set_ylabel("Real", fontsize=label_fontsize)

        thr = 50
        for i in range(cm_pct.shape[0]):
            for j in range(cm_pct.shape[1]):
                pct = cm_pct[i, j]
                cnt = int(cm_cnt[i, j])
                color_txt = "white" if pct > thr else "black"

                ax.text(j, i - 0.10, f"{pct:.1f}%",
                        ha="center", va="center",
                        color=color_txt, fontsize=10, fontweight="bold")
                ax.text(j, i + 0.22, f"({cnt})",
                        ha="center", va="center",
                        color=color_txt, fontsize=7)

    fig.colorbar(im, cax=ax_cbar, label="% por fila (clase real)")
    fig.suptitle(f"{title_prefix}\n{subtitle}", fontsize=13, y=0.98)
    fig.subplots_adjust(left=0.08, right=0.92, bottom=0.22, top=0.82)

    fig.savefig(outpath, dpi=300, bbox_inches="tight")
    plt.close(fig)

EVAL_T0 = time.time()

topk = results.head(TOP_K).reset_index(drop=True)

X_train_v, X_test_v, _, _ = preprocess_full(X_train_np, X_test_np)
w_train_full = make_sample_weight(y_train_enc, class_weight_dict)

print("\n" + "#" * 95)
print("TOP-5 (selected by FIXED OOF) — TRAIN & TEST evaluation (reference)")
print("#" * 95)

top5_summary = []

downloads = "/home/felorrieta/Catalina"
os.makedirs(downloads, exist_ok=True)

for i, row in topk.iterrows():
    params = row["params"]
    best_n = int(row["best_n_cv"])

    model = XGBClassifier(
        random_state=100 + i,
        n_jobs=-1,
        tree_method="hist",
        n_estimators=best_n,
        verbosity=0,
        objective="multi:softprob",
        num_class=n_classes,
        eval_metric="mlogloss",
        **params
    )
    model.fit(X_train_v, y_train_enc, sample_weight=w_train_full, verbose=False)

    p_tr = model.predict_proba(X_train_v)
    p_te = model.predict_proba(X_test_v)

    yhat_tr = np.argmax(p_tr, axis=1)
    yhat_te = np.argmax(p_te, axis=1)

    f1m_tr = f1_score(y_train_enc, yhat_tr, average="macro")
    f1m_te = f1_score(y_test_enc,  yhat_te, average="macro")
    bacc_tr = balanced_accuracy_score(y_train_enc, yhat_tr)
    bacc_te = balanced_accuracy_score(y_test_enc,  yhat_te)
    acc_tr = accuracy_score(y_train_enc, yhat_tr)
    acc_te = accuracy_score(y_test_enc,  yhat_te)
    f1w_te = f1_score(y_test_enc, yhat_te, average="weighted")
    f1blz_te = f1_blazar(y_test_enc, yhat_te)

    print("\n" + "=" * 95)
    print(f"OOF-TOP{TOP_K} | Model #{i+1}")
    print("=" * 95)
    print("FIXED OOF macroF1:", f"{row['oof_macroF1']:.4f}", "| fold std:", f"{row['fold_f1m_std']:.4f}")
    print("PARAMS:", {"best_n_estimators": best_n, **to_plain_params(params)})

    print("\nMÉTRICAS (TRAIN / TEST)")
    print(f"macro-F1 train={f1m_tr:.4f} | test={f1m_te:.4f}")
    print(f"bal_acc  train={bacc_tr:.4f} | test={bacc_te:.4f}")
    print(f"acc      train={acc_tr:.4f} | test={acc_te:.4f}")
    print(f"F1_w test={f1w_te:.4f} | F1(Blazar) test={f1blz_te:.4f}")
    print(f"GAP macro-F1 (train-test) = {f1m_tr - f1m_te:.4f}")

    cm_tr = confusion_matrix(y_train_enc, yhat_tr)
    cm_te = confusion_matrix(y_test_enc,  yhat_te)

    print("\nMatriz de confusión (TRAIN)")
    print(pd.DataFrame(cm_tr, index=labels, columns=labels))

    print("\nMatriz de confusión (TEST)")
    print(pd.DataFrame(cm_te, index=labels, columns=labels))

    print("\nClassification report (TRAIN)")
    print(classification_report(y_train_enc, yhat_tr, target_names=labels, zero_division=0))

    print("\nClassification report (TEST)")
    print(classification_report(y_test_enc, yhat_te, target_names=labels, zero_division=0))

    subtitle = (
        f"Acc train={acc_tr:.3f} | Acc test={acc_te:.3f} | "
        f"F1w train={f1_score(y_train_enc, yhat_tr, average='weighted', zero_division=0):.3f} | "
        f"F1w test={f1w_te:.3f}"
    )

    outpath = os.path.join(downloads, f"XGB_IISIG_LOGFIRMA_REALES_{i+1}.png")
    save_confusion_train_test(
        cm_tr, cm_te, labels,
        outpath=outpath,
        title_prefix=f"XGB_{i+1} (IISIG log-firma, datos reales) | n_estimators={best_n}",
        subtitle=subtitle,
        gap_width=0.28,
        wspace=0.15,
        cmap="Greens"
    )
    print(f"✅ Guardado: {outpath}")

    top5_summary.append({
        "Modelo": f"XGB_{i+1}",
        "best_n": best_n,
        "oof_macroF1": row["oof_macroF1"],
        "oof_f1_blazar": row["oof_f1_blazar"],
        "fold_f1m_std": row["fold_f1m_std"],
        "Acc_test": acc_te,
        "F1_w_test": f1w_te,
        "macroF1_test": f1m_te,
        "bacc_test": bacc_te,
    })

EVAL_T1 = time.time()
TOTAL_T1 = time.time()

print("\nTiempo eval TOP-5 (hh:mm:ss):", time.strftime("%H:%M:%S", time.gmtime(EVAL_T1 - EVAL_T0)))
print("Tiempo TOTAL (hh:mm:ss):", time.strftime("%H:%M:%S", time.gmtime(TOTAL_T1 - TOTAL_T0)))

# ============================================================
# 7) TABLA FINAL PARA LATEX
# ============================================================
def cv_auc_gap(params, X, y, cv):
    auc_tr, auc_va = [], []

    for tr_idx, va_idx in cv.split(X, y):
        X_tr_raw, X_va_raw = X[tr_idx], X[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]

        X_tr, X_va, _, _ = preprocess_fit_transform(X_tr_raw, X_va_raw)

        w_tr = make_sample_weight(y_tr, class_weight_dict)
        w_va = make_sample_weight(y_va, class_weight_dict)

        model = fit_xgb_one_fold(params, X_tr, y_tr, X_va, y_va, w_tr=w_tr, w_va=w_va)

        p_tr = model.predict_proba(X_tr)
        p_va = model.predict_proba(X_va)

        auc_tr.append(roc_auc_score(y_tr, p_tr, multi_class="ovr", average="weighted"))
        auc_va.append(roc_auc_score(y_va, p_va, multi_class="ovr", average="weighted"))

    mean_va = float(np.mean(auc_va))
    std_va  = float(np.std(auc_va))
    gap     = float(np.mean(auc_tr) - mean_va)
    return mean_va, std_va, gap

cv_5 = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_5x2 = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=42)

latex_rows = []

for i, row in tqdm(list(topk.iterrows()), total=len(topk), desc="Building LaTeX table metrics"):
    params = row["params"]

    auc_cv,  sd_cv,  gap_cv  = cv_auc_gap(params, X_train_np, y_train_enc, cv_5)
    auc_rep, sd_rep, gap_rep = cv_auc_gap(params, X_train_np, y_train_enc, cv_5x2)

    acc_test = top5_summary[i]["Acc_test"]
    f1w_test = top5_summary[i]["F1_w_test"]

    latex_rows.append({
        "Modelo": top5_summary[i]["Modelo"],
        "AUC_CV": auc_cv,
        "SD_CV": sd_cv,
        "Gap_CV": gap_cv,
        "AUC_rep": auc_rep,
        "SD_rep": sd_rep,
        "Gap_rep": gap_rep,
        "Acc_test": acc_test,
        "F1_w_test": f1w_test,
    })

latex_df = pd.DataFrame(latex_rows)

print("\nTabla resumen (para elegir mejor modelo):")
print(latex_df.sort_values(["Acc_test", "F1_w_test"], ascending=False).to_string(index=False))

print("\n--- LaTeX rows (pegables dentro de tu tabular) ---")
for _, r in latex_df.iterrows():
    print(
        f"{r['Modelo']} & "
        f"{r['AUC_CV']:.4f} & {r['SD_CV']:.4f} & {r['Gap_CV']:.4f} & "
        f"{r['AUC_rep']:.4f} & {r['SD_rep']:.4f} & {r['Gap_rep']:.4f} & "
        f"{r['Acc_test']:.4f} & {r['F1_w_test']:.4f} \\\\"
    )

Shapes: (1032, 127) (258, 127)
Labels: ['AGN', 'Blazar', 'QSO']
Distribución train: {0: 284, 1: 126, 2: 622}
Class weights: {0: 2.180281690140845, 1: 2.7301587301587302, 2: 0.9954983922829582}

########## XGB OOF-first (5x2) — IISIGNATURE LOG-FIRMA REALES ##########
N_ITER=30 | CV=5x2 | TOP_K=5


TOP 10 by FIXED OOF macro-F1 (and stability):
 oof_macroF1  oof_bacc  oof_acc  oof_f1_blazar  fold_f1m_std  best_n_cv                                                                                                                                                                                                                                                                                                                                                                params
    0.403404  0.395904 0.484496       0.277228      0.029700         33    {'colsample_bynode': 0.7519301990693147, 'colsample_bytree': 0.8641485131528328, 'gamma': 0.00048300424915854063, 'grow_policy': 'lossguide', 'learning_rate': 0.05671053612296404, 'max_depth': 4, 'max_leaves': 77, 'min_child_weight': 12.78089996481106, 'reg_alpha': 0.0001527074036654596, 'reg_lambda': 7.586762940448893, 'subsample': 0.9941308100323758}
    0.402051  0.391594 0.495155       0.281407      0.018700         80 {'colsample_bynode': 0.7681653099912698, 'colsample_bytree': 0.9800183963693817, 'gamma': 0.0038432006551539924, 'grow_policy': 'lossguide', 'learning_rate': 0.023218646777297004, 'max_depth': 5, 'max_leaves': 128, 'min_child_weight': 11.921178877039386, 'reg_alpha': 3.460621800595392e-08, 'reg_lambda': 3.6988713446272268, 'subsample': 0.8937064954939259}
    0.399616  0.391889 0.482558       0.266667      0.029786        130     {'colsample_bynode': 0.8716913147786483, 'colsample_bytree': 0.9550112065657013, 'gamma': 0.8719197460192358, 'grow_policy': 'lossguide', 'learning_rate': 0.017128683475627633, 'max_depth': 3, 'max_leaves': 90, 'min_child_weight': 24.069071892146443, 'reg_alpha': 1.6629513614106732e-09, 'reg_lambda': 6.604683530146009, 'subsample': 0.9673899545410138}
    0.397504  0.386066 0.491279       0.278351      0.036972        195    {'colsample_bynode': 0.8778109655320985, 'colsample_bytree': 0.6697440526594851, 'gamma': 0.3467567716188714, 'grow_policy': 'lossguide', 'learning_rate': 0.011046424508353313, 'max_depth': 5, 'max_leaves': 75, 'min_child_weight': 16.432378919707624, 'reg_alpha': 1.1026112761510001e-07, 'reg_lambda': 16.124278458562614, 'subsample': 0.8011807565247405}
    0.395381  0.387994 0.487403       0.252525      0.026273         46       {'colsample_bynode': 0.994198099313195, 'colsample_bytree': 0.7895885548555936, 'gamma': 1.0070219632034507, 'grow_policy': 'lossguide', 'learning_rate': 0.042119161345132834, 'max_depth': 5, 'max_leaves': 74, 'min_child_weight': 16.571536957884465, 'reg_alpha': 2.557121805028822e-10, 'reg_lambda': 22.89087461386255, 'subsample': 0.8871475266447988}
    0.393269  0.385553 0.497093       0.229508      0.033838         86        {'colsample_bynode': 0.7810890415965769, 'colsample_bytree': 0.9827500072434707, 'gamma': 0.3881791159604766, 'grow_policy': 'depthwise', 'learning_rate': 0.0252342845499294, 'max_depth': 3, 'max_leaves': 98, 'min_child_weight': 13.109303695322625, 'reg_alpha': 3.912811440759049e-06, 'reg_lambda': 5.859674932405375, 'subsample': 0.7000033862726793}
    0.392312  0.383971 0.487403       0.248705      0.035718         63   {'colsample_bynode': 0.7536957269964288, 'colsample_bytree': 0.7078434286720509, 'gamma': 0.00011930721535161477, 'grow_policy': 'depthwise', 'learning_rate': 0.036782691203668554, 'max_depth': 5, 'max_leaves': 23, 'min_child_weight': 10.150665434429309, 'reg_alpha': 0.014276254602325636, 'reg_lambda': 42.31554618260076, 'subsample': 0.9051525088143455}
    0.392051  0.383314 0.467054       0.272251      0.032057         64       {'colsample_bynode': 0.919944621340081, 'colsample_bytree': 0.6759156281069316, 'gamma': 0.005722551691267099, 'grow_policy': 'depthwise', 'learning_rate': 0.049980903508955704, 'max_depth': 2, 'max_leaves': 87, 'min_child_weight': 33.77497290007452, 'reg_alpha': 8.997071356223067e-10, 'reg_lambda': 7.13286363374483, 'subsample': 0.8840944384322625}
    0.390569  0.382601 0.473837       0.262626      0.027543        103    {'colsample_bynode': 0.993116054300616, 'colsample_bytree': 0.6763711896021449, 'gamma': 0.0031539876927125796, 'grow_policy': 'lossguide', 'learning_rate': 0.04357665034915617, 'max_depth': 5, 'max_leaves': 82, 'min_child_weight': 15.824825794808286, 'reg_alpha': 3.698389305376279e-05, 'reg_lambda': 142.60022669606684, 'subsample': 0.8936104288362405}
    0.389850  0.380363 0.468992       0.265957      0.026737         99      {'colsample_bynode': 0.870898519099042, 'colsample_bytree': 0.928183956239577, 'gamma': 0.029139017098458005, 'grow_policy': 'lossguide', 'learning_rate': 0.029460477479655373, 'max_depth': 2, 'max_leaves': 84, 'min_child_weight': 18.651719774462304, 'reg_alpha': 9.815983028687125e-06, 'reg_lambda': 20.71089276523123, 'subsample': 0.9189939050072081}

Tiempo búsqueda (hh:mm:ss): 00:06:50

###############################################################################################
TOP-5 (selected by FIXED OOF) — TRAIN & TEST evaluation (reference)
###############################################################################################

===============================================================================================
OOF-TOP5 | Model #1
===============================================================================================
FIXED OOF macroF1: 0.4034 | fold std: 0.0297
PARAMS: {'best_n_estimators': 33, 'colsample_bynode': 0.7519301990693147, 'colsample_bytree': 0.8641485131528328, 'gamma': 0.00048300424915854063, 'grow_policy': 'lossguide', 'learning_rate': 0.05671053612296404, 'max_depth': 4, 'max_leaves': 77, 'min_child_weight': 12.78089996481106, 'reg_alpha': 0.0001527074036654596, 'reg_lambda': 7.586762940448893, 'subsample': 0.9941308100323758}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.7736 | test=0.4116
bal_acc  train=0.7697 | test=0.4040
acc      train=0.7926 | test=0.4922
F1_w test=0.4907 | F1(Blazar) test=0.2692
GAP macro-F1 (train-test) = 0.3620

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     235       1   48
Blazar   14      86   26
QSO     111      14  497

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      30       6   39
Blazar    7       7   19
QSO      54       6   90

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.65      0.83      0.73       284
      Blazar       0.85      0.68      0.76       126
         QSO       0.87      0.80      0.83       622

    accuracy                           0.79      1032
   macro avg       0.79      0.77      0.77      1032
weighted avg       0.81      0.79      0.80      1032


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.33      0.40      0.36        75
      Blazar       0.37      0.21      0.27        33
         QSO       0.61      0.60      0.60       150

    accuracy                           0.49       258
   macro avg       0.44      0.40      0.41       258
weighted avg       0.50      0.49      0.49       258

✅ Guardado: /home/felorrieta/Catalina/XGB_IISIG_LOGFIRMA_REALES_1.png

===============================================================================================
OOF-TOP5 | Model #2
===============================================================================================
FIXED OOF macroF1: 0.4021 | fold std: 0.0187
PARAMS: {'best_n_estimators': 80, 'colsample_bynode': 0.7681653099912698, 'colsample_bytree': 0.9800183963693817, 'gamma': 0.0038432006551539924, 'grow_policy': 'lossguide', 'learning_rate': 0.023218646777297004, 'max_depth': 5, 'max_leaves': 128, 'min_child_weight': 11.921178877039386, 'reg_alpha': 3.460621800595392e-08, 'reg_lambda': 3.6988713446272268, 'subsample': 0.8937064954939259}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.8682 | test=0.3950
bal_acc  train=0.8660 | test=0.3875
acc      train=0.8828 | test=0.4690
F1_w test=0.4690 | F1(Blazar) test=0.2857
GAP macro-F1 (train-test) = 0.4733

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     255       2   27
Blazar    7     102   17
QSO      59       9  554

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      25       7   43
Blazar    7       8   18
QSO      54       8   88

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.79      0.90      0.84       284
      Blazar       0.90      0.81      0.85       126
         QSO       0.93      0.89      0.91       622

    accuracy                           0.88      1032
   macro avg       0.87      0.87      0.87      1032
weighted avg       0.89      0.88      0.88      1032


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.29      0.33      0.31        75
      Blazar       0.35      0.24      0.29        33
         QSO       0.59      0.59      0.59       150

    accuracy                           0.47       258
   macro avg       0.41      0.39      0.39       258
weighted avg       0.47      0.47      0.47       258

✅ Guardado: /home/felorrieta/Catalina/XGB_IISIG_LOGFIRMA_REALES_2.png

===============================================================================================
OOF-TOP5 | Model #3
===============================================================================================
FIXED OOF macroF1: 0.3996 | fold std: 0.0298
PARAMS: {'best_n_estimators': 130, 'colsample_bynode': 0.8716913147786483, 'colsample_bytree': 0.9550112065657013, 'gamma': 0.8719197460192358, 'grow_policy': 'lossguide', 'learning_rate': 0.017128683475627633, 'max_depth': 3, 'max_leaves': 90, 'min_child_weight': 24.069071892146443, 'reg_alpha': 1.6629513614106732e-09, 'reg_lambda': 6.604683530146009, 'subsample': 0.9673899545410138}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.6956 | test=0.3967
bal_acc  train=0.6842 | test=0.3873
acc      train=0.7393 | test=0.4845
F1_w test=0.4799 | F1(Blazar) test=0.2500
GAP macro-F1 (train-test) = 0.2989

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     210       6   68
Blazar   17      67   42
QSO     120      16  486

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      28       4   43
Blazar    8       6   19
QSO      54       5   91

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.61      0.74      0.67       284
      Blazar       0.75      0.53      0.62       126
         QSO       0.82      0.78      0.80       622

    accuracy                           0.74      1032
   macro avg       0.72      0.68      0.70      1032
weighted avg       0.75      0.74      0.74      1032


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.31      0.37      0.34        75
      Blazar       0.40      0.18      0.25        33
         QSO       0.59      0.61      0.60       150

    accuracy                           0.48       258
   macro avg       0.44      0.39      0.40       258
weighted avg       0.49      0.48      0.48       258

✅ Guardado: /home/felorrieta/Catalina/XGB_IISIG_LOGFIRMA_REALES_3.png

===============================================================================================
OOF-TOP5 | Model #4
===============================================================================================
FIXED OOF macroF1: 0.3975 | fold std: 0.0370
PARAMS: {'best_n_estimators': 195, 'colsample_bynode': 0.8778109655320985, 'colsample_bytree': 0.6697440526594851, 'gamma': 0.3467567716188714, 'grow_policy': 'lossguide', 'learning_rate': 0.011046424508353313, 'max_depth': 5, 'max_leaves': 75, 'min_child_weight': 16.432378919707624, 'reg_alpha': 1.1026112761510001e-07, 'reg_lambda': 16.124278458562614, 'subsample': 0.8011807565247405}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.8355 | test=0.3723
bal_acc  train=0.8331 | test=0.3618
acc      train=0.8595 | test=0.4496
F1_w test=0.4484 | F1(Blazar) test=0.2692
GAP macro-F1 (train-test) = 0.4632

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     250       4   30
Blazar    8      94   24
QSO      66      13  543

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      22       6   47
Blazar    8       7   18
QSO      57       6   87

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.77      0.88      0.82       284
      Blazar       0.85      0.75      0.79       126
         QSO       0.91      0.87      0.89       622

    accuracy                           0.86      1032
   macro avg       0.84      0.83      0.84      1032
weighted avg       0.86      0.86      0.86      1032


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.25      0.29      0.27        75
      Blazar       0.37      0.21      0.27        33
         QSO       0.57      0.58      0.58       150

    accuracy                           0.45       258
   macro avg       0.40      0.36      0.37       258
weighted avg       0.45      0.45      0.45       258

✅ Guardado: /home/felorrieta/Catalina/XGB_IISIG_LOGFIRMA_REALES_4.png

===============================================================================================
OOF-TOP5 | Model #5
===============================================================================================
FIXED OOF macroF1: 0.3954 | fold std: 0.0263
PARAMS: {'best_n_estimators': 46, 'colsample_bynode': 0.994198099313195, 'colsample_bytree': 0.7895885548555936, 'gamma': 1.0070219632034507, 'grow_policy': 'lossguide', 'learning_rate': 0.042119161345132834, 'max_depth': 5, 'max_leaves': 74, 'min_child_weight': 16.571536957884465, 'reg_alpha': 2.557121805028822e-10, 'reg_lambda': 22.89087461386255, 'subsample': 0.8871475266447988}

MÉTRICAS (TRAIN / TEST)
macro-F1 train=0.8154 | test=0.3813
bal_acc  train=0.8149 | test=0.3739
acc      train=0.8411 | test=0.4690
F1_w test=0.4673 | F1(Blazar) test=0.2353
GAP macro-F1 (train-test) = 0.4341

Matriz de confusión (TRAIN)
        AGN  Blazar  QSO
AGN     244       5   35
Blazar   14      92   20
QSO      76      14  532

Matriz de confusión (TEST)
        AGN  Blazar  QSO
AGN      26       6   43
Blazar   10       6   17
QSO      55       6   89

Classification report (TRAIN)
              precision    recall  f1-score   support

         AGN       0.73      0.86      0.79       284
      Blazar       0.83      0.73      0.78       126
         QSO       0.91      0.86      0.88       622

    accuracy                           0.84      1032
   macro avg       0.82      0.81      0.82      1032
weighted avg       0.85      0.84      0.84      1032


Classification report (TEST)
              precision    recall  f1-score   support

         AGN       0.29      0.35      0.31        75
      Blazar       0.33      0.18      0.24        33
         QSO       0.60      0.59      0.60       150

    accuracy                           0.47       258
   macro avg       0.41      0.37      0.38       258
weighted avg       0.47      0.47      0.47       258

✅ Guardado: /home/felorrieta/Catalina/XGB_IISIG_LOGFIRMA_REALES_5.png

Tiempo eval TOP-5 (hh:mm:ss): 00:00:05
Tiempo TOTAL (hh:mm:ss): 00:06:56


Tabla resumen (para elegir mejor modelo):
Modelo   AUC_CV    SD_CV   Gap_CV  AUC_rep   SD_rep  Gap_rep  Acc_test  F1_w_test
 XGB_1 0.558107 0.019116 0.370534 0.554170 0.016394 0.367236  0.492248   0.490686
 XGB_3 0.545037 0.026811 0.313597 0.545670 0.020693 0.314375  0.484496   0.479859
 XGB_2 0.552490 0.011157 0.414528 0.556986 0.010912 0.411099  0.468992   0.469050
 XGB_5 0.558878 0.012493 0.378837 0.559015 0.012790 0.373290  0.468992   0.467273
 XGB_4 0.557906 0.010890 0.388831 0.556905 0.010916 0.385972  0.449612   0.448368

--- LaTeX rows (pegables dentro de tu tabular) ---
XGB_1 & 0.5581 & 0.0191 & 0.3705 & 0.5542 & 0.0164 & 0.3672 & 0.4922 & 0.4907 \\
XGB_2 & 0.5525 & 0.0112 & 0.4145 & 0.5570 & 0.0109 & 0.4111 & 0.4690 & 0.4690 \\
XGB_3 & 0.5450 & 0.0268 & 0.3136 & 0.5457 & 0.0207 & 0.3144 & 0.4845 & 0.4799 \\
XGB_4 & 0.5579 & 0.0109 & 0.3888 & 0.5569 & 0.0109 & 0.3860 & 0.4496 & 0.4484 \\
XGB_5 & 0.5589 & 0.0125 & 0.3788 & 0.5590 & 0.0128 & 0.3733 & 0.4690 & 0.4673 \\

Profundidad

# ==========================================================
# XGBOOST — ESTUDIO POR NIVELES EN DATOS REALES
# Sirve para:
#   - ESIG firma
#   - ESIG log-firma
#   - IISIG firma
#   - IISIG log-firma
# ==========================================================

import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.utils.class_weight import compute_class_weight
from xgboost import XGBClassifier

PATH_Y = "/home/felorrieta/Catalina/ts_v9.0.1_SMBH_ZTF_xmatch.csv"

CONFIGS = {
    "ESIG firma": {
        "path_x": "/home/felorrieta/Catalina/path_signature_esig_REALES_M9.csv",
        "rep": "signature",
        "best_n": 97,
        "best_params": {
            "colsample_bynode": 0.7452462872846224,
            "colsample_bytree": 0.8766915421894768,
            "gamma": 0.00010058922341116494,
            "grow_policy": "depthwise",
            "learning_rate": 0.044484168177941416,
            "max_depth": 4,
            "max_leaves": 85,
            "min_child_weight": 39.873599110055316,
            "reg_alpha": 1.4931726672035842e-06,
            "reg_lambda": 3.2439692898781605,
            "subsample": 0.691952878566789
        }
    },
    "ESIG log-firma": {
        "path_x": "/home/felorrieta/Catalina/logsignature_esig_REALES_M9.csv",
        "rep": "logsignature",
        "best_n": 131,
        "best_params": {
            "colsample_bynode": 0.8774873757722178,
            "colsample_bytree": 0.8956884070401961,
            "gamma": 0.7977063868174838,
            "grow_policy": "lossguide",
            "learning_rate": 0.04702827154794471,
            "max_depth": 4,
            "max_leaves": 107,
            "min_child_weight": 47.951030377685704,
            "reg_alpha": 2.425164655536382e-06,
            "reg_lambda": 35.18457384151612,
            "subsample": 0.7648534336161562
        }
    },
    "IISIG firma": {
        "path_x": "/home/felorrieta/Catalina/path_signature_iisig_M9.csv",
        "rep": "signature",
        "best_n": 515,
        "best_params": {
            "colsample_bynode": 0.8228284587275367,
            "colsample_bytree": 0.8329564902836979,
            "gamma": 0.012481652467320591,
            "grow_policy": "lossguide",
            "learning_rate": 0.017989635897986907,
            "max_depth": 4,
            "max_leaves": 78,
            "min_child_weight": 113.10966992904058,
            "reg_alpha": 5.671556094422195e-06,
            "reg_lambda": 19.774885928261973,
            "subsample": 0.8934306302491446
        }
    },
    "IISIG log-firma": {
        "path_x": "/home/felorrieta/Catalina/logsignature_iisig_M9.csv",
        "rep": "logsignature",
        "best_n": 33,
        "best_params": {
            "colsample_bynode": 0.7519301990693147,
            "colsample_bytree": 0.8641485131528328,
            "gamma": 0.00048300424915854063,
            "grow_policy": "lossguide",
            "learning_rate": 0.05671053612296404,
            "max_depth": 4,
            "max_leaves": 77,
            "min_child_weight": 12.78089996481106,
            "reg_alpha": 0.0001527074036654596,
            "reg_lambda": 7.586762940448893,
            "subsample": 0.9941308100323758
        }
    }
}

def make_sample_weight(y_enc, cw_dict):
    return np.array([cw_dict[int(c)] for c in y_enc], dtype=float)

def auc_ovr_macro_from_proba(y_true, proba):
    return roc_auc_score(y_true, proba, multi_class="ovr", average="macro")

def load_real_data(path_x, path_y):
    x = pd.read_csv(path_x)
    y = pd.read_csv(path_y)
    y["id"] = y["oid"]

    data = pd.merge(x, y, on="id")

    train_idx = data.sample(frac=0.8, random_state=42).index
    data_train = data.loc[train_idx].reset_index(drop=True)
    data_test  = data.drop(train_idx).reset_index(drop=True)

    X_train = data_train.drop(columns=['oid', 'survey_class_mapped', 'survey_class', 'survey_class_cat', 'id'])
    y_train = data_train['survey_class_mapped']

    X_test  = data_test[X_train.columns].copy()
    y_test  = data_test['survey_class_mapped']

    le = LabelEncoder()
    y_train_enc = le.fit_transform(y_train)
    y_test_enc  = le.transform(y_test)

    labels = le.classes_
    n_classes = len(labels)

    classes = np.unique(y_train_enc)
    cw = compute_class_weight(class_weight="balanced", classes=classes, y=y_train_enc)
    class_weight_dict = {int(c): float(w) for c, w in zip(classes, cw)}

    HARD_CLASSES = ["AGN", "QSO"]
    HARD_FACTOR = 1.8

    for name in HARD_CLASSES:
        if name in labels:
            hid = int(np.where(labels == name)[0][0])
            class_weight_dict[hid] *= HARD_FACTOR

    return X_train, X_test, y_train_enc, y_test_enc, labels, n_classes, class_weight_dict

def get_cum_levels(rep, n_total, has_n0=False):
    if rep == "signature":
        if has_n0:
            return {1:2, 2:6, 3:14, 4:30, 5:62, 6:126, 7:254, 8:510, 9:1022}
        else:
            last = 1022 if n_total >= 1022 else n_total
            return {1:2, 2:6, 3:14, 4:30, 5:62, 6:126, 7:254, 8:510, 9:last}
    elif rep == "logsignature":
        last = 127 if n_total >= 127 else n_total
        return {1:2, 2:3, 3:5, 4:8, 5:14, 6:23, 7:41, 8:71, 9:last}
    else:
        raise ValueError("rep debe ser 'signature' o 'logsignature'.")

def run_xgb_levels_real(tag, cfg):
    X_train, X_test, y_train_enc, y_test_enc, labels, n_classes, class_weight_dict = load_real_data(
        cfg["path_x"], PATH_Y
    )

    print("\n" + "="*100)
    print(tag)
    print("="*100)
    print("Shapes:", X_train.shape, X_test.shape)

    cols_ordered = list(X_train.columns)
    n_total = len(cols_ordered)

    first_col_values = np.asarray(X_train.iloc[:, 0], dtype=float)
    has_n0 = np.allclose(first_col_values, 1.0) if cfg["rep"] == "signature" else False
    start_idx = 1 if has_n0 else 0

    cum_levels = get_cum_levels(cfg["rep"], n_total, has_n0=has_n0)

    rows = []

    for m in range(1, 10):
        n_feats = cum_levels[m]

        selected = cols_ordered[start_idx:start_idx + n_feats]

        Xtr_m = X_train[selected].copy()
        Xte_m = X_test[selected].copy()

        imp = SimpleImputer(strategy="median")
        Xtr_i = imp.fit_transform(Xtr_m)
        Xte_i = imp.transform(Xte_m)

        vt = VarianceThreshold(0.0)
        Xtr_v = vt.fit_transform(Xtr_i)
        Xte_v = vt.transform(Xte_i)

        wtr = make_sample_weight(y_train_enc, class_weight_dict)

        model = XGBClassifier(
            random_state=42,
            n_jobs=-1,
            tree_method="hist",
            n_estimators=cfg["best_n"],
            verbosity=0,
            objective="multi:softprob",
            num_class=n_classes,
            eval_metric="mlogloss",
            **cfg["best_params"]
        )

        model.fit(Xtr_v, y_train_enc, sample_weight=wtr, verbose=False)

        proba_tr = model.predict_proba(Xtr_v)
        proba_te = model.predict_proba(Xte_v)

        pred_tr = np.argmax(proba_tr, axis=1)
        pred_te = np.argmax(proba_te, axis=1)

        acc_tr = accuracy_score(y_train_enc, pred_tr)
        f1w_tr = f1_score(y_train_enc, pred_tr, average="weighted", zero_division=0)
        auc_tr = auc_ovr_macro_from_proba(y_train_enc, proba_tr)

        acc_te = accuracy_score(y_test_enc, pred_te)
        f1w_te = f1_score(y_test_enc, pred_te, average="weighted", zero_division=0)
        auc_te = auc_ovr_macro_from_proba(y_test_enc, proba_te)

        rows.append({
            "NivelFirma": m,
            "N_features": n_feats,
            "N_features_postVT": Xtr_v.shape[1],
            "AccTrain": acc_tr,
            "F1wTrain": f1w_tr,
            "AUCTrain": auc_tr,
            "AccTest": acc_te,
            "F1wTest": f1w_te,
            "AUCRep": auc_te
        })

    df_levels = pd.DataFrame(rows)

    best_auc = df_levels["AUCRep"].max()
    tol = 1e-4
    best_candidates = df_levels[df_levels["AUCRep"] >= best_auc - tol]
    best_simple = best_candidates.sort_values(["NivelFirma"]).iloc[0]

    print("\nRESULTADOS POR NIVEL")
    print(df_levels.to_string(index=False, float_format=lambda x: f"{x:.4f}"))

    print("\nNIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep")
    print(best_simple.to_string())

    print("\nFILAS LaTeX")
    for _, r in df_levels.iterrows():
        print(
            f"{int(r['NivelFirma'])} & {int(r['N_features'])} & "
            f"{r['AccTrain']:.4f} & {r['F1wTrain']:.4f} & {r['AUCTrain']:.4f} & "
            f"{r['AccTest']:.4f} & {r['F1wTest']:.4f} & {r['AUCRep']:.4f} \\\\"
        )

    return df_levels, best_simple


# ==========================================================
# EJECUTAR UNA O TODAS
# ==========================================================

# ejemplo: una sola
# df_esig_firma, best_esig_firma = run_xgb_levels_real("ESIG firma", CONFIGS["ESIG firma"])

# ejemplo: las cuatro
for name, cfg in CONFIGS.items():
    run_xgb_levels_real(name, cfg)


====================================================================================================
ESIG firma
====================================================================================================
Shapes: (1438, 1023) (359, 1023)

RESULTADOS POR NIVEL
 NivelFirma  N_features  N_features_postVT  AccTrain  F1wTrain  AUCTrain  AccTest  F1wTest  AUCRep
          1           2                  2    0.5848    0.4723    0.5239   0.5850   0.4714  0.4862
          2           6                  6    0.5076    0.5178    0.6646   0.4596   0.4685  0.5607
          3          14                 14    0.6551    0.6621    0.8089   0.5432   0.5554  0.6860
          4          30                 30    0.6905    0.6957    0.8444   0.5850   0.5942  0.7136
          5          62                 62    0.7309    0.7346    0.8760   0.6351   0.6427  0.7417
          6         126                126    0.7608    0.7633    0.8961   0.6490   0.6576  0.7754
          7         254                254    0.7872    0.7893    0.9121   0.6741   0.6799  0.7952
          8         510                510    0.7990    0.8008    0.9263   0.6769   0.6822  0.8026
          9        1022               1022    0.8206    0.8218    0.9377   0.6852   0.6889  0.8131

NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep
NivelFirma              9.000000
N_features           1022.000000
N_features_postVT    1022.000000
AccTrain                0.820584
F1wTrain                0.821843
AUCTrain                0.937744
AccTest                 0.685237
F1wTest                 0.688928
AUCRep                  0.813108

FILAS LaTeX
1 & 2 & 0.5848 & 0.4723 & 0.5239 & 0.5850 & 0.4714 & 0.4862 \\
2 & 6 & 0.5076 & 0.5178 & 0.6646 & 0.4596 & 0.4685 & 0.5607 \\
3 & 14 & 0.6551 & 0.6621 & 0.8089 & 0.5432 & 0.5554 & 0.6860 \\
4 & 30 & 0.6905 & 0.6957 & 0.8444 & 0.5850 & 0.5942 & 0.7136 \\
5 & 62 & 0.7309 & 0.7346 & 0.8760 & 0.6351 & 0.6427 & 0.7417 \\
6 & 126 & 0.7608 & 0.7633 & 0.8961 & 0.6490 & 0.6576 & 0.7754 \\
7 & 254 & 0.7872 & 0.7893 & 0.9121 & 0.6741 & 0.6799 & 0.7952 \\
8 & 510 & 0.7990 & 0.8008 & 0.9263 & 0.6769 & 0.6822 & 0.8026 \\
9 & 1022 & 0.8206 & 0.8218 & 0.9377 & 0.6852 & 0.6889 & 0.8131 \\

====================================================================================================
ESIG log-firma
====================================================================================================
Shapes: (1438, 127) (359, 127)

RESULTADOS POR NIVEL
 NivelFirma  N_features  N_features_postVT  AccTrain  F1wTrain  AUCTrain  AccTest  F1wTest  AUCRep
          1           2                  2    0.5848    0.4723    0.5227   0.5850   0.4714  0.4840
          2           3                  3    0.5083    0.5142    0.6306   0.4763   0.4807  0.5671
          3           5                  5    0.6328    0.6405    0.7791   0.5432   0.5591  0.6946
          4           8                  8    0.6509    0.6587    0.8116   0.5627   0.5738  0.7025
          5          14                 14    0.6864    0.6925    0.8396   0.5682   0.5807  0.7244
          6          23                 23    0.7045    0.7104    0.8533   0.5961   0.6044  0.7301
          7          41                 41    0.7246    0.7292    0.8676   0.6128   0.6205  0.7369
          8          71                 71    0.7177    0.7229    0.8778   0.6184   0.6256  0.7433
          9         127                127    0.7490    0.7535    0.8913   0.6184   0.6243  0.7449

NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep
NivelFirma             9.000000
N_features           127.000000
N_features_postVT    127.000000
AccTrain               0.748957
F1wTrain               0.753505
AUCTrain               0.891279
AccTest                0.618384
F1wTest                0.624266
AUCRep                 0.744941

FILAS LaTeX
1 & 2 & 0.5848 & 0.4723 & 0.5227 & 0.5850 & 0.4714 & 0.4840 \\
2 & 3 & 0.5083 & 0.5142 & 0.6306 & 0.4763 & 0.4807 & 0.5671 \\
3 & 5 & 0.6328 & 0.6405 & 0.7791 & 0.5432 & 0.5591 & 0.6946 \\
4 & 8 & 0.6509 & 0.6587 & 0.8116 & 0.5627 & 0.5738 & 0.7025 \\
5 & 14 & 0.6864 & 0.6925 & 0.8396 & 0.5682 & 0.5807 & 0.7244 \\
6 & 23 & 0.7045 & 0.7104 & 0.8533 & 0.5961 & 0.6044 & 0.7301 \\
7 & 41 & 0.7246 & 0.7292 & 0.8676 & 0.6128 & 0.6205 & 0.7369 \\
8 & 71 & 0.7177 & 0.7229 & 0.8778 & 0.6184 & 0.6256 & 0.7433 \\
9 & 127 & 0.7490 & 0.7535 & 0.8913 & 0.6184 & 0.6243 & 0.7449 \\

====================================================================================================
IISIG firma
====================================================================================================
Shapes: (1510, 1022) (377, 1022)

RESULTADOS POR NIVEL
 NivelFirma  N_features  N_features_postVT  AccTrain  F1wTrain  AUCTrain  AccTest  F1wTest  AUCRep
          1           2                  2    0.4987    0.4792    0.5960   0.4164   0.4115  0.5012
          2           6                  6    0.5377    0.5322    0.6871   0.4324   0.4354  0.5190
          3          14                 14    0.6119    0.6186    0.7598   0.4589   0.4727  0.5783
          4          30                 30    0.6411    0.6467    0.8002   0.4854   0.4976  0.5722
          5          62                 62    0.6887    0.6936    0.8365   0.5199   0.5285  0.5937
          6         126                126    0.7212    0.7243    0.8642   0.5172   0.5264  0.6026
          7         254                254    0.7656    0.7676    0.8942   0.5172   0.5251  0.6098
          8         510                510    0.7834    0.7856    0.9121   0.5172   0.5248  0.6162
          9        1022               1022    0.8033    0.8050    0.9290   0.5411   0.5462  0.6216

NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep
NivelFirma              9.000000
N_features           1022.000000
N_features_postVT    1022.000000
AccTrain                0.803311
F1wTrain                0.805023
AUCTrain                0.928991
AccTest                 0.541114
F1wTest                 0.546171
AUCRep                  0.621586

FILAS LaTeX
1 & 2 & 0.4987 & 0.4792 & 0.5960 & 0.4164 & 0.4115 & 0.5012 \\
2 & 6 & 0.5377 & 0.5322 & 0.6871 & 0.4324 & 0.4354 & 0.5190 \\
3 & 14 & 0.6119 & 0.6186 & 0.7598 & 0.4589 & 0.4727 & 0.5783 \\
4 & 30 & 0.6411 & 0.6467 & 0.8002 & 0.4854 & 0.4976 & 0.5722 \\
5 & 62 & 0.6887 & 0.6936 & 0.8365 & 0.5199 & 0.5285 & 0.5937 \\
6 & 126 & 0.7212 & 0.7243 & 0.8642 & 0.5172 & 0.5264 & 0.6026 \\
7 & 254 & 0.7656 & 0.7676 & 0.8942 & 0.5172 & 0.5251 & 0.6098 \\
8 & 510 & 0.7834 & 0.7856 & 0.9121 & 0.5172 & 0.5248 & 0.6162 \\
9 & 1022 & 0.8033 & 0.8050 & 0.9290 & 0.5411 & 0.5462 & 0.6216 \\

====================================================================================================
IISIG log-firma
====================================================================================================
Shapes: (1032, 127) (258, 127)

RESULTADOS POR NIVEL
 NivelFirma  N_features  N_features_postVT  AccTrain  F1wTrain  AUCTrain  AccTest  F1wTest  AUCRep
          1           2                  2    0.5591    0.5460    0.6665   0.4496   0.4353  0.5174
          2           3                  3    0.5630    0.5621    0.7039   0.4070   0.4098  0.5165
          3           5                  5    0.6647    0.6673    0.7954   0.4264   0.4227  0.5428
          4           8                  8    0.6647    0.6699    0.8136   0.4961   0.5039  0.5808
          5          14                 14    0.7045    0.7089    0.8489   0.4302   0.4400  0.5781
          6          23                 23    0.7238    0.7266    0.8568   0.4535   0.4576  0.5794
          7          41                 41    0.7771    0.7793    0.8945   0.4729   0.4788  0.5760
          8          71                 71    0.7897    0.7910    0.9043   0.4612   0.4662  0.6037
          9         127                127    0.7955    0.7983    0.9186   0.4806   0.4801  0.5997

NIVEL MÁS SIMPLE QUE MAXIMIZA AUCRep
NivelFirma            8.000000
N_features           71.000000
N_features_postVT    71.000000
AccTrain              0.789729
F1wTrain              0.790991
AUCTrain              0.904285
AccTest               0.461240
F1wTest               0.466226
AUCRep                0.603666

FILAS LaTeX
1 & 2 & 0.5591 & 0.5460 & 0.6665 & 0.4496 & 0.4353 & 0.5174 \\
2 & 3 & 0.5630 & 0.5621 & 0.7039 & 0.4070 & 0.4098 & 0.5165 \\
3 & 5 & 0.6647 & 0.6673 & 0.7954 & 0.4264 & 0.4227 & 0.5428 \\
4 & 8 & 0.6647 & 0.6699 & 0.8136 & 0.4961 & 0.5039 & 0.5808 \\
5 & 14 & 0.7045 & 0.7089 & 0.8489 & 0.4302 & 0.4400 & 0.5781 \\
6 & 23 & 0.7238 & 0.7266 & 0.8568 & 0.4535 & 0.4576 & 0.5794 \\
7 & 41 & 0.7771 & 0.7793 & 0.8945 & 0.4729 & 0.4788 & 0.5760 \\
8 & 71 & 0.7897 & 0.7910 & 0.9043 & 0.4612 & 0.4662 & 0.6037 \\
9 & 127 & 0.7955 & 0.7983 & 0.9186 & 0.4806 & 0.4801 & 0.5997 \\

Gráfico de importancias

# ==========================================================
# XGBOOST — GRÁFICO DE IMPORTANCIAS POR NIVEL EN DATOS REALES
# Usa el mismo CONFIGS que el bloque anterior
# ==========================================================

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import VarianceThreshold
from sklearn.utils.class_weight import compute_class_weight
from xgboost import XGBClassifier

PATH_Y = "/home/felorrieta/Catalina/ts_v9.0.1_SMBH_ZTF_xmatch.csv"
OUTDIR = Path("/home/felorrieta/Catalina")
OUTDIR.mkdir(parents=True, exist_ok=True)

# ==========================================================
# HELPERS
# ==========================================================
def make_sample_weight(y_enc, cw_dict):
    return np.array([cw_dict[int(c)] for c in y_enc], dtype=float)

def load_real_data(path_x, path_y):
    x = pd.read_csv(path_x)
    y = pd.read_csv(path_y)
    y["id"] = y["oid"]

    data = pd.merge(x, y, on="id")

    train_idx = data.sample(frac=0.8, random_state=42).index
    data_train = data.loc[train_idx].reset_index(drop=True)
    data_test  = data.drop(train_idx).reset_index(drop=True)

    X_train = data_train.drop(columns=['oid', 'survey_class_mapped', 'survey_class', 'survey_class_cat', 'id'])
    y_train = data_train['survey_class_mapped']

    X_test  = data_test[X_train.columns].copy()
    y_test  = data_test['survey_class_mapped']

    le = LabelEncoder()
    y_train_enc = le.fit_transform(y_train)
    y_test_enc  = le.transform(y_test)

    labels = le.classes_
    n_classes = len(labels)

    classes = np.unique(y_train_enc)
    cw = compute_class_weight(class_weight="balanced", classes=classes, y=y_train_enc)
    class_weight_dict = {int(c): float(w) for c, w in zip(classes, cw)}

    HARD_CLASSES = ["AGN", "QSO"]
    HARD_FACTOR = 1.8

    for name in HARD_CLASSES:
        if name in labels:
            hid = int(np.where(labels == name)[0][0])
            class_weight_dict[hid] *= HARD_FACTOR

    return X_train, X_test, y_train_enc, y_test_enc, labels, n_classes, class_weight_dict

def get_signature_bins(n_total, has_n0):
    if has_n0:
        # acumulados incluyendo N0:
        # 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023
        bins = [0, 1, 3, 7, 15, 31, 63, 127, 255, 511, min(1023, n_total)]
        labels_n = [f"N{i}" for i in range(10)]  # N0..N9
    else:
        # acumulados sin N0:
        # 2, 6, 14, 30, 62, 126, 254, 510, 1022
        bins = [0, 2, 6, 14, 30, 62, 126, 254, 510, min(1022, n_total)]
        labels_n = [f"N{i}" for i in range(1, 10)]  # N1..N9
    return bins, labels_n

def get_logsig_bins(n_total):
    # acumulados log-firma 2D:
    # 2, 3, 5, 8, 14, 23, 41, 71, 127
    bins = [0, 2, 3, 5, 8, 14, 23, 41, 71, min(127, n_total)]
    labels_n = [f"N{i}" for i in range(1, 10)]  # N1..N9
    return bins, labels_n

def map_levels_importance(df_imp, rep, n_total, has_n0=False):
    if rep == "signature":
        bins, labels_n = get_signature_bins(n_total, has_n0)
    elif rep == "logsignature":
        bins, labels_n = get_logsig_bins(n_total)
    else:
        raise ValueError("rep debe ser 'signature' o 'logsignature'.")

    df_imp["nivel"] = pd.cut(
        df_imp["orig_pos"],
        bins=bins,
        labels=labels_n,
        right=False,
        include_lowest=True
    )

    if df_imp["nivel"].isna().any():
        bad = df_imp.loc[df_imp["nivel"].isna(), ["feature", "orig_pos"]].head(10)
        raise ValueError(f"Hay variables fuera del rango esperado. Ejemplos:\n{bad}")

    if "N0" in df_imp["nivel"].astype(str).unique():
        df_imp = df_imp[df_imp["nivel"] != "N0"].copy()

    df_imp["nivel_num"] = (
        df_imp["nivel"].astype(str).str.replace("N", "", regex=False).astype(int)
    )

    return df_imp.sort_values(["nivel_num", "orig_pos"])

def run_xgb_importance_real(tag, cfg):
    X_train, X_test, y_train_enc, y_test_enc, labels, n_classes, class_weight_dict = load_real_data(
        cfg["path_x"], PATH_Y
    )

    print("\n" + "="*100)
    print(f"IMPORTANCIAS — {tag}")
    print("="*100)
    print("Shapes:", X_train.shape, X_test.shape)

    # -------- preprocess full --------
    imp = SimpleImputer(strategy="median")
    Xtr_i = imp.fit_transform(X_train)
    Xte_i = imp.transform(X_test)

    vt = VarianceThreshold(0.0)
    Xtr_v = vt.fit_transform(Xtr_i)
    Xte_v = vt.transform(Xte_i)

    wtr = make_sample_weight(y_train_enc, class_weight_dict)

    orig_cols = pd.Index(X_train.columns)
    vt_mask = vt.get_support()
    kept_cols = orig_cols[vt_mask]
    kept_pos  = np.where(vt_mask)[0]

    print("Variables originales:", len(orig_cols))
    print("Variables post-VT   :", len(kept_cols))

    # -------- modelo final --------
    model = XGBClassifier(
        random_state=42,
        n_jobs=-1,
        tree_method="hist",
        n_estimators=cfg["best_n"],
        verbosity=0,
        objective="multi:softprob",
        num_class=n_classes,
        eval_metric="mlogloss",
        **cfg["best_params"]
    )

    model.fit(Xtr_v, y_train_enc, sample_weight=wtr, verbose=False)

    # -------- importancias --------
    importances = model.feature_importances_

    if len(importances) != len(kept_cols):
        raise ValueError(
            f"feature_importances_ tiene largo {len(importances)} "
            f"pero post-VT hay {len(kept_cols)} columnas."
        )

    df_imp = pd.DataFrame({
        "feature": kept_cols.astype(str),
        "orig_pos": kept_pos,
        "importance": importances
    })

    first_col_values = np.asarray(X_train.iloc[:, 0], dtype=float)
    has_n0 = np.allclose(first_col_values, 1.0) if cfg["rep"] == "signature" else False

    print("¿Se detectó término constante N0?:", has_n0)

    df_imp = map_levels_importance(
        df_imp,
        rep=cfg["rep"],
        n_total=len(orig_cols),
        has_n0=has_n0
    )

    print("\nCantidad de variables por nivel:")
    print(df_imp["nivel_num"].value_counts().sort_index())

    res_imp = df_imp.groupby("nivel_num").agg(
        mean=("importance", "mean"),
        median=("importance", "median"),
        count=("importance", "size")
    ).reset_index()

    niveles_completos = pd.DataFrame({"nivel_num": np.arange(1, 10)})
    res_imp = niveles_completos.merge(res_imp, on="nivel_num", how="left").fillna(0)

    print("\nRESUMEN POR NIVEL — IMPORTANCIAS")
    print(res_imp.to_string(index=False))

    # -------- gráfico --------
    xpos = np.arange(len(res_imp))
    labels_plot = [f"Nivel {n}" for n in res_imp["nivel_num"]]

    fig, ax = plt.subplots(figsize=(9.5, 5.2), facecolor="white")
    ax.set_facecolor("white")

    ax.bar(
        xpos,
        res_imp["mean"],
        color="#A5D6A7",
        edgecolor="#2E7D32",
        linewidth=1.2,
        alpha=0.9,
        label="Importancia promedio"
    )

    ax.scatter(
        xpos,
        res_imp["median"],
        s=80,
        marker="o",
        color="#388E3C",
        edgecolors="#1B5E20",
        linewidths=1,
        zorder=3,
        label="Mediana"
    )

    ax.set_xticks(xpos)
    ax.set_xticklabels(labels_plot)
    ax.set_xlabel("Nivel")
    ax.set_ylabel("Importancia")
    ax.set_title(f"Importancia promedio y mediana por nivel ({tag}, reales)")
    ax.grid(axis="y", linestyle="--", alpha=0.35, color="gray")

    handles, labels_leg = ax.get_legend_handles_labels()
    order = [1, 0]
    ax.legend([handles[i] for i in order], [labels_leg[i] for i in order])

    fig.tight_layout()

    safe_tag = (
        tag.replace(" ", "_")
           .replace("(", "")
           .replace(")", "")
           .replace("-", "")
           .replace("á", "a")
           .replace("í", "i")
    )
    outpath = OUTDIR / f"importancia_promedio_mediana_por_nivel_{safe_tag}_reales.png"
    fig.savefig(outpath, dpi=300, bbox_inches="tight", facecolor="white")

    print(f"\nGráfico guardado en: {outpath}")
    plt.show()

    return df_imp, res_imp

# ==========================================================
# EJEMPLOS DE USO
# ==========================================================

# uno solo:
# df_imp, res_imp = run_xgb_importance_real("ESIG firma", CONFIGS["ESIG firma"])

# los cuatro:
for name, cfg in CONFIGS.items():
    run_xgb_importance_real(name, cfg)


====================================================================================================
IMPORTANCIAS — ESIG firma
====================================================================================================
Shapes: (1438, 1023) (359, 1023)
Variables originales: 1023
Variables post-VT   : 1022
¿Se detectó término constante N0?: True

Cantidad de variables por nivel:
nivel_num
1      2
2      4
3      8
4     16
5     32
6     64
7    128
8    256
9    512
Name: count, dtype: int64

RESUMEN POR NIVEL — IMPORTANCIAS
 nivel_num     mean   median  count
         1 0.000550 0.000550      2
         2 0.001271 0.001336      4
         3 0.000634 0.000000      8
         4 0.000588 0.000330     16
         5 0.000647 0.000062     32
         6 0.000810 0.000781     64
         7 0.000791 0.000791    128
         8 0.000986 0.000941    256
         9 0.001080 0.000970    512

Gráfico guardado en: /home/felorrieta/Catalina/importancia_promedio_mediana_por_nivel_ESIG_firma_reales.png


====================================================================================================
IMPORTANCIAS — ESIG log-firma
====================================================================================================
Shapes: (1438, 127) (359, 127)
Variables originales: 127
Variables post-VT   : 127
¿Se detectó término constante N0?: False

Cantidad de variables por nivel:
nivel_num
1     2
2     1
3     2
4     3
5     6
6     9
7    18
8    30
9    56
Name: count, dtype: int64

RESUMEN POR NIVEL — IMPORTANCIAS
 nivel_num     mean   median  count
         1 0.000000 0.000000      2
         2 0.006996 0.006996      1
         3 0.011064 0.011064      2
         4 0.006139 0.005971      3
         5 0.012237 0.007840      6
         6 0.006463 0.006061      9
         7 0.009155 0.007891     18
         8 0.007320 0.007140     30
         9 0.007795 0.007085     56

Gráfico guardado en: /home/felorrieta/Catalina/importancia_promedio_mediana_por_nivel_ESIG_logfirma_reales.png


====================================================================================================
IMPORTANCIAS — IISIG firma
====================================================================================================
Shapes: (1510, 1022) (377, 1022)
Variables originales: 1022
Variables post-VT   : 1022
¿Se detectó término constante N0?: False

Cantidad de variables por nivel:
nivel_num
1      2
2      4
3      8
4     16
5     32
6     64
7    128
8    256
9    512
Name: count, dtype: int64

RESUMEN POR NIVEL — IMPORTANCIAS
 nivel_num     mean   median  count
         1 0.000584 0.000584      2
         2 0.000620 0.000535      4
         3 0.000898 0.001212      8
         4 0.000879 0.000991     16
         5 0.000755 0.000939     32
         6 0.000917 0.000979     64
         7 0.001033 0.001061    128
         8 0.000943 0.000997    256
         9 0.001013 0.001068    512

Gráfico guardado en: /home/felorrieta/Catalina/importancia_promedio_mediana_por_nivel_IISIG_firma_reales.png


====================================================================================================
IMPORTANCIAS — IISIG log-firma
====================================================================================================
Shapes: (1032, 127) (258, 127)
Variables originales: 127
Variables post-VT   : 127
¿Se detectó término constante N0?: False

Cantidad de variables por nivel:
nivel_num
1     2
2     1
3     2
4     3
5     6
6     9
7    18
8    30
9    56
Name: count, dtype: int64

RESUMEN POR NIVEL — IMPORTANCIAS
 nivel_num     mean   median  count
         1 0.009416 0.009416      2
         2 0.005089 0.005089      1
         3 0.007231 0.007231      2
         4 0.009897 0.010743      3
         5 0.010655 0.008660      6
         6 0.007037 0.007187      9
         7 0.007268 0.007020     18
         8 0.007721 0.007685     30
         9 0.007896 0.007904     56

Gráfico guardado en: /home/felorrieta/Catalina/importancia_promedio_mediana_por_nivel_IISIG_logfirma_reales.png