library(ggplot2)
library(gridExtra)
library(GGally)
library(corrplot)
library(dplyr)
library(tidyr)
library(viridis)
library(reshape2)

# Configuración general
theme_set(theme_minimal(base_size = 11))

# NOTA: Reemplazar 'ckd_data' con el nombre real de tu dataframe
getwd()  
[1] "/Users/samircabrera/Development/Universidad/Inteligencia Artificial/Inteligencia-Artificial/Proyecto/Code/plots"
ckd_data <- read.csv("/Users/samircabrera/Development/Universidad/Inteligencia Artificial/Inteligencia-Artificial/Proyecto/Dataset/Chronic_Kidney_Dsease_data.csv")
plot_renal_pairs <- function(data, sample_size = 400) {
  renal_data <- data %>%
    select(GFR, SerumCreatinine, BUNLevels, ProteinInUrine, ACR, Diagnosis) %>%
    mutate(Diagnosis = factor(Diagnosis, labels = c("No CKD", "CKD"))) %>%
    group_by(Diagnosis) %>%
    slice_sample(n = sample_size / 2) %>%  # Mantiene proporción
    ungroup()
  
  ggpairs(renal_data,
          columns = 1:5,
          aes(color = Diagnosis, alpha = 0.6),
          upper = list(continuous = wrap("cor", size = 3.5, stars = FALSE)),
          lower = list(continuous = wrap("points", alpha = 0.4, size = 0.8)),
          diag = list(continuous = wrap("densityDiag", alpha = 0.6)),
          title = "Relaciones entre Marcadores Renales (Muestra Estratificada)") +
    theme_bw() +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
      strip.text = element_text(size = 9)
    )
}
plot_gfr_3d <- function(data) {
  n_per_group = 250
  data_plot <- data %>%
    mutate(Diagnosis = factor(Diagnosis, labels = c("No CKD", "CKD"))) %>%
    group_by(Diagnosis) %>%
    slice_sample(n = n_per_group) %>%  # Exactamente n_per_group de cada uno
    ungroup()
  
  ggplot(data_plot, aes(x = SerumCreatinine, y = GFR)) +
    geom_point(aes(color = Age, size = BMI), alpha = 0.6) +
    scale_color_viridis_c(option = "plasma") +
    scale_size_continuous(range = c(2, 6)) +
    geom_smooth(method = "loess", se = TRUE, color = "black", linewidth = 1) +
    facet_wrap(~Diagnosis, scales = "free") +
    labs(title = "GFR vs Creatinina Sérica por Diagnóstico",
         subtitle = sprintf("%d pacientes por grupo | Edad (color) y BMI (tamaño)", n_per_group),
         x = "Creatinina Sérica (mg/dL)",
         y = "GFR (mL/min/1.73m²)") +
    theme_minimal() +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
      plot.subtitle = element_text(hjust = 0.5, size = 10),
      strip.text = element_text(face = "bold", size = 11),
      legend.position = "bottom"
    )
}
plot_clinical_heatmap <- function(data) {
  # Crear estadios de CKD basados en GFR
  data_staged <- data %>%
    mutate(CKD_Stage = case_when(
      GFR >= 90 ~ "Normal (≥90)",
      GFR >= 60 ~ "Mild (60-89)",
      GFR >= 30 ~ "Moderate (30-59)",
      GFR >= 15 ~ "Severe (15-29)",
      TRUE ~ "Kidney Failure (<15)"
    )) %>%
    mutate(CKD_Stage = factor(CKD_Stage, 
                              levels = c("Normal (≥90)", "Mild (60-89)", 
                                         "Moderate (30-59)", "Severe (15-29)", 
                                         "Kidney Failure (<15)")))
  
  # Calcular promedios por estadio
  heatmap_data <- data_staged %>%
    group_by(CKD_Stage) %>%
    summarise(
      BMI = mean(BMI, na.rm = TRUE),
      SystolicBP = mean(SystolicBP, na.rm = TRUE),
      HbA1c = mean(HbA1c, na.rm = TRUE),
      Creatinine = mean(SerumCreatinine, na.rm = TRUE),
      Hemoglobin = mean(HemoglobinLevels, na.rm = TRUE),
      ProteinUrine = mean(ProteinInUrine, na.rm = TRUE),
      Fatigue = mean(FatigueLevels, na.rm = TRUE),
      QoL = mean(QualityOfLifeScore, na.rm = TRUE)
    ) %>%
    pivot_longer(-CKD_Stage, names_to = "Variable", values_to = "Value") %>%
    group_by(Variable) %>%
    mutate(Value_scaled = scale(Value)[,1])  # Estandarizar por variable
  
  ggplot(heatmap_data, aes(x = CKD_Stage, y = Variable, fill = Value_scaled)) +
    geom_tile(color = "white", size = 0.5) +
    geom_text(aes(label = round(Value, 1)), size = 3, color = "white") +
    scale_fill_gradient2(low = "#3B9AB2", mid = "#EBCC2A", high = "#F21A00",
                         midpoint = 0, name = "Z-score") +
    labs(title = "Perfil Clínico Promedio por Estadio de CKD",
         x = "Estadio de Enfermedad Renal",
         y = "Variable Clínica",
         caption = "Valores estandarizados (Z-scores) - números = valor real promedio") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          plot.title = element_text(hjust = 0.5, face = "bold"))
}
plot_parallel_coordinates <- function(data) {
  parallel_data <- data %>%
    select(BMI, PhysicalActivity, DietQuality, SleepQuality,
           AlcoholConsumption, Diagnosis) %>%
    mutate(Diagnosis = factor(Diagnosis, labels = c("No CKD", "CKD"))) %>%
    mutate(across(c(BMI, PhysicalActivity, DietQuality, SleepQuality, 
                    AlcoholConsumption), scale))
  
  parallel_long <- parallel_data %>%
    pivot_longer(cols = c(BMI, PhysicalActivity, DietQuality, 
                          SleepQuality, AlcoholConsumption),
                 names_to = "Variable", values_to = "Value") %>%
    mutate(Variable = factor(Variable, 
                             levels = c("BMI", "PhysicalActivity", "DietQuality",
                                        "SleepQuality", "AlcoholConsumption")))
  
  ggplot(parallel_long, aes(x = Variable, y = Value, fill = Diagnosis)) +
    geom_violin(alpha = 0.6, position = position_dodge(width = 0.9)) +
    geom_boxplot(width = 0.2, position = position_dodge(width = 0.9),
                 alpha = 0.8, outlier.alpha = 0.3) +
    scale_fill_manual(values = c("No CKD" = "#2ecc71", "CKD" = "#e74c3c")) +
    labs(title = "Distribución de Variables de Estilo de Vida",
         subtitle = "Comparación completa del dataset (violin + boxplot)",
         y = "Valor Estandarizado (Z-score)",
         x = NULL) +
    theme_minimal() +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
      plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
      plot.subtitle = element_text(hjust = 0.5, size = 10),
      legend.position = "bottom"
    )
}
plot_demographic_boxplots <- function(data) {
  data_demo <- data %>%
    mutate(
      Gender = factor(Gender, labels = c("Male", "Female")),
      Ethnicity = factor(Ethnicity, labels = c("Caucasian", "African American", 
                                               "Asian", "Other")),
      SocioeconomicStatus = factor(SocioeconomicStatus, 
                                   labels = c("Low", "Middle", "High"))
    ) %>%
    select(Gender, Ethnicity, SocioeconomicStatus, GFR, HbA1c, 
           SystolicBP, QualityOfLifeScore) %>%
    pivot_longer(cols = c(GFR, HbA1c, SystolicBP, QualityOfLifeScore),
                 names_to = "Biomarker", values_to = "Value")
  
  ggplot(data_demo, aes(x = SocioeconomicStatus, y = Value, fill = Gender)) +
    geom_boxplot(alpha = 0.7, outlier.size = 0.5) +
    facet_grid(Biomarker ~ Ethnicity, scales = "free_y") +
    scale_fill_brewer(palette = "Set2") +
    labs(title = "Biomarcadores por Demografía (Género, Etnicidad, Estatus Socioeconómico)",
         x = "Estatus Socioeconómico",
         y = "Valor del Biomarcador") +
    theme(strip.text = element_text(face = "bold", size = 8),
          axis.text.x = element_text(angle = 45, hjust = 1),
          plot.title = element_text(hjust = 0.5, face = "bold", size = 11))
}
plot_qol_bubble <- function(data) {
  n_per_group = 50
  bubble_data <- data %>%
    mutate(
      Severity_Score = (SerumCreatinine - min(SerumCreatinine)) / 
        (max(SerumCreatinine) - min(SerumCreatinine)) * 10,
      Diagnosis = factor(Diagnosis, labels = c("No CKD", "CKD"))
    ) %>%
    group_by(Diagnosis) %>%
    slice_sample(n = n_per_group) %>%  # Exactamente n_per_group de cada uno
    ungroup()
  
  ggplot(bubble_data, aes(x = FatigueLevels, y = QualityOfLifeScore)) +
    geom_point(aes(size = Severity_Score, color = GFR), 
               alpha = 0.7, shape = 16) +
    scale_color_viridis_c(option = "magma", direction = -1, 
                          name = "GFR\n(mL/min/1.73m²)") +
    scale_size_continuous(range = c(2, 10), 
                          name = "Severidad\n(Creatinina)") +
    geom_smooth(method = "lm", se = TRUE, color = "black", 
                linetype = "dashed", linewidth = 1) +
    facet_wrap(~Diagnosis, scales = "free") +
    labs(title = "Calidad de Vida vs Fatiga por Diagnóstico",
         subtitle = sprintf("%d pacientes por grupo | Severidad en tamaño | GFR en color", n_per_group),
         x = "Nivel de Fatiga (0-10)",
         y = "Score de Calidad de Vida (0-100)") +
    theme_minimal() +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
      plot.subtitle = element_text(hjust = 0.5, size = 10),
      strip.text = element_text(face = "bold", size = 11),
      legend.position = "right",
      panel.grid.minor = element_blank()
    )
}
plot_lifestyle_vs_gfr <- function(data) {
  n_per_group = 100
  # Crear Healthy Lifestyle Score
  lifestyle_data <- data %>%
    mutate(
      # Normalizar cada componente a escala 0-10
      PA_score = (PhysicalActivity / 10) * 10,  # Ya está en 0-10
      Diet_score = DietQuality,  # Ya está en 0-10
      Sleep_score = ((SleepQuality - 4) / (10 - 4)) * 10,  # Normalizar de 4-10 a 0-10
      NoSmoking_score = (1 - Smoking) * 10,  # Invertir: no fumar = 10
      LowAlcohol_score = ((20 - AlcoholConsumption) / 20) * 10,  # Invertir: bajo alcohol = alto score
      
      # Calcular score promedio
      HealthyLifestyleScore = (PA_score + Diet_score + Sleep_score + 
                                 NoSmoking_score + LowAlcohol_score) / 5,
      
      Diagnosis = factor(Diagnosis, labels = c("No CKD", "CKD"))
    ) %>%
    group_by(Diagnosis) %>%
    slice_sample(n = n_per_group) %>%
    ungroup()
  
  ggplot(lifestyle_data, aes(x = HealthyLifestyleScore, y = GFR)) +
    geom_point(aes(color = Diagnosis, size = SerumCreatinine), alpha = 0.6) +
    geom_smooth(aes(color = Diagnosis), method = "lm", se = TRUE, linewidth = 1.2) +
    scale_color_manual(values = c("No CKD" = "#2ecc71", "CKD" = "#e74c3c")) +
    scale_size_continuous(range = c(1, 6), name = "Creatinina\n(mg/dL)") +
    labs(title = "Estilo de Vida Saludable vs Función Renal (GFR)",
         subtitle = sprintf("Muestra: %d pacientes por grupo", n_per_group),
         x = "Healthy Lifestyle Score (0-10)",
         y = "GFR (mL/min/1.73m²)") +
    theme_minimal() +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
      plot.subtitle = element_text(hjust = 0.5, size = 10, color = "gray30"),
      legend.position = "right",
      panel.grid.minor = element_blank()
    )
}
generate_all_plots <- function(data) {
  plots <- list()
  
  cat("Generando visualizaciones multivariables priorizadas...\n\n")
  
  cat("1. Pairplot de Variables Renales (Análisis principal)...\n")
  plots$renal_pairs <- plot_renal_pairs(data)
  print(plots$renal_pairs)
  
  cat("2. Heatmap de Perfiles Clínicos (Clusters y patrones)...\n")
  plots$heatmap <- plot_clinical_heatmap(data)
  print(plots$heatmap)
  
  cat("3. Lifestyle Score vs GFR (Impacto conductual)...\n")
  plots$lifestyle_gfr <- plot_lifestyle_vs_gfr(data)
  print(plots$lifestyle_gfr)
  
  cat("4. Scatterplot 4D (GFR-Creatinina-Edad-BMI)...\n")
  plots$gfr_3d <- plot_gfr_3d(data)
  print(plots$gfr_3d)
  
  cat("5. Parallel Coordinates (Factores de Riesgo)...\n")
  plots$parallel <- plot_parallel_coordinates(data)
  print(plots$parallel)
  
  cat("6. Boxplots Demográficos Facetados...\n")
  plots$demo_box <- plot_demographic_boxplots(data)
  print(plots$demo_box)
  
  cat("7. Bubble Chart (Calidad de Vida)...\n")
  plots$bubble <- plot_qol_bubble(data)
  print(plots$bubble)
  
  cat("\n Todas las visualizaciones generadas y priorizadas correctamente.\n")
  
  return(invisible(plots))
}
plots <- generate_all_plots(ckd_data)
Generando visualizaciones multivariables priorizadas...

1. Pairplot de Variables Renales (Análisis principal)...
2. Heatmap de Perfiles Clínicos (Clusters y patrones)...
3. Lifestyle Score vs GFR (Impacto conductual)...
4. Scatterplot 4D (GFR-Creatinina-Edad-BMI)...
5. Parallel Coordinates (Factores de Riesgo)...
6. Boxplots Demográficos Facetados...
7. Bubble Chart (Calidad de Vida)...

 Todas las visualizaciones generadas y priorizadas correctamente.

