library(ggplot2)
library(gridExtra)
library(GGally)
library(corrplot)
library(dplyr)
library(tidyr)
library(viridis)
library(reshape2)
# Configuración general
theme_set(theme_minimal(base_size = 11))
# NOTA: Reemplazar 'ckd_data' con el nombre real de tu dataframe
getwd()
[1] "/Users/samircabrera/Development/Universidad/Inteligencia Artificial/Inteligencia-Artificial/Proyecto/Code/plots"
ckd_data <- read.csv("/Users/samircabrera/Development/Universidad/Inteligencia Artificial/Inteligencia-Artificial/Proyecto/Dataset/Chronic_Kidney_Dsease_data.csv")
plot_renal_pairs <- function(data, sample_size = 400) {
renal_data <- data %>%
select(GFR, SerumCreatinine, BUNLevels, ProteinInUrine, ACR, Diagnosis) %>%
mutate(Diagnosis = factor(Diagnosis, labels = c("No CKD", "CKD"))) %>%
group_by(Diagnosis) %>%
slice_sample(n = sample_size / 2) %>% # Mantiene proporción
ungroup()
ggpairs(renal_data,
columns = 1:5,
aes(color = Diagnosis, alpha = 0.6),
upper = list(continuous = wrap("cor", size = 3.5, stars = FALSE)),
lower = list(continuous = wrap("points", alpha = 0.4, size = 0.8)),
diag = list(continuous = wrap("densityDiag", alpha = 0.6)),
title = "Relaciones entre Marcadores Renales (Muestra Estratificada)") +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
strip.text = element_text(size = 9)
)
}
plot_gfr_3d <- function(data) {
n_per_group = 250
data_plot <- data %>%
mutate(Diagnosis = factor(Diagnosis, labels = c("No CKD", "CKD"))) %>%
group_by(Diagnosis) %>%
slice_sample(n = n_per_group) %>% # Exactamente n_per_group de cada uno
ungroup()
ggplot(data_plot, aes(x = SerumCreatinine, y = GFR)) +
geom_point(aes(color = Age, size = BMI), alpha = 0.6) +
scale_color_viridis_c(option = "plasma") +
scale_size_continuous(range = c(2, 6)) +
geom_smooth(method = "loess", se = TRUE, color = "black", linewidth = 1) +
facet_wrap(~Diagnosis, scales = "free") +
labs(title = "GFR vs Creatinina Sérica por Diagnóstico",
subtitle = sprintf("%d pacientes por grupo | Edad (color) y BMI (tamaño)", n_per_group),
x = "Creatinina Sérica (mg/dL)",
y = "GFR (mL/min/1.73m²)") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 10),
strip.text = element_text(face = "bold", size = 11),
legend.position = "bottom"
)
}
plot_clinical_heatmap <- function(data) {
# Crear estadios de CKD basados en GFR
data_staged <- data %>%
mutate(CKD_Stage = case_when(
GFR >= 90 ~ "Normal (≥90)",
GFR >= 60 ~ "Mild (60-89)",
GFR >= 30 ~ "Moderate (30-59)",
GFR >= 15 ~ "Severe (15-29)",
TRUE ~ "Kidney Failure (<15)"
)) %>%
mutate(CKD_Stage = factor(CKD_Stage,
levels = c("Normal (≥90)", "Mild (60-89)",
"Moderate (30-59)", "Severe (15-29)",
"Kidney Failure (<15)")))
# Calcular promedios por estadio
heatmap_data <- data_staged %>%
group_by(CKD_Stage) %>%
summarise(
BMI = mean(BMI, na.rm = TRUE),
SystolicBP = mean(SystolicBP, na.rm = TRUE),
HbA1c = mean(HbA1c, na.rm = TRUE),
Creatinine = mean(SerumCreatinine, na.rm = TRUE),
Hemoglobin = mean(HemoglobinLevels, na.rm = TRUE),
ProteinUrine = mean(ProteinInUrine, na.rm = TRUE),
Fatigue = mean(FatigueLevels, na.rm = TRUE),
QoL = mean(QualityOfLifeScore, na.rm = TRUE)
) %>%
pivot_longer(-CKD_Stage, names_to = "Variable", values_to = "Value") %>%
group_by(Variable) %>%
mutate(Value_scaled = scale(Value)[,1]) # Estandarizar por variable
ggplot(heatmap_data, aes(x = CKD_Stage, y = Variable, fill = Value_scaled)) +
geom_tile(color = "white", size = 0.5) +
geom_text(aes(label = round(Value, 1)), size = 3, color = "white") +
scale_fill_gradient2(low = "#3B9AB2", mid = "#EBCC2A", high = "#F21A00",
midpoint = 0, name = "Z-score") +
labs(title = "Perfil ClÃnico Promedio por Estadio de CKD",
x = "Estadio de Enfermedad Renal",
y = "Variable ClÃnica",
caption = "Valores estandarizados (Z-scores) - números = valor real promedio") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"))
}
plot_parallel_coordinates <- function(data) {
parallel_data <- data %>%
select(BMI, PhysicalActivity, DietQuality, SleepQuality,
AlcoholConsumption, Diagnosis) %>%
mutate(Diagnosis = factor(Diagnosis, labels = c("No CKD", "CKD"))) %>%
mutate(across(c(BMI, PhysicalActivity, DietQuality, SleepQuality,
AlcoholConsumption), scale))
parallel_long <- parallel_data %>%
pivot_longer(cols = c(BMI, PhysicalActivity, DietQuality,
SleepQuality, AlcoholConsumption),
names_to = "Variable", values_to = "Value") %>%
mutate(Variable = factor(Variable,
levels = c("BMI", "PhysicalActivity", "DietQuality",
"SleepQuality", "AlcoholConsumption")))
ggplot(parallel_long, aes(x = Variable, y = Value, fill = Diagnosis)) +
geom_violin(alpha = 0.6, position = position_dodge(width = 0.9)) +
geom_boxplot(width = 0.2, position = position_dodge(width = 0.9),
alpha = 0.8, outlier.alpha = 0.3) +
scale_fill_manual(values = c("No CKD" = "#2ecc71", "CKD" = "#e74c3c")) +
labs(title = "Distribución de Variables de Estilo de Vida",
subtitle = "Comparación completa del dataset (violin + boxplot)",
y = "Valor Estandarizado (Z-score)",
x = NULL) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 10),
legend.position = "bottom"
)
}
plot_demographic_boxplots <- function(data) {
data_demo <- data %>%
mutate(
Gender = factor(Gender, labels = c("Male", "Female")),
Ethnicity = factor(Ethnicity, labels = c("Caucasian", "African American",
"Asian", "Other")),
SocioeconomicStatus = factor(SocioeconomicStatus,
labels = c("Low", "Middle", "High"))
) %>%
select(Gender, Ethnicity, SocioeconomicStatus, GFR, HbA1c,
SystolicBP, QualityOfLifeScore) %>%
pivot_longer(cols = c(GFR, HbA1c, SystolicBP, QualityOfLifeScore),
names_to = "Biomarker", values_to = "Value")
ggplot(data_demo, aes(x = SocioeconomicStatus, y = Value, fill = Gender)) +
geom_boxplot(alpha = 0.7, outlier.size = 0.5) +
facet_grid(Biomarker ~ Ethnicity, scales = "free_y") +
scale_fill_brewer(palette = "Set2") +
labs(title = "Biomarcadores por DemografÃa (Género, Etnicidad, Estatus Socioeconómico)",
x = "Estatus Socioeconómico",
y = "Valor del Biomarcador") +
theme(strip.text = element_text(face = "bold", size = 8),
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold", size = 11))
}
plot_qol_bubble <- function(data) {
n_per_group = 50
bubble_data <- data %>%
mutate(
Severity_Score = (SerumCreatinine - min(SerumCreatinine)) /
(max(SerumCreatinine) - min(SerumCreatinine)) * 10,
Diagnosis = factor(Diagnosis, labels = c("No CKD", "CKD"))
) %>%
group_by(Diagnosis) %>%
slice_sample(n = n_per_group) %>% # Exactamente n_per_group de cada uno
ungroup()
ggplot(bubble_data, aes(x = FatigueLevels, y = QualityOfLifeScore)) +
geom_point(aes(size = Severity_Score, color = GFR),
alpha = 0.7, shape = 16) +
scale_color_viridis_c(option = "magma", direction = -1,
name = "GFR\n(mL/min/1.73m²)") +
scale_size_continuous(range = c(2, 10),
name = "Severidad\n(Creatinina)") +
geom_smooth(method = "lm", se = TRUE, color = "black",
linetype = "dashed", linewidth = 1) +
facet_wrap(~Diagnosis, scales = "free") +
labs(title = "Calidad de Vida vs Fatiga por Diagnóstico",
subtitle = sprintf("%d pacientes por grupo | Severidad en tamaño | GFR en color", n_per_group),
x = "Nivel de Fatiga (0-10)",
y = "Score de Calidad de Vida (0-100)") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 10),
strip.text = element_text(face = "bold", size = 11),
legend.position = "right",
panel.grid.minor = element_blank()
)
}
plot_lifestyle_vs_gfr <- function(data) {
n_per_group = 100
# Crear Healthy Lifestyle Score
lifestyle_data <- data %>%
mutate(
# Normalizar cada componente a escala 0-10
PA_score = (PhysicalActivity / 10) * 10, # Ya está en 0-10
Diet_score = DietQuality, # Ya está en 0-10
Sleep_score = ((SleepQuality - 4) / (10 - 4)) * 10, # Normalizar de 4-10 a 0-10
NoSmoking_score = (1 - Smoking) * 10, # Invertir: no fumar = 10
LowAlcohol_score = ((20 - AlcoholConsumption) / 20) * 10, # Invertir: bajo alcohol = alto score
# Calcular score promedio
HealthyLifestyleScore = (PA_score + Diet_score + Sleep_score +
NoSmoking_score + LowAlcohol_score) / 5,
Diagnosis = factor(Diagnosis, labels = c("No CKD", "CKD"))
) %>%
group_by(Diagnosis) %>%
slice_sample(n = n_per_group) %>%
ungroup()
ggplot(lifestyle_data, aes(x = HealthyLifestyleScore, y = GFR)) +
geom_point(aes(color = Diagnosis, size = SerumCreatinine), alpha = 0.6) +
geom_smooth(aes(color = Diagnosis), method = "lm", se = TRUE, linewidth = 1.2) +
scale_color_manual(values = c("No CKD" = "#2ecc71", "CKD" = "#e74c3c")) +
scale_size_continuous(range = c(1, 6), name = "Creatinina\n(mg/dL)") +
labs(title = "Estilo de Vida Saludable vs Función Renal (GFR)",
subtitle = sprintf("Muestra: %d pacientes por grupo", n_per_group),
x = "Healthy Lifestyle Score (0-10)",
y = "GFR (mL/min/1.73m²)") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 10, color = "gray30"),
legend.position = "right",
panel.grid.minor = element_blank()
)
}
generate_all_plots <- function(data) {
plots <- list()
cat("Generando visualizaciones multivariables priorizadas...\n\n")
cat("1. Pairplot de Variables Renales (Análisis principal)...\n")
plots$renal_pairs <- plot_renal_pairs(data)
print(plots$renal_pairs)
cat("2. Heatmap de Perfiles ClÃnicos (Clusters y patrones)...\n")
plots$heatmap <- plot_clinical_heatmap(data)
print(plots$heatmap)
cat("3. Lifestyle Score vs GFR (Impacto conductual)...\n")
plots$lifestyle_gfr <- plot_lifestyle_vs_gfr(data)
print(plots$lifestyle_gfr)
cat("4. Scatterplot 4D (GFR-Creatinina-Edad-BMI)...\n")
plots$gfr_3d <- plot_gfr_3d(data)
print(plots$gfr_3d)
cat("5. Parallel Coordinates (Factores de Riesgo)...\n")
plots$parallel <- plot_parallel_coordinates(data)
print(plots$parallel)
cat("6. Boxplots Demográficos Facetados...\n")
plots$demo_box <- plot_demographic_boxplots(data)
print(plots$demo_box)
cat("7. Bubble Chart (Calidad de Vida)...\n")
plots$bubble <- plot_qol_bubble(data)
print(plots$bubble)
cat("\n Todas las visualizaciones generadas y priorizadas correctamente.\n")
return(invisible(plots))
}
plots <- generate_all_plots(ckd_data)
Generando visualizaciones multivariables priorizadas...
1. Pairplot de Variables Renales (Análisis principal)...
2. Heatmap de Perfiles ClÃnicos (Clusters y patrones)...
3. Lifestyle Score vs GFR (Impacto conductual)...
4. Scatterplot 4D (GFR-Creatinina-Edad-BMI)...
5. Parallel Coordinates (Factores de Riesgo)...
6. Boxplots Demográficos Facetados...
7. Bubble Chart (Calidad de Vida)...
Todas las visualizaciones generadas y priorizadas correctamente.







