Librerías necesarias

library(readxl)
library(pcv)
library(mdatools)
library(knitr)
library(kableExtra)
library(tidyverse)
library(ggplot2)

Carga de base de datos

data_calib = read_excel("Original base.xlsx")
data_calib
data_valid = read_excel("original base valid.xlsx")
data_valid
X = as.matrix(data_calib[, 2:101])
y = as.matrix(data_calib[,102])
names = as.factor(as.matrix(data_calib[,1]))
colhead = as.matrix(data.frame(colnames(data_calib[,2:101])))

Y = as.matrix(data_valid[, 2:101])
x = as.matrix(data_valid[,102])
nam = as.factor(as.matrix(data_valid[,1]))
colc = as.matrix(data.frame(colnames(data_valid[,2:101])))

Modelo sin pre-procesamiento para datos de calibración

model = pls(X, y,x.test = Y, y.test = x, ncomp.selcrit = "min") 
summary(model)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 3
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       99.746      32.963 0.330 24.789 0.330 0.0000 1.22
## Test      99.597      49.784 0.411 17.580 0.378 5.5631 1.38
rcm = model$res$cal$r2[,3]
rtm = model$res$test$r2[,3]
ecm = model$res$cal$rmse[,3]
etm = model$res$test$rmse[,3]
model1 = pls(X, y,x.test = Y, y.test = x, ncomp.selcrit = "wold") 
summary(model1)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 3
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       99.746      32.963 0.330 24.789 0.330 0.0000 1.22
## Test      99.597      49.784 0.411 17.580 0.378 5.5631 1.38
rcw = model1$res$cal$r2[,3]
rtw = model1$res$test$r2[,3]
ecw = model1$res$cal$rmse[,3]
etw = model1$res$test$rmse[,3]

Pre-procesamiento: Correccón de linea de base espectral para datos de calibración

pspectra = prep.snv(X)
pv1 = pcvpls(pspectra, y, 7, cv = list("ven", 4))
Xpvm1 = pls(pspectra, y , ncomp.selcrit = "min", x.test = pv1, y.test = y)
summary(Xpvm1)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       97.368      70.981 0.710 16.310 0.710  0.0000 1.86
## Test      97.263      81.467 0.673 17.322 0.712 -0.1542 1.75
rcm1 = Xpvm1$res$cal$r2[,7]
rtm1 = Xpvm1$res$test$r2[,7]
ecm1 = Xpvm1$res$cal$rmse[,7]
etm1 = Xpvm1$res$test$rmse[,7]
Xpvw1 = pls(pspectra, y , ncomp.selcrit = "wold", x.test = pv1, y.test = y)
summary(Xpvw1)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 5
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       96.437      65.428 0.654 17.802 0.654  0.0000 1.70
## Test      96.526      79.126 0.631 18.384 0.659 -0.1115 1.65
rcw1 = Xpvw1$res$cal$r2[,5]
rtw1 = Xpvw1$res$test$r2[,5]
ecw1 = Xpvw1$res$cal$rmse[,5]
etw1 = Xpvw1$res$test$rmse[,5]

Pre-procesamiento: corrección de dispersión multiplicativa para datos de calibración

pspectra = prep.msc(X)
pv2 = pcvpls(pspectra, y, 7, cv = list("ven", 4))
Xpvm2 = pls(pspectra, y , ncomp.selcrit = "min", x.test = pv2, y.test = y)
summary(Xpvm2)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       97.411      70.201 0.702 16.527 0.702  0.0000 1.83
## Test      97.308      80.593 0.668 17.434 0.702 -0.1618 1.74
rcm2 = Xpvm2$res$cal$r2[,7]
rtm2 = Xpvm2$res$test$r2[,7]
ecm2 = Xpvm2$res$cal$rmse[,7]
etm2 = Xpvm2$res$test$rmse[,7]
Xpvw2 = pls(pspectra, y , ncomp.selcrit = "wold", x.test = pv2, y.test = y)
summary(Xpvw2)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 5
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       96.493      65.146 0.651 17.874 0.651  0.0000 1.69
## Test      96.582      78.337 0.630 18.420 0.656 -0.1087 1.64
rcw2 = Xpvw2$res$cal$r2[,5]
rtw2 = Xpvw2$res$test$r2[,5]
ecw2 = Xpvw2$res$cal$rmse[,5]
etw2 = Xpvw2$res$test$rmse[,5]

Pre-procesamiento: Corrección de línea de base con mínimos cuadrados asimétricos para datos de calibración

perturbations1 = rbind(dnorm(1:ncol(X), 750, 200) * 10000,
                       dnorm(1:ncol(X), 750, 100) * 10000,
                       dnorm(1:ncol(X), 500, 100) * 10000)
perturb1 = matrix(rep(perturbations1, each = nrow(X)/nrow(perturbations1)), 
                  nrow = nrow(X), byrow = TRUE)

Y.y1 = X + perturb1

y.new1 = prep.alsbasecorr(Y.y1, plambda = 5, p = 0.05)

pv3 = pcvpls(y.new1, y,7,cv = list("ven", 4))
Xpvm3 = pls(y.new1, y, x.test = pv3, y.test = y, ncomp.selcrit = "min")
summary(Xpvm3)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       96.838      41.073 0.411 23.241 0.411  0.000 1.30
## Test      96.715      78.120 0.371 24.021 0.417 -0.078 1.26
rcm3 = Xpvm3$res$cal$r2[,7]
rtm3 = Xpvm3$res$test$r2[,7]
ecm3 = Xpvm3$res$cal$rmse[,7]
etm3 = Xpvm3$res$test$rmse[,7]
Xpvw3 = pls(y.new1 ,y, x.test = pv3, y.test = y, ncomp.selcrit = "wold")
summary(Xpvw3)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 2
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       89.914      28.000 0.280 25.690 0.280 0.0000 1.18
## Test      89.294      74.716 0.273 25.822 0.277 0.0528 1.17
rcw3 = Xpvw3$res$cal$r2[,2]
rtw3 = Xpvw3$res$test$r2[,2]
ecw3 = Xpvw3$res$cal$rmse[,2]
etw3 = Xpvw3$res$test$rmse[,2]

Pre procesamiento: Alisados y derivados para datos de calibración

# Alisado
nspectra = X + 0.025 * matrix(rnorm(length(X)), dim(X))
pv4 = pcvpls(nspectra, y,7,cv = list("ven", 4))
Xpvm4 = pls(nspectra, y, x.test = pv4, y.test = y, ncomp.selcrit = "min")
summary(Xpvm4)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 3
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       87.013      30.454 0.305 25.249 0.305 0.0000 1.20
## Test      88.294      40.331 0.268 25.906 0.287 0.0648 1.17
rcm4 = Xpvm4$res$cal$r2[,3]
rtm4 = Xpvm4$res$test$r2[,3]
ecm4 = Xpvm4$res$cal$rmse[,3]
etm4 = Xpvm4$res$test$rmse[,3]
Xpvw4 = pls(nspectra ,y, x.test = pv4, y.test = y, ncomp.selcrit = "wold")
summary(Xpvw4)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 3
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       87.013      30.454 0.305 25.249 0.305 0.0000 1.20
## Test      88.294      40.331 0.268 25.906 0.287 0.0648 1.17
rcw4 = Xpvw4$res$cal$r2[,3]
rtw4 = Xpvw4$res$test$r2[,3]
ecw4 = Xpvw4$res$cal$rmse[,3]
etw4 = Xpvw4$res$test$rmse[,3]

# Suavizado
pspectra = prep.savgol(nspectra, width = 15, porder = 1)
pv5 = pcvpls(pspectra, y,7,cv = list("ven", 4))
Xpvm5 = pls(pspectra, y, x.test = pv5, y.test = y, ncomp.selcrit = "min")
summary(Xpvm5)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 4
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       99.326      29.760 0.298 25.375 0.298  0.0000 1.19
## Test      99.329      37.577 0.285 25.596 0.294 -0.0169 1.18
rcm5 = Xpvm5$res$cal$r2[,4]
rtm5 = Xpvm5$res$test$r2[,4]
ecm5 = Xpvm5$res$cal$rmse[,4]
etm5 = Xpvm5$res$test$rmse[,4]
Xpvw5 = pls(pspectra ,y, x.test = pv5, y.test = y, ncomp.selcrit = "wold")
summary(Xpvw5)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 3
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       99.068      28.478 0.285 25.605 0.285  0.0000 1.18
## Test      99.104      36.773 0.276 25.760 0.283 -0.0136 1.18
rcw5 = Xpvw5$res$cal$r2[,3]
rtw5 = Xpvw5$res$test$r2[,3]
ecw5 = Xpvw5$res$cal$rmse[,3]
etw5 = Xpvw5$res$test$rmse[,3]



# Derivada
dpspectra = prep.savgol(nspectra, width = 15, porder = 1, dorder = 1)
pv6 = pcvpls(dpspectra, y,7,cv = list("ven", 4))
Xpvm6 = pls(dpspectra, y, x.test = pv6, y.test = y, ncomp.selcrit = "min")
summary(Xpvm6)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 5
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope  Bias  RPD
## Cal       63.547      31.024 0.310 25.145 0.310 0.000 1.20
## Test      62.300      34.901 0.249 26.230 0.285 0.123 1.15
rcm6 = Xpvm6$res$cal$r2[,5]
rtm6 = Xpvm6$res$test$r2[,5]
ecm6 = Xpvm6$res$cal$rmse[,5]
etm6 = Xpvm6$res$test$rmse[,5]
Xpvw6 = pls(dpspectra ,y, x.test = pv6, y.test = y, ncomp.selcrit = "wold")
summary(Xpvw6)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 2
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       54.911      23.708 0.237 26.445 0.237 0.0000 1.15
## Test      53.403      32.344 0.220 26.740 0.229 0.0157 1.13
rcw6 = Xpvw6$res$cal$r2[,2]
rtw6 = Xpvw6$res$test$r2[,2]
ecw6 = Xpvw6$res$cal$rmse[,2]
etw6 = Xpvw6$res$test$rmse[,2]

Pre-procesado: Transformaciones por elemento para datos de calibración

# Logarítmo
Y1 = prep.transform(X, log)
pv7 = pcvpls(Y1, y,7,cv = list("ven", 4))
Xpvm7 = pls(Y1, y, x.test = pv7, y.test = y, ncomp.selcrit = "min")
summary(Xpvm7)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       99.969      72.872 0.729 15.769 0.729  0.0000 1.92
## Test      99.982      77.933 0.708 16.364 0.728 -0.1238 1.85
rcm7 = Xpvm7$res$cal$r2[,7]
rtm7 = Xpvm7$res$test$r2[,7]
ecm7 = Xpvm7$res$cal$rmse[,7]
etm7 = Xpvm7$res$test$rmse[,7]
Xpvw7 = pls(Y1 ,y, x.test = pv7, y.test = y, ncomp.selcrit = "wold")
summary(Xpvw7)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       99.969      72.872 0.729 15.769 0.729  0.0000 1.92
## Test      99.982      77.933 0.708 16.364 0.728 -0.1238 1.85
rcw7 = Xpvw7$res$cal$r2[,7]
rtw7 = Xpvw7$res$test$r2[,7]
ecw7 = Xpvw7$res$cal$rmse[,7]
etw7 = Xpvw7$res$test$rmse[,7]


# Potencia
Y2 = prep.transform(X, function(x, p) x^p, p = 0.2)
pv8 = pcvpls(Y2, y,7,cv = list("ven", 4))
Xpvm8 = pls(Y2, y, x.test = pv8, y.test = y, ncomp.selcrit = "min")
summary(Xpvm8)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       99.968      72.824 0.728 15.783 0.728  0.0000 1.92
## Test      99.974      79.844 0.709 16.341 0.729 -0.1472 1.85
rcm8 = Xpvm8$res$cal$r2[,7]
rtm8 = Xpvm8$res$test$r2[,7]
ecm8 = Xpvm8$res$cal$rmse[,7]
etm8 = Xpvm8$res$test$rmse[,7]
Xpvw8 = pls(Y2,y, x.test = pv8, y.test = y, ncomp.selcrit = "wold")
summary(Xpvw8)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       99.968      72.824 0.728 15.783 0.728  0.0000 1.92
## Test      99.974      79.844 0.709 16.341 0.729 -0.1472 1.85
rcw8 = Xpvw8$res$cal$r2[,7]
rtw8 = Xpvw8$res$test$r2[,7]
ecw8 = Xpvw8$res$cal$rmse[,7]
etw8 = Xpvw8$res$test$rmse[,7]

# Cuadrado
M = prep.transform(X, function(x) x^2)
pv9 = pcvpls(M, y,7,cv = list("ven", 4))
Xpvm9 = pls(M, y, x.test = pv9, y.test = y, ncomp.selcrit = "min")
summary(Xpvm9)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       99.963      70.607 0.706 16.415 0.706  0.0000 1.85
## Test      99.967      75.669 0.672 17.335 0.699 -0.0313 1.75
rcm9 = Xpvm9$res$cal$r2[,7]
rtm9 = Xpvm9$res$test$r2[,7]
ecm9 = Xpvm9$res$cal$rmse[,7]
etm9 = Xpvm9$res$test$rmse[,7]
Xpvw9 = pls(M,y, x.test = pv9, y.test = y, ncomp.selcrit = "wold")
summary(Xpvw9)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 3
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       99.716      33.943 0.339 24.607 0.339  0.0000 1.23
## Test      99.729      50.087 0.328 24.828 0.338 -0.0275 1.22
rcw9 = Xpvw9$res$cal$r2[,3]
rtw9 = Xpvw9$res$test$r2[,3]
ecw9 = Xpvw9$res$cal$rmse[,3]
etw9 = Xpvw9$res$test$rmse[,3]

Pre-procesamiento: Selección de variables para datos de calibración

S = prep.varsel(X, seq(2, ncol(X), by = 2))

pv10 = pcvpls(S, y,7,cv = list("ven", 4))
Xpvm10 = pls(S, y, x.test = pv10, y.test = y, ncomp.selcrit = "min")
summary(Xpvm10)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       99.968      71.584 0.716 16.139 0.716  0.0000 1.88
## Test      99.971      74.786 0.697 16.675 0.715 -0.1102 1.82
rcm10 = Xpvm10$res$cal$r2[,7]
rtm10 = Xpvm10$res$test$r2[,7]
ecm10 = Xpvm10$res$cal$rmse[,7]
etm10 = Xpvm10$res$test$rmse[,7]
Xpvw10 = pls(S,y, x.test = pv10, y.test = y, ncomp.selcrit = "wold")
summary(Xpvw10)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y.Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       99.968      71.584 0.716 16.139 0.716  0.0000 1.88
## Test      99.971      74.786 0.697 16.675 0.715 -0.1102 1.82
rcw10 = Xpvw10$res$cal$r2[,7]
rtw10 = Xpvw10$res$test$r2[,7]
ecw10 = Xpvw10$res$cal$rmse[,7]
etw10 = Xpvw10$res$test$rmse[,7]

Gráficos de R2 y RMSE para datos de calibración

tabla1 = data.frame(Modelos = paste("Modelo", 1:11),
                    R2 = c(rcm, rcm1, rcm2, rcm3, rcm4, rcm5, rcm6, rcm7, rcm8,
                           rcm9, rcm10),
                    RMSE = c(ecm, ecm1, ecm2, ecm3, ecm4, ecm5, ecm6, ecm7, ecm8, 
                             ecm9, ecm10))


datos_largos1 <- reshape2::melt(tabla1, id.vars = "Modelos", variable.name = "Tipo", value.name = "R2")

datos <- tabla1[order(-tabla1$R2), ]

ggplot(datos, aes(x = reorder(Modelos, -R2))) + 
  # Línea y puntos para R2
  geom_line(aes(y = R2, group = 1, color = "R2"), linewidth = 1.2) +
  geom_point(aes(y = R2, color = "R2"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Línea y puntos para RMSE (escalado a R2)
  geom_line(aes(y = RMSE / max(RMSE) * max(R2), group = 1, color = "RMSE"), linewidth = 1.2, linetype = "dashed") +
  geom_point(aes(y = RMSE / max(RMSE) * max(R2), color = "RMSE"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Escalas y ejes
  scale_y_continuous(
    name = expression(R^2), 
    sec.axis = sec_axis(~ . * max(datos$RMSE) / max(datos$R2), name = "RMSE")
  ) +
  scale_color_manual(
    values = c("R2" = "#1f77b4", "RMSE" = "#ff7f0e"), 
    labels = c(expression(R^2), "RMSE")
  ) +
  # Etiquetas
  labs(
    title = "Comparación de R² y RMSE entre Modelos",
    subtitle = "Con validación cruzada \nMetodología: selección de componentes minimizando el RMSE",
    x = "Modelo",
    y = expression(R^2),
    color = "Métrica"
  ) +
  # Tema estilizado
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, margin = margin(b = 10)),
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 10),
    axis.title.y = element_text(face = "bold", size = 12),
    axis.title.y.right = element_text(face = "bold", size = 12),
    legend.position = "top",
    legend.title = element_text(face = "bold"),
    legend.text = element_text(size = 10),
    panel.grid.major = element_line(size = 0.5, color = "gray80"),
    panel.grid.minor = element_blank(),
    panel.border = element_blank()
  )

tabla2 = data.frame(Modelos = paste("Modelo", 1:11), 
                    R2 = c(rtm, rtm1, rtm2, rtm3, rtm4, rtm5, rtm6, rtm7, rtm8,
                                    rtm9, rtm10), 
                    RMSE = c(etm, etm1, etm2, etm3, etm4, etm5, etm6, etm7, etm8, 
                                      etm9, etm10))


datos_largos2 <- reshape2::melt(tabla2, id.vars = "Modelos", variable.name = "Tipo", value.name = "R2")

datos2 <- tabla2[order(-tabla2$R2), ]

ggplot(datos2, aes(x = reorder(Modelos, -R2))) + 
  # Línea y puntos para R2
  geom_line(aes(y = R2, group = 1, color = "R2"), linewidth = 1.2) +
  geom_point(aes(y = R2, color = "R2"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Línea y puntos para RMSE (escalado a R2)
  geom_line(aes(y = RMSE / max(RMSE) * max(R2), group = 1, color = "RMSE"), linewidth = 1.2, linetype = "dashed") +
  geom_point(aes(y = RMSE / max(RMSE) * max(R2), color = "RMSE"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Escalas y ejes
  scale_y_continuous(
    name = expression(R^2), 
    sec.axis = sec_axis(~ . * max(datos$RMSE) / max(datos$R2), name = "RMSE")
  ) +
  scale_color_manual(
    values = c("R2" = "#1f77b4", "RMSE" = "#ff7f0e"), 
    labels = c(expression(R^2), "RMSE")
  ) +
  # Etiquetas
  labs(
    title = "Comparación de R² y RMSE entre Modelos",
    subtitle = "Con validación cruzada \nMetodología: selección de componentes minimizando el RMSE",
    x = "Modelo",
    y = expression(R^2),
    color = "Métrica"
  ) +
  # Tema estilizado
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, margin = margin(b = 10)),
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 10),
    axis.title.y = element_text(face = "bold", size = 12),
    axis.title.y.right = element_text(face = "bold", size = 12),
    legend.position = "top",
    legend.title = element_text(face = "bold"),
    legend.text = element_text(size = 10),
    panel.grid.major = element_line(size = 0.5, color = "gray80"),
    panel.grid.minor = element_blank(),
    panel.border = element_blank()
  )

tabla3 = data.frame(Modelos = paste("Modelo", 1:11), 
                    R2 = c(rcw, rcw1, rcw2, rcw3, rcw4, rcw5, rcw6, rcw7, rcw8,
                           rcw9, rcw10), 
                    RMSE = c(ecw, ecw1, ecw2, ecw3, ecw4, ecw5, ecw6, ecw7, ecw8, 
                             ecw9, ecw10))


datos_largos3 <- reshape2::melt(tabla3, id.vars = "Modelos", variable.name = "Tipo", value.name = "R2")

datos3 <- tabla3[order(-tabla3$R2), ]

ggplot(datos3, aes(x = reorder(Modelos, -R2))) + 
  # Línea y puntos para R2
  geom_line(aes(y = R2, group = 1, color = "R2"), linewidth = 1.2) +
  geom_point(aes(y = R2, color = "R2"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Línea y puntos para RMSE (escalado a R2)
  geom_line(aes(y = RMSE / max(RMSE) * max(R2), group = 1, color = "RMSE"), linewidth = 1.2, linetype = "dashed") +
  geom_point(aes(y = RMSE / max(RMSE) * max(R2), color = "RMSE"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Escalas y ejes
  scale_y_continuous(
    name = expression(R^2), 
    sec.axis = sec_axis(~ . * max(datos$RMSE) / max(datos$R2), name = "RMSE")
  ) +
  scale_color_manual(
    values = c("R2" = "#1f77b4", "RMSE" = "#ff7f0e"), 
    labels = c(expression(R^2), "RMSE")
  ) +
  # Etiquetas
  labs(
    title = "Comparación de R² y RMSE entre Modelos",
    subtitle = "Con validación cruzada \nMetodología: selección de componentes Would",
    x = "Modelo",
    y = expression(R^2),
    color = "Métrica"
  ) +
  # Tema estilizado
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, margin = margin(b = 10)),
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 10),
    axis.title.y = element_text(face = "bold", size = 12),
    axis.title.y.right = element_text(face = "bold", size = 12),
    legend.position = "top",
    legend.title = element_text(face = "bold"),
    legend.text = element_text(size = 10),
    panel.grid.major = element_line(size = 0.5, color = "gray80"),
    panel.grid.minor = element_blank(),
    panel.border = element_blank()
  )

tabla4 = data.frame(Modelos = paste("Modelo", 1:11), 
                    R2 = c(rtw, rtw1, rtw2, rtw3, rtw4, rtw5, rtw6, rtw7, rtw8,
                           rtw9, rtw10), 
                    RMSE = c(etw, etw1, etw2, etw3, etw4, etw5, etw6, etw7, etw8, 
                             etw9, etw10))


datos_largos4 <- reshape2::melt(tabla4, id.vars = "Modelos", variable.name = "Tipo", value.name = "R2")

datos4 <- tabla4[order(-tabla4$R2), ]

ggplot(datos4, aes(x = reorder(Modelos, -R2))) + 
  # Línea y puntos para R2
  geom_line(aes(y = R2, group = 1, color = "R2"), linewidth = 1.2) +
  geom_point(aes(y = R2, color = "R2"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Línea y puntos para RMSE (escalado a R2)
  geom_line(aes(y = RMSE / max(RMSE) * max(R2), group = 1, color = "RMSE"), linewidth = 1.2, linetype = "dashed") +
  geom_point(aes(y = RMSE / max(RMSE) * max(R2), color = "RMSE"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Escalas y ejes
  scale_y_continuous(
    name = expression(R^2), 
    sec.axis = sec_axis(~ . * max(datos$RMSE) / max(datos$R2), name = "RMSE")
  ) +
  scale_color_manual(
    values = c("R2" = "#1f77b4", "RMSE" = "#ff7f0e"), 
    labels = c(expression(R^2), "RMSE")
  ) +
  # Etiquetas
  labs(
    title = "Comparación de R² y RMSE entre Modelos",
    subtitle = "Con validación cruzada \nMetodología: selección de componentes Would",
    x = "Modelo",
    y = expression(R^2),
    color = "Métrica"
  ) +
  # Tema estilizado
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, margin = margin(b = 10)),
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 10),
    axis.title.y = element_text(face = "bold", size = 12),
    axis.title.y.right = element_text(face = "bold", size = 12),
    legend.position = "top",
    legend.title = element_text(face = "bold"),
    legend.text = element_text(size = 10),
    panel.grid.major = element_line(size = 0.5, color = "gray80"),
    panel.grid.minor = element_blank(),
    panel.border = element_blank()
  )

Modelo sin pre-procesamiento para datos de validación

model = pls(Y, x,x.test = X, y.test = y, ncomp.selcrit = "min") 
summary(model)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 4
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       99.805      61.539 0.615 14.210 0.615  0.0000 1.61
## Test      99.554      47.027 0.215 26.822 0.316 -7.5124 1.18
rcm = model$res$cal$r2[,4]
rtm = model$res$test$r2[,4]
ecm = model$res$cal$rmse[,4]
etm = model$res$test$rmse[,4]
model1 = pls(Y, x,x.test = X, y.test = y, ncomp.selcrit = "wold") 
summary(model1)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 3
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       99.756      56.173 0.562 15.169 0.562  0.0000 1.51
## Test      99.536      44.926 0.184 27.349 0.285 -6.2842 1.14
rcw = model1$res$cal$r2[,3]
rtw = model1$res$test$r2[,3]
ecw = model1$res$cal$rmse[,3]
etw = model1$res$test$rmse[,3]

Pre-procesado: Corrección de línea de base espectral para datos de validación

pspectra = prep.snv(Y)
pv1 = pcvpls(pspectra, x, 7, cv = list("ven", 4))
Ypvm1 = pls(pspectra, x, ncomp.selcrit = "min", x.test = pv1, y.test = x)
summary(Ypvm1)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2  RMSE Slope    Bias  RPD
## Cal       97.089      84.388 0.844 9.053 0.844  0.0000 2.53
## Test      97.033      88.506 0.815 9.848 0.830 -0.0198 2.33
rcm1 = Ypvm1$res$cal$r2[,7]
rtm1 = Ypvm1$res$test$r2[,7]
ecm1 = Ypvm1$res$cal$rmse[,7]
etm1 = Ypvm1$res$test$rmse[,7]
Ypvw1 = pls(pspectra, x, ncomp.selcrit = "wold", x.test = pv1, y.test = x)
summary(Ypvw1)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2  RMSE Slope    Bias  RPD
## Cal       97.089      84.388 0.844 9.053 0.844  0.0000 2.53
## Test      97.033      88.506 0.815 9.848 0.830 -0.0198 2.33
rcw1 = Ypvw1$res$cal$r2[,7]
rtw1 = Ypvw1$res$test$r2[,7]
ecw1 = Ypvw1$res$cal$rmse[,7]
etw1 = Ypvw1$res$test$rmse[,7]

Pre-procesamiento: corrección de dispersión multiplicativa para datos de validación

pspectra = prep.msc(Y)
pv2 = pcvpls(pspectra, x, 7, cv = list("ven", 4))
Ypvm2 = pls(pspectra, x, ncomp.selcrit = "min", x.test = pv2, y.test = x)
summary(Ypvm2)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2  RMSE Slope   Bias  RPD
## Cal       97.108      84.317 0.843 9.074 0.843  0.000 2.53
## Test      97.054      86.705 0.814 9.871 0.830 -0.029 2.32
rcm2 = Ypvm2$res$cal$r2[,7]
rtm2 = Ypvm2$res$test$r2[,7]
ecm2 = Ypvm2$res$cal$rmse[,7]
etm2 = Ypvm2$res$test$rmse[,7]
Ypvw2 = pls(pspectra, x, ncomp.selcrit = "wold", x.test = pv2, y.test = x)
summary(Ypvw2)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2  RMSE Slope   Bias  RPD
## Cal       97.108      84.317 0.843 9.074 0.843  0.000 2.53
## Test      97.054      86.705 0.814 9.871 0.830 -0.029 2.32
rcw2 = Ypvw2$res$cal$r2[,7]
rtw2 = Ypvw2$res$test$r2[,7]
ecw2 = Ypvw2$res$cal$rmse[,7]
etw2 = Ypvw2$res$test$rmse[,7]

Pre-procesamiento: Corrección de línea de base con mínimos cuadrados asimétricos para datos de validación

perturbations1 = rbind(dnorm(1:ncol(Y), 750, 200) * 10000,
                       dnorm(1:ncol(Y), 750, 100) * 10000,
                       dnorm(1:ncol(Y), 500, 100) * 10000)
perturb1 = matrix(rep(perturbations1, each = nrow(Y)/nrow(perturbations1)), 
                  nrow = nrow(Y), byrow = TRUE)

Y.y1 = Y + perturb1

y.new1 = prep.alsbasecorr(Y.y1, plambda = 5, p = 0.05)

pv3 = pcvpls(y.new1, x, 7, cv = list("ven", 4))
Ypvm3 = pls(y.new1, x, x.test = pv3, y.test = x, ncomp.selcrit = "min")
summary(Ypvm3)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       98.374      61.720 0.617 14.176 0.617  0.0000 1.62
## Test      98.217      83.342 0.579 14.862 0.629 -0.2257 1.54
rcm3 = Ypvm3$res$cal$r2[,7]
rtm3 = Ypvm3$res$test$r2[,7]
ecm3 = Ypvm3$res$cal$rmse[,7]
etm3 = Ypvm3$res$test$rmse[,7]
Ypvw3 = pls(y.new1, x, x.test = pv3, y.test = x, ncomp.selcrit = "wold")
summary(Ypvw3)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 4
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       87.970      56.797 0.568 15.060 0.568 0.0000 1.52
## Test      77.393      81.867 0.542 15.506 0.558 0.1276 1.48
rcw3 = Ypvw3$res$cal$r2[,4]
rtw3 = Ypvw3$res$test$r2[,4]
ecw3 = Ypvw3$res$cal$rmse[,4]
etw3 = Ypvw3$res$test$rmse[,4]

Pre procesamiento: Alisados y derivados para datos de validación

# Alisados
nspectra = Y + 0.025 * matrix(rnorm(length(Y)), dim(Y))
pv4 = pcvpls(nspectra, x, 7, cv = list("ven", 4))
Ypvm4 = pls(nspectra, x, x.test = pv4, y.test = x, ncomp.selcrit = "min")
summary(Ypvm4)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 3
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       82.211      61.266 0.613 14.260 0.613  0.0000 1.61
## Test      81.217      65.108 0.499 16.222 0.531 -0.0661 1.41
rcm4 = Ypvm4$res$cal$r2[,3]
rtm4 = Ypvm4$res$test$r2[,3]
ecm4 = Ypvm4$res$cal$rmse[,3]
etm4 = Ypvm4$res$test$rmse[,3]
Ypvw4 = pls(nspectra, x, x.test = pv4, y.test = x, ncomp.selcrit = "wold")
summary(Ypvw4)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 3
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       82.211      61.266 0.613 14.260 0.613  0.0000 1.61
## Test      81.217      65.108 0.499 16.222 0.531 -0.0661 1.41
rcw4 = Ypvw4$res$cal$r2[,2]
rtw4 = Ypvw4$res$test$r2[,2]
ecw4 = Ypvw4$res$cal$rmse[,2]
etw4 = Ypvw4$res$test$rmse[,2]

# Suavizado
pspectra = prep.savgol(nspectra, width = 15, porder = 1)
pv5 = pcvpls(pspectra, x,7,cv = list("ven", 4))
Ypvm5 = pls(pspectra, x, x.test = pv5, y.test = x, ncomp.selcrit = "min")
summary(Ypvm5)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 5
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       99.138      53.286 0.533 15.660 0.533  0.000 1.47
## Test      98.933      65.761 0.508 16.071 0.524 -0.082 1.43
rcm5 = Ypvm5$res$cal$r2[,3]
rtm5 = Ypvm5$res$test$r2[,3]
ecm5 = Ypvm5$res$cal$rmse[,3]
etm5 = Ypvm5$res$test$rmse[,3]
Ypvw5 = pls(pspectra, x, x.test = pv5, y.test = x, ncomp.selcrit = "wold")
summary(Ypvw5)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 3
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       98.810      51.152 0.512 16.014 0.512  0.0000 1.43
## Test      98.496      65.265 0.501 16.187 0.510 -0.0603 1.42
rcw5 = Ypvw5$res$cal$r2[,3]
rtw5 = Ypvw5$res$test$r2[,3]
ecw5 = Ypvw5$res$cal$rmse[,3]
etw5 = Ypvw5$res$test$rmse[,3]

# Derivada
dpspectra = prep.savgol(nspectra, width = 15, porder = 1, dorder = 1)
pv6 = pcvpls(dpspectra, x, 7,cv = list("ven", 4))
Ypvm6 = pls(dpspectra, x, x.test = pv6, y.test = x, ncomp.selcrit = "min")
summary(Ypvm6)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 5
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       54.542      47.936 0.479 16.533 0.479 0.0000 1.39
## Test      53.235      49.375 0.303 19.128 0.393 0.0249 1.20
rcm6 = Ypvm6$res$cal$r2[,6]
rtm6 = Ypvm6$res$test$r2[,6]
ecm6 = Ypvm6$res$cal$rmse[,6]
etm6 = Ypvm6$res$test$rmse[,6]
Ypvw6 = pls(dpspectra, x, x.test = pv6, y.test = x, ncomp.selcrit = "wold")
summary(Ypvw6)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 2
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       43.770      30.644 0.306 19.082 0.306 0.0000 1.20
## Test      42.849      45.753 0.253 19.800 0.276 0.1213 1.16
rcw6 = Ypvw6$res$cal$r2[,2]
rtw6 = Ypvw6$res$test$r2[,2]
ecw6 = Ypvw6$res$cal$rmse[,2]
etw6 = Ypvw6$res$test$rmse[,2]

Pre-procesado: Transformaciones por elemento para datos de validación

# logarítmo
Y1 <- prep.transform(Y, log)
pv7 = pcvpls(Y1, x, 7,cv = list("ven", 4))
Ypvm7 = pls(Y1, x, x.test = pv7, y.test = x, ncomp.selcrit = "min")
summary(Ypvm7)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       99.966      83.226 0.832  9.384 0.832 0.0000 2.45
## Test      99.965      82.142 0.792 10.451 0.807 0.0108 2.20
rcm7 = Ypvm7$res$cal$r2[,7]
rtm7 = Ypvm7$res$test$r2[,7]
ecm7 = Ypvm7$res$cal$rmse[,7]
etm7 = Ypvm7$res$test$rmse[,7]
Ypvw7 = pls(Y1, x, x.test = pv7, y.test = x, ncomp.selcrit = "wold")
summary(Ypvw7)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       99.966      83.226 0.832  9.384 0.832 0.0000 2.45
## Test      99.965      82.142 0.792 10.451 0.807 0.0108 2.20
rcw7 = Ypvw7$res$cal$r2[,7]
rtw7 = Ypvw7$res$test$r2[,7]
ecw7 = Ypvw7$res$cal$rmse[,7]
etw7 = Ypvw7$res$test$rmse[,7]


# Potencia
Y2 <- prep.transform(Y, function(x, p) x^p, p = 0.2)
pv8 = pcvpls(Y2, x, 7,cv = list("ven", 4))
Ypvm8 = pls(Y2, x, x.test = pv8, y.test = x, ncomp.selcrit = "min")
summary(Ypvm8)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope  Bias  RPD
## Cal       99.964      82.817 0.828  9.498 0.828 0.000 2.42
## Test      99.963      82.360 0.788 10.539 0.805 0.035 2.18
rcm8 = Ypvm8$res$cal$r2[,7]
rtm8 = Ypvm8$res$test$r2[,7]
ecm8 = Ypvm8$res$cal$rmse[,7]
etm8 = Ypvm8$res$test$rmse[,7]
Ypvw8 = pls(Y2, x, x.test = pv8, y.test = x, ncomp.selcrit = "wold")
summary(Ypvw8)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope  Bias  RPD
## Cal       99.964      82.817 0.828  9.498 0.828 0.000 2.42
## Test      99.963      82.360 0.788 10.539 0.805 0.035 2.18
rcw8 = Ypvw8$res$cal$r2[,7]
rtw8 = Ypvw8$res$test$r2[,7]
ecw8 = Ypvw8$res$cal$rmse[,7]
etw8 = Ypvw8$res$test$rmse[,7]

# Cuadrado
M <- prep.transform(Y, function(x) x^2)
pv9 = pcvpls(M, x, 7,cv = list("ven", 4))
Ypvm9 = pls(M, x, x.test = pv9, y.test = x, ncomp.selcrit = "min")
summary(Ypvm9)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       99.952      76.348 0.763 11.143 0.763 0.0000 2.06
## Test      99.949      82.166 0.723 12.064 0.748 0.0604 1.90
rcm9 = Ypvm9$res$cal$r2[,7]
rtm9 = Ypvm9$res$test$r2[,7]
ecm9 = Ypvm9$res$cal$rmse[,7]
etm9 = Ypvm9$res$test$rmse[,7]
Ypvw9 = pls(M, x, x.test = pv9, y.test = x, ncomp.selcrit = "wold")
summary(Ypvw9)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 3
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope    Bias  RPD
## Cal       99.682      56.832 0.568 15.054 0.568  0.0000 1.52
## Test      99.533      71.884 0.563 15.148 0.570 -0.0388 1.51
rcw9 = Ypvw9$res$cal$r2[,3]
rtw9 = Ypvw9$res$test$r2[,3]
ecw9 = Ypvw9$res$cal$rmse[,3]
etw9 = Ypvw9$res$test$rmse[,3]

Pre-procesamiento: Selección de variables para datos de validación

S <- prep.varsel(Y, seq(2, ncol(Y), by = 2))

pv10 = pcvpls(S, x, 7,cv = list("ven", 4))
Ypvm10 = pls(S, x, x.test = pv10, y.test = x, ncomp.selcrit = "min")
summary(Ypvm10)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 7
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       99.960      75.069 0.751 11.441 0.751 0.0000 2.01
## Test      99.958      79.830 0.729 11.920 0.739 0.0143 1.92
rcm10 = Ypvm10$res$cal$r2[,7]
rtm10 = Ypvm10$res$test$r2[,7]
ecm10 = Ypvm10$res$cal$rmse[,7]
etm10 = Ypvm10$res$test$rmse[,7]
Ypvw10 = pls(S, x, x.test = pv10, y.test = x, ncomp.selcrit = "wold")
summary(Ypvw10)
## 
## PLS model (class pls) summary
## -------------------------------
## Info: 
## Number of selected components: 5
## Cross-validation: none
## 
## Response variable: Y-Values
##      X cumexpvar Y cumexpvar    R2   RMSE Slope   Bias  RPD
## Cal       99.850      66.098 0.661 13.341 0.661 0.0000 1.72
## Test      99.791      73.999 0.651 13.534 0.656 0.0047 1.70
rcw10 = Ypvw10$res$cal$r2[,5]
rtw10 = Ypvw10$res$test$r2[,5]
ecw10 = Ypvw10$res$cal$rmse[,5]
etw10 = Ypvw10$res$test$rmse[,5]

Gráficos de R2 y RMSE para datos de validación

tabla1 = data.frame(Modelos = paste("Modelo", 1:11),
                    R2 = c(rcm, rcm1, rcm2, rcm3, rcm4, rcm5, rcm6, rcm7, rcm8,
                           rcm9, rcm10),
                    RMSE = c(ecm, ecm1, ecm2, ecm3, ecm4, ecm5, ecm6, ecm7, ecm8, 
                             ecm9, ecm10))


datos_largos1 <- reshape2::melt(tabla1, id.vars = "Modelos", variable.name = "Tipo", value.name = "R2")

datos <- tabla1[order(-tabla1$R2), ]

ggplot(datos, aes(x = reorder(Modelos, -R2))) + 
  # Línea y puntos para R2
  geom_line(aes(y = R2, group = 1, color = "R2"), linewidth = 1.2) +
  geom_point(aes(y = R2, color = "R2"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Línea y puntos para RMSE (escalado a R2)
  geom_line(aes(y = RMSE / max(RMSE) * max(R2), group = 1, color = "RMSE"), linewidth = 1.2, linetype = "dashed") +
  geom_point(aes(y = RMSE / max(RMSE) * max(R2), color = "RMSE"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Escalas y ejes
  scale_y_continuous(
    name = expression(R^2), 
    sec.axis = sec_axis(~ . * max(datos$RMSE) / max(datos$R2), name = "RMSE")
  ) +
  scale_color_manual(
    values = c("R2" = "#1f77b4", "RMSE" = "#ff7f0e"), 
    labels = c(expression(R^2), "RMSE")
  ) +
  # Etiquetas
  labs(
    title = "Comparación de R² y RMSE entre Modelos",
    subtitle = "Con validación cruzada \nMetodología: selección de componentes minimizando el RMSE",
    x = "Modelo",
    y = expression(R^2),
    color = "Métrica"
  ) +
  # Tema estilizado
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, margin = margin(b = 10)),
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 10),
    axis.title.y = element_text(face = "bold", size = 12),
    axis.title.y.right = element_text(face = "bold", size = 12),
    legend.position = "top",
    legend.title = element_text(face = "bold"),
    legend.text = element_text(size = 10),
    panel.grid.major = element_line(size = 0.5, color = "gray80"),
    panel.grid.minor = element_blank(),
    panel.border = element_blank()
  )

tabla2 = data.frame(Modelos = paste("Modelo", 1:11), 
                    R2 = c(rtm, rtm1, rtm2, rtm3, rtm4, rtm5, rtm6, rtm7, rtm8,
                           rtm9, rtm10), 
                    RMSE = c(etm, etm1, etm2, etm3, etm4, etm5, etm6, etm7, etm8, 
                             etm9, etm10))


datos_largos2 <- reshape2::melt(tabla2, id.vars = "Modelos", variable.name = "Tipo", value.name = "R2")

datos2 <- tabla2[order(-tabla2$R2), ]

ggplot(datos2, aes(x = reorder(Modelos, -R2))) + 
  # Línea y puntos para R2
  geom_line(aes(y = R2, group = 1, color = "R2"), linewidth = 1.2) +
  geom_point(aes(y = R2, color = "R2"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Línea y puntos para RMSE (escalado a R2)
  geom_line(aes(y = RMSE / max(RMSE) * max(R2), group = 1, color = "RMSE"), linewidth = 1.2, linetype = "dashed") +
  geom_point(aes(y = RMSE / max(RMSE) * max(R2), color = "RMSE"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Escalas y ejes
  scale_y_continuous(
    name = expression(R^2), 
    sec.axis = sec_axis(~ . * max(datos$RMSE) / max(datos$R2), name = "RMSE")
  ) +
  scale_color_manual(
    values = c("R2" = "#1f77b4", "RMSE" = "#ff7f0e"), 
    labels = c(expression(R^2), "RMSE")
  ) +
  # Etiquetas
  labs(
    title = "Comparación de R² y RMSE entre Modelos",
    subtitle = "Con validación cruzada \nMetodología: selección de componentes minimizando el RMSE",
    x = "Modelo",
    y = expression(R^2),
    color = "Métrica"
  ) +
  # Tema estilizado
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, margin = margin(b = 10)),
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 10),
    axis.title.y = element_text(face = "bold", size = 12),
    axis.title.y.right = element_text(face = "bold", size = 12),
    legend.position = "top",
    legend.title = element_text(face = "bold"),
    legend.text = element_text(size = 10),
    panel.grid.major = element_line(size = 0.5, color = "gray80"),
    panel.grid.minor = element_blank(),
    panel.border = element_blank()
  )

tabla3 = data.frame(Modelos = paste("Modelo", 1:11), 
                    R2 = c(rcw, rcw1, rcw2, rcw3, rcw4, rcw5, rcw6, rcw7, rcw8,
                           rcw9, rcw10), 
                    RMSE = c(ecw, ecw1, ecw2, ecw3, ecw4, ecw5, ecw6, ecw7, ecw8, 
                             ecw9, ecw10))


datos_largos3 <- reshape2::melt(tabla3, id.vars = "Modelos", variable.name = "Tipo", value.name = "R2")

datos3 <- tabla3[order(-tabla3$R2), ]

ggplot(datos3, aes(x = reorder(Modelos, -R2))) + 
  # Línea y puntos para R2
  geom_line(aes(y = R2, group = 1, color = "R2"), linewidth = 1.2) +
  geom_point(aes(y = R2, color = "R2"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Línea y puntos para RMSE (escalado a R2)
  geom_line(aes(y = RMSE / max(RMSE) * max(R2), group = 1, color = "RMSE"), linewidth = 1.2, linetype = "dashed") +
  geom_point(aes(y = RMSE / max(RMSE) * max(R2), color = "RMSE"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Escalas y ejes
  scale_y_continuous(
    name = expression(R^2), 
    sec.axis = sec_axis(~ . * max(datos$RMSE) / max(datos$R2), name = "RMSE")
  ) +
  scale_color_manual(
    values = c("R2" = "#1f77b4", "RMSE" = "#ff7f0e"), 
    labels = c(expression(R^2), "RMSE")
  ) +
  # Etiquetas
  labs(
    title = "Comparación de R² y RMSE entre Modelos",
    subtitle = "Con validación cruzada \nMetodología: selección de componentes Would",
    x = "Modelo",
    y = expression(R^2),
    color = "Métrica"
  ) +
  # Tema estilizado
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, margin = margin(b = 10)),
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 10),
    axis.title.y = element_text(face = "bold", size = 12),
    axis.title.y.right = element_text(face = "bold", size = 12),
    legend.position = "top",
    legend.title = element_text(face = "bold"),
    legend.text = element_text(size = 10),
    panel.grid.major = element_line(size = 0.5, color = "gray80"),
    panel.grid.minor = element_blank(),
    panel.border = element_blank()
  )

tabla4 = data.frame(Modelos = paste("Modelo", 1:11), 
                    R2 = c(rtw, rtw1, rtw2, rtw3, rtw4, rtw5, rtw6, rtw7, rtw8,
                           rtw9, rtw10), 
                    RMSE = c(etw, etw1, etw2, etw3, etw4, etw5, etw6, etw7, etw8, 
                             etw9, etw10))


datos_largos4 <- reshape2::melt(tabla4, id.vars = "Modelos", variable.name = "Tipo", value.name = "R2")

datos4 <- tabla4[order(-tabla4$R2), ]

ggplot(datos4, aes(x = reorder(Modelos, -R2))) + 
  # Línea y puntos para R2
  geom_line(aes(y = R2, group = 1, color = "R2"), linewidth = 1.2) +
  geom_point(aes(y = R2, color = "R2"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Línea y puntos para RMSE (escalado a R2)
  geom_line(aes(y = RMSE / max(RMSE) * max(R2), group = 1, color = "RMSE"), linewidth = 1.2, linetype = "dashed") +
  geom_point(aes(y = RMSE / max(RMSE) * max(R2), color = "RMSE"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Escalas y ejes
  scale_y_continuous(
    name = expression(R^2), 
    sec.axis = sec_axis(~ . * max(datos$RMSE) / max(datos$R2), name = "RMSE")
  ) +
  scale_color_manual(
    values = c("R2" = "#1f77b4", "RMSE" = "#ff7f0e"), 
    labels = c(expression(R^2), "RMSE")
  ) +
  # Etiquetas
  labs(
    title = "Comparación de R² y RMSE entre Modelos",
    subtitle = "Con validación cruzada \nMetodología: selección de componentes Would",
    x = "Modelo",
    y = expression(R^2),
    color = "Métrica"
  ) +
  # Tema estilizado
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, margin = margin(b = 10)),
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 10),
    axis.title.y = element_text(face = "bold", size = 12),
    axis.title.y.right = element_text(face = "bold", size = 12),
    legend.position = "top",
    legend.title = element_text(face = "bold"),
    legend.text = element_text(size = 10),
    panel.grid.major = element_line(size = 0.5, color = "gray80"),
    panel.grid.minor = element_blank(),
    panel.border = element_blank()
  )

Anéxos

tabla5 = data.frame(Modelos = paste("Modelo", 1:11), 
                     Preprocesamiento = c("Sin pre-procesamiento", "Corrección de linea base expectral (SNV)", 
                                          "Correción de dispersión multiplicativa (MSC)", 
                                          "Correción de linea de base con mínimos cuadrados asiméticos (ALS)",
                                          "Alisado", "Suavizado", "Derivado", "Tranformación logaritmica", "Transformación pontencia", 
                                          "Tranformación cuadrado", "Selección de variables"))
tabla5 |> 
  kable(
    format = "html",  # Cambia a "latex" si exportas a PDF
    caption = "Tabla 1. Resumen de Modelos y Preprocesamientos",
    col.names = c("Modelos", "Preprocesamiento"),  # Títulos de las columnas
    align = "c",  # Centrar las columnas
    booktabs = TRUE
  ) |> 
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE,
    position = "center",
    font_size = 12
  ) |> 
  row_spec(0, bold = TRUE, font_size = 14, background = "#D3D3D3") |>   # Encabezado destacado
  column_spec(1, bold = TRUE, color = "black", width = "8em") |>   # Primera columna con ancho fijo
  column_spec(2, background = "#F7F7F7", width = "30em") |>   # Fondo gris para la descripción
  footnote(
    general = "Los preprocesamientos listados son opciones comunes para análisis estadísticos y machine learning.",
    general_title = "Nota:"
  )
Tabla 1. Resumen de Modelos y Preprocesamientos
Modelos Preprocesamiento
Modelo 1 Sin pre-procesamiento
Modelo 2 Corrección de linea base expectral (SNV)
Modelo 3 Correción de dispersión multiplicativa (MSC)
Modelo 4 Correción de linea de base con mínimos cuadrados asiméticos (ALS)
Modelo 5 Alisado
Modelo 6 Suavizado
Modelo 7 Derivado
Modelo 8 Tranformación logaritmica
Modelo 9 Transformación pontencia
Modelo 10 Tranformación cuadrado
Modelo 11 Selección de variables
Nota:
Los preprocesamientos listados son opciones comunes para análisis estadísticos y machine learning.