library(readxl)
library(pcv)
## Warning: package 'pcv' was built under R version 4.4.2
library(mdatools)
## Warning: package 'mdatools' was built under R version 4.4.2
library(knitr)
## Warning: package 'knitr' was built under R version 4.4.2
library(kableExtra)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
data_calib = read_excel("PROMEDIOS_C_SAC_NIR.xlsx")
data_valid = read_excel("PROMEDIOS_V_SAC_NIR.xlsx")

X = as.matrix(data_calib[, 2:101])
y = as.matrix(data_calib[,102])
names = as.factor(as.matrix(data_calib[,1]))
colhead = as.matrix(data.frame(colnames(data_calib[,2:101])))

Y = as.matrix(data_valid[, 2:101])
x = as.matrix(data_valid[,102])
nam = as.factor(as.matrix(data_valid[,1]))
colc = as.matrix(data.frame(colnames(data_valid[,2:101])))

Se llevó a cabo un análisis de NIR [explicación cientifica de plantitas y nitrógeno]. Por lo que, se obtuvo una base de matriz de datos de 189x100, por esto se realizó un análisis multivariante, llevado a cabo con modelamiento lineal parcial. Esto es llevado a cabo con una base de datos de calibración (189x100) y otra de validación (72x100).

Se realizó un pre-procesado de los datos, esto consiste en realizar ciertas transformaciones matemáticas para así sobrellevar mejor el análisis.

En primer lugar, se analizaron las componentes principales asociadas a la base original, sin un pre-procesamiento.

Esto nor otorgó visualizar el comportamiento de dichas componentes.

m = mdatools::pca(X, 7, scale = TRUE, info = "People PCA model")
m = selectCompNum(m, 5)
res = predict(m, X)

1. Modelo sin pre-procesamiento

1.1 Modelo solo de validación cruzada

Modelo para datos de calibración

model0.1 = pls(X, y, cv = list("ven", 4), ncomp.selcrit = "min", cv.scope = "global") 

model0.2 = pls(X, y, cv = list("ven", 4), ncomp.selcrit = "wold", cv.scope = "global") 

Modelo para datos de validación

model0.3 = pls(Y, x, cv = list("ven", 4), ncomp.selcrit = "min", cv.scope = "global") 
show(model0.3$ncomp.selected)
## [1] 4
model0.4 = pls(Y, x, cv = list("ven", 4), ncomp.selcrit = "wold", cv.scope = "global") 
show(model0.4$ncomp.selected)
## [1] 4
tabla1 = data.frame(Modelos = c("Modelo 1","Modelo 2", "Modelo 3","Modelo 4"), 
                    R2 = c(model0.1$res$cal$r2[, 7], model0.2$res$cal$r2[, 3], model0.3$res$cal$r2[, 7], model0.4$res$cal$r2[, 3] )  |> round(3), 
                    RMSE = c(model0.1$res$cal$rmse[,7], model0.2$res$cal$rmse[,3], model0.3$res$cal$rmse[, 7], model0.4$res$cal$rmse[, 3]) |> round(3))

datos_largos <- reshape2::melt(tabla1, id.vars = "Modelos", variable.name = "Tipo", value.name = "R2")

datos <- tabla1[order(-tabla1$R2), ]

ggplot(datos, aes(x = reorder(Modelos, -R2))) + 
  # Línea y puntos para R2
  geom_line(aes(y = R2, group = 1, color = "R2"), size = 1.2) +
  geom_point(aes(y = R2, color = "R2"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Línea y puntos para RMSE (normalizado a escala de R2)
  geom_line(aes(y = RMSE / max(RMSE) * max(R2), group = 1, color = "RMSE"), 
            size = 1.2, linetype = "dashed") +
  geom_point(aes(y = RMSE / max(RMSE) * max(R2), color = "RMSE"), 
             size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Escalas y ejes
  scale_y_continuous(
    name = expression(R^2), 
    sec.axis = sec_axis(~ . * max(datos$RMSE) / max(datos$R2), name = "RMSE"),
    limits = c(0, max(datos$R2) * 1.1)  # Espacio extra arriba
  ) +
  scale_color_manual(
    values = c("R2" = "#377eb8", "RMSE" = "#e41a1c"),  # Colores distintivos
    labels = c(expression(R^2), "RMSE")  # Etiquetas en la leyenda
  ) +
  # Etiquetas
  labs(
    title = "Comparación de R² y RMSE entre Modelos de Datos de Calibración",
    subtitle = "Evaluación mediante validación cruzada y minimización del RMSE",
    x = "Modelos",
    color = "Métrica"
  ) +
  # Tema estilizado
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, margin = margin(b = 10)),
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 10),
    axis.text.y = element_text(size = 10),
    axis.title.y = element_text(face = "bold", size = 12),
    axis.title.y.right = element_text(face = "bold", size = 12),
    legend.position = "top",
    legend.title = element_text(face = "bold"),
    legend.text = element_text(size = 10),
    panel.grid.major = element_line(size = 0.5, color = "gray85"),
    panel.grid.minor = element_blank()
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

tabla2 = data.frame(Modelos = c("Modelo 1","Modelo 2", "Modelo 3","Modelo 4"), 
                    R2 = c(model0.1$res$cv$r2[, 7], model0.2$res$cv$r2[, 3], model0.3$res$cv$r2[,7], model0.4$res$cv$r2[,3])  |> round(3), 
                    RMSE = c(model0.1$res$cv$r2[,7], model0.2$res$cv$rmse[,3], model0.3$res$cv$rmse[, 7], model0.4$res$cv$rmse[, 3]) |> round(3))

datos_largos <- reshape2::melt(tabla1, id.vars = "Modelos", variable.name = "Tipo", value.name = "R2")

datos <- tabla2[order(-tabla2$R2), ]

ggplot(datos, aes(x = reorder(Modelos, -R2))) + 
  # Línea y puntos para R2
  geom_line(aes(y = R2, group = 1, color = "R2"), size = 1.2) +
  geom_point(aes(y = R2, color = "R2"), size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Línea y puntos para RMSE (escalado a R2)
  geom_line(aes(y = RMSE / max(RMSE) * max(R2), group = 1, color = "RMSE"), 
            size = 1.2, linetype = "dashed") +
  geom_point(aes(y = RMSE / max(RMSE) * max(R2), color = "RMSE"), 
             size = 3, shape = 21, fill = "white", stroke = 1.2) +
  # Escalas y ejes
  scale_y_continuous(
    name = expression(R^2), 
    sec.axis = sec_axis(~ . * max(datos$RMSE) / max(datos$R2), name = "RMSE"),
    limits = c(0, max(datos$R2) * 1.1)  # Espacio extra para que los puntos no queden al borde
  ) +
  scale_color_manual(
    values = c("R2" = "#4daf4a", "RMSE" = "#984ea3"),  # Colores personalizados
    labels = c(expression(R^2), "RMSE")  # Etiquetas claras
  ) +
  # Etiquetas
  labs(
    title = "Comparación de R² y RMSE entre Modelos de Validación",
    subtitle = "Evaluación de métricas en datos de validación",
    x = "Modelos",
    color = "Métrica"
  ) +
  # Tema estilizado
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, margin = margin(b = 10)),
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 10),
    axis.text.y = element_text(size = 10),
    axis.title.y = element_text(face = "bold", size = 12),
    axis.title.y.right = element_text(face = "bold", size = 12),
    legend.position = "top",
    legend.title = element_text(face = "bold"),
    legend.text = element_text(size = 10),
    panel.grid.major = element_line(size = 0.5, color = "gray85"),
    panel.grid.minor = element_blank()
  )

2. Pre-procesado con escalado automático

2.1 Centralización con la media

2.1.1 Datos de calibración
data1.1 = as.matrix(prep.autoscale(X, center = TRUE, scale = FALSE))

pv.1 = pcvpls(data1.1, y, 7, cv = list("ven", 4))
Xpvm.1 = pls(data1.1, y, ncomp.selcrit = "min", x.test = pv.1, y.test = y)
Xpvw.1 = pls(data1.1, y, ncomp.selcrit = "wold", x.test = pv.1, y.test = y)
Xpvm.1$res$cal$r2[,7]
## [1] 0.8785249
Xpvm.1$res$cal$rmse[,7] 
## [1] 10.55235
Xpvm.1$res$test$r2[,7]
## [1] 0.8442886
Xpvm.1$res$test$rmse[,7] 
## [1] 11.9472
Xpvw.1$res$cal$rmse[,3]
## [1] 21.39817
Xpvw.1$res$cal$r2[,3]
## [1] 0.5004929
Xpvw.1$res$test$rmse[,3]
## [1] 21.64469
Xpvw.1$res$test$r2[,3]
## [1] 0.4889171
## 7 3 
2.1.2 Datos de Validación
data1.2 = as.matrix(prep.autoscale(Y, center = TRUE, scale = FALSE))

pv.2 = pcvpls(data1.2, x, 7, cv = list("ven", 4))
Xpvm.2 = pls(data1.2, x, ncomp.selcrit = "min", x.test = pv.2, y.test = x)
Xpvw.2 = pls(data1.2, x, ncomp.selcrit = "wold", x.test = pv.2, y.test = x)
show(Xpvm.2$ncomp.selected) ; show(Xpvw.2$ncomp.selected) ##4 
## [1] 4
## [1] 4
Xpvm.2$res$cal$r2[,4]
## [1] 0.8349013
Xpvm.2$res$cal$rmse[,4]
## [1] 9.310039
Xpvm.2$res$test$rmse[,4]
## [1] 9.612195
Xpvm.2$res$test$r2[,4]
## [1] 0.8240109
Xpvw.2$res$cal$r2[,4]
## [1] 0.8349013
Xpvw.2$res$cal$rmse[,4]
## [1] 9.310039
Xpvw.2$res$test$rmse[,4]
## [1] 9.612195
Xpvw.2$res$test$r2[,4]
## [1] 0.8240109

2.2 Estandarización sin centralizado

2.2.1 Datos de calibración
data2.1 = prep.autoscale(X, center = FALSE, scale = TRUE)

pv1.1 = pcvpls(data2.1, y, 7, cv = list("ven", 4))
Xpvm1.1 = pls(data2.1, y , ncomp.selcrit = "min", x.test = pv1.1, y.test = y)
Xpvw1.1 = pls(data2.1, y, ncomp.selcrit = "wold", x.test = pv1.1, y.test = y)
show(Xpvm1.1$ncomp.selected) ; show(Xpvw1.1$ncomp.selected) ## 7
## [1] 7
## [1] 7
Xpvm1.1$res$cal$r2[,7]
## [1] 0.8847073
Xpvm1.1$res$cal$rmse[,7]
## [1] 10.28032
Xpvm1.1$res$test$rmse[,7]
## [1] 11.67689
Xpvm1.1$res$test$r2[,7]
## [1] 0.8512547
Xpvw1.1$res$cal$r2[,7]
## [1] 0.8847073
Xpvw1.1$res$cal$rmse[,7]
## [1] 10.28032
Xpvw1.1$res$test$rmse[,7]
## [1] 11.67689
Xpvw1.1$res$test$r2[,7]
## [1] 0.8512547
2.2.2 Datos de validación
data2.2 = prep.autoscale(X, center = FALSE, scale = TRUE)

pv1.2 = pcvpls(data2.2, y, 7, cv = list("ven", 4))
Xpvm1.2 = pls(data2.2, y, ncomp.selcrit = "min", x.test = pv1.2, y.test = y)
Xpvw1.2 = pls(data2.2, y, ncomp.selcrit = "wold", x.test = pv1.2, y.test = y)
show(Xpvm1.2$ncomp.selected) ; show(Xpvw1.2$ncomp.selected) #7
## [1] 7
## [1] 7
Xpvm1.2$res$cal$r2[,7]
## [1] 0.8847073
Xpvm1.2$res$cal$rmse[,7]
## [1] 10.28032
Xpvm1.2$res$test$rmse[,7]
## [1] 11.67689
Xpvm1.2$res$test$r2[,7]
## [1] 0.8512547
Xpvw1.2$res$cal$r2[,7]
## [1] 0.8847073
Xpvw1.2$res$test$r2[,7]
## [1] 0.8512547
Xpvw1.2$res$test$rmse[,7]
## [1] 11.67689
Xpvw1.2$res$cal$rmse[,7]
## [1] 10.28032

2.3 Centralizado con la media y estandarizado

2.3.1 Datos de calibración
data3.1 = prep.autoscale(X, center = TRUE, scale = TRUE)

pv2.1 = pcvpls(data3.1, y, 7, cv = list("ven", 4))
Xpvm2.1 = pls(data3.1, y, ncomp.selcrit = "min", x.test = pv2.1, y.test = y)
Xpvw2.1 = pls(data3.1, y, ncomp.selcrit = "wold", x.test = pv2.1, y.test = y)
show(Xpvm2.1$ncomp.selected) ; show(Xpvw2.1$ncomp.selected) #7
## [1] 7
## [1] 7
Xpvm2.1$res$cal$r2[,7]
## [1] 0.8847073
Xpvm2.1$res$cal$rmse[,7]
## [1] 10.28032
Xpvm2.1$res$test$rmse[,7]
## [1] 11.67689
Xpvm2.1$res$test$r2[,7]
## [1] 0.8512547
Xpvw2.1$res$cal$r2[,7]
## [1] 0.8847073
Xpvw2.1$res$cal$rmse[,7]
## [1] 10.28032
Xpvw2.1$res$test$rmse[,7]
## [1] 11.67689
Xpvw2.1$res$test$r2[,7]
## [1] 0.8512547
2.3.2 Datos de validación
data3.2 = prep.autoscale(Y, center = TRUE, scale = TRUE)

pv2.2 = pcvpls(data3.2, x, 7, cv = list("ven", 4))
Xpvm2.2 = pls(data3.2, x, ncomp.selcrit = "min", x.test = pv2.2, y.test = x)
Xpvw2.2 = pls(data3.2, x, ncomp.selcrit = "wold", x.test = pv2.2, y.test = x)
show(Xpvm2.2$ncomp.selected) ; show(Xpvw2.2$ncomp.selected) #4
## [1] 4
## [1] 4
Xpvm2.2$res$cal$r2[,4]
## [1] 0.8348049
Xpvm2.2$res$cal$rmse[,4]
## [1] 9.312757
Xpvm2.2$res$test$rmse[,4]
## [1] 9.605713
Xpvm2.2$res$test$r2[,4]
## [1] 0.8242481
Xpvw2.2$res$cal$r2[,4]
## [1] 0.8348049
Xpvw2.2$res$cal$rmse[,4]
## [1] 9.312757
Xpvw2.2$res$test$rmse[,4]
## [1] 9.605713
Xpvw2.2$res$test$r2[,4]
## [1] 0.8242481

2.4 Centralizado en la mediana y estandarizado

2.4.1 Datos de calibración
data4.1 = prep.autoscale(X, center = apply(X, 2, median), scale = TRUE)
pv3.1 = pcvpls(data4.1, y, 7, cv = list("ven", 4))
Xpvm3.1 = pls(data4.1, y, ncomp.selcrit = "min", x.test = pv3.1, y.test = y)
Xpvw3.1= pls(data4.1, y, ncomp.selcrit = "wold", x.test = pv3.1, y.test = y)
show(Xpvm3.1$ncomp.selected) ; show(Xpvw3.1$ncomp.selected) #7
## [1] 7
## [1] 7
Xpvm3.1$res$cal$r2[,7]
## [1] 0.8847073
Xpvm3.1$res$cal$rmse[,7]
## [1] 10.28032
Xpvm3.1$res$test$rmse[,7]
## [1] 11.67689
Xpvm3.1$res$test$r2[,7]
## [1] 0.8512547
Xpvw3.1$res$cal$r2[,7]
## [1] 0.8847073
Xpvw3.1$res$cal$rmse[,7]
## [1] 10.28032
Xpvw3.1$res$test$rmse[,7]
## [1] 11.67689
Xpvw3.1$res$test$r2[,7]
## [1] 0.8512547
2.4.2 Datos de validación
data4.2 = prep.autoscale(Y, center = apply(Y, 2, median), scale = TRUE)
pv3.2 = pcvpls(data4.2, x, 7, cv = list("ven", 4))
Xpvm3.2 = pls(data4.2, x, ncomp.selcrit = "min", x.test = pv3.2, y.test = x)
Xpvw3.2 = pls(data4.2, x, ncomp.selcrit = "wold", x.test = pv3.2, y.test = x)
show(Xpvm3.2$ncomp.selected) ; show(Xpvw3.2$ncomp.selected) #4
## [1] 4
## [1] 4
Xpvm3.2$res$cal$r2[,4]
## [1] 0.8348049
Xpvm3.2$res$cal$rmse[,4]
## [1] 9.312757
Xpvm3.2$res$test$rmse[,4]
## [1] 9.605713
Xpvm3.2$res$test$r2[,4]
## [1] 0.8242481
Xpvw3.2$res$cal$r2[,4]
## [1] 0.8348049
Xpvw3.2$res$cal$rmse[,4]
## [1] 9.312757
Xpvw3.2$res$test$rmse[,4]
## [1] 9.605713
Xpvw3.2$res$test$r2[,4]
## [1] 0.8242481

3. Pre-procesamiento con corrección de linea base espectral

3.1 Variación normal estándar

3.1.1 Datos de calibración
pspectra1 = prep.snv(X)
pv4.1 = pcvpls(pspectra1, y, 7, cv = list("ven", 4))
Xpvm4.1 = pls(pspectra1, y , ncomp.selcrit = "min", x.test = pv4.1, y.test = y)
Xpvw4.1 = pls(pspectra1, y , ncomp.selcrit = "wold", x.test = pv4.1, y.test = y)
Xpv4.1 = pls(pspectra1, y , 3, x.test = pv4.1, y.test = y)
show(Xpvm4.1$ncomp.selected) ; show(Xpvw4.1$ncomp.selected) # 5 1 
## [1] 5
## [1] 1
Xpvm4.1$res$cal$r2[,5]
## [1] 0.8439183
Xpvm4.1$res$cal$rmse[,5]
## [1] 11.96139
Xpvm4.1$res$test$rmse[,5]
## [1] 12.91009
Xpvm4.1$res$test$r2[,5]
## [1] 0.8181778
Xpvw4.1$res$cal$r2[,1]
## [1] 0.3733491
Xpvw4.1$res$cal$rmse[,1]
## [1] 23.96727
Xpvw4.1$res$test$rmse[,1]
## [1] 24.11487
Xpvw4.1$res$test$r2[,1]
## [1] 0.3656072
3.1.2 Datos de validación
pspectra2 = prep.snv(Y)
pv4.2 = pcvpls(pspectra2, x, 7, cv = list("ven", 4))
Xpvm4.2 = pls(pspectra2, x, ncomp.selcrit = "min", x.test = pv4.2, y.test = x)
Xpvw4.2 = pls(pspectra2, x, ncomp.selcrit = "wold", x.test = pv4.2, y.test = x)
Xpv4.2 = pls(pspectra2, x, 3, x.test = pv4.2, y.test = x)
show(Xpvm4.2$ncomp.selected) ; show(Xpvw4.2$ncomp.selected) #4
## [1] 4
## [1] 4
Xpvm4.2$res$cal$r2[,4]
## [1] 0.8451403
Xpvm4.2$res$cal$rmse[,4]
## [1] 9.016727
Xpvm4.2$res$test$rmse[,4]
## [1] 10.11867
Xpvm4.2$res$test$r2[,4]
## [1] 0.8049763
Xpvw4.2$res$cal$r2[,4]
## [1] 0.8451403
Xpvw4.2$res$cal$rmse[,4]
## [1] 9.016727
Xpvw4.2$res$test$rmse[,4]
## [1] 10.11867
Xpvw4.2$res$test$r2[,4]
## [1] 0.8049763

3.2 Corrección de dispersión multiplicativa

3.2.1 Datos de calibración
pspectra2.1 = prep.msc(X)
pv5.1 = pcvpls(pspectra2.1, y, 7, cv = list("ven", 4))
Xpvm5.1 = pls(pspectra2.1, y , ncomp.selcrit = "min", x.test = pv5.1, y.test = y)
Xpvw5.1 = pls(pspectra2.1, y , ncomp.selcrit = "wold", x.test = pv5.1, y.test = y)
Xpv5.1 = pls(pspectra2.1, y , 4, x.test = pv5.1, y.test = y)
show(Xpvm5.1$ncomp.selected) ; show(Xpvw5.1$ncomp.selected) #5 1 
## [1] 5
## [1] 1
Xpvm5.1$res$cal$r2[,5]
## [1] 0.8433928
Xpvm5.1$res$cal$rmse[,5]
## [1] 11.98151
Xpvm5.1$res$test$rmse[,5]
## [1] 12.91808
Xpvm5.1$res$test$r2[,5]
## [1] 0.8179525
Xpvw5.1$res$cal$r2[,5]
## [1] 0.8433928
Xpvw5.1$res$cal$rmse[,5]
## [1] 11.98151
Xpvw5.1$res$test$rmse[,5]
## [1] 12.91808
Xpvw5.1$res$test$r2[,5]
## [1] 0.8179525
3.2.2 Datos de validación
pspectra2.2 = prep.msc(Y)
pv5.2 = pcvpls(pspectra2.2, x, 7, cv = list("ven", 4))
Xpvm5.2 = pls(pspectra2.2, x , ncomp.selcrit = "min", x.test = pv5.2, y.test = x)
Xpvw5.2 = pls(pspectra2.2, x , ncomp.selcrit = "wold", x.test = pv5.2, y.test = x)
Xpv5.2 = pls(pspectra2.2, x , 4, x.test = pv5.2, y.test = x)
show(Xpvm5.2$ncomp.selected) ; show(Xpvw5.2$ncomp.selected) #4
## [1] 4
## [1] 4
Xpvm5.2$res$cal$r2[,4]
## [1] 0.8450368
Xpvm5.2$res$cal$rmse[,4]
## [1] 9.019738
Xpvm5.2$res$test$rmse[,4]
## [1] 10.11989
Xpvm5.2$res$test$r2[,4]
## [1] 0.8049294
Xpvw5.2$res$cal$r2[,4]
## [1] 0.8450368
Xpvw5.2$res$cal$rmse[,4]
## [1] 9.019738
Xpvw5.2$res$test$rmse[,4]
## [1] 10.11989
Xpvw5.2$res$test$r2[,4]
## [1] 0.8049294

3.3 Corrección de linea de base con mínimos cuadrados asimétricos

3.3.1 Datos de calibración
perturbations1 = rbind(dnorm(1:ncol(X), 750, 200) * 10000,
                      dnorm(1:ncol(X), 750, 100) * 10000,
                      dnorm(1:ncol(X), 500, 100) * 10000)
perturb1 = matrix(rep(perturbations1, each = nrow(X)/nrow(perturbations1)), 
                                 nrow = nrow(X), byrow = TRUE)

Y.y1 = X + perturb1

y.new1 = prep.alsbasecorr(Y.y1, plambda = 5, p = 0.05)

pv6.1 = pcvpls(y.new1, y,7,cv = list("ven", 4))
Xpvm6.1 = pls(y.new1, y, x.test = pv6.1, y.test = y, ncomp.selcrit = "min")
Xpvw6.1 = pls(y.new1 ,y, x.test = pv6.1, y.test = y, ncomp.selcrit = "wold")

show(Xpvm6.1$ncomp.selected) ; show(Xpvw6.1$ncomp.selected) #8 6 
## [1] 7
## [1] 6
Xpvm6.1$res$cal$r2[,8]
## [1] 0.6356894
Xpvm6.1$res$cal$rmse[,8]
## [1] 18.27434
Xpvm6.1$res$test$rmse[,8]
## [1] 21.97193
Xpvm6.1$res$test$r2[,8]
## [1] 0.4733464
Xpvw6.1$res$cal$r2[,6]
## [1] 0.5711993
Xpvw6.1$res$cal$rmse[,6]
## [1] 19.82593
Xpvw6.1$res$test$rmse[,6]
## [1] 21.72441
Xpvw6.1$res$test$r2[,6]
## [1] 0.4851453
3.3.2 Datos de validación
perturbations2 = rbind(dnorm(1:ncol(Y), 750, 200) * 10000,
                      dnorm(1:ncol(Y), 750, 100) * 10000,
                      dnorm(1:ncol(Y), 500, 100) * 10000)
perturb2 = matrix(rep(perturbations2, each = nrow(Y)/nrow(perturbations2)), 
                                 nrow = nrow(Y), byrow = TRUE)

Y.y2 = Y + perturb2

y.new2 = prep.alsbasecorr(Y.y2, plambda = 5, p = 0.05)

pv6.2 = pcvpls(y.new2, x,7,cv = list("ven", 4))
Xpvm6.2 = pls(y.new2, x, x.test = pv6.2, y.test = x, ncomp.selcrit = "min")
Xpvw6.2 = pls(y.new2, x, x.test = pv6.2, y.test = x, ncomp.selcrit = "wold")
show(Xpvm6.2$ncomp.selected) ; show(Xpvw6.2$ncomp.selected) #7 6
## [1] 7
## [1] 6
Xpvm6.2$res$cal$r2[,7]
## [1] 0.8446763
Xpvm6.2$res$cal$rmse[,7]
## [1] 9.030223
Xpvm6.2$res$test$rmse[,7]
## [1] 13.13928
Xpvm6.2$res$test$r2[,7]
## [1] 0.6711605
Xpvw6.2$res$cal$r2[,6]
## [1] 0.7792002
Xpvw6.2$res$cal$rmse[,6]
## [1] 10.76661
Xpvw6.2$res$test$rmse[,6]
## [1] 13.36372
Xpvw6.2$res$test$r2[,6]
## [1] 0.6598303

5. Normalización

5.1 Área

5.1.1 Datos de calibración
X1 = prep.norm(X, "area")
pv7.1.1 = pcvpls(X1, y,7,cv = list("ven", 4))
Xm1.1 = pls(X1, y, x.test = pv7.1.1, y.test = y, ncomp.selcrit = "min")
Xw1.1 = pls(X1,y, x.test = pv7.1.1, y.test = y, ncomp.selcrit = "wold")
show(Xm1.1$ncomp.selected) ; show(Xw1.1$ncomp.selected) # 6 2
## [1] 6
## [1] 2
Xm1.1$res$cal$r2[,6]
## [1] 0.8547722
Xm1.1$res$cal$rmse[,6]
## [1] 11.538
Xm1.1$res$test$rmse[,6]
## [1] 12.63595
Xm1.1$res$test$r2[,6]
## [1] 0.8258176
Xw1.1$res$cal$r2[,2]
## [1] 0.4785141
Xw1.1$res$cal$rmse[,2]
## [1] 21.86387
Xw1.1$res$test$rmse[,2]
## [1] 22.05211
Xw1.1$res$test$r2[,2]
## [1] 0.469496
5.1.2 Datos de validación
Y1 = prep.norm(Y, "area")
pv7.1.2 = pcvpls(Y1, x, 7,cv = list("ven", 4))
Xm1.2 = pls(Y1, x, x.test = pv7.1.2, y.test = x, ncomp.selcrit = "min")
Xw1.2 = pls(Y1, x, x.test = pv7.1.2, y.test = x, ncomp.selcrit = "wold")
show(Xm1.2$ncomp.selected) ; show(Xw1.2$ncomp.selected) #5 3
## [1] 5
## [1] 3
Xm1.2$res$cal$r2[,5]
## [1] 0.8660459
Xm1.2$res$cal$rmse[,5]
## [1] 8.386053
Xm1.2$res$test$rmse[,5]
## [1] 10.09148
Xm1.2$res$test$r2[,5]
## [1] 0.8060231
Xw1.2$res$cal$r2[,3]
## [1] 0.8191605
Xw1.2$res$cal$rmse[,3]
## [1] 9.743753
Xw1.2$res$test$rmse[,3]
## [1] 10.30295
Xw1.2$res$test$r2[,3]
## [1] 0.7978082

5.2 Longitud euclediana

5.2.1 Datos de calibración
X2 = prep.norm(X, "length")
pv7.2.1 = pcvpls(X2, y,7,cv = list("ven", 4))
Xm2.1 = pls(X2, y, x.test = pv7.2.1, y.test = y, ncomp.selcrit = "min")
Xw2.1 = pls(X2 ,y, x.test = pv7.2.1, y.test = y, ncomp.selcrit = "wold")
show(Xm2.1$ncomp.selected) ; show(Xw2.1$ncomp.selected) #6 2 
## [1] 6
## [1] 2
Xm2.1$res$cal$r2[,6]
## [1] 0.8564715
Xm2.1$res$cal$rmse[,6]
## [1] 11.4703
Xm2.1$res$test$rmse[,6]
## [1] 12.5687
Xm2.1$res$test$r2[,6]
## [1] 0.8276667
Xw2.1$res$cal$r2[,2]
## [1] 0.4776096
Xw2.1$res$cal$rmse[,2]
## [1] 21.88282
Xw2.1$res$test$rmse[,2]
## [1] 22.0717
Xw2.1$res$test$r2[,2]
## [1] 0.4685528
5.2.2 Datos de validación
Y2 = prep.norm(Y, "length")
pv7.2.2 = pcvpls(Y2, x,7,cv = list("ven", 4))
Xm2.2 = pls(Y2, x, x.test = pv7.2.2, y.test = x, ncomp.selcrit = "min")
Xw2.2 = pls(Y2, x, x.test = pv7.2.2, y.test = x, ncomp.selcrit = "wold")
show(Xm2.2$ncomp.selected) ; show(Xw2.2$ncomp.selected) #9 3 
## [1] 8
## [1] 3
Xm2.2$res$cal$r2[,9]
## [1] 0.9683879
Xm2.2$res$cal$rmse[,9]
## [1] 4.073865
Xm2.2$res$test$rmse[,9]
## [1] 9.259544
Xm2.2$res$test$r2[,9]
## [1] 0.8366873
Xw2.2$res$cal$r2[,3]
## [1] 0.8192726
Xw2.2$res$cal$rmse[,3]
## [1] 9.740732
Xw2.2$res$test$rmse[,3]
## [1] 10.30496
Xw2.2$res$test$r2[,3]
## [1] 0.7977291

5.3 Suma

5.3.1 Datos de calibración
X3 = prep.norm(X, "sum")
pv7.3.1 = pcvpls(X3, y,7,cv = list("ven", 4))
Xm3.1 = pls(X3, y, x.test = pv7.3.1, y.test = y, ncomp.selcrit = "min")
Xw3.1 = pls(X3,y, x.test = pv7.3.1, y.test = y, ncomp.selcrit = "wold")
show(Xm3.1$ncomp.selected) ; show(Xw3.1$ncomp.selected) #6 2 
## [1] 6
## [1] 2
Xm3.1$res$cal$r2[,6]
## [1] 0.8547722
Xm3.1$res$cal$rmse[,6]
## [1] 11.538
Xm3.1$res$test$rmse[,6]
## [1] 12.63595
Xm3.1$res$test$r2[,6]
## [1] 0.8258176
Xw3.1$res$cal$r2[,2]
## [1] 0.4785141
Xw3.1$res$cal$rmse[,2]
## [1] 21.86387
Xw3.1$res$test$rmse[,2]
## [1] 22.05211
Xw3.1$res$test$r2[,2]
## [1] 0.469496
5.3.1 Datos de validación
Y3 = prep.norm(Y, "sum")
pv7.3.2 = pcvpls(Y3, x,7,cv = list("ven", 4))
Xm3.2 = pls(Y3, x, x.test = pv7.3.2, y.test = x, ncomp.selcrit = "min")
Xw3.2 = pls(Y3, x, x.test = pv7.3.2, y.test = x, ncomp.selcrit = "wold")
show(Xm3.2$ncomp.selected) ; show(Xw3.2$ncomp.selected) #5 3 
## [1] 5
## [1] 3
Xm3.2$res$cal$r2[,5]
## [1] 0.8660459
Xm3.2$res$cal$rmse[,5]
## [1] 8.386053
Xm3.2$res$test$rmse[,5]
## [1] 10.09148
Xm3.2$res$test$r2[,5]
## [1] 0.8060231
Xw3.2$res$cal$r2[,3]
## [1] 0.8191605
Xw3.2$res$cal$rmse[,3]
## [1] 9.743753
Xw3.2$res$test$rmse[,3]
## [1] 10.30295
Xw3.2$res$test$r2[,3]
## [1] 0.7978082

5.4 Variable normal estándar

5.4.1 Datos de calibraación
X4 = prep.norm(X, "snv")
pv7.4.1 = pcvpls(X4, y,7,cv = list("ven", 4))
Xm4.1 = pls(X4, y, x.test = pv7.4.1, y.test = y, ncomp.selcrit = "min")
Xw4.1 = pls(X4 ,y, x.test = pv7.4.1, y.test = y, ncomp.selcrit = "wold")
show(Xm4.1$ncomp.selected) ; show(Xw4.1$ncomp.selected) #5 1 
## [1] 5
## [1] 1
Xm4.1$res$cal$r2[,5]
## [1] 0.8439183
Xm4.1$res$cal$rmse[,5]
## [1] 11.96139
Xm4.1$res$test$rmse[,5]
## [1] 12.91009
Xm4.1$res$test$r2[,5]
## [1] 0.8181778
Xw4.1$res$cal$r2[,1]
## [1] 0.3733491
Xw4.1$res$cal$rmse[,1]
## [1] 23.96727
Xw4.1$res$test$rmse[,1]
## [1] 24.11487
Xw4.1$res$test$r2[,1]
## [1] 0.3656072
5.4.2 Datos de validación
Y4 = prep.norm(Y, "snv")
pv7.4.2 = pcvpls(Y4, x, 7, cv = list("ven", 4))
Xm4.2 = pls(Y4, x, x.test = pv7.4.2, y.test = x, ncomp.selcrit = "min")
Xw4.2 = pls(Y4 ,x, x.test = pv7.4.2, y.test = x, ncomp.selcrit = "wold")
show(Xm4.2$ncomp.selected) ; show(Xw4.2$ncomp.selected) #4
## [1] 4
## [1] 4
Xm4.2$res$cal$r2[,4]
## [1] 0.8451403
Xm4.2$res$cal$rmse[,4]
## [1] 9.016727
Xm4.2$res$test$rmse[,4]
## [1] 10.11867
Xm4.2$res$test$r2[,4]
## [1] 0.8049763
Xw4.2$res$cal$r2[,4]
## [1] 0.8451403
Xw4.2$res$cal$rmse[,4]
## [1] 9.016727
Xw4.2$res$test$rmse[,4]
## [1] 10.11867
Xw4.2$res$test$r2[,4]
## [1] 0.8049763

5.5 Normalización probabilística

5.5.1 Datos de calibración
X5 = prep.norm(X, "pqn")
pv7.5.1 = pcvpls(X5, y,7,cv = list("ven", 4))
Xm5.1 = pls(X5, y, x.test = pv7.5.1, y.test = y, ncomp.selcrit = "min")
Xw5.1 = pls(X5 ,y, x.test = pv7.5.1, y.test = y, ncomp.selcrit = "wold")
show(Xm5.1$ncomp.selected) ; show(Xw5.1$ncomp.selected) #7 2 
## [1] 7
## [1] 2
Xm5.1$res$cal$r2[,7]
## [1] 0.8629784
Xm5.1$res$cal$rmse[,7]
## [1] 11.20728
Xm5.1$res$test$rmse[,7]
## [1] 12.45263
Xm5.1$res$test$r2[,7]
## [1] 0.8308348
Xw5.1$res$cal$r2[,2]
## [1] 0.4854921
Xw5.1$res$cal$rmse[,2]
## [1] 21.7171
Xw5.1$res$test$rmse[,2]
## [1] 21.98711
Xw5.1$res$test$r2[,2]
## [1] 0.4726184
5.5.2 Datos de validación
Y5 = prep.norm(Y, "pqn")
pv7.5.2 = pcvpls(Y5, x,7,cv = list("ven", 4))
Xm5.2 = pls(Y5, x, x.test = pv7.5.2, y.test = x, ncomp.selcrit = "min")
Xw5.2 = pls(Y5, x, x.test = pv7.5.2, y.test = x, ncomp.selcrit = "wold")
show(Xm5.2$ncomp.selected) ; show(Xw5.2$ncomp.selected) #7 3
## [1] 7
## [1] 3
Xm5.2$res$cal$r2[,7]
## [1] 0.9225166
Xm5.2$res$cal$rmse[,7]
## [1] 6.377994
Xm5.2$res$test$rmse[,7]
## [1] 10.24577
Xm5.2$res$test$r2[,7]
## [1] 0.8000461
Xw5.2$res$cal$r2[,3]
## [1] 0.8132183
Xw5.2$res$cal$rmse[,3]
## [1] 9.902546
Xw5.2$res$test$rmse[,3]
## [1] 10.34647
Xw5.2$res$test$r2[,3]
## [1] 0.7960965

6. Alisados y derivados

6.1 Añadir ruido

6.1.1 Datos de calibración
nspectra1 = X + 0.025 * matrix(rnorm(length(X)), dim(X))
pv9.1 = pcvpls(nspectra1, y,7,cv = list("ven", 4))
Xpvm9.1 = pls(nspectra1, y, x.test = pv9.1, y.test = y, ncomp.selcrit = "min")
Xpvw9.1 = pls(nspectra1, y, x.test = pv9.1, y.test = y, ncomp.selcrit = "wold")
show(Xpvm9.1$ncomp.selected) ; show(Xpvw9.1$ncomp.selected) #3
## [1] 3
## [1] 3
Xpvm9.1$res$cal$r2[,3]
## [1] 0.551401
Xpvm9.1$res$cal$rmse[,3]
## [1] 20.27846
Xpvm9.1$res$test$rmse[,3]
## [1] 23.88884
Xpvm9.1$res$test$r2[,3]
## [1] 0.3774435
Xpvw9.1$res$cal$rmse[,3]
## [1] 20.27846
Xpvw9.1$res$cal$rmse[,3]
## [1] 20.27846
Xpvw9.1$res$test$r2[,3]
## [1] 0.3774435
Xpvw9.1$res$test$r2[,3]
## [1] 0.3774435
6.1.2 Datos de validación
nspectra2 = Y + 0.025 * matrix(rnorm(length(Y)), dim(Y))
pv9.2 = pcvpls(nspectra2, x,7,cv = list("ven", 4))
Xpvm9.2 = pls(nspectra2, x, x.test = pv9.2, y.test = x, ncomp.selcrit = "min")
Xpvw9.2 = pls(nspectra2, x, x.test = pv9.2, y.test = x, ncomp.selcrit = "wold")
show(Xpvm9.2$ncomp.selected) ; show(Xpvw9.2$ncomp.selected) #3 1
## [1] 3
## [1] 3
Xpvm9.2$res$cal$r2[,3]
## [1] 0.8563599
Xpvm9.2$res$cal$rmse[,3]
## [1] 8.683953
Xpvm9.2$res$test$rmse[,3]
## [1] 13.39126
Xpvm9.2$res$test$r2[,3]
## [1] 0.6584269
Xpvw9.2$res$cal$r2[,1]
## [1] 0.5147719
Xpvw9.2$res$cal$rmse[,1]
## [1] 15.96073
Xpvw9.2$res$test$rmse[,1]
## [1] 16.09826
Xpvw9.2$res$test$r2[,1]
## [1] 0.5063733

6.2 Suavizado

6.2.1 Datos de calibración
pspectra1 = prep.savgol(nspectra1, width = 15, porder = 1)
pv10.1 = pcvpls(pspectra1, y,7,cv = list("ven", 4))
Xpvm10.1 = pls(pspectra1, y, x.test = pv10.1, y.test = y, ncomp.selcrit = "min")
Xpvw10.1 = pls(pspectra1, y, x.test = pv10.1, y.test = y, ncomp.selcrit = "wold")
show(Xpvm10.1$ncomp.selected) ; show(Xpvw10.1$ncomp.selected) #5 3
## [1] 4
## [1] 3
Xpvm10.1$res$cal$r2[,5]
## [1] 0.4548223
Xpvm10.1$res$cal$rmse[,5]
## [1] 22.35501
Xpvm10.1$res$test$rmse[,5]
## [1] 23.78475
Xpvm10.1$res$test$r2[,5]
## [1] 0.3828572
Xpvw10.1$res$cal$r2[,3]
## [1] 0.4158706
Xpvw10.1$res$cal$rmse[,3]
## [1] 23.13983
Xpvw10.1$res$test$rmse[,3]
## [1] 23.69707
Xpvw10.1$res$test$r2[,3]
## [1] 0.3873989
6.2.2 Datos de validación
pspectra2 = prep.savgol(nspectra2, width = 15, porder = 1)
pv10.2 = pcvpls(pspectra2, x,7,cv = list("ven", 4))
Xpvm10.2 = pls(pspectra2, x, x.test = pv10.2, y.test = x, ncomp.selcrit = "min")
Xpvw10.2 = pls(pspectra2, x, x.test = pv10.2, y.test = x, ncomp.selcrit = "wold")
show(Xpvm10.2$ncomp.selected) ; show(Xpvw10.2$ncomp.selected) #3
## [1] 3
## [1] 3
Xpvm10.2$res$cal$r2[,3]
## [1] 0.6765629
Xpvm10.2$res$cal$rmse[,3]
## [1] 13.0309
Xpvm10.2$res$test$rmse[,3]
## [1] 13.65945
Xpvm10.2$res$test$r2[,3]
## [1] 0.6446086
Xpvw10.2$res$cal$r2[,3]
## [1] 0.6765629
Xpvw10.2$res$cal$rmse[,3]
## [1] 13.0309
Xpvw10.2$res$test$rmse[,3]
## [1] 13.65945
Xpvw10.2$res$test$r2[,3]
## [1] 0.6446086

6.3 Primera derivada

6.3.1 Datos de calibración
dpspectra1 = prep.savgol(nspectra1, width = 15, porder = 1, dorder = 1)
pv11.1 = pcvpls(dpspectra1, y, 20, cv = list("ven", 4))
Xpvm11.1 = pls(dpspectra1, y, x.test = pv11.1, y.test = y, ncomp.selcrit = "min")
Xpvw11.1 = pls(dpspectra1, y, x.test = pv11.1, y.test = y, ncomp.selcrit = "wold")
show(Xpvm11.1$ncomp.selected) ; show(Xpvw11.1$ncomp.selected) # 3 2
## [1] 2
## [1] 2
Xpvm11.1$res$cal$r2[,3]
## [1] 0.4581777
Xpvm11.1$res$cal$rmse[,3]
## [1] 22.2861
Xpvm11.1$res$test$rmse[,3]
## [1] 25.45329
Xpvm11.1$res$test$r2[,3]
## [1] 0.2932327
Xpvw11.1$res$cal$r2[,2]
## [1] 0.3999804
Xpvw11.1$res$cal$rmse[,2]
## [1] 23.45246
Xpvw11.1$res$test$rmse[,2]
## [1] 25.06214
Xpvw11.1$res$test$r2[,2]
## [1] 0.3147879
6.3.2 Datos de validación
dpspectra2 = prep.savgol(nspectra2, width = 15, porder = 1, dorder = 1)
pv11.2 = pcvpls(dpspectra2, x, 20, cv = list("ven", 4))
Xpvm11.2 = pls(dpspectra2, x, x.test = pv11.2, y.test = x, ncomp.selcrit = "min")
Xpvw11.2 = pls(dpspectra2, x, x.test = pv11.2, y.test = x, ncomp.selcrit = "wold")
show(Xpvm11.2$ncomp.selected) ; show(Xpvw11.2$ncomp.selected) #2
## [1] 1
## [1] 1
Xpvm11.2$res$cal$r2[,2]
## [1] 0.4455507
Xpvm11.2$res$cal$rmse[,2]
## [1] 17.06124
Xpvm11.2$res$test$rmse[,2]
## [1] 22.11862
Xpvm11.2$res$test$r2[,2]
## [1] 0.06812691
Xpvw11.2$res$cal$r2[,2]
## [1] 0.4455507
Xpvw11.2$res$cal$rmse[,2]
## [1] 17.06124
Xpvw11.2$res$test$rmse[,2]
## [1] 22.11862
Xpvw11.2$res$test$r2[,2]
## [1] 0.06812691

7 Transformaciones por elementos

7.1 Datos de calibración
S1.1 = prep.transform(X, log)
pv12.1 = pcvpls(S1.1, y,7,cv = list("ven", 4))
model2.1m = pls(S1.1, y, x.test = pv12.1, y.test = y, ncomp.selcrit = "min")
model2.1w = pls(S1.1, y, x.test = pv12.1, y.test = y, ncomp.selcrit = "wold")
model2.1 = pls(S1.1, y, x.test = pv12.1, y.test = y, 3)
show(model2.1m$ncomp.selected) ; show(model2.1w$ncomp.selected) #7
## [1] 7
## [1] 7
model2.1m$res$cal$r2[,7]
## [1] 0.8860505
model2.1m$res$cal$rmse[,7]
## [1] 10.22026
model2.1m$res$test$rmse[,7]
## [1] 11.72092
model2.1m$res$test$r2[,7]
## [1] 0.8501309
model2.1w$res$cal$r2[,7]
## [1] 0.8860505
model2.1w$res$cal$rmse[,7]
## [1] 10.22026
model2.1w$res$test$rmse[,7]
## [1] 11.72092
model2.1w$res$test$r2[,7]
## [1] 0.8501309
7.2 Datos de validación
S1.2 = prep.transform(Y, log)
pv12.2 = pcvpls(S1.2, x,7,cv = list("ven", 4))
model2.2m = pls(S1.2, x, x.test = pv12.2, y.test = x, ncomp.selcrit = "min")
model2.2w = pls(S1.2, x, x.test = pv12.2, y.test = x, ncomp.selcrit = "wold")
model2.2 = pls(S1.2, x, x.test = pv12.2, y.test = x, 3)
show(model2.2m$ncomp.selected) ; show(model2.2w$ncomp.selected) #4
## [1] 4
## [1] 4
model2.2m$res$cal$r2[,4]
## [1] 0.8416598
model2.2m$res$cal$rmse[,4]
## [1] 9.11749
model2.2m$res$test$rmse[,4]
## [1] 9.45002
model2.2m$res$test$r2[,4]
## [1] 0.8298993
model2.2w$res$cal$r2[,4]
## [1] 0.8416598
model2.2w$res$cal$rmse[,4]
## [1] 9.11749
model2.2w$res$test$rmse[,4]
## [1] 9.45002
model2.2w$res$test$r2[,4]
## [1] 0.8298993

7.3 Datos de calibración

S2.1 = prep.transform(X, function(x, p) x^p, p = 0.2)
pv13.1 = pcvpls(S2.1, y,7,cv = list("ven", 4))
model3.1m = pls(S2.1, y, x.test = pv13.1, y.test = y, ncomp.selcrit = "min")
model3.1w = pls(S2.1, y, x.test = pv13.1, y.test = y, ncomp.selcrit = "wold")
model3.1 = pls(S2.1, y, x.test = pv13.1, y.test = y, 3)
show(model3.1m$ncomp.selected) ; show(model3.1w$ncomp.selected) #7
## [1] 7
## [1] 7
model3.1m$res$cal$r2[,7]
## [1] 0.8861839
model3.1m$res$cal$rmse[,7]
## [1] 10.21428
model3.1m$res$test$rmse[,7]
## [1] 11.68395
model3.1m$res$test$r2[,7]
## [1] 0.8510748
model3.1w$res$cal$r2[,7]
## [1] 0.8861839
model3.1w$res$cal$rmse[,7]
## [1] 10.21428
model3.1w$res$test$rmse[,7]
## [1] 11.68395
model3.1w$res$test$r2[,7]
## [1] 0.8510748

7.5 Datos de validación

S2.2 = prep.transform(Y, function(x, p) x^p, p = 0.2)
pv13.2 = pcvpls(S2.2, x,7,cv = list("ven", 4))
model3.2m = pls(S2.2, x, x.test = pv13.2, y.test = x, ncomp.selcrit = "min")
model3.2w = pls(S2.2, x, x.test = pv13.2, y.test = x, ncomp.selcrit = "wold")
model3.2 = pls(S2.2, x, x.test = pv13.2, y.test = x, 3)
show(model3.2m$ncomp.selected) ; show(model3.2w$ncomp.selected) #4
## [1] 4
## [1] 4
model3.2m$res$cal$r2[,4]
## [1] 0.8404598
model3.2m$res$cal$rmse[,4]
## [1] 9.151974
model3.2m$res$test$rmse[,4]
## [1] 9.472726
model3.2m$res$test$r2[,4]
## [1] 0.8290809
model3.2w$res$cal$r2[,4]
## [1] 0.8404598
model3.2w$res$cal$rmse[,4]
## [1] 9.151974
model3.2w$res$test$rmse[,4]
## [1] 9.472726
model3.2w$res$test$r2[,4]
## [1] 0.8290809

8. Selección de variables como método de procesamiento

8.1 Datos de calibración

M1 = prep.varsel(X, seq(2, ncol(X), by = 2))
pv14.1 = pcvpls(M1, y,7,cv = list("ven", 4))
model4.1m = pls(M1, y, x.test = pv14.1, y.test = y, ncomp.selcrit = "min")
model4.1w = pls(M1, y, x.test = pv14.1, y.test = y, ncomp.selcrit = "wold")
show(model3.1m$ncomp.selected) ; show(model3.1w$ncomp.selected) #7 3 
## [1] 7
## [1] 7
model4.1m$res$cal$r2[,7]
## [1] 0.8672805
model4.1m$res$cal$rmse[,7]
## [1] 11.02994
model4.1m$res$test$rmse[,7]
## [1] 12.18782
model4.1m$res$test$r2[,7]
## [1] 0.837953
model4.1w$res$cal$r2[,3]
## [1] 0.5022592
model4.1w$res$cal$rmse[,3]
## [1] 21.3603
model4.1w$res$test$rmse[,3]
## [1] 21.60939
model4.1w$res$test$r2[,3]
## [1] 0.4905828

8.2 Datos de validación

M2 = prep.varsel(Y, seq(2, ncol(Y), by = 2))
pv14.2 = pcvpls(M2, x,7,cv = list("ven", 4))
model4.2m = pls(M2, x, x.test = pv14.2, y.test = x, ncomp.selcrit = "min")
model4.2w = pls(M2, x, x.test = pv14.2, y.test = x, ncomp.selcrit = "wold")
show(model3.2m$ncomp.selected) ; show(model3.2w$ncomp.selected) #4
## [1] 4
## [1] 4
model4.2m$res$cal$r2[,4]
## [1] 0.8345437
model4.2m$res$cal$rmse[,4]
## [1] 9.320117
model4.2m$res$test$rmse[,4]
## [1] 9.630362
model4.2m$res$test$r2[,4]
## [1] 0.823345
model4.2w$res$cal$r2[,4]
## [1] 0.8345437
model4.2w$res$cal$rmse[,4]
## [1] 9.320117
model4.2w$res$test$rmse[,4]
## [1] 9.630362
model4.2w$res$test$r2[,4]
## [1] 0.823345

Extracción \(R^2\)

Anexos

Tabla 1. Resumen de Modelos y Preprocesamientos
Modelos Preprocesamiento
Modelo 1 Escalado automático: centralizado en la media
Modelo 2 Escalado automático: estandarizado sin centralizar
Modelo 3 Escalado automático: centralizado en la media y estandarizado
Modelo 4 Escalado automático: centralizado con la mediana y estandarizado
Modelo 5 Variación normal estándar
Modelo 6 Corrección de dispersión multiplicativa
Modelo 7 Corrección de linea con mínimos cuadrados asimétricos
Modelo 8 Normalización: área
Modelo 9 Normalización: longitud euclediana
Modelo 10 Normalización: suma
Modelo 11 Normalización: variable normal estándar
Modelo 12 Normalización: probabilística
Modelo 13 Alisados y derivados: solo con ruido
Modelo 14 Alisados y derivados: con ruido y suavizado
Modelo 15 Alisados y derivados: con ruido y primera derivada
Modelo 16 Transformaciones por elemento: logaritmo
Modelo 17 Transformaciones por elemento: raíz cuadrática
Modelo 18 Selección de variables
Nota:
Los preprocesamientos listados son opciones comunes para análisis estadísticos y machine learning.