Supplementary information

R script for Multivariate analysis - Iron stress condition in Spinach

Data and packages

library(mixOmics)
library(readxl)
library(textshape)

database <- read.csv("C:/Users/cesar/Desktop/ESPINACA MAESTRÍA/TRATAMIENTOS/Hierro/Matriz Datos Espinaca Hierro.csv")

view <- database

database <- textshape::column_to_rownames(database, loc = 1)

spinach <- as.data.frame(database)

spinach <- subset(spinach, select= -c(Class))

View(spinach)

spinach
X <- spinach

Y <- database$Class

dim(X)
## [1] 30 26

“Sparse Principal Component Analysis” sPCA

explainedVariance <- tune.pca(X, ncomp = 10, center = TRUE, scale = TRUE)

plot(explainedVariance)

test.keepX <- c(seq(26))

tune.spca.res <- tune.spca(X, ncomp = 3,
                           nrepeat = 5,
                           folds = 10,
                           test.keepX = test.keepX)
plot(tune.spca.res)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the mixOmics package.
##   Please report the issue at
##   <https://github.com/mixOmicsTeam/mixOmics/issues/>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

spca <- spca(X, ncomp = 3,
             scale = TRUE,
             center = TRUE)

plotIndiv(spca, comp = c(1, 2), ind.names = TRUE,
          group = database$Class,
          ellipse = TRUE,
          cutoff = 0.5,
          size.title = 15,
          size.legend = 15,
          size.xlabel = 15,
          size.ylabel = 15,
          col = c("red", "orange", "blue", "green","black","purple"),
          legend = TRUE, title = 'Iron stress in Spinach')

plotVar(spca, comp = c(1, 2), var.names = TRUE,
        cutoff = 0,
        rad.in = 1,
        title = 'Iron stress in spinach')

biplot(spca, cex = 1,
       group = database$Class,
       pch.size = 5,
       cutoff = 0.5,
       size.legend = 20,
       size.xlabel = 20,
       size.ylabel = 20,
       col = c("red", "orange", "blue", "green","black","purple"),
       title = 'Iron stress in Spinach')

plotLoadings(spca, comp = 1,
             size.title = 1.5,
             size.name = 1,
             size.axis = 1,
             ncomp = 20)

plotLoadings(spca, comp = 2,
             size.title = 1.5,
             size.name = 1,
             size.axis = 1,
             ncomp = 20)

“Sparse Partial Least Squares-Discriminant Analysis” sPLS-DA

splsda <- splsda(X, Y, ncomp = 10, scale = TRUE)

set.seed(30)

plotIndiv(splsda, comp = c(1, 2), ind.names = TRUE,
          group = database$Class,
          ellipse = TRUE,
          cutoff = 0.5,
          size.title = 15,
          size.legend = 15,
          size.xlabel = 15,
          size.ylabel = 15,
          col = c("red", "orange", "blue", "green","black","purple"),
          legend = TRUE, title = 'Iron stress in Spinach')

perf.splsda <- perf(splsda, validation = "Mfold", 
                    folds = 3, nrepeat = 50,
                    progressBar = FALSE, auc = TRUE)

plot(perf.splsda, sd = TRUE, legend.position = "vertical")

perf.splsda$choice.ncomp
##         max.dist centroids.dist mahalanobis.dist
## overall        6              7                4
## BER            6              7                4
tune.splsda <- tune.splsda(X, Y, ncomp = 4, 
                           validation = 'Mfold',
                           folds = 3, nrepeat = 50, 
                           dist = 'max.dist',
                           test.keepX = c (5, 10, 15, 20),
                           measure = "BER")
plot(tune.splsda)

final.splsda <- splsda(X, Y, ncomp = 3, keepX = c(5, 10) , scale = TRUE)

plotIndiv(final.splsda, comp = c(1, 2), ind.names = TRUE,
          group = database$Class,
          ellipse = TRUE,
          cutoff = 0.5,
          size.title = 15,
          size.legend = 15,
          size.xlabel = 15,
          size.ylabel = 15,
          col = c("red", "orange", "blue", "green","black","purple"),
          legend = TRUE, title = 'Iron stress in Spinach')

plotVar(final.splsda, comp = c(1, 2), var.names = TRUE,
        cutoff = 0,
        rad.in = 1,
        title = 'Iron stress in spinach')

biplot(final.splsda, cex = 1,
       group = database$Class,
       pch.size = 5,
       cutoff = 0,
       size.legend = 20,
       size.xlabel = 20,
       size.ylabel = 20,
       col = c("red", "orange", "blue", "green","black","purple"),
       title = 'Iron stress Spinach')

plotLoadings(final.splsda, comp = 1,
             size.title = 1,
             size.name = 1)

plotLoadings(final.splsda, comp = 2,
             size.title = 1,
             size.name = 1)

sPLS-DA model evaluation

perf.res <- perf.assess(final.splsda, dist = "max.dist",
                        validation = "Mfold", 
                        folds = 5, 
                        nrepeat = 50)

perf.res$error.rate$overall[,'max.dist']
## [1] 0.3633333
perf.res$error.rate.class$max.dist
## Deficiency S4 Deficiency S5     Excess S4     Excess S5   Standard S4 
##         0.580         0.000         0.976         0.068         0.356 
##   Standard S5 
##         0.200
summary(Y)
##    Length     Class      Mode 
##        30 character character
perf.res$error.rate$BER[,'max.dist']
## [1] 0.3633333
auc.plsda <- auroc(final.splsda, roc.comp = 3, print = FALSE)