R script for Multivariate analysis - Iron stress condition in
Spinach
Data and packages
library(mixOmics)
library(readxl)
library(textshape)
database <- read.csv("C:/Users/cesar/Desktop/ESPINACA MAESTRÍA/TRATAMIENTOS/Hierro/Matriz Datos Espinaca Hierro.csv")
view <- database
database <- textshape::column_to_rownames(database, loc = 1)
spinach <- as.data.frame(database)
spinach <- subset(spinach, select= -c(Class))
View(spinach)
spinach
X <- spinach
Y <- database$Class
dim(X)
## [1] 30 26
“Sparse Principal Component Analysis” sPCA
explainedVariance <- tune.pca(X, ncomp = 10, center = TRUE, scale = TRUE)
plot(explainedVariance)

test.keepX <- c(seq(26))
tune.spca.res <- tune.spca(X, ncomp = 3,
nrepeat = 5,
folds = 10,
test.keepX = test.keepX)
plot(tune.spca.res)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the mixOmics package.
## Please report the issue at
## <https://github.com/mixOmicsTeam/mixOmics/issues/>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

spca <- spca(X, ncomp = 3,
scale = TRUE,
center = TRUE)
plotIndiv(spca, comp = c(1, 2), ind.names = TRUE,
group = database$Class,
ellipse = TRUE,
cutoff = 0.5,
size.title = 15,
size.legend = 15,
size.xlabel = 15,
size.ylabel = 15,
col = c("red", "orange", "blue", "green","black","purple"),
legend = TRUE, title = 'Iron stress in Spinach')

plotVar(spca, comp = c(1, 2), var.names = TRUE,
cutoff = 0,
rad.in = 1,
title = 'Iron stress in spinach')

biplot(spca, cex = 1,
group = database$Class,
pch.size = 5,
cutoff = 0.5,
size.legend = 20,
size.xlabel = 20,
size.ylabel = 20,
col = c("red", "orange", "blue", "green","black","purple"),
title = 'Iron stress in Spinach')

plotLoadings(spca, comp = 1,
size.title = 1.5,
size.name = 1,
size.axis = 1,
ncomp = 20)

plotLoadings(spca, comp = 2,
size.title = 1.5,
size.name = 1,
size.axis = 1,
ncomp = 20)

“Sparse Partial Least Squares-Discriminant Analysis” sPLS-DA
splsda <- splsda(X, Y, ncomp = 10, scale = TRUE)
set.seed(30)
plotIndiv(splsda, comp = c(1, 2), ind.names = TRUE,
group = database$Class,
ellipse = TRUE,
cutoff = 0.5,
size.title = 15,
size.legend = 15,
size.xlabel = 15,
size.ylabel = 15,
col = c("red", "orange", "blue", "green","black","purple"),
legend = TRUE, title = 'Iron stress in Spinach')

perf.splsda <- perf(splsda, validation = "Mfold",
folds = 3, nrepeat = 50,
progressBar = FALSE, auc = TRUE)
plot(perf.splsda, sd = TRUE, legend.position = "vertical")

perf.splsda$choice.ncomp
## max.dist centroids.dist mahalanobis.dist
## overall 6 7 4
## BER 6 7 4
tune.splsda <- tune.splsda(X, Y, ncomp = 4,
validation = 'Mfold',
folds = 3, nrepeat = 50,
dist = 'max.dist',
test.keepX = c (5, 10, 15, 20),
measure = "BER")
plot(tune.splsda)

final.splsda <- splsda(X, Y, ncomp = 3, keepX = c(5, 10) , scale = TRUE)
plotIndiv(final.splsda, comp = c(1, 2), ind.names = TRUE,
group = database$Class,
ellipse = TRUE,
cutoff = 0.5,
size.title = 15,
size.legend = 15,
size.xlabel = 15,
size.ylabel = 15,
col = c("red", "orange", "blue", "green","black","purple"),
legend = TRUE, title = 'Iron stress in Spinach')

plotVar(final.splsda, comp = c(1, 2), var.names = TRUE,
cutoff = 0,
rad.in = 1,
title = 'Iron stress in spinach')

biplot(final.splsda, cex = 1,
group = database$Class,
pch.size = 5,
cutoff = 0,
size.legend = 20,
size.xlabel = 20,
size.ylabel = 20,
col = c("red", "orange", "blue", "green","black","purple"),
title = 'Iron stress Spinach')

plotLoadings(final.splsda, comp = 1,
size.title = 1,
size.name = 1)

plotLoadings(final.splsda, comp = 2,
size.title = 1,
size.name = 1)

sPLS-DA model evaluation
perf.res <- perf.assess(final.splsda, dist = "max.dist",
validation = "Mfold",
folds = 5,
nrepeat = 50)
perf.res$error.rate$overall[,'max.dist']
## [1] 0.3633333
perf.res$error.rate.class$max.dist
## Deficiency S4 Deficiency S5 Excess S4 Excess S5 Standard S4
## 0.580 0.000 0.976 0.068 0.356
## Standard S5
## 0.200
summary(Y)
## Length Class Mode
## 30 character character
perf.res$error.rate$BER[,'max.dist']
## [1] 0.3633333
auc.plsda <- auroc(final.splsda, roc.comp = 3, print = FALSE)
