R script for Multivariate analysis - Magnesium stress condition in
Spinach
Data and packages
library(mixOmics)
library(readxl)
library(textshape)
database <- read.csv("C:/Users/cesar/Desktop/ESPINACA MAESTRÍA/TRATAMIENTOS/Magnesio/Matriz Datos Espinaca Magnesio.csv")
view <- database
database <- textshape::column_to_rownames(database, loc = 1)
spinach <- as.data.frame(database)
spinach <- subset(spinach, select= -c(Class))
View(spinach)
spinach
## Valine Alanine GABA Glutamate Malate Succinate Citrate Aspartate Betaine
## 1 252.2 408.3 1506.8 1329.9 88.8 61.4 157.1 897.7 1845.8
## 2 48.9 88.1 145.9 401.0 36.0 8.1 47.1 192.4 373.5
## 3 58.7 87.8 237.2 589.7 143.9 28.6 211.7 142.7 686.8
## 4 93.5 158.8 371.1 531.5 106.0 17.2 171.2 322.4 761.7
## 5 84.8 146.3 271.8 491.6 42.9 14.5 57.6 342.0 556.9
## 6 125.6 188.4 538.6 772.2 197.3 106.4 243.1 456.2 1288.1
## 7 71.0 103.0 305.0 448.1 47.2 17.1 110.4 149.2 664.6
## 8 56.5 106.5 194.9 510.4 34.5 17.9 141.5 136.2 611.0
## 9 129.6 240.5 607.0 637.6 62.2 37.4 115.6 492.3 1031.3
## 10 129.3 241.4 607.5 724.3 74.2 37.0 114.2 477.4 1023.2
## 11 98.1 219.7 417.2 639.7 102.1 28.3 98.2 382.1 942.5
## 12 112.5 143.4 417.5 360.2 189.8 15.5 211.3 169.9 2524.3
## 13 435.2 511.2 1614.7 1505.2 404.8 20.8 678.9 926.9 11692.7
## 14 196.8 210.6 586.1 397.3 410.5 25.7 551.3 371.8 3977.2
## 15 186.1 207.3 692.0 511.4 268.3 13.1 249.0 337.1 4320.1
## 16 74.3 112.6 193.9 282.7 634.5 29.3 333.7 186.0 595.2
## 17 167.1 165.0 497.2 447.5 247.0 18.3 252.2 227.6 2965.2
## 18 259.3 298.8 860.0 641.8 187.1 20.1 370.5 306.8 4185.7
## 19 178.2 193.7 587.6 492.8 354.7 29.1 378.5 413.4 2820.8
## 20 302.7 336.2 962.3 856.9 732.6 36.0 792.1 540.0 4440.0
## 21 378.2 493.4 1680.6 1269.4 720.0 64.9 1742.0 1203.4 11791.9
## 22 303.7 310.5 813.5 721.7 986.5 61.5 1036.8 451.7 5417.0
## 23 262.7 292.7 700.1 678.4 1252.8 96.3 1374.7 489.5 4357.1
## 24 846.7 910.6 2271.4 2347.3 899.9 150.7 1330.3 994.9 13270.7
## 25 181.2 163.4 567.6 498.0 434.5 39.3 627.3 241.4 4168.5
## 26 292.9 335.4 875.5 868.0 214.8 26.8 534.4 422.6 6427.6
## 27 680.5 751.5 2210.4 2046.9 2362.9 185.5 1867.0 1069.0 17994.7
## 28 243.8 309.0 1004.8 1031.6 392.0 53.7 750.1 503.8 9519.8
## 29 221.3 195.7 459.9 751.7 319.1 32.1 404.0 272.3 4499.4
## 30 248.6 288.8 829.2 601.4 1400.2 36.9 642.4 463.5 4616.9
## Glucose Fructose Sucrose Ascorbate Uridine Adenosine Fumarate Tyrosine
## 1 250.7 275.7 45.2 876.7 330.7 385.0 37.3 101.0
## 2 89.8 67.6 40.7 76.5 58.4 52.7 18.6 31.9
## 3 161.1 162.6 42.8 123.9 95.1 85.2 35.3 32.7
## 4 167.5 145.2 29.0 45.2 132.9 96.1 35.5 59.0
## 5 150.7 106.2 21.6 113.7 114.4 99.9 21.6 49.1
## 6 269.3 166.4 120.7 277.9 148.3 120.4 58.3 65.9
## 7 145.0 144.5 17.2 116.9 118.8 113.0 17.4 37.1
## 8 129.5 111.3 26.6 80.6 104.7 101.4 16.2 32.2
## 9 197.3 169.0 13.8 254.6 222.3 193.8 30.0 60.9
## 10 155.8 165.8 13.7 261.9 214.6 186.9 29.2 60.0
## 11 113.4 96.9 13.0 222.9 113.0 123.3 20.0 44.3
## 12 423.2 213.8 31.7 22.3 120.9 139.8 58.1 53.4
## 13 5805.9 1433.1 2356.4 330.8 502.1 543.7 166.9 258.2
## 14 3941.0 1180.6 1362.8 70.1 157.0 163.1 129.8 118.5
## 15 2959.9 1227.5 1336.3 108.0 178.4 185.5 135.4 116.3
## 16 439.1 308.6 0.0 0.0 70.0 59.8 126.3 43.9
## 17 1955.0 798.1 1048.1 31.0 161.3 176.4 69.3 95.9
## 18 3242.9 1140.5 1624.4 76.9 250.5 221.8 63.6 173.5
## 19 1707.8 741.9 482.4 0.0 158.1 166.5 158.3 95.1
## 20 5337.7 1833.4 2993.5 0.0 245.5 234.1 239.4 223.2
## 21 3437.6 1894.8 816.4 1070.6 387.8 387.1 362.2 183.0
## 22 6457.4 1849.2 5057.2 224.5 154.3 169.9 256.8 193.7
## 23 3848.3 987.7 1418.2 121.5 121.1 129.6 396.6 120.6
## 24 9306.4 3152.6 4943.5 1009.3 390.8 454.7 177.2 464.6
## 25 4470.8 1182.8 1942.2 115.1 166.4 179.5 138.5 98.2
## 26 3125.0 934.4 978.0 327.2 213.2 237.9 96.3 157.1
## 27 14409.8 4909.8 5555.4 906.9 627.1 604.8 680.6 415.3
## 28 6236.6 1416.3 458.3 370.9 335.1 365.5 140.7 139.9
## 29 4873.5 1053.6 2754.3 142.1 156.0 175.3 72.3 152.9
## 30 10031.8 1911.9 6507.8 373.4 160.6 168.4 284.0 168.3
## Phenylalanine Guanosine Formate Choline Ferulate Glycerol Isoleucine Leucine
## 1 104.3 361.7 137.7 569.8 75.4 429.8 154.5 279.5
## 2 37.0 58.4 35.9 91.5 14.9 97.6 31.1 74.8
## 3 37.1 97.8 53.6 107.6 26.8 138.4 43.4 89.1
## 4 71.7 139.4 50.6 207.0 30.1 170.8 55.8 134.7
## 5 51.9 111.0 70.1 178.4 27.9 158.7 49.5 103.0
## 6 71.4 134.5 32.4 224.0 25.9 249.9 82.1 151.4
## 7 40.2 127.4 46.0 123.3 36.1 144.8 49.5 79.0
## 8 37.9 113.1 52.9 141.6 31.2 123.1 39.0 69.7
## 9 60.6 225.6 58.8 322.4 60.5 263.6 73.3 127.0
## 10 63.5 225.3 58.3 326.2 58.6 289.8 73.0 121.5
## 11 48.9 121.2 60.1 221.0 36.1 178.4 54.1 87.0
## 12 56.2 159.2 35.6 193.5 20.9 176.8 64.3 116.8
## 13 248.3 736.8 91.6 1474.7 68.8 684.7 277.7 523.4
## 14 105.0 209.7 56.3 346.9 31.2 364.4 123.9 251.2
## 15 95.1 246.5 69.8 383.5 31.0 312.7 124.5 232.0
## 16 56.8 80.6 40.7 144.7 13.5 170.8 44.3 110.0
## 17 79.0 238.1 36.1 344.6 28.8 290.5 107.3 210.8
## 18 153.3 317.1 44.8 483.5 35.4 365.2 158.4 326.4
## 19 91.8 210.0 46.2 386.6 27.4 322.9 114.7 209.3
## 20 181.8 323.7 56.5 646.8 40.8 558.5 211.3 403.1
## 21 175.2 501.0 82.1 716.0 110.9 852.5 214.8 386.8
## 22 135.6 194.1 34.1 482.8 41.7 383.5 194.0 366.9
## 23 115.6 179.9 28.6 288.8 31.2 310.0 155.8 305.3
## 24 386.0 493.1 56.4 657.0 76.4 753.6 544.7 1050.5
## 25 77.3 220.0 22.2 303.5 34.1 202.0 103.9 184.4
## 26 128.1 301.6 35.6 390.9 57.1 341.6 181.9 306.4
## 27 323.9 811.6 131.5 718.4 176.2 957.9 379.2 729.8
## 28 133.3 470.3 40.4 533.4 52.9 587.2 155.2 278.4
## 29 99.8 199.2 44.5 252.6 34.2 243.1 152.4 256.2
## 30 149.6 185.2 47.9 370.8 34.8 290.9 174.4 334.3
## p.Cumarate
## 1 64.0
## 2 5.5
## 3 9.2
## 4 12.0
## 5 19.5
## 6 20.2
## 7 19.0
## 8 15.7
## 9 37.8
## 10 31.0
## 11 8.5
## 12 46.1
## 13 11.8
## 14 18.6
## 15 13.9
## 16 11.8
## 17 9.7
## 18 16.1
## 19 11.5
## 20 23.9
## 21 46.6
## 22 17.9
## 23 18.2
## 24 39.6
## 25 15.0
## 26 15.9
## 27 62.0
## 28 22.3
## 29 13.5
## 30 26.8
X <- spinach
Y <- database$Class
dim(X)
## [1] 30 26
“Sparse Principal Component Analysis” sPCA
explainedVariance <- tune.pca(X, ncomp = 10, center = TRUE, scale = TRUE)
plot(explainedVariance)

test.keepX <- c(seq(26))
tune.spca.res <- tune.spca(X, ncomp = 3,
nrepeat = 5,
folds = 10,
test.keepX = test.keepX)
plot(tune.spca.res)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the mixOmics package.
## Please report the issue at
## <https://github.com/mixOmicsTeam/mixOmics/issues/>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

spca <- spca(X, ncomp = 3,
scale = TRUE,
center = TRUE)
plotIndiv(spca, comp = c(1, 2), ind.names = TRUE,
group = database$Class,
ellipse = TRUE,
cutoff = 0.5,
size.title = 15,
size.legend = 15,
size.xlabel = 15,
size.ylabel = 15,
col = c("red", "green", "blue"),
legend = TRUE, title = 'Magnesium stress in Spinach')

plotVar(spca, comp = c(1, 2), var.names = TRUE,
cutoff = 0,
rad.in = 1,
title = 'Magnesium stress in spinach')

biplot(spca, cex = 1,
group = database$Class,
pch.size = 5,
cutoff = 0.5,
size.legend = 20,
size.xlabel = 20,
size.ylabel = 20,
col = c("red", "green", "blue"),
title = 'Magnesium stress Spinach')

plotLoadings(spca, comp = 1,
size.title = 1,
size.name = 1,
size.axis = 1,
ncomp = 26)

plotLoadings(spca, comp = 2,
size.title = 1,
size.name = 1,
size.axis = 1,
ncomp = 26)

“Sparse Partial Least Squares-Discriminant Analysis” sPLS-DA
splsda <- splsda(X, Y, ncomp = 10, scale = TRUE)
set.seed(30)
plotIndiv(splsda, comp = c(1, 2), ind.names = TRUE,
group = database$Class,
ellipse = TRUE,
cutoff = 0.5,
size.title = 15,
size.legend = 15,
size.xlabel = 15,
size.ylabel = 15,
col = c("red", "green", "blue"),
legend = TRUE, title = 'Magnesium stress in Spinach')

perf.splsda <- perf(splsda, validation = "Mfold",
folds = 5, nrepeat = 50,
progressBar = FALSE, auc = TRUE)
plot(perf.splsda, sd = TRUE, legend.position = "vertical")

perf.splsda$choice.ncomp
## max.dist centroids.dist mahalanobis.dist
## overall 6 7 4
## BER 6 7 4
tune.splsda <- tune.splsda(X, Y, ncomp = 4,
validation = 'Mfold',
folds = 5, nrepeat = 50,
dist = 'max.dist',
test.keepX = c (5, 10, 15, 20, 26),
measure = "BER")
plot(tune.splsda)

final.splsda <- splsda(X, Y, ncomp = 4, keepX = c(5, 5) , scale = TRUE)
plotIndiv(final.splsda, comp = c(1, 2), ind.names = TRUE,
group = database$Class,
ellipse = TRUE,
cutoff = 0.5,
size.title = 15,
size.legend = 15,
size.xlabel = 15,
size.ylabel = 15,
col = c("red", "green", "blue"),
legend = TRUE, title = 'Magnesium stress in Spinach')

plotVar(final.splsda, comp = c(1, 2), var.names = TRUE,
cutoff = 0,
rad.in = 1,
title = 'Magnesium stress in spinach')

biplot(final.splsda, cex = 1,
group = database$Class,
pch.size = 5,
cutoff = 0,
size.legend = 20,
size.xlabel = 20,
size.ylabel = 20,
col = c("red", "green", "blue"),
title = 'Magnesium stress in Spinach')

plotLoadings(final.splsda, comp = 1,
size.title = 1,
size.name = 1)

plotLoadings(final.splsda, comp = 2,
size.title = 1,
size.name = 1)

sPLS-DA model evaluation
perf.res <- perf.assess(final.splsda, dist = "max.dist",
validation = "Mfold",
folds = 5,
nrepeat = 50)
perf.res$error.rate$overall[,'max.dist']
## [1] 0.146
perf.res$error.rate.class$max.dist
## Deficiency Excess Standard
## 0.280 0.104 0.054
summary(Y)
## Length Class Mode
## 30 character character
perf.res$error.rate$BER[,'max.dist']
## [1] 0.146
auc.plsda <- auroc(final.splsda, roc.comp = 4, print = FALSE)
