R script for Multivariate analysis - Nitrogen stress condition in
Spinach
Data and packages
library(mixOmics)
library(readxl)
library(textshape)
database <- read.csv("C:/Users/cesar/Desktop/ESPINACA MAESTRÍA/TRATAMIENTOS/Nitrógeno/Matriz Datos Espinaca Nitrógeno.csv")
view <- database
database <- textshape::column_to_rownames(database, loc = 1)
spinach <- as.data.frame(database)
spinach <- subset(spinach, select= -c(Class))
View(spinach)
spinach
## Valine Alanine GABA Glutamate Malate Succinate Citrate Aspartate Betaine
## 1 144.7 285.5 1040.9 725.5 869.9 125.4 433.5 1037.3 1853.2
## 2 64.1 116.1 432.4 365.2 70.0 23.5 22.9 372.3 506.0
## 3 104.2 215.8 534.9 686.3 312.5 56.5 243.3 607.6 1645.5
## 4 146.0 244.1 1210.0 691.5 205.5 54.4 232.9 552.0 1612.2
## 5 72.0 91.9 558.0 564.7 278.1 57.3 691.5 303.5 928.3
## 6 78.7 100.4 763.6 437.3 187.0 46.1 431.7 290.4 1330.1
## 7 127.2 181.2 1117.0 426.1 319.2 56.5 184.5 447.3 1520.3
## 8 65.9 101.4 547.3 465.6 410.8 85.5 377.9 255.0 1101.6
## 9 86.5 149.7 619.8 527.4 248.4 65.9 456.8 508.0 1420.4
## 10 80.9 116.7 632.6 347.9 216.0 46.8 155.0 288.3 936.9
## 11 185.3 438.2 1167.0 759.9 62.1 28.4 601.1 715.8 4573.2
## 12 137.1 288.9 516.5 535.2 14.2 8.8 16.5 309.8 1184.3
## 13 136.2 168.8 737.4 386.4 0.0 13.9 121.1 273.9 1510.1
## 14 103.0 176.6 692.7 252.3 0.0 16.4 38.6 387.1 1114.2
## 15 90.3 150.2 583.8 296.7 0.0 17.4 72.2 336.1 898.1
## 16 134.5 311.6 640.4 406.1 26.0 15.0 133.5 473.7 1688.0
## 17 136.2 213.6 796.8 370.3 0.0 12.3 118.3 275.1 1591.7
## 18 115.1 192.3 544.2 341.6 0.0 7.6 20.8 268.7 1257.8
## 19 152.1 243.7 830.4 482.6 0.0 17.5 73.4 310.1 1493.3
## 20 108.5 169.3 684.1 163.1 96.4 21.4 128.2 313.9 1427.7
## 21 201.8 325.4 891.1 399.5 124.3 25.7 316.7 480.3 3828.2
## 22 187.1 317.5 1059.4 336.9 67.4 29.7 208.4 481.4 2002.6
## 23 188.4 276.1 965.4 601.0 270.2 52.1 449.9 391.7 2747.7
## 24 174.3 221.7 635.8 323.9 123.9 42.5 498.5 353.9 2641.7
## 25 111.5 183.0 797.0 480.3 302.9 37.8 362.1 352.4 1448.8
## 26 128.2 159.3 790.7 383.0 51.1 26.5 86.1 341.6 1005.3
## 27 84.7 162.9 575.7 159.0 53.6 13.2 143.5 287.1 802.0
## 28 115.1 235.4 885.4 257.9 72.0 25.9 131.6 374.4 565.5
## 29 114.2 173.1 715.9 639.0 238.0 42.9 466.7 408.7 1360.7
## 30 124.0 149.6 661.8 268.5 85.1 29.8 155.4 177.7 1441.5
## Glucose Fructose Sucrose Ascorbate Uridine Adenosine Fumarate Tyrosine
## 1 768.8 767.2 902.0 103.5 225.1 163.6 247.3 79.8
## 2 301.3 294.7 187.0 30.1 120.3 107.7 27.9 32.0
## 3 477.6 556.2 501.1 71.2 136.8 120.0 81.1 70.1
## 4 683.2 629.7 800.4 191.4 208.3 183.7 76.1 78.9
## 5 376.2 352.5 407.1 234.4 106.1 108.0 79.2 34.7
## 6 366.7 327.4 266.6 137.4 122.6 146.0 54.9 40.7
## 7 614.8 542.5 302.4 194.7 144.8 172.4 82.3 61.1
## 8 365.4 333.6 510.2 51.8 105.0 55.2 125.6 31.3
## 9 431.5 365.2 282.0 138.1 129.2 129.9 75.7 42.5
## 10 463.2 394.6 385.4 51.9 121.9 104.1 60.3 39.7
## 11 1521.9 1049.0 345.3 265.4 322.4 337.4 36.4 90.1
## 12 749.5 541.2 49.2 0.0 193.3 126.7 15.4 65.9
## 13 413.5 284.6 0.0 47.5 253.8 236.3 21.0 59.1
## 14 803.1 695.1 43.1 0.0 231.9 207.2 23.6 47.4
## 15 1140.6 889.8 69.8 0.0 199.6 175.9 20.9 42.2
## 16 1004.2 822.6 327.7 26.6 169.6 146.8 36.3 73.7
## 17 284.8 121.2 0.0 149.0 231.7 267.8 13.9 45.8
## 18 472.4 347.7 11.8 0.0 197.5 181.4 15.4 41.1
## 19 516.1 365.1 0.0 31.7 240.2 190.3 18.5 56.8
## 20 669.8 559.7 126.5 0.0 203.1 165.4 36.8 57.0
## 21 680.1 750.6 22.9 77.7 314.7 341.1 48.0 90.4
## 22 958.1 796.0 177.0 158.5 299.5 295.1 42.8 92.8
## 23 1296.0 1014.6 126.8 238.2 275.5 392.6 83.7 71.3
## 24 839.6 685.5 68.4 202.7 208.7 205.8 58.0 75.8
## 25 760.7 647.6 295.6 97.0 182.6 230.9 69.3 61.0
## 26 517.4 428.7 53.8 0.0 163.3 140.5 18.0 56.1
## 27 493.7 428.0 493.7 0.0 106.9 89.1 26.6 43.6
## 28 1043.1 842.7 207.3 0.0 211.6 185.6 36.6 54.8
## 29 669.1 592.6 343.1 51.1 161.0 171.8 72.7 63.5
## 30 342.4 230.5 32.8 170.3 169.7 196.9 31.4 60.0
## Phenylalanine Guanosine Formate Choline Ferulate Glycerol Isoleucine Leucine
## 1 82.5 192.6 51.6 285.9 35.0 494.4 92.3 156.8
## 2 34.0 117.2 33.9 183.0 29.6 209.7 37.4 72.0
## 3 67.6 136.1 65.1 227.5 11.7 353.1 70.5 145.3
## 4 85.0 203.1 32.1 278.1 36.7 386.0 86.5 163.5
## 5 37.8 108.9 24.7 180.6 23.2 215.7 46.1 75.6
## 6 37.2 130.1 26.2 176.4 29.2 234.0 48.3 79.9
## 7 58.0 150.9 51.0 266.4 47.1 375.6 70.0 120.4
## 8 38.3 98.8 29.1 136.9 12.7 211.6 44.0 75.3
## 9 45.1 133.9 37.2 211.8 21.0 319.1 48.5 88.5
## 10 44.0 110.5 24.3 175.2 23.1 224.8 48.5 83.5
## 11 91.8 391.7 37.1 723.1 64.1 507.5 88.3 164.6
## 12 66.2 174.0 48.0 233.3 26.0 211.3 72.2 121.5
## 13 52.8 320.3 24.0 224.2 38.2 256.1 68.1 120.1
## 14 47.8 244.0 26.0 210.5 35.5 268.3 57.8 99.0
## 15 39.2 216.7 22.9 181.2 45.0 206.9 49.5 82.7
## 16 78.6 181.1 47.3 249.4 37.0 307.7 91.1 190.3
## 17 48.2 278.7 41.0 197.5 30.6 256.7 60.8 108.3
## 18 50.6 237.2 50.3 151.9 25.9 237.1 57.9 99.9
## 19 52.4 236.5 32.1 252.5 31.2 304.9 66.8 121.1
## 20 64.6 225.8 22.5 215.7 23.0 302.8 60.6 112.3
## 21 86.1 373.6 73.6 309.9 37.0 469.3 110.5 202.4
## 22 92.6 336.9 26.9 327.7 41.6 447.9 100.4 182.8
## 23 101.7 337.7 45.0 231.7 51.3 409.7 96.7 158.0
## 24 91.9 225.7 28.6 235.0 29.6 304.6 100.4 175.8
## 25 47.3 206.1 38.3 190.4 41.2 265.3 58.5 104.1
## 26 55.1 172.4 21.3 160.4 35.1 206.6 70.7 120.1
## 27 50.4 117.2 20.1 153.3 19.4 189.8 46.9 94.9
## 28 62.4 242.1 32.8 245.8 53.9 295.3 67.1 122.3
## 29 57.2 163.0 19.9 196.4 32.8 282.4 67.6 121.8
## 30 61.0 210.8 29.0 185.1 23.8 263.0 71.5 126.2
## p.Cumarate
## 1 17.9
## 2 12.3
## 3 13.9
## 4 24.5
## 5 12.2
## 6 13.0
## 7 15.7
## 8 7.7
## 9 12.6
## 10 11.0
## 11 38.1
## 12 16.0
## 13 19.8
## 14 19.0
## 15 17.2
## 16 23.0
## 17 22.6
## 18 14.4
## 19 19.9
## 20 22.5
## 21 25.4
## 22 36.8
## 23 31.8
## 24 26.7
## 25 23.7
## 26 16.0
## 27 14.4
## 28 30.9
## 29 18.8
## 30 24.7
X <- spinach
Y <- database$Class
dim(X)
## [1] 30 26
“Sparse Principal Component Analysis” sPCA
explainedVariance <- tune.pca(X, ncomp = 10, center = TRUE, scale = TRUE)
plot(explainedVariance)

test.keepX <- c(seq(26))
tune.spca.res <- tune.spca(X, ncomp = 3,
nrepeat = 5,
folds = 10,
test.keepX = test.keepX)
plot(tune.spca.res)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the mixOmics package.
## Please report the issue at
## <https://github.com/mixOmicsTeam/mixOmics/issues/>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

spca <- spca(X, ncomp = 3,
scale = TRUE,
center = TRUE)
plotIndiv(spca, comp = c(1, 2), ind.names = TRUE,
group = database$Class,
ellipse = TRUE,
cutoff = 0.5,
size.title = 15,
size.legend = 15,
size.xlabel = 15,
size.ylabel = 15,
col = c("red", "green", "blue"),
legend = TRUE, title = 'Nitrogen stress in Spinach')

plotVar(spca, comp = c(1, 2), var.names = TRUE,
cutoff = 0,
rad.in = 1,
title = 'Nitrogen stress in spinach')

biplot(spca, cex = 1,
group = database$Class,
pch.size = 5,
cutoff = 0.5,
size.legend = 20,
size.xlabel = 20,
size.ylabel = 20,
col = c("red", "green", "blue"),
title = 'Nitrogen stress in Spinach')
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

plotLoadings(spca, comp = 1,
size.title = 1,
size.name = 1,
size.axis = 1,
ncomp = 26)

plotLoadings(spca, comp = 2,
size.title = 1,
size.name = 1,
size.axis = 1,
ncomp = 26)

“Sparse Partial Least Squares-Discriminant Analysis” sPLS-DA
splsda <- splsda(X, Y, ncomp = 10, scale = TRUE)
set.seed(30)
plotIndiv(splsda, comp = c(1, 2), ind.names = TRUE,
group = database$Class,
ellipse = TRUE,
cutoff = 0.5,
size.title = 15,
size.legend = 15,
size.xlabel = 15,
size.ylabel = 15,
col = c("red", "green", "blue"),
legend = TRUE, title = 'Nitrogen stress in Spinach')

perf.splsda <- perf(splsda, validation = "Mfold",
folds = 5, nrepeat = 50,
progressBar = FALSE, auc = TRUE)
plot(perf.splsda, sd = TRUE, legend.position = "vertical")

perf.splsda$choice.ncomp
## max.dist centroids.dist mahalanobis.dist
## overall 2 2 2
## BER 2 2 2
tune.splsda <- tune.splsda(X, Y, ncomp = 3,
validation = 'Mfold',
folds = 5, nrepeat = 50,
dist = 'max.dist',
test.keepX = c (5, 10, 15, 20, 26),
measure = "BER")
plot(tune.splsda)

final.splsda <- splsda(X, Y, ncomp = 2, keepX = c(16, 26) , scale = TRUE)
plotIndiv(final.splsda, comp = c(1, 2), ind.names = TRUE,
group = database$Class,
ellipse = TRUE,
cutoff = 0.5,
size.title = 15,
size.legend = 15,
size.xlabel = 15,
size.ylabel = 15,
col = c("red", "green", "blue"),
legend = TRUE, title = 'Nitrogen stress in Spinach')

plotVar(final.splsda, comp = c(1, 2), var.names = TRUE,
cutoff = 0,
rad.in = 1,
title = 'Nitrogen stress in spinach')

biplot(final.splsda, cex = 1,
group = database$Class,
pch.size = 5,
cutoff = 0,
size.legend = 20,
size.xlabel = 20,
size.ylabel = 20,
col = c("red", "green", "blue"),
title = 'Nitrogen stress in Spinach')

plotLoadings(final.splsda, comp = 1,
size.title = 1,
size.name = 1)

plotLoadings(final.splsda, comp = 2,
size.title = 1,
size.name = 1)

sPLS-DA model evaluation
perf.res <- perf.assess(final.splsda, dist = "max.dist",
validation = "Mfold",
folds = 5,
nrepeat = 50)
perf.res$error.rate$overall[,'max.dist']
## [1] 0.3286667
perf.res$error.rate.class$max.dist
## Deficiency Excess Standard
## 0.212 0.658 0.116
summary(Y)
## Length Class Mode
## 30 character character
perf.res$error.rate$BER[,'max.dist']
## [1] 0.3286667
auc.plsda <- auroc(final.splsda, roc.comp = 2, print = FALSE)
