Supplementary information

R script for Multivariate analysis - Nitrogen stress condition in Spinach

Data and packages

library(mixOmics)
library(readxl)
library(textshape)

database <- read.csv("C:/Users/cesar/Desktop/ESPINACA MAESTRÍA/TRATAMIENTOS/Nitrógeno/Matriz Datos Espinaca Nitrógeno.csv")

view <- database

database <- textshape::column_to_rownames(database, loc = 1)

spinach <- as.data.frame(database)

spinach <- subset(spinach, select= -c(Class))

View(spinach)

spinach
##    Valine Alanine   GABA Glutamate Malate Succinate Citrate Aspartate Betaine
## 1   144.7   285.5 1040.9     725.5  869.9     125.4   433.5    1037.3  1853.2
## 2    64.1   116.1  432.4     365.2   70.0      23.5    22.9     372.3   506.0
## 3   104.2   215.8  534.9     686.3  312.5      56.5   243.3     607.6  1645.5
## 4   146.0   244.1 1210.0     691.5  205.5      54.4   232.9     552.0  1612.2
## 5    72.0    91.9  558.0     564.7  278.1      57.3   691.5     303.5   928.3
## 6    78.7   100.4  763.6     437.3  187.0      46.1   431.7     290.4  1330.1
## 7   127.2   181.2 1117.0     426.1  319.2      56.5   184.5     447.3  1520.3
## 8    65.9   101.4  547.3     465.6  410.8      85.5   377.9     255.0  1101.6
## 9    86.5   149.7  619.8     527.4  248.4      65.9   456.8     508.0  1420.4
## 10   80.9   116.7  632.6     347.9  216.0      46.8   155.0     288.3   936.9
## 11  185.3   438.2 1167.0     759.9   62.1      28.4   601.1     715.8  4573.2
## 12  137.1   288.9  516.5     535.2   14.2       8.8    16.5     309.8  1184.3
## 13  136.2   168.8  737.4     386.4    0.0      13.9   121.1     273.9  1510.1
## 14  103.0   176.6  692.7     252.3    0.0      16.4    38.6     387.1  1114.2
## 15   90.3   150.2  583.8     296.7    0.0      17.4    72.2     336.1   898.1
## 16  134.5   311.6  640.4     406.1   26.0      15.0   133.5     473.7  1688.0
## 17  136.2   213.6  796.8     370.3    0.0      12.3   118.3     275.1  1591.7
## 18  115.1   192.3  544.2     341.6    0.0       7.6    20.8     268.7  1257.8
## 19  152.1   243.7  830.4     482.6    0.0      17.5    73.4     310.1  1493.3
## 20  108.5   169.3  684.1     163.1   96.4      21.4   128.2     313.9  1427.7
## 21  201.8   325.4  891.1     399.5  124.3      25.7   316.7     480.3  3828.2
## 22  187.1   317.5 1059.4     336.9   67.4      29.7   208.4     481.4  2002.6
## 23  188.4   276.1  965.4     601.0  270.2      52.1   449.9     391.7  2747.7
## 24  174.3   221.7  635.8     323.9  123.9      42.5   498.5     353.9  2641.7
## 25  111.5   183.0  797.0     480.3  302.9      37.8   362.1     352.4  1448.8
## 26  128.2   159.3  790.7     383.0   51.1      26.5    86.1     341.6  1005.3
## 27   84.7   162.9  575.7     159.0   53.6      13.2   143.5     287.1   802.0
## 28  115.1   235.4  885.4     257.9   72.0      25.9   131.6     374.4   565.5
## 29  114.2   173.1  715.9     639.0  238.0      42.9   466.7     408.7  1360.7
## 30  124.0   149.6  661.8     268.5   85.1      29.8   155.4     177.7  1441.5
##    Glucose Fructose Sucrose Ascorbate Uridine Adenosine Fumarate Tyrosine
## 1    768.8    767.2   902.0     103.5   225.1     163.6    247.3     79.8
## 2    301.3    294.7   187.0      30.1   120.3     107.7     27.9     32.0
## 3    477.6    556.2   501.1      71.2   136.8     120.0     81.1     70.1
## 4    683.2    629.7   800.4     191.4   208.3     183.7     76.1     78.9
## 5    376.2    352.5   407.1     234.4   106.1     108.0     79.2     34.7
## 6    366.7    327.4   266.6     137.4   122.6     146.0     54.9     40.7
## 7    614.8    542.5   302.4     194.7   144.8     172.4     82.3     61.1
## 8    365.4    333.6   510.2      51.8   105.0      55.2    125.6     31.3
## 9    431.5    365.2   282.0     138.1   129.2     129.9     75.7     42.5
## 10   463.2    394.6   385.4      51.9   121.9     104.1     60.3     39.7
## 11  1521.9   1049.0   345.3     265.4   322.4     337.4     36.4     90.1
## 12   749.5    541.2    49.2       0.0   193.3     126.7     15.4     65.9
## 13   413.5    284.6     0.0      47.5   253.8     236.3     21.0     59.1
## 14   803.1    695.1    43.1       0.0   231.9     207.2     23.6     47.4
## 15  1140.6    889.8    69.8       0.0   199.6     175.9     20.9     42.2
## 16  1004.2    822.6   327.7      26.6   169.6     146.8     36.3     73.7
## 17   284.8    121.2     0.0     149.0   231.7     267.8     13.9     45.8
## 18   472.4    347.7    11.8       0.0   197.5     181.4     15.4     41.1
## 19   516.1    365.1     0.0      31.7   240.2     190.3     18.5     56.8
## 20   669.8    559.7   126.5       0.0   203.1     165.4     36.8     57.0
## 21   680.1    750.6    22.9      77.7   314.7     341.1     48.0     90.4
## 22   958.1    796.0   177.0     158.5   299.5     295.1     42.8     92.8
## 23  1296.0   1014.6   126.8     238.2   275.5     392.6     83.7     71.3
## 24   839.6    685.5    68.4     202.7   208.7     205.8     58.0     75.8
## 25   760.7    647.6   295.6      97.0   182.6     230.9     69.3     61.0
## 26   517.4    428.7    53.8       0.0   163.3     140.5     18.0     56.1
## 27   493.7    428.0   493.7       0.0   106.9      89.1     26.6     43.6
## 28  1043.1    842.7   207.3       0.0   211.6     185.6     36.6     54.8
## 29   669.1    592.6   343.1      51.1   161.0     171.8     72.7     63.5
## 30   342.4    230.5    32.8     170.3   169.7     196.9     31.4     60.0
##    Phenylalanine Guanosine Formate Choline Ferulate Glycerol Isoleucine Leucine
## 1           82.5     192.6    51.6   285.9     35.0    494.4       92.3   156.8
## 2           34.0     117.2    33.9   183.0     29.6    209.7       37.4    72.0
## 3           67.6     136.1    65.1   227.5     11.7    353.1       70.5   145.3
## 4           85.0     203.1    32.1   278.1     36.7    386.0       86.5   163.5
## 5           37.8     108.9    24.7   180.6     23.2    215.7       46.1    75.6
## 6           37.2     130.1    26.2   176.4     29.2    234.0       48.3    79.9
## 7           58.0     150.9    51.0   266.4     47.1    375.6       70.0   120.4
## 8           38.3      98.8    29.1   136.9     12.7    211.6       44.0    75.3
## 9           45.1     133.9    37.2   211.8     21.0    319.1       48.5    88.5
## 10          44.0     110.5    24.3   175.2     23.1    224.8       48.5    83.5
## 11          91.8     391.7    37.1   723.1     64.1    507.5       88.3   164.6
## 12          66.2     174.0    48.0   233.3     26.0    211.3       72.2   121.5
## 13          52.8     320.3    24.0   224.2     38.2    256.1       68.1   120.1
## 14          47.8     244.0    26.0   210.5     35.5    268.3       57.8    99.0
## 15          39.2     216.7    22.9   181.2     45.0    206.9       49.5    82.7
## 16          78.6     181.1    47.3   249.4     37.0    307.7       91.1   190.3
## 17          48.2     278.7    41.0   197.5     30.6    256.7       60.8   108.3
## 18          50.6     237.2    50.3   151.9     25.9    237.1       57.9    99.9
## 19          52.4     236.5    32.1   252.5     31.2    304.9       66.8   121.1
## 20          64.6     225.8    22.5   215.7     23.0    302.8       60.6   112.3
## 21          86.1     373.6    73.6   309.9     37.0    469.3      110.5   202.4
## 22          92.6     336.9    26.9   327.7     41.6    447.9      100.4   182.8
## 23         101.7     337.7    45.0   231.7     51.3    409.7       96.7   158.0
## 24          91.9     225.7    28.6   235.0     29.6    304.6      100.4   175.8
## 25          47.3     206.1    38.3   190.4     41.2    265.3       58.5   104.1
## 26          55.1     172.4    21.3   160.4     35.1    206.6       70.7   120.1
## 27          50.4     117.2    20.1   153.3     19.4    189.8       46.9    94.9
## 28          62.4     242.1    32.8   245.8     53.9    295.3       67.1   122.3
## 29          57.2     163.0    19.9   196.4     32.8    282.4       67.6   121.8
## 30          61.0     210.8    29.0   185.1     23.8    263.0       71.5   126.2
##    p.Cumarate
## 1        17.9
## 2        12.3
## 3        13.9
## 4        24.5
## 5        12.2
## 6        13.0
## 7        15.7
## 8         7.7
## 9        12.6
## 10       11.0
## 11       38.1
## 12       16.0
## 13       19.8
## 14       19.0
## 15       17.2
## 16       23.0
## 17       22.6
## 18       14.4
## 19       19.9
## 20       22.5
## 21       25.4
## 22       36.8
## 23       31.8
## 24       26.7
## 25       23.7
## 26       16.0
## 27       14.4
## 28       30.9
## 29       18.8
## 30       24.7
X <- spinach

Y <- database$Class

dim(X)
## [1] 30 26

“Sparse Principal Component Analysis” sPCA

explainedVariance <- tune.pca(X, ncomp = 10, center = TRUE, scale = TRUE)

plot(explainedVariance)

test.keepX <- c(seq(26))

tune.spca.res <- tune.spca(X, ncomp = 3,
                           nrepeat = 5,
                           folds = 10,
                           test.keepX = test.keepX)

plot(tune.spca.res)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the mixOmics package.
##   Please report the issue at
##   <https://github.com/mixOmicsTeam/mixOmics/issues/>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

spca <- spca(X, ncomp = 3,
             scale = TRUE,
             center = TRUE)

plotIndiv(spca, comp = c(1, 2), ind.names = TRUE,
          group = database$Class,
          ellipse = TRUE,
          cutoff = 0.5,
          size.title = 15,
          size.legend = 15,
          size.xlabel = 15,
          size.ylabel = 15,
          col = c("red", "green", "blue"),
          legend = TRUE, title = 'Nitrogen stress in Spinach')

plotVar(spca, comp = c(1, 2), var.names = TRUE,
        cutoff = 0,
        rad.in = 1,
        title = 'Nitrogen stress in spinach')

biplot(spca, cex = 1,
       group = database$Class,
       pch.size = 5,
       cutoff = 0.5,
       size.legend = 20,
       size.xlabel = 20,
       size.ylabel = 20,
       col = c("red", "green", "blue"),
       title = 'Nitrogen stress in Spinach')
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

plotLoadings(spca, comp = 1,
             size.title = 1,
             size.name = 1,
             size.axis = 1,
             ncomp = 26)

plotLoadings(spca, comp = 2,
             size.title = 1,
             size.name = 1,
             size.axis = 1,
             ncomp = 26)

“Sparse Partial Least Squares-Discriminant Analysis” sPLS-DA

splsda <- splsda(X, Y, ncomp = 10, scale = TRUE)

set.seed(30)

plotIndiv(splsda, comp = c(1, 2), ind.names = TRUE,
          group = database$Class,
          ellipse = TRUE,
          cutoff = 0.5,
          size.title = 15,
          size.legend = 15,
          size.xlabel = 15,
          size.ylabel = 15,
          col = c("red", "green", "blue"),
          legend = TRUE, title = 'Nitrogen stress in Spinach')

perf.splsda <- perf(splsda, validation = "Mfold", 
                    folds = 5, nrepeat = 50,
                    progressBar = FALSE, auc = TRUE)

plot(perf.splsda, sd = TRUE, legend.position = "vertical")

perf.splsda$choice.ncomp
##         max.dist centroids.dist mahalanobis.dist
## overall        2              2                2
## BER            2              2                2
tune.splsda <- tune.splsda(X, Y, ncomp = 3, 
                           validation = 'Mfold',
                           folds = 5, nrepeat = 50, 
                           dist = 'max.dist',
                           test.keepX = c (5, 10, 15, 20, 26),
                           measure = "BER")
plot(tune.splsda)

final.splsda <- splsda(X, Y, ncomp = 2, keepX = c(16, 26) , scale = TRUE)

plotIndiv(final.splsda, comp = c(1, 2), ind.names = TRUE,
          group = database$Class,
          ellipse = TRUE,
          cutoff = 0.5,
          size.title = 15,
          size.legend = 15,
          size.xlabel = 15,
          size.ylabel = 15,
          col = c("red", "green", "blue"),
          legend = TRUE, title = 'Nitrogen stress in Spinach')

plotVar(final.splsda, comp = c(1, 2), var.names = TRUE,
        cutoff = 0,
        rad.in = 1,
        title = 'Nitrogen stress in spinach')

biplot(final.splsda, cex = 1,
       group = database$Class,
       pch.size = 5,
       cutoff = 0,
       size.legend = 20,
       size.xlabel = 20,
       size.ylabel = 20,
       col = c("red", "green", "blue"),
       title = 'Nitrogen stress in Spinach')

plotLoadings(final.splsda, comp = 1,
             size.title = 1,
             size.name = 1)

plotLoadings(final.splsda, comp = 2,
             size.title = 1,
             size.name = 1)

sPLS-DA model evaluation

perf.res <- perf.assess(final.splsda, dist = "max.dist",
                        validation = "Mfold", 
                        folds = 5, 
                        nrepeat = 50)

perf.res$error.rate$overall[,'max.dist']
## [1] 0.3286667
perf.res$error.rate.class$max.dist
## Deficiency     Excess   Standard 
##      0.212      0.658      0.116
summary(Y)
##    Length     Class      Mode 
##        30 character character
perf.res$error.rate$BER[,'max.dist']
## [1] 0.3286667
auc.plsda <- auroc(final.splsda, roc.comp = 2, print = FALSE)