Analysis on NMR Data

Import and process NMR Data: bin=0.01

Import data

Ultrafiltrato.HCl.ns128.replicati.maggio2015.norm.bin001 <- read.delim("./Ultrafiltrato-HCl-ns128-replicati-maggio2015-norm-bin001.txt", header=FALSE)
data.001 <- as.data.frame(t(Ultrafiltrato.HCl.ns128.replicati.maggio2015.norm.bin001))
names(data.001) <- as.character(data.001[1,])
data.001 <- data.001[2:24,]

Remove Variables with 0 values

remCol <- unlist(lapply(data.001,function(x) return (sum(x)==0)))
data.001 <- data.001[,!remCol]

Using values from previous analysis (May 2015) Insert Patient nr. 3 not present in this set of data

# old data
data.001_old <- read.csv("./UF-HCl_ns128_feb15_normTA_bin001_Transposed.txt")
names(data.001_old) <- names(data.001)
# add line pz 3
data.001 <- rbind(data.001[1:2,],data.001_old[3,],data.001[3:NROW(data.001),])
# correct row names
row.names(data.001) <- paste("P",1:24, sep="")
dim(data.001)
## [1]  24 870

Negative values are now transformed in positive

Data Analysis

Plot boxplots of every bin to look at variance

boxplot(data.001,outline =  F) # entire spectrum

ChemoSpec analysis

ChemoSpec is a R package for the analysis of Spectra.

Form .csv files for ChemoSpec use (grouped as PD vs HF (Ultrafiltration))

Build ChemoSpec data

require("ChemoSpec")
require("R.utils")
require("RColorBrewer")

files2SpectraObject(gr.crit = c("PD", "HF"), gr.cols = brewer.pal(3,"Set1")[1:2], freq.unit = "ppm", int.unit = "peak intensity", descrip = "PD Dialysate Study", out.file = "NMR_001")
PD_NMR.001 <- loadObject("NMR_001.RData")

Analyse Spectra Plots

myTitle <- "PD NMR Dialysate Spectra"

surveySpectra(PD_noHF_NMR.001, method = "iqr", main = paste(myTitle,"- IQR summary",sep=" "), by.gr = FALSE)

plotSpectra(PD_NMR.001, main = paste(myTitle,"- pts 12, 16, 20, 3 and 9",sep=" "), which = c(1, 3, 12, 18, 24), yrange = c(0, 5), offset = 0.4, lab.pos = 8)

plotSpectra(PD_NMR.001, main = paste(myTitle,"- region [-0.15,2]",sep=" "), which = c(1:24), xlim = c(-0.15, 2), yrange = c(0,12), offset = 0.25, lab.pos = .25, xaxt="n")
axis(1,at = seq(-0.15,2,.025),labels = as.character(seq(-0.15,2,.025)))

plotSpectra(PD_NMR.001, main = paste(myTitle,"- region [2,3.25]",sep=" "), which = c(1:24), xlim = c(2, 3.25), yrange = c(0,6), offset = 0.15, lab.pos = 2.3, xaxt="n")
axis(1,at = seq(2,3.25,.025))

plotSpectra(PD_NMR.001, main = paste(myTitle,"- region [3.25,6]",sep=" "), which = c(1:24), xlim = c(3.25, 6), yrange = c(0,7), offset = 0.15, lab.pos = 5.95, xaxt="n")
axis(1,at = seq(3.25,6,.025))

plotSpectra(PD_NMR.001, main = paste(myTitle,"- region [6,9]",sep=" "), which = c(1:24), xlim = c(6, 9), yrange = c(0,.6), offset = 0.025, lab.pos = 8.5, xaxt="n")
axis(1,at = seq(6,9,.025))

Remove HF group from spectra

Remove Glucose region 4.7-5.25 (?)

Check Gaps

check4Gaps(PD_Final_NMR.001$freq, PD_Final_NMR.001$data[1,], plot = TRUE)

##   beg.freq end.freq    size beg.indx end.indx
## 1 -0.15000  4.65525 4.80525        1      481
## 2  5.20585  9.00000 3.79415      482      861

HCA

HCA.001 <- hcaSpectra(PD_Final_NMR.001, main = "PD NMR Spectra")

PCA

PCA.001 <- c_pcaSpectra(PD_Final_NMR.001, choice = "Pareto")
plotScores(PD_Final_NMR.001, main = "PD Dialysate NMR Spectra", PCA.001, pcs = c(1,2), ellipse = "both", tol = 1.0)

PCA Diagnostic

diagnostics <- pcaDiag(PD_Final_NMR.001, PCA.001, pcs = 2, plot = "OD")

diagnostics <- pcaDiag(PD_Final_NMR.001, PCA.001, pcs = 2, plot = "SD")

Remove Outliers

PD_Outlier_NMR.001 <- removeSample(PD_Final_NMR.001, rem.sam = c("PD13_Patient15"))
sumSpectra(PD_Outlier_NMR.001)
#grep("NMR_PD_13", PD_Outlier_NMR.001$names)

PCA without outliers

PCA.001_out <- c_pcaSpectra(PD_Outlier_NMR.001, choice = "Pareto")
plotScores(PD_Outlier_NMR.001, main = "PD Dialysate NMR Spectra", PCA.001_out, pcs = c(1,2), ellipse = "both", tol = 1.0)

Numbers of PC’s needed

plotScree(pca = PCA.001_out, main = "PD Dialysate NMR Spectra")

plotScree2(PCA.001_out, main = "PD Dialysate NMR Spectra")

out <- cv_pcaSpectra(PD_Outlier_NMR.001, pcs = 10, choice = "Pareto")

3D score plot

plotScores3D(PD_Outlier_NMR.001, PCA.001_out, main = "PD Dialysate NMR Spectra", ellipse = F,ptsize = 1.75)

Loading Plot

plotLoadings(PD_Outlier_NMR.001, PCA.001_out, main = "PD Dialysate NMR Spectra", loads = c(1:3), ref = 1)

Plotting One Loading vs. Another

plot2Loadings(PD_Outlier_NMR.001, PCA.001_out, main = "PD Dialysate NMR Spectra", loads = c(1:2), tol = 0.02)

Plot to Identify Influential Frequencies as in 1

spt <- sPlotSpectra(PD_Outlier_NMR.001, PCA.001_out, main = "PD Dialysate NMR Spectra", pc = 1, tol = 0.02)

spt <- sPlotSpectra(PD_Outlier_NMR.001, PCA.001_out, main = "Detail of PD Dialysate NMR Spectra", pc = 1, tol = 0.05, xlim = c(2, 6.5), ylim = c(.5, 1.05))

spt <- sPlotSpectra(PD_Outlier_NMR.001, PCA.001_out, main = "Detail of PD Dialysate NMR Spectra", pc = 1, tol = 0.05, xlim = c(-3, -6.5), ylim = c(-.5, -1.05))

HCA with PCA results

HCA.001 <- hcaScores(PD_Outlier_NMR.001,  PCA.001_out, scores = c(1:5), main = "PD Dialysate NMR Spectra")

References

[1.] S. Wiklund, E. Johansson, L. Sjostrom, E. J. Mellerowicz, U. Edlund, J. P. Shockcor, J. Gottfries, T. Moritz, and J. Trygg,Visualization of gc/tof-ms-based metabolomics data for identifcation of biochemically interesting compounds using opls class models," Analytical Chemistry, vol. 80, no. 1, pp. 115-122, 2008.