Import data
Ultrafiltrato.HCl.ns128.replicati.maggio2015.norm.bin001 <- read.delim("./Ultrafiltrato-HCl-ns128-replicati-maggio2015-norm-bin001.txt", header=FALSE)
data.001 <- as.data.frame(t(Ultrafiltrato.HCl.ns128.replicati.maggio2015.norm.bin001))
names(data.001) <- as.character(data.001[1,])
data.001 <- data.001[2:24,]
Remove Variables with 0 values
remCol <- unlist(lapply(data.001,function(x) return (sum(x)==0)))
data.001 <- data.001[,!remCol]
Using values from previous analysis (May 2015) Insert Patient nr. 3 not present in this set of data
# old data
data.001_old <- read.csv("./UF-HCl_ns128_feb15_normTA_bin001_Transposed.txt")
names(data.001_old) <- names(data.001)
# add line pz 3
data.001 <- rbind(data.001[1:2,],data.001_old[3,],data.001[3:NROW(data.001),])
# correct row names
row.names(data.001) <- paste("P",1:24, sep="")
dim(data.001)
## [1] 24 870
Negative values are now transformed in positive
Plot boxplots of every bin to look at variance
boxplot(data.001,outline = F) # entire spectrum
ChemoSpec is a R package for the analysis of Spectra.
Form .csv files for ChemoSpec use (grouped as PD vs HF (Ultrafiltration))
Build ChemoSpec data
require("ChemoSpec")
require("R.utils")
require("RColorBrewer")
files2SpectraObject(gr.crit = c("PD", "HF"), gr.cols = brewer.pal(3,"Set1")[1:2], freq.unit = "ppm", int.unit = "peak intensity", descrip = "PD Dialysate Study", out.file = "NMR_001")
PD_NMR.001 <- loadObject("NMR_001.RData")
Analyse Spectra Plots
myTitle <- "PD NMR Dialysate Spectra"
surveySpectra(PD_noHF_NMR.001, method = "iqr", main = paste(myTitle,"- IQR summary",sep=" "), by.gr = FALSE)
plotSpectra(PD_NMR.001, main = paste(myTitle,"- pts 12, 16, 20, 3 and 9",sep=" "), which = c(1, 3, 12, 18, 24), yrange = c(0, 5), offset = 0.4, lab.pos = 8)
plotSpectra(PD_NMR.001, main = paste(myTitle,"- region [-0.15,2]",sep=" "), which = c(1:24), xlim = c(-0.15, 2), yrange = c(0,12), offset = 0.25, lab.pos = .25, xaxt="n")
axis(1,at = seq(-0.15,2,.025),labels = as.character(seq(-0.15,2,.025)))
plotSpectra(PD_NMR.001, main = paste(myTitle,"- region [2,3.25]",sep=" "), which = c(1:24), xlim = c(2, 3.25), yrange = c(0,6), offset = 0.15, lab.pos = 2.3, xaxt="n")
axis(1,at = seq(2,3.25,.025))
plotSpectra(PD_NMR.001, main = paste(myTitle,"- region [3.25,6]",sep=" "), which = c(1:24), xlim = c(3.25, 6), yrange = c(0,7), offset = 0.15, lab.pos = 5.95, xaxt="n")
axis(1,at = seq(3.25,6,.025))
plotSpectra(PD_NMR.001, main = paste(myTitle,"- region [6,9]",sep=" "), which = c(1:24), xlim = c(6, 9), yrange = c(0,.6), offset = 0.025, lab.pos = 8.5, xaxt="n")
axis(1,at = seq(6,9,.025))
Remove HF group from spectra
Remove Glucose region 4.7-5.25 (?)
Check Gaps
check4Gaps(PD_Final_NMR.001$freq, PD_Final_NMR.001$data[1,], plot = TRUE)
## beg.freq end.freq size beg.indx end.indx
## 1 -0.15000 4.65525 4.80525 1 481
## 2 5.20585 9.00000 3.79415 482 861
HCA
HCA.001 <- hcaSpectra(PD_Final_NMR.001, main = "PD NMR Spectra")
PCA
PCA.001 <- c_pcaSpectra(PD_Final_NMR.001, choice = "Pareto")
plotScores(PD_Final_NMR.001, main = "PD Dialysate NMR Spectra", PCA.001, pcs = c(1,2), ellipse = "both", tol = 1.0)
PCA Diagnostic
diagnostics <- pcaDiag(PD_Final_NMR.001, PCA.001, pcs = 2, plot = "OD")
diagnostics <- pcaDiag(PD_Final_NMR.001, PCA.001, pcs = 2, plot = "SD")
Remove Outliers
PD_Outlier_NMR.001 <- removeSample(PD_Final_NMR.001, rem.sam = c("PD13_Patient15"))
sumSpectra(PD_Outlier_NMR.001)
#grep("NMR_PD_13", PD_Outlier_NMR.001$names)
PCA without outliers
PCA.001_out <- c_pcaSpectra(PD_Outlier_NMR.001, choice = "Pareto")
plotScores(PD_Outlier_NMR.001, main = "PD Dialysate NMR Spectra", PCA.001_out, pcs = c(1,2), ellipse = "both", tol = 1.0)
Numbers of PC’s needed
plotScree(pca = PCA.001_out, main = "PD Dialysate NMR Spectra")
plotScree2(PCA.001_out, main = "PD Dialysate NMR Spectra")
out <- cv_pcaSpectra(PD_Outlier_NMR.001, pcs = 10, choice = "Pareto")
3D score plot
plotScores3D(PD_Outlier_NMR.001, PCA.001_out, main = "PD Dialysate NMR Spectra", ellipse = F,ptsize = 1.75)
Loading Plot
plotLoadings(PD_Outlier_NMR.001, PCA.001_out, main = "PD Dialysate NMR Spectra", loads = c(1:3), ref = 1)
Plotting One Loading vs. Another
plot2Loadings(PD_Outlier_NMR.001, PCA.001_out, main = "PD Dialysate NMR Spectra", loads = c(1:2), tol = 0.02)
Plot to Identify Influential Frequencies as in 1
spt <- sPlotSpectra(PD_Outlier_NMR.001, PCA.001_out, main = "PD Dialysate NMR Spectra", pc = 1, tol = 0.02)
spt <- sPlotSpectra(PD_Outlier_NMR.001, PCA.001_out, main = "Detail of PD Dialysate NMR Spectra", pc = 1, tol = 0.05, xlim = c(2, 6.5), ylim = c(.5, 1.05))
spt <- sPlotSpectra(PD_Outlier_NMR.001, PCA.001_out, main = "Detail of PD Dialysate NMR Spectra", pc = 1, tol = 0.05, xlim = c(-3, -6.5), ylim = c(-.5, -1.05))
HCA with PCA results
HCA.001 <- hcaScores(PD_Outlier_NMR.001, PCA.001_out, scores = c(1:5), main = "PD Dialysate NMR Spectra")
[1.] S. Wiklund, E. Johansson, L. Sjostrom, E. J. Mellerowicz, U. Edlund, J. P. Shockcor, J. Gottfries, T. Moritz, and J. Trygg,Visualization of gc/tof-ms-based metabolomics data for identifcation of biochemically interesting compounds using opls class models," Analytical Chemistry, vol. 80, no. 1, pp. 115-122, 2008.