#/////////////////////////////////////////////////////////////////////////////
#   Libraries
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\


library("gplots")
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess
library("car")
library(limma)
library(Hmisc)
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: splines
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## 
## The following object is masked from '.env':
## 
##     %nin%
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
library(VIM)
## Loading required package: colorspace
## VIM is ready to use. 
##  Since version 4.0.0 the GUI is in its own package VIMGUI.
## 
##           Please use the package to use the new (and old) GUI.
## 
## 
## Attaching package: 'VIM'
## 
## The following object is masked from 'package:datasets':
## 
##     sleep
library(Amelia)
## Loading required package: foreign
## Loading required package: Rcpp
## Loading required package: RcppArmadillo
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.2, built: 2013-04-03)
## ## Copyright (C) 2005-2014 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library(ggplot2)
library(RColorBrewer)
library(car)
library(ggplot2)
library(reshape)
library(scales)


#/////////////////////////////////////////////////////////////////////////////
#   Read data
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\


setwd("~/ANDIS/2014-01_Metabolomics/Analysis/")

# metadata
MetaData <- read.csv("../RawData//ANDIS_Matrix_for_metabolomics.forAnalysis.20140121.csv",row.names=1)
MetaData$ID <- paste("ANDIS",MetaData$INDEX,sep="_")
rownames(MetaData) <- MetaData$ID

# metabolomics

RawDataIn <- read.csv("../RawData/ANDIS_RP_ESIn_140318.txt",row.names=1,comment.char = "#",sep="\t")

# Fic colnames and remove non-metabolites from dataframe
colnames(RawDataIn) <- sapply(strsplit(colnames(RawDataIn), "\\."), `[`, 1)
RawDataInS <- as.matrix(RawDataIn[,c(1:99)])
#colnames(RawDataIn) <- sapply(strsplit(colnames(RawDataIn), "\\."), `[`, 1)
#RawDataInS <- as.matrix(RawDataIn[,c(2:50,59:156)])

setdiff(MetaData$ID,colnames(RawDataInS))
##  [1] "ANDIS_5"   "ANDIS_6"   "ANDIS_7"   "ANDIS_9"   "ANDIS_11" 
##  [6] "ANDIS_17"  "ANDIS_18"  "ANDIS_20"  "ANDIS_22"  "ANDIS_31" 
## [11] "ANDIS_37"  "ANDIS_42"  "ANDIS_45"  "ANDIS_47"  "ANDIS_52" 
## [16] "ANDIS_55"  "ANDIS_56"  "ANDIS_58"  "ANDIS_59"  "ANDIS_60" 
## [21] "ANDIS_68"  "ANDIS_69"  "ANDIS_73"  "ANDIS_74"  "ANDIS_75" 
## [26] "ANDIS_76"  "ANDIS_77"  "ANDIS_80"  "ANDIS_90"  "ANDIS_93" 
## [31] "ANDIS_95"  "ANDIS_96"  "ANDIS_98"  "ANDIS_101" "ANDIS_103"
## [36] "ANDIS_104" "ANDIS_109" "ANDIS_110" "ANDIS_111" "ANDIS_113"
## [41] "ANDIS_114" "ANDIS_117" "ANDIS_122" "ANDIS_123" "ANDIS_129"
## [46] "ANDIS_133" "ANDIS_136" "ANDIS_137" "ANDIS_138" "ANDIS_139"
## [51] "ANDIS_140"

MetaDataS <- MetaData[colnames(RawDataInS),]

RawDataInS[RawDataInS==0] <- NA

png("missingness.png")
barplot(100*colSums(apply(RawDataInS,2,is.na))/nrow(RawDataInS),las=2,main="NA per samle",ylab="% of metabolites with NA")
dev.off()
## pdf 
##   2

#/////////////////////////////////////////////////////////////////////////////
#  Imputera missing data
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\


# Missing data



missmap(as.data.frame(RawDataInS),col=c("blue","grey"))

plot of chunk unnamed-chunk-1


RawDataInS_imputed<-apply(RawDataInS,1,impute)

missmap(as.data.frame(t(RawDataInS_imputed)),col=c("blue","grey"))

plot of chunk unnamed-chunk-1


RawData_complet <- RawDataInS[complete.cases(RawDataInS),order(MetaDataS$patid)]

RawDataInS <- RawDataInS[,order(MetaDataS$patid)]







RDClog3 <- RawDataInS
MetaData3 <- MetaDataS[colnames(RDClog3),]


MetaData3$DiaCol <- as.character(recode(MetaData3$diagresult_id, "'LDA'='pink';'T2DCLC'='black';'T1DABS'='steelblue'"))

#/////////////////////////////////////////////////////////////////////////////
#  QC
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

png("intensity_dist.png",w=1200,h=800,pointsize=18)
par(mfrow=c(2,1))
boxplot(RDClog3,las=2,col=MetaData3$DiaCol)

plotDensities(RDClog3,group=MetaData3$diagresult_id)
## Error: 'x' contains missing values
dev.off()
## pdf 
##   2


par(mfrow=c(1,1))
SampleCor<-cor(RDClog3,method="spearman", use = "pairwise.complete.obs")

#qqPlot(colMeans(SampleCor))
hmcol<-brewer.pal(12,"RdBu")
## Warning: n too large, allowed maximum for palette RdBu is 11
## Returning the palette you asked for with that many colors

pdf("Correlation_matrix_samples.pdf")

heatmap.2(SampleCor, col=bluered(101), scale="none", ColSideColors=as.character(MetaData3$DiaCol),
          RowSideColors=redgreen(ncol(SampleCor)),
          key=TRUE, symkey=FALSE, density.info="none", trace="none", cexRow=0.5)

dev.off()
## pdf 
##   2
# 
# SampleCorMetabo<-cor(t(RDClog3),method="pearson")
# 
# rownames(SampleCorMetabo) <- 1:2010
# 
# pdf("Correlation_matrix_metabolites.pdf")
# 
# heatmap.2(SampleCorMetabo, col=redgreen(75), scale="none", ColSideColors=as.character(MetaData3$DiaCol),
#           key=TRUE, symkey=FALSE, density.info="none", trace="none", cexRow=0.5)
# 
# dev.off()
# 
# ##

d <- dist(t(RDClog3),method="euclidean") # euclidean distances between the rows
fit <- cmdscale(d,eig=TRUE, k=3) # k is the number of dim
fit # view results
## $points
##              [,1]     [,2]     [,3]
## ANDIS_1   -45.764   5.5263 -62.4639
## ANDIS_2   -37.789  -3.6748 -63.2096
## ANDIS_3   -41.584  -9.7425 -52.3843
## ANDIS_4   -37.896 -14.6059 -46.6289
## ANDIS_8   -36.653   3.8569 -33.0596
## ANDIS_10  -35.595 -27.0872 -24.6627
## ANDIS_12  -47.516  18.6994 -16.9712
## ANDIS_13  -32.985 -26.7723 -24.7268
## ANDIS_14  -29.846  -3.1798 -32.6195
## ANDIS_15  -35.260 -23.1269 -16.4639
## ANDIS_16  -40.945   2.9511 -15.4338
## ANDIS_19  -48.411  15.6938  -1.7897
## ANDIS_21  -37.668  26.6617 -10.1342
## ANDIS_23  -34.738 -23.8295  -7.7259
## ANDIS_24  -34.509 -21.6500   0.5451
## ANDIS_25  -36.817 -26.5163  -8.6594
## ANDIS_26  -36.248 -17.2154  -7.2717
## ANDIS_27  -38.883  10.0633 -11.3839
## ANDIS_28  -29.119 -27.6402  -8.9357
## ANDIS_29   31.629 -13.7619 -28.9312
## ANDIS_30   33.772   4.0906 -26.4355
## ANDIS_32  -29.674  27.5048  -0.2679
## ANDIS_33  -32.756  17.6122  -8.7398
## ANDIS_34  -41.537  15.9313   8.2326
## ANDIS_35  -39.772  27.4407   7.2881
## ANDIS_36  -20.060  34.1900  -2.7112
## ANDIS_38  -29.762   1.3044  -6.7610
## ANDIS_39  -27.288 -34.5455   5.4264
## ANDIS_40  -25.282  -2.3324   6.6179
## ANDIS_41  -23.055 -34.7235  19.4354
## ANDIS_43  -36.396  19.7258  -1.9264
## ANDIS_44   30.758  14.4963 -15.2753
## ANDIS_46  -21.994 -37.7419  29.7101
## ANDIS_48  -30.590 -21.8136  24.0630
## ANDIS_49  -27.331 -28.8581  20.7602
## ANDIS_50  -35.850  10.0555  15.8428
## ANDIS_51  -29.649 -29.3407  29.8859
## ANDIS_53  -44.984  16.8512  15.2327
## ANDIS_54  -26.741   3.1967  11.7616
## ANDIS_57  -23.539 -20.5833  19.7389
## ANDIS_61  -24.726   2.7680  16.7486
## ANDIS_62  -16.101 -25.4160  17.8480
## ANDIS_63  -29.683  20.9933  32.2268
## ANDIS_64  -15.870 -21.8401  26.7700
## ANDIS_65  -23.925  20.2941  17.4473
## ANDIS_66  -30.786  14.4006  23.4236
## ANDIS_67  -21.452  19.0860  29.9095
## ANDIS_70  -22.021  16.1926  22.6290
## ANDIS_71  -33.702  22.0563  29.2051
## ANDIS_72  -27.660  14.5569  30.5046
## ANDIS_78  -11.728  -9.4941   7.6881
## ANDIS_79  -18.014  25.1393  33.9927
## ANDIS_81  -21.933   7.5746  11.1364
## ANDIS_82  -20.672  -8.5396  23.9248
## ANDIS_83   -9.283  30.6374   9.6777
## ANDIS_84   -4.259  21.8721   0.8192
## ANDIS_85    1.617 -19.4523   6.7297
## ANDIS_86   10.200 -12.2512   3.0065
## ANDIS_87   -3.199  31.6892   5.9020
## ANDIS_88   19.583 -12.6825   6.5198
## ANDIS_89   46.990 -17.4296  10.1076
## ANDIS_91   17.303  23.3418  -2.3238
## ANDIS_92   44.678 -33.8446   9.7912
## ANDIS_94   42.339 -11.7350 -10.7137
## ANDIS_97   21.170 -24.9704  -3.1676
## ANDIS_99   25.956 -21.7453   2.6806
## ANDIS_100  37.846  -8.2020   0.1471
## ANDIS_102  17.800 -26.0764   1.7465
## ANDIS_105  30.368   5.9976  -8.0591
## ANDIS_106   8.542   5.2075   4.8238
## ANDIS_107  19.852  -0.8222   1.7633
## ANDIS_108  14.002  25.2722   8.7423
## ANDIS_112  34.706 -12.5473   3.3853
## ANDIS_115  28.347  37.5974  -8.5489
## ANDIS_116  35.804  17.9753  -2.9616
## ANDIS_118  20.764 -10.5092   7.6915
## ANDIS_119  38.269 -16.9829   3.6248
## ANDIS_120  40.691  15.3636   4.7765
## ANDIS_121  43.224   3.6077   2.4006
## ANDIS_124  44.731 -21.7613   6.8732
## ANDIS_125  49.778 -30.9040   0.6342
## ANDIS_126  40.136  18.9279  -4.7107
## ANDIS_127  40.496  16.1832 -13.4429
## ANDIS_128  35.063  41.7729  -5.2699
## ANDIS_130  50.657  -5.2572   7.4464
## ANDIS_131  39.899  15.3226  -7.9795
## ANDIS_132  43.496 -19.4761  -2.4218
## ANDIS_134  49.031 -19.5626  -2.8370
## ANDIS_135  49.318 -35.2502  -1.6002
## ANDIS_141  49.445  19.5222  -0.2347
## ANDIS_142  49.438 -15.5573  -8.2656
## ANDIS_143  46.942  19.1849   7.9759
## ANDIS_144  45.068  15.3169   3.7564
## ANDIS_145  42.492   4.7176 -11.1533
## ANDIS_146  50.173   7.2063   5.7567
## ANDIS_147  48.841  -7.3803   0.3245
## ANDIS_148  43.286  19.9334 -12.8224
## ANDIS_149  49.614  14.9724  -7.7180
## ANDIS_150  35.386  47.8952 -15.2949
## 
## $eig
##  [1]  1.172e+05  4.248e+04  3.417e+04  2.363e+04  1.692e+04  1.300e+04
##  [7]  1.059e+04  9.422e+03  8.795e+03  7.762e+03  7.515e+03  7.255e+03
## [13]  7.078e+03  6.850e+03  6.201e+03  6.046e+03  5.862e+03  5.726e+03
## [19]  5.384e+03  5.338e+03  5.112e+03  5.012e+03  4.920e+03  4.840e+03
## [25]  4.711e+03  4.679e+03  4.511e+03  4.382e+03  4.319e+03  4.181e+03
## [31]  4.168e+03  4.020e+03  3.996e+03  3.903e+03  3.867e+03  3.783e+03
## [37]  3.770e+03  3.713e+03  3.656e+03  3.609e+03  3.551e+03  3.473e+03
## [43]  3.400e+03  3.309e+03  3.275e+03  3.242e+03  3.192e+03  3.184e+03
## [49]  3.130e+03  3.101e+03  3.028e+03  2.999e+03  2.952e+03  2.926e+03
## [55]  2.875e+03  2.822e+03  2.777e+03  2.761e+03  2.664e+03  2.639e+03
## [61]  2.633e+03  2.603e+03  2.574e+03  2.556e+03  2.550e+03  2.518e+03
## [67]  2.448e+03  2.425e+03  2.403e+03  2.350e+03  2.338e+03  2.273e+03
## [73]  2.246e+03  2.187e+03  2.158e+03  2.139e+03  2.103e+03  2.077e+03
## [79]  2.052e+03  2.014e+03  2.006e+03  1.953e+03  1.926e+03  1.903e+03
## [85]  1.895e+03  1.829e+03  1.788e+03  1.749e+03  1.673e+03  1.661e+03
## [91]  1.626e+03  1.581e+03  1.553e+03  1.538e+03  1.513e+03  1.424e+03
## [97]  1.400e+03  1.305e+03 -4.313e-13
## 
## $x
## NULL
## 
## $ac
## [1] 0
## 
## $GOF
## [1] 0.3397 0.3397

x <- fit$points[,1]
y <- fit$points[,2]
z <- fit$points[,3]

Comp <- data.frame(PC1=x,PC2=y,PC3=z)

PropoVarExplained <- fit$eig/sum(fit$eig)*100

qplot(PC1,PC2,data=Comp,colour=MetaData3$INDEX,shape=MetaData3$diagresult_id,size=2)+
  xlab(paste("PC1 (",prettyNum(PropoVarExplained[1],digits=3)," %)",sep=""))+
  ylab(paste("PC2 (",prettyNum(PropoVarExplained[2],digits=3)," %)",sep=""))

plot of chunk unnamed-chunk-1


qplot(PC1,PC3,data=Comp,colour=MetaData3$diagresult_id,size=2)+
  xlab(paste("PC2 (",prettyNum(PropoVarExplained[2],digits=3)," %)",sep=""))+
  ylab(paste("PC3 (",prettyNum(PropoVarExplained[3],digits=3)," %)",sep=""))

plot of chunk unnamed-chunk-1


qplot(PC2,PC3,data=Comp,colour=MetaData3$INDEX,shape=MetaData3$diagresult_id,size=2)+
  xlab(paste("PC1 (",prettyNum(PropoVarExplained[1],digits=3)," %)",sep=""))+
  ylab(paste("PC3 (",prettyNum(PropoVarExplained[3],digits=3)," %)",sep=""))

plot of chunk unnamed-chunk-1





#/////////////////////////////////////////////////////////////////////////////
# Batch effect removal
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
# 
# 
# library(sva)
# 
# # Use imputed or complete
# batchdata <- t(RawDataIpS_imputed)
# 
# 
# n.sv = num.sv(batchdata,model.matrix(~MetaData3$diagresult_id),method="leek")
# 
# RDClog3.batchrm <- removeBatchEffect(x=batchdata,batch=MetaData3$INDEX)
# 
# 
# 
# png("intensity_dist.png")
# par(mfrow=c(1,2))
# boxplot(RDClog3.batchrm,las=2,col=MetaData3$DiaCol)
# 
# plotDensities(RDClog3.batchrm,group=MetaData3$diagresult_id)
# dev.off()
# 
# 
# par(mfrow=c(1,1))
# SampleCor<-cor(RDClog3.batchrm,method="spearman")
# 
# qqPlot(colMeans(SampleCor))
# 
# pdf("Correlation_matrix_raw.pdf")
# 
# heatmap.2(SampleCor, col=redgreen(75), scale="none", ColSideColors=as.character(MetaData3$DiaCol),
#           key=TRUE, symkey=FALSE, density.info="none", trace="none", cexRow=0.5)
# 
# dev.off()
# 
# 
# 
# 
# ##





#/////////////////////////////////////////////////////////////////////////////
# Inference
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\



MetaData3$DiagNum <- as.numeric(recode(MetaData3$diagresult_id, "'LDA'='1';'T2DCLC'='2';'T1DABS'='0'"))



# select Model

Diagnosis <- MetaData3$DiagNum
Index <- MetaData3$INDEX

designlist <- list(
  None=cbind(Int=rep(1,length(Diagnosis))),
  Diagnosis=cbind(Int=1,Diagnosis=Diagnosis),
  Index=cbind(Int=1,Index=Index),
  Both=cbind(Int=1,DiagnosisIndex=Diagnosis*Index),
  Add=cbind(Int=1,Diagnosis=Diagnosis,Index=Index),
  Full=cbind(Int=1,Diagnosis=Diagnosis,Index=Index,DiagnosisIndex=Diagnosis*Index)
)

out <- selectModel(RawData_complet,designlist)

table(out$pref)
## 
##      None Diagnosis     Index      Both       Add      Full 
##       147       258       286       134       387       568



#  extra covariate

design <- model.matrix(~MetaData3$DiagNum+MetaData3$INDEX)
fit <- lmFit(RawDataInS[,],design)
## Warning: Partial NA coefficients for 95 probe(s)
fit <- eBayes(fit)

tt<-topTable(fit, coef="MetaData3$DiagNum", number=10000,adjust="BH")
head(tt)
##                      logFC AveExpr       t   P.Value adj.P.Val     B
## 676.229@0.89185095   1.750   16.30  10.522 1.002e-17 6.211e-14 29.86
## 426.407@11.354644   -1.615   17.24 -10.026 1.173e-16 2.657e-13 27.44
## 440.3855@10.9604025 -1.197   17.84  -9.987 1.286e-16 2.657e-13 27.34
## 496.4129@10.920985  -2.016   21.93  -9.844 2.906e-16 3.830e-13 26.55
## 965.3308@2.760915    1.423   19.57   9.852 3.088e-16 3.830e-13 26.49
## 717.5305@12.563882   1.389   20.35   9.643 7.203e-16 6.961e-13 25.65
table(tt$adj.P.Val < .05)
## 
## FALSE  TRUE 
##  4071  2130

par(mfrow=c(1,1))
qqPlot(fit$t[,2])

plot of chunk unnamed-chunk-1



volcanoplot(fit,coef=2,highlight=8,names=rownames(fit$lods),cex=1)

plot of chunk unnamed-chunk-1



plotMA(fit,array=2,main="MA-plot, marked = top 30 lowest p-value",ylab="M / log2 FC",
       xlab="A/Average Intensity")
abline(h=0,col="red")
top30 <- head(tt,n=30)
points(top30$AveExpr,top30$logFC,cex=0.8,col="blue")

plot of chunk unnamed-chunk-1



#write.csv(file="topMetabolites_high_vs_low.csv",tt)

MetaData3$diagresult_id <- relevel(MetaData3$diagresult_id,ref="T1DABS")

par(mfrow=c(3,3))
for (i in 1:9) {
  boxplot(RawDataInS[rownames(tt)[i],]~MetaData3$diagresult_id,ylab=rownames(tt)[i],
          col=unique(MetaData3$DiaCol),main=prettyNum(tt$adj.P.Val[i],digits=2))
}

plot of chunk unnamed-chunk-1



write.csv(tt,file="topTable_anova_ESIp.csv")

# 
# 
# ClusterData <- RawData_complet[,order(MetaData3$diagresult_id)]
# 
# 
# cl <- kmeans(ClusterData , 6)
# 
# 
# #At this stage I also make a new variable indicating cluster membership as below.
# # I have a good #idea of what my clusters will be called so 
# #I gave them those names in the second line of the code. 
# #Then I put it together and put the data in a form that is good for graphing.
# 
# cl.cluster<-as.factor(cl$cluster)
# 
# #cl.cluster<-as.vector(recode (cluster, "1='FC'; 2='FV'; 3='SO'; 4= 'OS' ", 
# #                              as.numeric.result=FALSE) )
# 
# clustT1WIN2<- data.frame (ClusterData , cl.cluster) 
# clustT1WIN2$.row=rownames(ClusterData)
# molten2 <- melt(clustT1WIN2, id = c(".row", "cl.cluster") )
# 
# #OK set up the graph background. 
# #Following the ggplot book I also create a jit parameter cause it is
# #much easier to alter this and type it in than the full code over and over again.
# 
# pcp_cl <- ggplot(molten2,   aes(variable, value, group = .row, colour = cl.cluster) )          
# jit<- position_jitter (width = .08, height = .08)
# 
# #Ok first graph the cluster means.
# 
# pcp_cl + stat_summary(aes(group = cl.cluster), fun.y = mean,   geom = "line")
# 
# #Then we produce a colourful but uninformative parallel coordinates 
# #plot with a bit of alpha blending and jitter.
# 
# pcp_cl + geom_line(position = jit, alpha = 1/5)
# 
# #All code up to this point is as per Wickham but 
# #I also add the cluster means graph that we
# #first produced as well as changing the angle of the x axis text so it is readable.
# 
# pcp_cl + geom_line(position = jit, colour =  alpha("black", 1/4))+
#   stat_summary(aes(group = cl.cluster), fun.y = mean,   geom = "line", size = 1.5 )+
#   facet_wrap(~ cl.cluster)+ opts(axis.text.x=theme_text(angle=-45, hjust=0) )+
#   theme(axis.text.x=element_text(rep(c("red","blue"),each=18)))
# 
# 
# 
#