hgdata.R

kelsi — Feb 20, 2014, 3:50 PM

setwd("~/Dropbox/Rappaport/R-Tools/hasmik data analysis")
#install.packages("metabolomics")
require(RUnit)
Loading required package: RUnit
require(metabolomics)
Loading required package: metabolomics
Loading required package: limma
Loading required package: crmn
require(reshape2)
Loading required package: reshape2
require(ggplot2)
Loading required package: ggplot2
hk<-read.csv("housekeep-575.31.csv", header=TRUE)
names(hk)
 [1] "Filename"    "Sample.Type" "Sample.Name" "Integ..Type" "Area"       
 [6] "ISTD.Area"   "Area.Ratio"  "Amount"      "Amount.1"    "X.Diff"     
[11] "X.RSD.AMT"   "Peak.Status" "Level"       "Units"       "RT"         
[16] "Sample.ID"   "X"          
head(hk)
        Filename    Sample.Type Sample.Name   Integ..Type      Area
1  ORB32217_1101 Unknown Sample        1101 User Settings 471070908
2 ORB32218_1101a Unknown Sample       1101a User Settings 566415514
3  ORB32219_1102 Unknown Sample        1102 User Settings 705771525
4                                                                NA
5  ORB32221_1103 Unknown Sample        1103 User Settings 625237884
6 ORB32222_1103a Unknown Sample       1103a User Settings 871484589
  ISTD.Area Area.Ratio Amount Amount.1 X.Diff X.RSD.AMT Peak.Status Level
1        NA         NA     NA       NA     NA        NA          NA    NA
2        NA         NA     NA       NA     NA        NA          NA    NA
3        NA         NA     NA       NA     NA        NA          NA    NA
4        NA         NA     NA       NA     NA        NA          NA    NA
5        NA         NA     NA       NA     NA        NA          NA    NA
6        NA         NA     NA       NA     NA        NA          NA    NA
  Units    RT Sample.ID  X
1    NA 14.39        NA NA
2    NA 14.32        NA NA
3    NA 14.36        NA NA
4    NA    NA        NA NA
5    NA 14.38        NA NA
6    NA 14.31        NA NA
cys<-read.csv("cys-851.42.csv", header=TRUE)
t3<-read.csv("T3-811.76.csv", header=TRUE)
iAA<-read.csv("IAAT3-834.77.csv", header=TRUE)

checkIdentical(iAA$Filename,cys$Filename)
[1] TRUE
checkIdentical(iAA$Filename,hk$Filename)
[1] TRUE
checkIdentical(iAA$Filename,t3$Filename)
[1] TRUE

orbRun<-data.frame(cbind(as.character(iAA$Filename),as.character(iAA$Sample.Name),as.numeric(iAA$Area),
                         as.numeric(t3$Area),as.numeric(cys$Area),as.numeric(hk$Area)))

orbRun<-cbind(orbRun[,1:2],matrix(as.numeric(as.matrix(orbRun[,3:6])),40,4))
orbRun
                 X1      X2         1        2         3         4
1     ORB32217_1101    1101 101929070  5617949 309574005 4.711e+08
2    ORB32218_1101a   1101a  90088054  7518351 290260141 5.664e+08
3     ORB32219_1102    1102  92351819  8661665 258140612 7.058e+08
4                                  NA       NA        NA        NA
5     ORB32221_1103    1103 105999201  8652884 287604042 6.252e+08
6    ORB32222_1103a   1103a 120186537 10281860 340869134 8.715e+08
7     ORB32223_1104    1104 110317295  7637618 288385755 5.379e+08
8                                  NA       NA        NA        NA
9     ORB32225_1111    1111 102337541  5605722 183366115 7.897e+08
10   ORB32226_1111a   1111a 114578442  5492095 154080227 5.862e+08
11    ORB32227_1112    1112 109585121 13926395 282391514 1.231e+09
12                                 NA       NA        NA        NA
13    ORB32229_1113    1113 101441847  7686414 225706638 7.074e+08
14   ORB32230_1113a   1113a  93717880  6636125 191690828 8.471e+08
15    ORB32231_1114    1114 119165603  9297563 287634206 9.519e+08
16                                 NA       NA        NA        NA
17    ORB32233_1115    1115 110983482  2129704  86253234 5.179e+08
18   ORB32234_1115a   1115a  97113262  2284732  77083663 5.580e+08
19    ORB32235_2101    2101  95324466  4032647  81953618 8.787e+08
20                                 NA       NA        NA        NA
21    ORB32237_2102    2102 111233917  5438698 164735096 5.251e+08
22   ORB32238_2102a   2102a 102989324  7179717 165119135 1.249e+09
23    ORB32239_2103    2103 123328599  7241691 204783244 7.061e+08
24                                 NA       NA        NA        NA
25    ORB32241_2104    2104 120012257  5900653 151785523 1.152e+09
26   ORB32242_2104a   2104a 108634300  6641041 223532961 7.967e+08
27    ORB32243_2111    2111  94049905  2404952  67792251 6.925e+08
28                                 NA       NA        NA        NA
29    ORB32245_2112    2112 101163735  3870674  78698295 6.349e+08
30   ORB32246_2112a   2112a  94595150  3076947  63907590 7.275e+08
31    ORB32247_2113    2113  92464900  7360943 153500906 1.460e+09
32                                 NA       NA        NA        NA
33    ORB32249_2114    2114 113488327  5666168 149031810 1.267e+09
34   ORB32250_2114a   2114a  81855516  7529929 237256741 1.053e+09
35    ORB32251_2115    2115  80950181  8015940 159992425 1.453e+09
36                                 NA       NA        NA        NA
37 ORB32253_1102_01 1102_01  95449889  8188514 281203126 8.201e+08
38 ORB32254_1112_01 1112_01 122106032  9575280 300508470 1.406e+09
39 ORB32255_2101_01 2101_01 116021253  4059972  88011008 9.486e+08
40 ORB32256_2111_01 2111_01 111645523  3087159  65367274 8.150e+08
names(orbRun)<-c("file","sample","iAA","t3","cys","hk")

duplicates<-orbRun[cbind(3,37,11,38,19,39,27,40),]
37-3
[1] 34
38-11
[1] 27
39-19
[1] 20
40-27
[1] 13
duplicates
               file  sample       iAA       t3       cys        hk
3     ORB32219_1102    1102  92351819  8661665 258140612 7.058e+08
37 ORB32253_1102_01 1102_01  95449889  8188514 281203126 8.201e+08
11    ORB32227_1112    1112 109585121 13926395 282391514 1.231e+09
38 ORB32254_1112_01 1112_01 122106032  9575280 300508470 1.406e+09
19    ORB32235_2101    2101  95324466  4032647  81953618 8.787e+08
39 ORB32255_2101_01 2101_01 116021253  4059972  88011008 9.486e+08
27    ORB32243_2111    2111  94049905  2404952  67792251 6.925e+08
40 ORB32256_2111_01 2111_01 111645523  3087159  65367274 8.150e+08
orbRunWNA<-orbRun
orbRun<-na.omit(orbRun)
orbRun<-cbind(orbRun[,1:2],matrix(as.numeric(as.matrix(orbRun[,3:6])),31,4))
dup1<-duplicates[cbind(1,3,5,7),]
dup2<-duplicates[cbind(2,4,6,8),]

diffDups<-dup2[,3:6]-dup1[,3:6]
dup1Matrx<-diffDups/dup1[,3:6]

dup1Matrx<-as.numeric(as.matrix(dup1[,3:6]))
percentDups<-cbind(dup1[,1:2],diffDups/dup1Matrx)
names(orbRun)<-c("file","sample","iAA","t3","cys","hk")

names(percentDups)<-colnames(orbRun)
percentDups
            file sample     iAA        t3      cys      hk
3  ORB32219_1102   1102 0.03355 -0.054626  0.08934 0.16199
11 ORB32227_1112   1112 0.11426 -0.312437  0.06416 0.14222
19 ORB32235_2101   2101 0.21712  0.006776  0.07391 0.07953
27 ORB32243_2111   2111 0.18709  0.283668 -0.03577 0.17696
percentDups$sample<-c("1102 34inj", "1112 27inj", "2101 20inj", "2111 13inj")
percentDups<-melt(percentDups)
Using file, sample as id variables

ggplot(percentDups, aes(x=sample, y=value, color=variable)) + geom_point(size=3)+   xlab("Sample Number") + ylab("% change of 2nd Duplicate from 1st") + ggtitle("How the Duplicates Vary")

plot of chunk unnamed-chunk-1


ggplot(percentDups, aes(x=sample, y=value)) + geom_boxplot() + xlab("Sample Number") + ylab("% change of 4 features from Duplicate") + ggtitle("How the Duplicates Vary")

plot of chunk unnamed-chunk-1


orbData<-melt(orbRun[,(2:6)],id="sample")
orbData$value<-as.numeric(orbData$value)
ggplot(orbData, aes(x=sample, y=value, color=variable)) + geom_point(size=3)+   xlab("Sample Number") + ylab("absolute absorbance") + ggtitle("orbitrap data features")

plot of chunk unnamed-chunk-1