kelsi — Feb 20, 2014, 3:50 PM
setwd("~/Dropbox/Rappaport/R-Tools/hasmik data analysis")
#install.packages("metabolomics")
require(RUnit)
Loading required package: RUnit
require(metabolomics)
Loading required package: metabolomics
Loading required package: limma
Loading required package: crmn
require(reshape2)
Loading required package: reshape2
require(ggplot2)
Loading required package: ggplot2
hk<-read.csv("housekeep-575.31.csv", header=TRUE)
names(hk)
[1] "Filename" "Sample.Type" "Sample.Name" "Integ..Type" "Area"
[6] "ISTD.Area" "Area.Ratio" "Amount" "Amount.1" "X.Diff"
[11] "X.RSD.AMT" "Peak.Status" "Level" "Units" "RT"
[16] "Sample.ID" "X"
head(hk)
Filename Sample.Type Sample.Name Integ..Type Area
1 ORB32217_1101 Unknown Sample 1101 User Settings 471070908
2 ORB32218_1101a Unknown Sample 1101a User Settings 566415514
3 ORB32219_1102 Unknown Sample 1102 User Settings 705771525
4 NA
5 ORB32221_1103 Unknown Sample 1103 User Settings 625237884
6 ORB32222_1103a Unknown Sample 1103a User Settings 871484589
ISTD.Area Area.Ratio Amount Amount.1 X.Diff X.RSD.AMT Peak.Status Level
1 NA NA NA NA NA NA NA NA
2 NA NA NA NA NA NA NA NA
3 NA NA NA NA NA NA NA NA
4 NA NA NA NA NA NA NA NA
5 NA NA NA NA NA NA NA NA
6 NA NA NA NA NA NA NA NA
Units RT Sample.ID X
1 NA 14.39 NA NA
2 NA 14.32 NA NA
3 NA 14.36 NA NA
4 NA NA NA NA
5 NA 14.38 NA NA
6 NA 14.31 NA NA
cys<-read.csv("cys-851.42.csv", header=TRUE)
t3<-read.csv("T3-811.76.csv", header=TRUE)
iAA<-read.csv("IAAT3-834.77.csv", header=TRUE)
checkIdentical(iAA$Filename,cys$Filename)
[1] TRUE
checkIdentical(iAA$Filename,hk$Filename)
[1] TRUE
checkIdentical(iAA$Filename,t3$Filename)
[1] TRUE
orbRun<-data.frame(cbind(as.character(iAA$Filename),as.character(iAA$Sample.Name),as.numeric(iAA$Area),
as.numeric(t3$Area),as.numeric(cys$Area),as.numeric(hk$Area)))
orbRun<-cbind(orbRun[,1:2],matrix(as.numeric(as.matrix(orbRun[,3:6])),40,4))
orbRun
X1 X2 1 2 3 4
1 ORB32217_1101 1101 101929070 5617949 309574005 4.711e+08
2 ORB32218_1101a 1101a 90088054 7518351 290260141 5.664e+08
3 ORB32219_1102 1102 92351819 8661665 258140612 7.058e+08
4 NA NA NA NA
5 ORB32221_1103 1103 105999201 8652884 287604042 6.252e+08
6 ORB32222_1103a 1103a 120186537 10281860 340869134 8.715e+08
7 ORB32223_1104 1104 110317295 7637618 288385755 5.379e+08
8 NA NA NA NA
9 ORB32225_1111 1111 102337541 5605722 183366115 7.897e+08
10 ORB32226_1111a 1111a 114578442 5492095 154080227 5.862e+08
11 ORB32227_1112 1112 109585121 13926395 282391514 1.231e+09
12 NA NA NA NA
13 ORB32229_1113 1113 101441847 7686414 225706638 7.074e+08
14 ORB32230_1113a 1113a 93717880 6636125 191690828 8.471e+08
15 ORB32231_1114 1114 119165603 9297563 287634206 9.519e+08
16 NA NA NA NA
17 ORB32233_1115 1115 110983482 2129704 86253234 5.179e+08
18 ORB32234_1115a 1115a 97113262 2284732 77083663 5.580e+08
19 ORB32235_2101 2101 95324466 4032647 81953618 8.787e+08
20 NA NA NA NA
21 ORB32237_2102 2102 111233917 5438698 164735096 5.251e+08
22 ORB32238_2102a 2102a 102989324 7179717 165119135 1.249e+09
23 ORB32239_2103 2103 123328599 7241691 204783244 7.061e+08
24 NA NA NA NA
25 ORB32241_2104 2104 120012257 5900653 151785523 1.152e+09
26 ORB32242_2104a 2104a 108634300 6641041 223532961 7.967e+08
27 ORB32243_2111 2111 94049905 2404952 67792251 6.925e+08
28 NA NA NA NA
29 ORB32245_2112 2112 101163735 3870674 78698295 6.349e+08
30 ORB32246_2112a 2112a 94595150 3076947 63907590 7.275e+08
31 ORB32247_2113 2113 92464900 7360943 153500906 1.460e+09
32 NA NA NA NA
33 ORB32249_2114 2114 113488327 5666168 149031810 1.267e+09
34 ORB32250_2114a 2114a 81855516 7529929 237256741 1.053e+09
35 ORB32251_2115 2115 80950181 8015940 159992425 1.453e+09
36 NA NA NA NA
37 ORB32253_1102_01 1102_01 95449889 8188514 281203126 8.201e+08
38 ORB32254_1112_01 1112_01 122106032 9575280 300508470 1.406e+09
39 ORB32255_2101_01 2101_01 116021253 4059972 88011008 9.486e+08
40 ORB32256_2111_01 2111_01 111645523 3087159 65367274 8.150e+08
names(orbRun)<-c("file","sample","iAA","t3","cys","hk")
duplicates<-orbRun[cbind(3,37,11,38,19,39,27,40),]
37-3
[1] 34
38-11
[1] 27
39-19
[1] 20
40-27
[1] 13
duplicates
file sample iAA t3 cys hk
3 ORB32219_1102 1102 92351819 8661665 258140612 7.058e+08
37 ORB32253_1102_01 1102_01 95449889 8188514 281203126 8.201e+08
11 ORB32227_1112 1112 109585121 13926395 282391514 1.231e+09
38 ORB32254_1112_01 1112_01 122106032 9575280 300508470 1.406e+09
19 ORB32235_2101 2101 95324466 4032647 81953618 8.787e+08
39 ORB32255_2101_01 2101_01 116021253 4059972 88011008 9.486e+08
27 ORB32243_2111 2111 94049905 2404952 67792251 6.925e+08
40 ORB32256_2111_01 2111_01 111645523 3087159 65367274 8.150e+08
orbRunWNA<-orbRun
orbRun<-na.omit(orbRun)
orbRun<-cbind(orbRun[,1:2],matrix(as.numeric(as.matrix(orbRun[,3:6])),31,4))
dup1<-duplicates[cbind(1,3,5,7),]
dup2<-duplicates[cbind(2,4,6,8),]
diffDups<-dup2[,3:6]-dup1[,3:6]
dup1Matrx<-diffDups/dup1[,3:6]
dup1Matrx<-as.numeric(as.matrix(dup1[,3:6]))
percentDups<-cbind(dup1[,1:2],diffDups/dup1Matrx)
names(orbRun)<-c("file","sample","iAA","t3","cys","hk")
names(percentDups)<-colnames(orbRun)
percentDups
file sample iAA t3 cys hk
3 ORB32219_1102 1102 0.03355 -0.054626 0.08934 0.16199
11 ORB32227_1112 1112 0.11426 -0.312437 0.06416 0.14222
19 ORB32235_2101 2101 0.21712 0.006776 0.07391 0.07953
27 ORB32243_2111 2111 0.18709 0.283668 -0.03577 0.17696
percentDups$sample<-c("1102 34inj", "1112 27inj", "2101 20inj", "2111 13inj")
percentDups<-melt(percentDups)
Using file, sample as id variables
ggplot(percentDups, aes(x=sample, y=value, color=variable)) + geom_point(size=3)+ xlab("Sample Number") + ylab("% change of 2nd Duplicate from 1st") + ggtitle("How the Duplicates Vary")
ggplot(percentDups, aes(x=sample, y=value)) + geom_boxplot() + xlab("Sample Number") + ylab("% change of 4 features from Duplicate") + ggtitle("How the Duplicates Vary")
orbData<-melt(orbRun[,(2:6)],id="sample")
orbData$value<-as.numeric(orbData$value)
ggplot(orbData, aes(x=sample, y=value, color=variable)) + geom_point(size=3)+ xlab("Sample Number") + ylab("absolute absorbance") + ggtitle("orbitrap data features")