samplefile <- "ptr/ptr20171106/flowers_TOF.csv"
s <- read.table(samplefile, header=TRUE, sep="\t", colClasses=list(Start="POSIXct"))
s$Sample <- toupper(s$Filename)
s$fullname <- trimws(paste0(s$Species, s$Population, s$Plant, s$Photoperiod, substr(s$Filename,13,15)))
s$Samp <- s$Species %in% c("kaalae","hookeri")
s[s$Species=="standard",]$Species <- "blank" #thisstandard did not seem to work
datafile <- "ptr/ptr20171106/AMDIS_NIST_PTR_171106.csv"
data <- read.table(datafile, header=TRUE, sep="\t")
data$Sample <- factor(do.call(rbind, strsplit(as.character(data$Sample)," "))[,1])
data <- data[data$Sample %in% s$Sample,]
data$CAS <- factor(data$CAS)
data$Name <- factor(data$Name)
vol.all <- dcast(data, Sample~CAS, sum, value.var="Area")
s <- s[match(vol.all[,1], s$Sample),]
rownames(vol.all) <- s$fullname
vol.all[,1] <- NULL
chems <- data.frame(CAS=factor(colnames(vol.all)), Name=data$Name[match(colnames(vol.all),data$CAS)])
chems$RT <- sapply(chems$CAS, function(x) {median(data$RT[data$CAS==x])})
chems$Match <- sapply(chems$CAS, function(x) {median(data$Match[data$CAS==x])})
chems$RTV <- sapply(chems$CAS, function(x) {var(data$RT[data$CAS==x])})
chems$MatchV <- sapply(chems$CAS, function(x) {var(data$Match[data$CAS==x])})
chems$Siloxane <- grepl("sil", chems$Name, ignore.case=TRUE)
cont <- c("Benzene","Caprolactam","2-Pentanone, 4-hydroxy-4-methyl-","2-Bromo dodecane","Ethanol, 2-butoxy-","Styrene","Ethanol, 2-(2-ethoxyethoxy)-","3-Penten-2-one, 4-methyl-","Nonanal","Decanal", "5-Hepten-2-one, 6-methyl-", "Homosalate", "Oxybenzone", "Toluene", "Disulfide, dimethyl")
chems$Cont <- chems$Name %in% cont
chems$Blank <- colSums(vol.all[s$Species=="blank",])>0
chems$Ambient <- colSums(vol.all[s$Species=="ambient",])>0
chems$Type <- factor(with(chems, ifelse(chems$Siloxane, "silo", paste0(ifelse(Blank, "blank",ifelse(Ambient, "ambi","vol"))))))
chems$Sum <- colSums(vol.all)
chems$Cont <- chems$Name %in% cont
chems$SampMax <- sapply(vol.all[s$Samp,], max)
chems$SampC <- sapply(vol.all[s$Samp,], function(x) {sum(x>0)})
chems$AmbiC <- sapply(vol.all[s$Species=="ambient",], function(x) {sum(x>0)})
####Filtering############
#sfilter <- !s$Species %in% c("standard")
sfilter <- s$Samp
cfilter <- chems$Type=="vol" & chems$SampC > 1 & chems$SampMax > 10000
nTypes <- nlevels(chems$Type)
colnames(vol.all) <- chems$Name[match(colnames(vol.all), chems$CAS)]
vol <- vol.all[sfilter,cfilter]
svol <- as.data.frame(prop.table(as.matrix(vol),1)) #make samples sum to 1
ssvol <- as.data.frame(scale(svol,center=FALSE))#AND THEN scale each compound 0-1 so compounds are equally weighted
pavol <- vol #convert to presence/absence
pavol[pavol>0] <-1
ccol <- sample(rainbow(length(vol)))beanplot(chems$Sum~chems$Type, las=2) log="y" selected
Key: triangle = day, circle=night, day/night=square
vol.nmds <- metaMDS(sqrt(vol.all[sfilter,cfilter]), k=2, autotransform = F, trace=0)
plot(vol.nmds)
text(vol.nmds, display="species", col="blue")
text(vol.nmds, labels=s[sfilter,]$fullname, col=as.integer(factor(s$Species[sfilter])), adj=c(1,0.5))
points(vol.nmds, display = "sites", col=factor(s$Species[sfilter]), pch=c(15,17,15,19)[s[sfilter,]$Photoperiod], cex=2)
points(vol.nmds, display= "species", col=rainbow(nTypes)[chems[cfilter,]$Type], pch=19, cex=0.5)
legend("topleft", as.character(unique(chems[cfilter,]$Type)), col=rainbow(8), pch=19)
legend("topleft", as.character(unique(s$Species[sfilter])), col=1:5, pch=19)par(mar=c(15,4,4,2))
barplot(t(vol.all), col=as.integer(chems$Type)+1, cex.names=0.9, axes=TRUE, las=2)
legend("topleft", levels(chems$Type), col=2:5, pch=19)vol.h <- vol.all[sfilter,cfilter][order(s[sfilter,]$fullname),rev(order(colSums(vol.all[sfilter,cfilter])))]
ramp <- colorRampPalette(c("white", "darkblue"))(512)
sideramp <- colorRampPalette(c("white", "darkgreen"))(512)
colMax <- function(data) sapply(data, max, na.rm = TRUE)
range01 <- function(x){(x-min(x))/(max(x)-min(x))}
colside <- range01(log10(colSums(vol.h)+1))
heatmap(as.matrix(log10(vol.h+1)), margins=c(38,1), col=ramp, scale="none", ColSideColors=sideramp[round(colside*511)+1], cexCol=2, cexRow=1.8, Rowv=NA, Colv=NA)