# Import metadata
metadata <-read.csv("~/Documents/gbru_fy18_rice_methane/march2020/metadata_biomassCH4_noRIL9.csv", header=TRUE)
#Define the geometric mean for all values greater than 0
geomean <- function(x) { exp(mean(log(x[x>0]))) }
# Create a function to filter rows by the proportion of nonzero samples and the geometric mean of the nonzero samples
prefilter <- function(x, zcutoff, mcutoff){
nzvect <- apply(x, 1, function(y){
((sum(y>0)/length(y))> zcutoff) & (geomean(y)> mcutoff)
})
lowcounts <- t(as.data.frame(apply(x[!nzvect,],2,sum)))
rownames(lowcounts)<-c("lowconts")
df1 <-rbind(lowcounts, x[nzvect,])
return(df1)
}
# Import OTU counts
eggnogdata<-read.table("~/Documents/gbru_fy18_rice_methane/march2020/gbru_fy20_rice_methane.eggnog.tsv", sep = "\t", header=TRUE)
row.names(eggnogdata)<-eggnogdata$EGGNOG_ID
eggnogdata2<-eggnogdata[,5:100]
eggnogdata2[is.na(eggnogdata2)]<-0
seeddata<-read.table("~/Documents/gbru_fy18_rice_methane/march2020/gbru_fy20_rice_methane.seed.tsv", sep = "\t", header=TRUE)
seeddata2<-seeddata[,5:100]
seeddata2[is.na(seeddata2)]<-0
interpro2godata<-read.table("~/Documents/gbru_fy18_rice_methane/march2020/gbru_fy20_rice_methane.interpro2go.tsv", sep = "\t", header=TRUE)
row.names(interpro2godata)<-interpro2godata$INTERPRO2GO_ID
interpro2godata2<-interpro2godata[,5:100]
interpro2godata2[is.na(interpro2godata2)]<-0
# Move feature ID to rownames
#Prefilter data
e_filt<-prefilter(eggnogdata2, 0.8, 200)
s_filt<-prefilter(seeddata2, 0.8, 200)
i_filt<-prefilter(interpro2godata2, 0.8, 200)
library("ALDEx2")
# Construct covariates matrix
covariates1 <- data.frame(genotype=metadata$genotype, sumch4=log10(metadata$sum_ch4), dev=metadata$dev)
# Process
#select D3 developmental stage
d3cov<- covariates1[covariates1$dev=="D3",]
d3cov<-d3cov[1:30,]
d3rows <- as.numeric(rownames(d3cov))
# create model matrix for log10 of total methane
mm1 <- model.matrix(~ sumch4, d3cov)
# run CLR transformations and sampling
i.clr<- aldex.clr(i_filt[,d3rows], mm1, mc.samples = 128, denom="all", verbose=TRUE, useMC=TRUE)
multicore environment is is OK -- using the BiocParallel package
removed rows with sums equal to zero
computing center with all features
data format is OK
'package:stats' may not be available when loading'package:stats' may not be available when loadingdirichlet samples complete
'package:stats' may not be available when loading'package:stats' may not be available when loadingtransformation complete
s.clr<- aldex.clr(s_filt[,d3rows], mm1, mc.samples = 128, denom="all", verbose=TRUE, useMC=TRUE)
multicore environment is is OK -- using the BiocParallel package
removed rows with sums equal to zero
computing center with all features
data format is OK
'package:stats' may not be available when loading'package:stats' may not be available when loadingdirichlet samples complete
'package:stats' may not be available when loading'package:stats' may not be available when loadingtransformation complete
e.clr<- aldex.clr(e_filt[,d3rows], mm1, mc.samples = 128, denom="all", verbose=TRUE, useMC=TRUE)
multicore environment is is OK -- using the BiocParallel package
removed rows with sums equal to zero
computing center with all features
data format is OK
'package:stats' may not be available when loading'package:stats' may not be available when loadingdirichlet samples complete
'package:stats' may not be available when loading'package:stats' may not be available when loadingtransformation complete
# run CLR transformations and sampling
i.glm<- aldex.glm(i.clr, verbose=TRUE)
running tests for each MC instance:
|------------(25%)----------(50%)----------(75%)----------|
e.glm<- aldex.glm(e.clr, verbose=TRUE)
running tests for each MC instance:
|------------(25%)----------(50%)--------
Because there are no significant taxa after BH correction. The challenge of regressing this data is that each sample does not have its own menthane measurment whic reduces the degrees of freedom
i.corr <- aldex.corr(i.clr, d3cov$sumch4)
s.corr <- aldex.corr(s.clr, d3cov$sumch4)
e.corr <- aldex.corr(e.clr, d3cov$sumch4)
No genes were signifigant in any of the three ontologies at Stage D2