source('Functions/Dist_Functs.R')
source('Functions/Sim_Functs.R')
source('Functions/Predict_Functs.R')
source('Functions/Illus_Functs.R')
source('Functions/SubComp_Functs.R')
k <- c(10E8, 2.5E8, 2E8, 1E8, .2E8, 1E8, 1E8)
l <- c(8E8, 2.5E8, 2E8, 1E8, .2E8, 1E8, 1E8)
** FOR SURE the 0 removal isssue **
ATsi <- AT[,-1]
RatioMatrixes <- lapply( 1:nrow(ATsi), function(i) {
Mat <- sapply(1:ncol(ATsi), function(x) ATsi[i,x]/ATsi[,x])
colnames(Mat) <- names(ATsi)
rownames(Mat) <- AT$OTU
return(Mat)
}
)
names(RatioMatrixes) <- AT$OTU
# Matrixes saying if bigger or smaller than median compared to each OTU
BinMatrixes <- lapply( 1:length(RatioMatrixes), function(i) {
Bin <- sapply(1:nrow(ATsi), function(x) {
Mat <- RatioMatrixes[[i]]
Mat[x,] >= .999*median(Mat[x,])})
Bin <- t(Bin)
colnames(Bin) <- names(ATsi)
rownames(Bin) <- AT$OTU
return(Bin)
})
names(BinMatrixes) <- AT$OTU
# Now create one Matrix, again the columns represent the samples,
# Let's call the OTU of a row again the Owner OTU.
# The number is the percentage of OTUs for which the ratio Owner OTU/ OTU was
# above median in this sample
AssessMatrix <- sapply(1:length(BinMatrixes), function(x) colMeans(BinMatrixes[[x]]))
AssessMatrix <- t(AssessMatrix)
rownames(AssessMatrix) <- AT$OTU
ATsi
## k l
## 1 0.56497175 0.50955414
## 2 0.14124294 0.15923567
## 3 0.11299435 0.12738854
## 4 0.05649718 0.06369427
## 5 0.01129944 0.01273885
## 6 0.05649718 0.06369427
## 7 0.05649718 0.06369427
AssessMatrix
## k l
## A 1.0000000 0.1428571
## B 0.8571429 1.0000000
## C 0.8571429 1.0000000
## D 0.8571429 1.0000000
## E 0.8571429 1.0000000
## F 0.8571429 1.0000000
## G 0.8571429 1.0000000
## tried the Zeller Data Before, and maybe again t
# load("Zeller2014AbundTable_ATA.RData")
# AT <- Zeller2014AbundTable_ATA[,-2:-1]
# AT <- t(AT)
# AT <- as.data.frame(AT)
## if we wanted species level ########
if(file.exists("Data/mOTU.species.profile.tsv")){
AT <- read.table("Data/mOTU.species.profile.tsv", header = TRUE)
}
# ### if we wanted genus level, weirdly even more OTUs ####
# if(file.exists("Data/mOTU.genus.profile.tsv")){
# AT <- read.table("Data/mOTU.genus.profile.tsv", header = TRUE)
# }
# Data before and 3 months after Gastric Bypass, from the bypass_read_information
# file, I chose the following 10 samples
Before <- c('X9437', 'X9439', 'X9441', 'X9460', 'X9464')
After <- c('X9438', 'X9440', 'X9442', 'X9462', 'X9465')
names(AT)[1] <- "OTU"
# select the 10 samples as our Abundance Table to work with
AT_sub <- select(AT, OTU, one_of(Before), one_of(After))
# test if the relative abundances of each sample sum up to 1
summarise_each(AT_sub[,-1], funs(sum))
## X9437 X9439 X9441 X9460 X9464 X9438 X9440 X9442 X9462
## 1 0.9999997 1 0.9999995 1 1 0.9999998 1 1 1
## X9465
## 1 1.000001
# yes they practically do
# are there no unmapped, what is the max?
summarise_each(AT_sub[,-1], funs(max))
## X9437 X9439 X9441 X9460 X9464 X9438 X9440
## 1 0.2279863 0.1519103 0.1243271 0.2340772 0.2739951 0.224817 0.3986044
## X9442 X9462 X9465
## 1 0.1911199 0.1719057 0.1127358
# ranging from 11 to 40%
# what is the max OTU in each case?
WhichMax <- function(x) { AT_sub$OTU[which(x == max(x))]}
summarise_each(AT_sub[,-1], funs(WhichMax))
## X9437 X9439 X9441
## 1 motu_linkage_group_585 Prevotella_copri Prevotella_copri
## X9460 X9464 X9438
## 1 Bacteroides_dorei/vulgatus Bacteroides_dorei/vulgatus Prevotella_copri
## X9440 X9442 X9462
## 1 Prevotella_copri Prevotella_copri motu_linkage_group_127
## X9465
## 1 Escherichia_coli
# so different, interesting
## remove all OTUs that are 0 in all conditions, i.e. rowSum != 0
AT_sub <- AT_sub[which(rowSums(AT_sub[,-1]) != 0),]
## replace all other 0 with Pseudocount
# change all 0 to NA
AT_sub[AT_sub == 0] <- NA
# find the minimum of all other values
Pseudocount <- min(AT_sub[,-1], na.rm = TRUE)
# set all NA to this min = Psuedocount Value
AT_sub[is.na(AT_sub)] <- Pseudocount
# how did the pseudocount affect the colsums?
summarise_each(AT_sub[,-1], funs(sum))
## X9437 X9439 X9441 X9460 X9464 X9438 X9440 X9442
## 1 1.010041 1.010408 1.0091 1.012604 1.014226 1.0091 1.011192 1.008421
## X9462 X9465
## 1 1.009623 1.011455
# since they went a bit up, clear, I renormalise
Norm <- function(x) { x/sum(x)}
AT_sub <- mutate_each(AT_sub, funs(Norm), -OTU)
summarise_each(AT_sub, funs(sum), -OTU)
## X9437 X9439 X9441 X9460 X9464 X9438 X9440 X9442 X9462 X9465
## 1 1 1 1 1 1 1 1 1 1 1
# ok all 1, AT_sub is our final AT to work with
ATsi <- AT_sub[,-1] # remove the OTU column
dim(ATsi)
## [1] 358 10
## the following produces a list with RatioMatrixes, for each OTU one RatioMatrix.
# let's call this OTU, the Owner OTU of the Matrix.
# each column of a RatioMatrix contains the relative abundance ratios of the
# Owner OTU to all other OTUs in this sample.
# Consequently, each row of a RatioMatrix contains the ratios of the Owner OTU to
# the OTU of that row over all samples
RatioMatrixes <- lapply( 1:nrow(ATsi), function(i) {
Mat <- sapply(1:ncol(ATsi), function(x) ATsi[i,x]/ATsi[,x])
colnames(Mat) <- names(ATsi)
#rownames(Mat) <- AT$OTU
return(Mat)})
# names(RatioMatrixes) <- AT$OTU
### The Big question is how to use these ratio distributions (they are
## independent of compositional effects, that is clear)
## The current idea: For decide based on the ratios for each OTU, in which
## samples the Owner OTU is above median or below >> BinMatrixes: Each row
## consists of 1 and 0 (T and F) showing if the Ratio Owner OTU/row OTU was
## above median in this sample.
# So for each Owner OTU and sample you have a vector showing whether the Ratio
# Owner OTU/ row OTU was above median
BinMatrixes <- lapply( 1:length(RatioMatrixes), function(i) {
Bin <- sapply(1:nrow(ATsi), function(x) {
Mat <- RatioMatrixes[[i]]
Mat[x,] >= .999*median(Mat[x,])})
Bin <- t(Bin)
colnames(Bin) <- names(ATsi)
#rownames(Bin) <- AT$OTU
return(Bin)})
# names(BinMatrixes) <- AT$OTU
# Now create one Matrix in which each entry reflects the percentage of the ratios
# of the OTU that is above Median for this sample
AssessMatrix <- sapply(1:length(BinMatrixes), function(x) colMeans(BinMatrixes[[x]]))
AssessMatrix <- t(AssessMatrix)
rownames(AssessMatrix) <- AT_sub$OTUs
colnames(AssessMatrix) <- names(ATsi)
head(ATsi)
## X9437 X9439 X9441 X9460 X9464
## 1 6.900708e-05 1.840294e-03 4.169122e-04 0.0005068051 4.341115e-04
## 2 6.039357e-05 6.491894e-04 7.087506e-04 0.1215882638 1.973230e-04
## 3 5.178006e-05 5.176128e-05 5.182838e-05 0.0000516490 5.156644e-05
## 4 2.223939e-04 3.394882e-03 4.169122e-04 0.0002472002 1.243137e-03
## 5 1.683189e-04 1.551187e-02 2.855849e-03 0.0018270898 1.267802e-02
## 6 5.178006e-05 2.496883e-04 2.501477e-04 0.0001036644 5.156644e-05
## X9438 X9440 X9442 X9462 X9465
## 1 3.070082e-04 1.254707e-04 3.265532e-04 2.179937e-03 3.801704e-03
## 2 2.570681e-03 1.380182e-03 3.158821e-04 2.797351e-02 5.170771e-05
## 3 5.182836e-05 5.172113e-05 5.186328e-05 7.032326e-05 5.170771e-05
## 4 8.488505e-04 2.174829e-04 5.817592e-04 9.067649e-04 5.689627e-04
## 5 4.502785e-04 3.677872e-03 2.266665e-03 7.032326e-05 3.168091e-04
## 6 6.666460e-03 5.172113e-05 8.259887e-04 1.617375e-04 5.340487e-04
head(AssessMatrix)
## X9437 X9439 X9441 X9460 X9464 X9438
## [1,] 0.02793296 0.8575419 0.4022346 0.7486034 0.8128492 0.1703911
## [2,] 0.02793296 0.4162011 0.6312849 0.9944134 0.2094972 0.7821229
## [3,] 0.72346369 0.6480447 0.6312849 0.7178771 0.8016760 0.5782123
## [4,] 0.12849162 0.8966480 0.3156425 0.2039106 0.8407821 0.6675978
## [5,] 0.01955307 0.9441341 0.7290503 0.5335196 0.9413408 0.1648045
## [6,] 0.12569832 0.7262570 0.6592179 0.3407821 0.1871508 0.9469274
## X9440 X9442 X9462 X9465
## [1,] 0.1201117 0.2094972 0.79888268 0.87150838
## [2,] 0.8463687 0.1229050 0.95530726 0.03351955
## [3,] 0.7709497 0.5670391 0.63687151 0.64245810
## [4,] 0.1284916 0.5837989 0.68156425 0.56703911
## [5,] 0.8631285 0.6340782 0.01955307 0.16480447
## [6,] 0.1480447 0.7737430 0.45251397 0.77932961
# Let's look at the Escherichias
apply(AssessMatrix[which(grepl('Escher', AT_sub$OTU)),], 1, range)
## [,1] [,2]
## [1,] 0.002793296 0.5670391
## [2,] 0.960893855 0.8016760
AssessMatrix[which(grepl('Escher', AT_sub$OTU)),]
## X9437 X9439 X9441 X9460 X9464 X9438
## [1,] 0.002793296 0.002793296 0.008379888 0.8044693 0.1312849 0.8938547
## [2,] 0.723463687 0.648044693 0.631284916 0.7178771 0.8016760 0.5782123
## X9440 X9442 X9462 X9465
## [1,] 0.9413408 0.3854749 0.8826816 0.9608939
## [2,] 0.7709497 0.5670391 0.6368715 0.6424581
ATsi[which(grepl('Escher', AT_sub$OTU)),]
## X9437 X9439 X9441 X9460 X9464
## 108 6.900708e-05 5.176128e-05 1.459192e-04 0.006483753 3.946469e-04
## 111 5.178006e-05 5.176128e-05 5.182838e-05 0.000051649 5.156644e-05
## X9438 X9440 X9442 X9462 X9465
## 108 4.258816e-02 4.739956e-02 4.663953e-03 3.975924e-02 1.114591e-01
## 111 5.182836e-05 5.172113e-05 5.186328e-05 7.032326e-05 5.170771e-05