require(vegan)
require(dplyr)
require(tidyr)
require(ggplot2)
require(stringr)
require(reshape2)
require(manipulate)
# require(gridExtra) # you could use with grid.arrange if you want to show several ggplot plots in one Figure,

Function definitions

########################################################
###### the color palette                         #####
########################################################
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73",
               "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

########################################################
###### show changes in OTUs between two conditions #####
########################################################

plotOTUChanges <- function(LongDataFrame){
        LongDataFrame$OTU <- as.character(LongDataFrame$OTU)
        LongDataFrame$Condition <- as.character(LongDataFrame$Condition)
        Tr <- ggplot(LongDataFrame, aes(x = OTU, y = Value, 
                                colour = Condition, 
                                size = Condition, 
                                shape = Condition))

        Tr <- Tr + geom_point() +
        facet_grid(AbundType ~., scales = "free") +
        scale_size_discrete(range = c(6,4)) +
        scale_colour_manual(values = c(cbPalette[1],cbPalette[2])) +
        scale_shape_manual(values = c(16,17)) +
        theme_bw(base_size = 18) +
        theme(panel.grid.minor = element_blank()) +
        # removes all minor gridlines
        theme(panel.grid.major = element_line(colour = cbPalette[1],
                                              size = 0.4)) +
        # sets the grid lines thicker and in a new colour
        theme(panel.background = element_rect(colour = cbPalette[1],
                                          size = 2)) +
        # adjust colour and size of the surrounding box
        theme(panel.grid.major.y = element_blank()) +
        # removes the horisontal major grid lines +
        xlab("OTU") +
        ylab("Abundance")        
}

###############################################################
###### show changes in composition between two conditions #####
###############################################################

plotConditionChanges <- function(LongDataFrame){
        LongDataFrame$OTU <- as.character(LongDataFrame$OTU)
        LongDataFrame$Condition <- as.character(LongDataFrame$Condition)
        Tr <- ggplot(LongDataFrame, aes(x = Condition, y = Value, 
                                colour = OTU))

        Tr <- Tr + geom_point(size = 5, alpha = .7) +
        facet_grid(AbundType ~., scales = "free") +
        scale_colour_manual(values = cbPalette[1:length(unique(LongDataFrame$OTU))]) +
        theme_bw(base_size = 18) +
        theme(panel.grid.minor = element_blank()) +
        # removes all minor gridlines
        theme(panel.grid.major = element_line(colour = cbPalette[1],
                                              size = 0.4)) +
        # sets the grid lines thicker and in a new colour
        theme(panel.background = element_rect(colour = cbPalette[1],
                                          size = 2)) +
        # adjust colour and size of the surrounding box
        theme(panel.grid.major.y = element_blank()) +
        # removes the horisontal major grid lines +
        xlab("Condition") +
        ylab("Abundance")        
}


###############################################################
###### function to show how well one OTU predicts the others #####
###############################################################
# predictor is just the OTU, so "A", "B" and so on.
# could be combined with manipulate but not in Markdown,see below and in text



plotPaulPredict <- function(WideDataFrame, Predictor){
        WideDataFrame <- select(WideDataFrame, Condition_k, Condition_l, matches(Predictor))
        names(WideDataFrame)[3] <- paste("Pred._from:",Predictor)
        WideDataFrame$OTU <- LETTERS[1:nrow(WideDataFrame)]
        #WideDataFrame <- melt(WideDataFrame, id.vars = "OTU")
        WideDataFrame <- gather(WideDataFrame, Condition, Value, -OTU)
        
        Tr <- ggplot(WideDataFrame, aes(x = OTU, y = Value, 
                                colour = Condition, 
                                size = Condition, 
                                shape = Condition))

        Tr <- Tr + geom_point()  +
        scale_size_discrete(range = c(6,4,7)) +
        scale_colour_manual(values = c(cbPalette[1],cbPalette[2],cbPalette[3])) +
        scale_shape_manual(values = c(16,17,4)) +
        theme_bw(base_size = 18) +
        theme(panel.grid.minor = element_blank()) +
        # removes all minor gridlines
        theme(panel.grid.major = element_line(colour = cbPalette[1],
                                              size = 0.4)) +
        # sets the grid lines thicker and in a new colour
        theme(panel.background = element_rect(colour = cbPalette[1],
                                          size = 2)) +
        # adjust colour and size of the surrounding box
        theme(panel.grid.major.y = element_blank()) +
        # removes the horisontal major grid lines +
        xlab("OTU") +
        ylab("Rel. Abundance")
        
        print(Tr)        
}
## you can now do manipulate, but it does not work with this markdown

# manipulate(
# plotPaulPredict(PredAbund,Predictor),
# Predictor = picker("A","B","C","D","E"))

###############################################################
###### Functions to illustrate the prediction precision   #####
###############################################################

illustratePrecision <- function(DF){
    Tr <- ggplot(DF, aes(x = OTU, y = Ratio))
        Tr <- Tr +
        geom_hline(aes(yintercept = 1), colour = cbPalette[1], size = 1.5) +
        geom_point(size = 6, colour = cbPalette[2]) +
        theme_bw(base_size = 18) +
        theme(panel.grid.minor = element_blank()) +
        # removes all minor gridlines
        theme(panel.grid.major = element_line(colour = cbPalette[1],
                                              size = 0.4)) +
        # sets the grid lines thicker and in a new colour
        theme(panel.background = element_rect(colour = cbPalette[1],
                                          size = 2)) +
        # adjust colour and size of the surrounding box
        theme(panel.grid.major.y = element_blank()) +
        # removes the horisontal major grid lines +
        xlab("predictor OTU") +
            ylab("Ratio_PredictedToRealDifferences")
    print(Tr)
}

###############################################################
###### Functions to illustrate the power measures      #####
###############################################################

illustratePowers <- function(DF){
    Tr <- ggplot(DF, aes(x = OTU, y = value, color = Measure,
                           shape = Measure, size = Measure))
        Tr <- Tr +
        geom_hline(aes(yintercept = 0), colour = cbPalette[1], size = 1.5) +
        geom_point() +
        scale_size_discrete(range = c(6,4)) +
        scale_colour_manual(values = c(cbPalette[1],cbPalette[2])) +
        scale_shape_manual(values = c(16,17)) +
        theme_bw(base_size = 18) +
        theme(panel.grid.minor = element_blank()) +
        # removes all minor gridlines
        theme(panel.grid.major = element_line(colour = cbPalette[1],
                                              size = 0.4)) +
        # sets the grid lines thicker and in a new colour
        theme(panel.background = element_rect(colour = cbPalette[1],
                                          size = 2)) +
        # adjust colour and size of the surrounding box
        theme(panel.grid.major.y = element_blank()) +
        # removes the horisontal major grid lines +
        xlab("predicted OTU")   
    print(Tr)
}



###############################################################
###### function to predict the relative abundance of OTU j in 
# condition l from the relative abundances of OTU i in condition l
# and condtion k                                           #####
###############################################################
# so k and l are the vectors of relative abundances in these conditions
# i and j are the indices of the OTUs of interst.


predictAbundance <- function( k, l, i, j ){
        if( i == j ){
                l[i]
              }
        else {
               k[j]*(1 - l[i])/(1 - k[i]) 
        }
}



###################################################
##### Jensen Shannon Distance #####
###################################################

dist.JSD <- function(inMatrix, pseudocount=0.000001, ...) {
  KLD <- function(x,y) sum(x *log(x/y))
  JSD<- function(x,y) sqrt(0.5 * KLD(x, (x+y)/2) + 0.5 * KLD(y, (x+y)/2))
  matrixColSize <- length(colnames(inMatrix))
  matrixRowSize <- length(rownames(inMatrix))
  colnames <- colnames(inMatrix)
  resultsMatrix <- matrix(0, matrixColSize, matrixColSize)
  
  inMatrix = apply(inMatrix,1:2,function(x) ifelse (x==0,pseudocount,x))
  
  for(i in 1:matrixColSize) {
    for(j in 1:matrixColSize) { 
      resultsMatrix[i,j]=JSD(as.vector(inMatrix[,i]),
                             as.vector(inMatrix[,j]))
    }
  }
  colnames -> colnames(resultsMatrix) -> rownames(resultsMatrix)
  as.dist(resultsMatrix)->resultsMatrix
  attr(resultsMatrix, "method") <- "dist"
  return(resultsMatrix) 
}

Paul’s system to test if changes in relative abundances might be driven by compositional effects

The system relies on the idea, that as little ‘real’ absolute abundance changes can explain the observed relative abundance changes (occam’s razor)
- Since relative abundances sum up to 1, it is clear that a change in relative abundance of an OTU has to be compensated by the others.
- For examples see 150512_HowTWorkWRelativeAbundances.

The equation to calculate the expected change in relative abundances of the other OTUs assuming that only one OTU really changed.

Always a comparison between two conditions. k is the index of the first condition and l of the second. The predictor OTU is i, the predicted OTU is j. Then the predicted rel abundance of j in condition l is (knowing the rel abundance of i in both k and l)

\[ \bar{x}_{jl} = x_{jk} \frac{1-x_{il}}{1-x_{ik}} \]

The equation is directly intuitiv: You assume that no OTU except the predictor changes. That means the relations among the other OTUs must remain the same (i.e. they are proportional). Thus, you just multiply the relative abundances of the predicted OTUs in condition k with the fraction they together make up in the new condition l, divided by the fraction they made up in condition k.

The next step is to compare the predicted relative abundances of condition l with the real relative abundances of condition l. I.e. does the prediction from the one OTU bring the composition closer to the real compositon. Closer, so there is a distance measure, we should test bray-curtis, Jensen-Shannon distance

In an iterative method it would be: - you have the two distributions of condition k and l - calculate for each OTU the prediction for condition l - find the OTU whose prediction gets closest to the composition of condition l - if there is a significant improvement, use the prediction as the new condition k

Further Definitions:

relativeAbundances: the real relative abundances of the 5 OTUs in condition k and l in a 5x2 matrix, so OTUs are rows, conditions are columns
pred_relativeAbundances: the predicted abundances for condition l using each OTU as predictor, consequently a 5x5 matrix, each column the relative abundances predicted from one OTU.
Real_RA_Differences: Relative abundances of condition l - relative abundances of condition k (so vector of length number of samples)
Predicted_RA_Differences: Predicted relative abundances of condition l - relative abundances of condition k (5x5 matrix)
PredicToReal_RA_Differences: Relative abundances of condition l - Predicted relative abundances of condition l (5x5 matrix)
Sum_PredToReal_RA_Differences: a measure of how well each OTU predicted the real relative abundances in condition_l (The smaller the better, so vector of length number of samples)
PredictionToReal_Ratio: The current precision measure: a ratio of the Sum_PredToReal_RA_Differences over the sum of the real differences between condition l and k (leaving the change of the predictor OTU out)
Predicted_RA_Differences2: same as Predicted_RA_Differences but diagonal set to 0.
OTUChange_PredictedFromAllOthers: Is the sum of all the changes of a OTU predicted from all the other OTUs. So let’s say half predict it to go up, half to go down, you sum this up.
Diff_RealToPredictionFromAllOthers: is the difference between the real change of an OTU compared to the OTUChange_PredictedFromAllOthers. I visualise this besides the Real_RA_Differences as power measures
Power_Real_RA_Differences: a power measure dividing the Real_RA_Difference by 1-relativeAbundances. I divide because the same change in absolute abundance causes a less dramatic effect in relative abundance for OTUs with already high relative abundance. This power measure totally ignores compositon effects.
Power_RealtoPrediction_RA_Differences: Similar power measure but with composition effects, i.e. Diff_RealToPredictionFromAllOthers divided by 1-relativeAbundances.

A simulated example: Total absolute abundance stays constant. Two change.

# Condition1 <- data.frame(OTU = c("A", "B", "C", "D", "E"),
#                             absolute = c(5E8, 3.5E8, 2E8, 1E8, .2E8))
# Condition2 <- data.frame(OTU = c("A", "B", "C", "D", "E"),
#                             absolute = c(5E8, 2E8, 3.5E8, 1E8, .2E8))

Condition1 <- data.frame(OTU = c("A", "B", "C", "D", "E"),
                            absolute = c(5E8, 2.5E8, 2E8, 1E8, .2E8))
Condition2 <- data.frame(OTU = c("A", "B", "C", "D", "E"),
                            absolute = c(10E8, 2.5E8, 2E8, 1E8, .2E8))

Tr

##            A          B          C          D          E Condition_k
## A 0.63694268 0.51266118 0.50150084 0.48263182 0.47012436  0.46728972
## B 0.15923567 0.15923567 0.25075042 0.24131591 0.23506218  0.23364486
## C 0.12738854 0.20506447 0.12738854 0.19305273 0.18804974  0.18691589
## D 0.06369427 0.10253224 0.10030017 0.06369427 0.09402487  0.09345794
## E 0.01273885 0.02050645 0.02006003 0.01930527 0.01273885  0.01869159
##   Condition_l
## A  0.63694268
## B  0.15923567
## C  0.12738854
## D  0.06369427
## E  0.01273885

plotPaulPredict(PredAbund,Predictor)

In this case, there are no compositional effects here (B goes really down, C really up, as their changes balance, no compositional effects). So all predictors should be bad.

Calculating the distances between the predicted and real compositions.

I tested several methods here:

simply the difference (note difference from distance)
the euclidean distance (maybe)
the Bray-Curtis distance
the Jenson-Shannon distance

Simply the difference

And the visualisation of this prescision measure:

illustratePrecision(Ratiodf)

Euclidean Distance

Both for Euclidean and Bray Curtis Distance I fist use the vegan package. The problem with the vegdist() function is, that it calculates pairwise distances. I want to only calculate the distance to condition l and maybe k.

T_RA_Abundances <- t(RA_Abundances)
# the transformed data.frame where rows are samples and colums the OTUs
Matrix_RA_Abundances <- data.matrix(T_RA_Abundances)

DistEucl <- vegdist(Matrix_RA_Abundances, method = 'euclidean')
DistEucl <- as.matrix(DistEucl)

DistTo_k <- DistEucl[nrow(DistEucl)-1,]
DistTo_l <- DistEucl[nrow(DistEucl),]

# Since I want the data later in a nice data frame, I first
PredAReal_Dists <- data.frame(OTU = names(DistTo_l), Pred_Dists =
                                      DistTo_l)
PredAReal_Dists <- PredAReal_Dists[-nrow(PredAReal_Dists),]
# removes the condition_l (distances always 0)

# Note again, you should not compare these distances to the real distance, but to the real distance with the distance from the predicting strain out.

# you can do this as follows

MatrixAllk <- t(replicate(5,Matrix_RA_Abundances[nrow(Matrix_RA_Abundances)-1,]))

diag(MatrixAllk) <- Matrix_RA_Abundances[nrow(Matrix_RA_Abundances),]

Matrix_RA_Abundances_ToComp <- Matrix_RA_Abundances
Matrix_RA_Abundances_ToComp[1:(nrow(Matrix_RA_Abundances)-2),] <- MatrixAllk

DistEucl_TC <- vegdist(Matrix_RA_Abundances_ToComp, method = 'euclidean')
DistEucl_TC <- as.matrix(DistEucl_TC)

DistTo_l_TC <- DistEucl_TC[nrow(DistEucl_TC),] 

PredAReal_Dists$Real_Dists <- DistTo_l_TC[1:(length(DistTo_l_TC)-1)]

#The precision measure is the ratio of the Pred_Diffs to the Real_Diffs

PredAReal_Dists <- transform(PredAReal_Dists, PredRealRatio = Pred_Dists/Real_Dists) 

PredAReal_Dists

##                     OTU Pred_Dists Real_Dists PredRealRatio
## A                     A  0.0000000  0.1000077     0.0000000
## B                     B  0.1518162  0.1823374     0.8326113
## C                     C  0.1676694  0.1877236     0.8931715
## D                     D  0.1868259  0.1946736     0.9596880
## E                     E  0.1953912  0.1968457     0.9926111
## Condition_k Condition_k  0.1969357  0.1969357     1.0000000

# ########################################
# #Calculate te distance on your own ######
# ########################################
# 
# DistTo_k_S <- sqrt(colSums((RA_Abundances-RA_Abundances$Condition_k)^2))
# DistTo_l_S <- sqrt(colSums((RA_Abundances-RA_Abundances$Condition_l)^2))
# #all.equal(DistTo_k, DistTo_k_S)
# #all.equal(DistTo_l, DistTo_l_S)
# 
# # Since I want the data later in a nice data frame, I first
# PredAReal_DistsS <- data.frame(OTU = names(DistTo_l_S), Pred_Dists =
#                                       DistTo_l_S)
# PredAReal_DistsS <- PredAReal_DistsS[-nrow(PredAReal_DistsS),]
# 
# Allk_TC <- replicate(nrow(RA_Abundances), RA_Abundances$Condition_k)
# colnames(Allk_TC) <- LETTERS[1:nrow(relativeAbundances)]
# 
# diag(Allk_TC) <- RA_Abundances$Condition_l
# 
# DistTo_l_TCS <- sqrt(colSums((Allk_TC-RA_Abundances$Condition_l)^2))
# 
# PredAReal_DistsS$Real_Dists <- c(DistTo_l_TCS, DistTo_l_S[length(DistTo_l_S)-1])
# 
# #The precision measure is the ratio of the Pred_Diffs to the Real_Diffs
# 
# PredAReal_DistsS <- transform(PredAReal_DistsS, PredRealRatio = Pred_Dists/Real_Dists)

And the visualisation of this prescision measure:

illustratePrecision(Ratiodf)

Bray Curtis Distance

T_RA_Abundances <- t(RA_Abundances)
# the transformed data.frame where rows are samples and colums the OTUs
Matrix_RA_Abundances <- data.matrix(T_RA_Abundances)

DistBray <- vegdist(Matrix_RA_Abundances, method = 'bray')
DistBray <- as.matrix(DistBray)

DistTo_k <- DistBray[nrow(DistBray)-1,]
DistTo_l <- DistBray[nrow(DistBray),]

# Since I want the data later in a nice data frame, I first
PredAReal_Dists <- data.frame(OTU = names(DistTo_l), Pred_Dists =
                                      DistTo_l)
PredAReal_Dists <- PredAReal_Dists[-nrow(PredAReal_Dists),]
# removes the condition_l (distances always 0)

# Note again, you should not compare these distances to the real distance, but to the real distance with the distance from the predicting strain out.

# you can do this as follows

MatrixAllk <- t(replicate(nrow(relativeAbundances),relativeAbundances[,1]))

diag(MatrixAllk) <- relativeAbundances[,2]

Matrix_RA_Abundances_ToComp <- Matrix_RA_Abundances
Matrix_RA_Abundances_ToComp[1:nrow(relativeAbundances),] <- MatrixAllk

DistBray_TC <- vegdist(Matrix_RA_Abundances_ToComp, method = 'bray')
DistBray_TC <- as.matrix(DistBray_TC)

DistTo_l_TC <- DistBray_TC[nrow(DistBray_TC),] 

PredAReal_Dists$Real_Dists <- DistTo_l_TC[1:(length(DistTo_l_TC)-1)]

#The precision measure is the ratio of the Pred_Diffs to the Real_Diffs

PredAReal_Dists <- transform(PredAReal_Dists, PredRealRatio = Pred_Dists/Real_Dists) 


########################################
#Calculate the distance on your own ######
########################################
# DistTo_k_S <- colSums(abs(RA_Abundances-RA_Abundances$Condition_k))/colSums(
#         RA_Abundances+RA_Abundances$Condition_k)
# DistTo_l_S <- colSums(abs(RA_Abundances-RA_Abundances$Condition_l))/colSums(
#         RA_Abundances+RA_Abundances$Condition_l)
# 
# #all.equal(DistTo_k, DistTo_k_S)
# #all.equal(DistTo_l, DistTo_l_S)
# 
# # Since I want the data later in a nice data frame, I first
# PredAReal_DistsS <- data.frame(OTU = names(DistTo_l_S), Pred_Dists =
#                                       DistTo_l_S)
# PredAReal_DistsS <- PredAReal_DistsS[-nrow(PredAReal_DistsS),]
# 
# Allk_TC <- replicate(nrow(RA_Abundances), RA_Abundances$Condition_k)
# colnames(Allk_TC) <- LETTERS[1:nrow(relativeAbundances)]
# 
# diag(Allk_TC) <- RA_Abundances$Condition_l
# 
# DistTo_l_TCS <- colSums(abs(Allk_TC-RA_Abundances$Condition_l))/colSums(
#         Allk_TC+RA_Abundances$Condition_l)
# 
# PredAReal_DistsS$Real_Dists <- c(DistTo_l_TCS, DistTo_l_S[length(DistTo_l_S)-1])
# 
# #The precision measure is the ratio of the Pred_Diffs to the Real_Diffs
# 
# PredAReal_DistsS <- transform(PredAReal_DistsS, PredRealRatio = Pred_Dists/Real_Dists)

And the visualisation of this prescision measure:

illustratePrecision(Ratiodf)

Jensen-Shannon Distance

(http://stackoverflow.com/questions/11226627/jensen-shannon-divergence-in-r) and of course (http://enterotype.embl.de/enterotypes.html) This gives the function dist.JSD, that I copied, but I also did my own again

# # Since dist.JSD works with a matrix where the samples are columns I do not have to transform here.
# Matrix_RA_Abundances <- data.matrix(RA_Abundances)
# 
# DistJS <- dist.JSD(Matrix_RA_Abundances)
# # I was surprised it produced some NaNs, but I could not find a mistake in the
# # function
# # note the function directly produces an object of class dist
# DistJS <- as.matrix(DistJS)
# 
# DistTo_l <- DistJS[nrow(DistJS),]
# 
# 
# # Since I want the data later in a nice data frame, I first
# PredAReal_Dists <- data.frame(OTU = names(DistTo_l), Pred_Dists =
#                                       DistTo_l)
# PredAReal_Dists <- PredAReal_Dists[-nrow(PredAReal_Dists),]
# # removes the condition_l (distances always 0)
# 
# # Note again, you should not compare these distances to the real distance, but to the real distance with the distance from the predicting strain out.
# 
# 
# MatrixAllk <- replicate(nrow(relativeAbundances),relativeAbundances[,1])
# 
# diag(MatrixAllk) <- relativeAbundances[,2]
# 
# Matrix_RA_Abundances_ToComp <- Matrix_RA_Abundances
# Matrix_RA_Abundances_ToComp[,1:nrow(relativeAbundances)] <- MatrixAllk
# 
# DistJS_TC <- dist.JSD(Matrix_RA_Abundances_ToComp)
# DistJS_TC <- as.matrix(DistJS_TC)
# 
# DistTo_l_TC <- DistJS_TC[nrow(DistJS_TC),] 
# 
# PredAReal_Dists$Real_Dists <- DistTo_l_TC[1:(length(DistTo_l_TC)-1)]
# 
# #The precision measure is the ratio of the Pred_Diffs to the Real_Diffs
# 
# PredAReal_Dists <- transform(PredAReal_Dists, PredRealRatio = Pred_Dists/Real_Dists) 


########################################
#Calculate the distance on your own ######
########################################

# m <- 0.5 * (p + q)
# my p is the predictions, my q is condition l
Ms <- .5*(RA_Abundances+RA_Abundances$Condition_l)

JS <- 0.5 * (colSums(RA_Abundances * log(RA_Abundances / Ms)) + colSums(RA_Abundances$Condition_l * log(RA_Abundances$Condition_l / Ms)))

JS = ifelse(JS<0,0,JS) # Sometimes 0 is given as -3...e-17
DistTo_l_S <- sqrt(JS)

#all.equal(DistTo_l, DistTo_l_S)
# So this method really works

# Since I want the data later in a nice data frame, I first
PredAReal_DistsS <- data.frame(OTU = names(DistTo_l_S), Pred_Dists =
                                      DistTo_l_S)
PredAReal_DistsS <- PredAReal_DistsS[-nrow(PredAReal_DistsS),]

Allk_TC <- replicate(nrow(RA_Abundances), RA_Abundances$Condition_k)
colnames(Allk_TC) <- LETTERS[1:nrow(relativeAbundances)]

diag(Allk_TC) <- RA_Abundances$Condition_l

Ms_TC <- .5*(Allk_TC+RA_Abundances$Condition_l)

JS_TC <- 0.5 * (colSums(Allk_TC * log(Allk_TC / Ms_TC)) + colSums(RA_Abundances$Condition_l * log(RA_Abundances$Condition_l / Ms_TC)))


DistTo_l_TCS <- sqrt(JS_TC) 

PredAReal_DistsS$Real_Dists <- c(DistTo_l_TCS, DistTo_l_S[length(DistTo_l_S)-1])

#The precision measure is the ratio of the Pred_Diffs to the Real_Diffs

PredAReal_DistsS <- transform(PredAReal_DistsS, PredRealRatio = Pred_Dists/Real_Dists)

And the visualisation of this prescision measure:

illustratePrecision(Ratiodf)

Then do the same thing completely with the Zeller Matrix.

if(file.exists("Zeller.Rda")){
        load("Zeller.Rda")
}

# I decide from now on I define an Abundance Table with the OTUs as rows and so
# samples as columns. Since Zeller is currently the other way around, I call it
# T_AT
T_AT <- Zeller2014AbundTable
Samples <- T_AT$Sample
Groups <- T_AT$Groups
T_AT <- T_AT[-2:-1]
AT <- as.data.frame(t(T_AT))
colnames(AT) <- Samples

# choosing the two samples to compare
k <- 5
l <- 14
AT_sub <- select(AT, k, l)
# I chose these because they are control and cancer and because they are very different in UNMAPPED
# renaming the samples and OTUs
colnames(AT_sub) <- c('k', 'l')
rownames(AT_sub) <- c('UM', paste('OTU_', 1:300, sep= ''))

# calculating the predicted relative abundances for each predictor.
pred_RAs <- sapply(seq(nrow(AT_sub)), function(i) sapply(seq(nrow(AT_sub)),
                function(j) predictAbundance(AT_sub[,1], AT_sub[,2], i, j)))
# with 301 OTUs, pred_RAs is a 301x301 matrix, each column is the predicted composition predicted from one OTU

colnames(pred_RAs) <- rownames(AT_sub)
# to test pred_RAs-AT_sub$l should be 0 on the diagonal
# diag(pred_RAs-AT_sub$l) # indeed all 0

# turn into data frame and add the k and l compositions
pred_RAs <- as.data.frame(pred_RAs)
# names(PredAbund) <- LETTERS[1:ncol(PredAbund)]
pred_RAs$k <- AT_sub$k
pred_RAs$l <- AT_sub$l

# pred_RAs is now a data.frame, columns 1-301 are the predicted compositons predicted from each OTU, then there is the k and l compositions.
# This data frame will be used to calculate the distances

Simply the differences

And the visualisation of this prescision measure:

illustratePrecision(Ratiodf)

150526_Paul???sMethodOnGastricBypassData

Thorsten Brach

26 May 2015

Function definitions

Paul’s system to test if changes in relative abundances might be driven by compositional effects

The equation to calculate the expected change in relative abundances of the other OTUs assuming that only one OTU really changed.

A simulated example: Total absolute abundance stays constant. Two change.

Calculating the distances between the predicted and real compositions.

Simply the difference

Euclidean Distance

Bray Curtis Distance

Jensen-Shannon Distance

Simply the differences