require(dplyr)
require(tidyr)
require(ggplot2)
require(stringr)
require(reshape2)
require(manipulate)
# require(gridExtra) # you could use with grid.arrange if you want to show several ggplot plots in one Figure,

Function definitions

########################################################
###### the color palette                         #####
########################################################
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73",
               "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

########################################################
###### show changes in OTUs between two conditions #####
########################################################

plotOTUChanges <- function(LongDataFrame){
        LongDataFrame$OTU <- as.character(LongDataFrame$OTU)
        LongDataFrame$Condition <- as.character(LongDataFrame$Condition)
        Tr <- ggplot(LongDataFrame, aes(x = OTU, y = Value, 
                                colour = Condition, 
                                size = Condition, 
                                shape = Condition))

        Tr <- Tr + geom_point() +
        facet_grid(AbundType ~., scales = "free") +
        scale_size_discrete(range = c(6,4)) +
        scale_colour_manual(values = c(cbPalette[1],cbPalette[2])) +
        scale_shape_manual(values = c(16,17)) +
        theme_bw(base_size = 18) +
        theme(panel.grid.minor = element_blank()) +
        # removes all minor gridlines
        theme(panel.grid.major = element_line(colour = cbPalette[1],
                                              size = 0.4)) +
        # sets the grid lines thicker and in a new colour
        theme(panel.background = element_rect(colour = cbPalette[1],
                                          size = 2)) +
        # adjust colour and size of the surrounding box
        theme(panel.grid.major.y = element_blank()) +
        # removes the horisontal major grid lines +
        xlab("OTU") +
        ylab("Abundance")        
}

###############################################################
###### show changes in composition between two conditions #####
###############################################################

plotConditionChanges <- function(LongDataFrame){
        LongDataFrame$OTU <- as.character(LongDataFrame$OTU)
        LongDataFrame$Condition <- as.character(LongDataFrame$Condition)
        Tr <- ggplot(LongDataFrame, aes(x = Condition, y = Value, 
                                colour = OTU))

        Tr <- Tr + geom_point(size = 5, alpha = .7) +
        facet_grid(AbundType ~., scales = "free") +
        scale_colour_manual(values = cbPalette[1:length(unique(LongDataFrame$OTU))]) +
        theme_bw(base_size = 18) +
        theme(panel.grid.minor = element_blank()) +
        # removes all minor gridlines
        theme(panel.grid.major = element_line(colour = cbPalette[1],
                                              size = 0.4)) +
        # sets the grid lines thicker and in a new colour
        theme(panel.background = element_rect(colour = cbPalette[1],
                                          size = 2)) +
        # adjust colour and size of the surrounding box
        theme(panel.grid.major.y = element_blank()) +
        # removes the horisontal major grid lines +
        xlab("Condition") +
        ylab("Abundance")        
}


###############################################################
###### function to show how well one OTU predicts the others #####
###############################################################
# predictor is just the OTU, so "A", "B" and so on.
# could be combined with manipulate but not in Markdown,see below and in text



plotPaulPredict <- function(WideDataFrame, Predictor){
        WideDataFrame <- select(WideDataFrame, Condition_k, Condition_l, matches(Predictor))
        names(WideDataFrame)[3] <- paste("Pred._from:",Predictor)
        WideDataFrame$OTU <- LETTERS[1:nrow(WideDataFrame)]
        #WideDataFrame <- melt(WideDataFrame, id.vars = "OTU")
        WideDataFrame <- gather(WideDataFrame, Condition, Value, -OTU)
        
        Tr <- ggplot(WideDataFrame, aes(x = OTU, y = Value, 
                                colour = Condition, 
                                size = Condition, 
                                shape = Condition))

        Tr <- Tr + geom_point()  +
        scale_size_discrete(range = c(6,4,7)) +
        scale_colour_manual(values = c(cbPalette[1],cbPalette[2],cbPalette[3])) +
        scale_shape_manual(values = c(16,17,4)) +
        theme_bw(base_size = 18) +
        theme(panel.grid.minor = element_blank()) +
        # removes all minor gridlines
        theme(panel.grid.major = element_line(colour = cbPalette[1],
                                              size = 0.4)) +
        # sets the grid lines thicker and in a new colour
        theme(panel.background = element_rect(colour = cbPalette[1],
                                          size = 2)) +
        # adjust colour and size of the surrounding box
        theme(panel.grid.major.y = element_blank()) +
        # removes the horisontal major grid lines +
        xlab("OTU") +
        ylab("Rel. Abundance")
        
        print(Tr)        
}
## you can now do manipulate, but it does not work with this markdown

# manipulate(
# plotPaulPredict(PredAbund,Predictor),
# Predictor = picker("A","B","C","D","E"))

###############################################################
###### Functions to illustrate the prediction precision   #####
###############################################################

illustratePrecision <- function(DF){
    Tr <- ggplot(DF, aes(x = OTU, y = Ratio))
        Tr <- Tr +
        geom_hline(aes(yintercept = 1), colour = cbPalette[1], size = 1.5) +
        geom_point(size = 6, colour = cbPalette[2]) +
        theme_bw(base_size = 18) +
        theme(panel.grid.minor = element_blank()) +
        # removes all minor gridlines
        theme(panel.grid.major = element_line(colour = cbPalette[1],
                                              size = 0.4)) +
        # sets the grid lines thicker and in a new colour
        theme(panel.background = element_rect(colour = cbPalette[1],
                                          size = 2)) +
        # adjust colour and size of the surrounding box
        theme(panel.grid.major.y = element_blank()) +
        # removes the horisontal major grid lines +
        xlab("predictor OTU") +
            ylab("Ratio_PredictedToRealDifferences")
    print(Tr)
}

###############################################################
###### Functions to illustrate the power measures      #####
###############################################################

illustratePowers <- function(DF){
    Tr <- ggplot(DF, aes(x = OTU, y = value, color = Measure,
                           shape = Measure, size = Measure))
        Tr <- Tr +
        geom_hline(aes(yintercept = 0), colour = cbPalette[1], size = 1.5) +
        geom_point() +
        scale_size_discrete(range = c(6,4)) +
        scale_colour_manual(values = c(cbPalette[1],cbPalette[2])) +
        scale_shape_manual(values = c(16,17)) +
        theme_bw(base_size = 18) +
        theme(panel.grid.minor = element_blank()) +
        # removes all minor gridlines
        theme(panel.grid.major = element_line(colour = cbPalette[1],
                                              size = 0.4)) +
        # sets the grid lines thicker and in a new colour
        theme(panel.background = element_rect(colour = cbPalette[1],
                                          size = 2)) +
        # adjust colour and size of the surrounding box
        theme(panel.grid.major.y = element_blank()) +
        # removes the horisontal major grid lines +
        xlab("predicted OTU")   
    print(Tr)
}



###############################################################
###### function to predict the relative abundance of OTU j in 
# condition l from the relative abundances of OTU i in condition l
# and condtion k                                           #####
###############################################################
# so k and l are the vectors of relative abundances in these conditions
# i and j are the indices of the OTUs of interst.


predictAbundance <- function( k, l, i, j ){
        if( i == j ){
                l[i]
              }
        else {
               k[j]*(1 - l[i])/(1 - k[i]) 
        }
}

Background and Future Questions

Currently I think that the microbial composition (relative abundances) in the feces should reflect the composition in the colon
- I have to think about the next steps from feces to reads, where are biases that might affect the composition (e.g. what if we take always 2~g DNA but one microbe has more DNA per cell than another)
- It would also be interesting to decide whether the absolute/total abundance of microbes in the feces might correlate with that in the colon
- how is the chance for a microbe to end up in feces, dependend on the distribution along the gut?
With this assumption, that we get a good idea of the rel. abundances of microbes in the gut without the slightes idea of the total abundance, we have to see what we can make out of the relative abundances
Paul’s method relies in a way on Occam’s razor: we assume that as little OTUs are really different (absolute abundances) between the conditions. Can we predict the obtained composition by changing only a few.

One example how misleading relative abundances can be for associations between OTUs if we know nothing about the absolute abundances (from Lowell et al. (2014))

Obviously the number of microbes in the gut is not fixed, so if we have no clue about absolute/total abundances, the following is a reasonable problem.

This example illustrates two points

for the interpretation of differential expression/abundance, i.e. where you only look at one OTU. Say OTU_A is less abundant in sample 1 than in sample 2. If you have no clue about the total abundance, the relative abundance does not allow you to make a conclusion about the absolute abundance.
differential abundance/expression is hard to interpret, but correlations between relative abundances (two OTUs) are just wrong if you have no further information or assumptions that the total abundance is similar in the samples.

I think we are mainly on the first point, Paul obviously also on the second.

In this example there are only two OTUs. BOTH change, and in the same direction, but because the one changes faster, the relative abundance of the one OTU actually goes up. So there is a direct correlation between the two OTUs in total abundance, but a negative correlation in relative abundance.

dfOpposite <- data.frame(Sample = c(1,2), OTU1_absolute = c(53571.43, 30000),
                         OTU2_absolute = c(71428.57, 12500))
dfOpposite

##   Sample OTU1_absolute OTU2_absolute
## 1      1      53571.43      71428.57
## 2      2      30000.00      12500.00

dfOpposite <- mutate(dfOpposite, Total = OTU1_absolute+OTU2_absolute,
                     OTU1_relative = OTU1_absolute/Total,
                     OTU2_relative = OTU2_absolute/Total)
dfOpposite

##   Sample OTU1_absolute OTU2_absolute  Total OTU1_relative OTU2_relative
## 1      1      53571.43      71428.57 125000     0.4285714     0.5714286
## 2      2      30000.00      12500.00  42500     0.7058824     0.2941176

So you see the absolute abundances correlate directly, the relative abundances correlate inverse. The simple reason, both OTUs dropped in numbers but OTU2 did so much stronger.

cor(dfOpposite$OTU1_absolute,dfOpposite$OTU2_absolute)

## [1] 1

cor(dfOpposite$OTU1_relative, dfOpposite$OTU2_relative)

## [1] -1

The Visual representation

# to get the tidy data.frame, here a combination of dplyr, reshape2, and stringr
# the select command from dplyr
PlotdfOpposite <- select(dfOpposite, Sample, OTU1_absolute,
                         OTU2_absolute, OTU1_relative, OTU2_relative)
# melt from reshape2
PlotdfOpposite <- melt(PlotdfOpposite, id.vars = "Sample")

## Here you use a command from the stringr package
Splitted <- str_split_fixed(PlotdfOpposite$variable, "_", 2)

PlotdfOpposite$AbundType <- Splitted[,2]
PlotdfOpposite$OTU <- Splitted[,1]
PlotdfOpposite$Sample <- as.character(PlotdfOpposite$Sample)


PlotdfOpposite <- select(PlotdfOpposite, OTU, Condition = Sample, AbundType, Value = value)

Tr <- plotOTUChanges(PlotdfOpposite)
Trr <- plotConditionChanges(PlotdfOpposite)

Tr

Trr

Paul’s system to test if changes in relative abundances are likely real or simply driven by changes in another OTU

again this system relies on the idea, that a change in as little OTUs as possible explains all the changes by compositional effects (occam’s razor)
Since relative abundances sum up to 1, it is clear that a change in relative abundance of an OTU has to compensated by the others.
see the just studied problem example: there were only 2 OTUs so if you know the relativ abundance of the one OTU you also know the one of the other.
Using 5 OTUs, when only one changes in absolute abundance, all change in relative abundance. Namely the 4 compensate for the one that actually changed.

The equation to calculate the expected change of the other OTUs knowing the change of one OTU from one condtion to the other.

Always a comparison between two conditions. k is the index of the first condition and l of the second. The predictor OTU is i, the predicted OTU is j. Then the predicted rel abundance of j in condition l is (knowing the rel abundance of i in both k and l)

\[ \bar{x}_{jl} = x_{jk} \frac{1-x_{il}}{1-x_{ik}} \]

The equation is directly intuitiv: You assume that no OTU except the predictor changes. That means the relations among the other OTUs must remain the same (chance for proportionality). Thus, you just multiply the proportions of the predicted OTUs in condition k with the fraction they together make up in the new condition l, divided by the fraction they made up in the old k.

In the following I simulate examples to see whether this knowledge (equation) can help us to better understand relative abundance data. I use examples with always five OTUs, and of course the “real” relative abundances of condition 1 and 2 (k and l are known)

Further Definitions:

relativeAbundances: the real relative abundances of the 5 OTUs in condition k and l in a 5x2 matrix
pred_relativeAbundances: the predicted abundances for condition l using each OTU as predictor, consequently a 5x5 matrix
Real_RA_Differences: Relative abundances of condition l - relative abundances of condition k (so vector of length number of samples)
Predicted_RA_Differences: Predicted relative abundances of condition l - relative abundances of condition k (5x5 matrix)
PredicToReal_RA_Differences: Relative abundances of condition l - Predicted relative abundances of condition l (5x5 matrix)
Sum_PredToReal_RA_Differences: a measure of how well each OTU predicted the real relative abundances in condition_l (The smaller the better, so vector of length number of samples)
PredictionToReal_Ratio: The current precision measure: a ratio of the Sum_PredToReal_RA_Differences over the sum of the real differences between condition l and k (leaving the change of the predictor OTU out)
Predicted_RA_Differences2: same as Predicted_RA_Differences but diagonal set to 0.
OTUChange_PredictedFromAllOthers: Is the sum of all the changes of a OTU predicted from all the other OTUs. So let’s say half predict it to go up, half to go down, you sum this up.
Diff_RealToPredictionFromAllOthers: is the difference between the real change of an OTU compared to the OTUChange_PredictedFromAllOthers. I visualise this besides the Real_RA_Differences as power measures
Power_Real_RA_Differences: a power measure dividing the Real_RA_Difference by 1-relativeAbundances. I divide because the same change in absolute abundance causes a less dramatic effect in relative abundance for OTUs with already high relative abundance. This power measure totally ignores compositon effects.
Power_RealtoPrediction_RA_Differences: Similar power measure but with composition effects, i.e. Diff_RealToPredictionFromAllOthers divided by 1-relativeAbundances.

General Questions are:

How to assess the prediction performance of the different OTUs (Precision)?
- Currently I think PredictionToReal_Ratio is a good measure
How to estimate which OTUs might be main effectors (Power)?
- Here I’m not yet sure if I found a good measure
- Power_Real_RA_Differences and Power_RealtoPrediction_RA_Differences rely on Real_RA_Differences and Diff_RealToPredictionFromAllOthers. The higher the more power for the OTU is the idea. Let’s test if it holds different examples.
How reciprocal are your measures, i.e. change l and k.
- see Lowell, CoDA (?) theory: Permutation invariance: the conclusions of analyses must not depend on the order of the components.

Problems/considerations with the current measures

Example 2 shows a possible problem of including the predictions into the power measures. You would assume that if one OTU B goes up 10% and another OTU C goes down 10%, these two would level out in their prediction of the others. This is however not the case, see this simple example

# three OTUs
relAb <- data.frame(OTU = c("A","B","C"), Condi_k = c(.5, .1, .4), Condi_l = c(.5, .4, .1) )
relAb

##   OTU Condi_k Condi_l
## 1   A     0.5     0.5
## 2   B     0.1     0.4
## 3   C     0.4     0.1

APredFromB <- .5*.6/.9
APredFromC <- .5*.9/.6
(APredFromB-.5) + (APredFromC-.5)

## [1] 0.08333333

So surprisingly, the OTUChange_PredictedFromAllOthers for A would be 8.33% up although B goes 30% up and C 30% down. C has the stronger effect on A than B, because it has higher abundance in the starting condition. The entire procedure is based on the assumption that as little as possible changes in the absolute abundances. If only C changed, to reach the 30% you needed more cells than for B because C had higher absolute abundance in condition k. But of course it puts a doubt on summing up these values and comparing it with the real change of A.

In other words: even if A does not change, the predictions from all the others will not sum up to the real A (nor is the mean of these predictions real A), in fact it seems they will always predict something too high.

Example 1: Indeed only one OTU changes in absolute abundance.

Condition1 <- data.frame(OTU = c("A", "B", "C", "D", "E"),
                            absolute = c(5E8, 2.5E8, 2E8, 1E8, .2E8))
Condition2 <- data.frame(OTU = c("A", "B", "C", "D", "E"),
                            absolute = c(10E8, 2.5E8, 2E8, 1E8, .2E8))

Condition1 <- mutate(Condition1, relative = absolute/sum(absolute), Condition = "k" )
Condition2 <- mutate(Condition2, relative = absolute/sum(absolute), Condition = "l" )

Exampledf <- rbind(Condition1, Condition2)

Exampledf <- gather(Exampledf, AbundType, Value, -OTU, -Condition)

Tr <- plotOTUChanges(Exampledf)
Trr <- plotConditionChanges(Exampledf)

Tr

Trr

Only OTU_A changed in absolute abundance, but all changed in relative abundance. Does Paul’s equation help?

relativeAbundances <- cbind(Condition1$relative, Condition2$relative)
colnames(relativeAbundances) <- c("k", "l")
rownames(relativeAbundances) <- c("A", "B", "C", "D", "E")

# Paul's fancy way to use sapply to calculate the predicted abundances from the
# change of each OTU

pred_relativeAbundances <- sapply(seq(nrow(relativeAbundances)),
                    function(i) sapply(seq(nrow(relativeAbundances)),
                                       function(j) predictAbundance(relativeAbundances[,1], relativeAbundances[,2], i, j)))

# To understand what Paul does run this
# sapply(seq(2), function(i) sapply(6:10, function(j) paste(i,j)))

# so what he does is just a fancy way of doing this loop:

# pred_relativeAbundances <- matrix(0, nrow =5 , ncol = 5)
# for (i in 1:nrow(relativeAbundances)) {
#         for (j in 1:nrow(relativeAbundances)){
#                 PAbund[j,i] <- predictAbundance(X2Rela[,1], X2Rela[,2], i, j)
#         }
# }


colnames(pred_relativeAbundances) <- c("A", "B", "C", "D", "E")
relativeAbundances

##            k          l
## A 0.46728972 0.63694268
## B 0.23364486 0.15923567
## C 0.18691589 0.12738854
## D 0.09345794 0.06369427
## E 0.01869159 0.01273885

pred_relativeAbundances

##            A          B          C          D          E
## A 0.63694268 0.51266118 0.50150084 0.48263182 0.47012436
## B 0.15923567 0.15923567 0.25075042 0.24131591 0.23506218
## C 0.12738854 0.20506447 0.12738854 0.19305273 0.18804974
## D 0.06369427 0.10253224 0.10030017 0.06369427 0.09402487
## E 0.01273885 0.02050645 0.02006003 0.01930527 0.01273885

As you can see OTU_A is the only that changes and it is the perfect predictor. To visualise the prediction I use the function “plotPaulPredict” that in principle could be nicely combined with manipulate.

PredAbund <- as.data.frame(pred_relativeAbundances)
# names(PredAbund) <- LETTERS[1:ncol(PredAbund)]
PredAbund$Condition_k <- relativeAbundances[,1]
PredAbund$Condition_l <- relativeAbundances[,2]

# NOTE, here you could use manipulate in the console

# manipulate(
# plotPaulPredict(PredAbund,Predictor),
# Predictor = picker("A","B","C","D","E"))

# I can only plot what is best
Predictor = "A"

plotPaulPredict(PredAbund,Predictor)

In this case A is perfect, because the situation is exactly as our prediction assumes, that only one OTU really changes, the rest is just compositional effects.

Here all the further definitions come in the game.

Real_RA_Differences <- relativeAbundances[,2] - relativeAbundances[,1]
Predicted_RA_Differences <- pred_relativeAbundances - relativeAbundances[,1]


PredicToReal_RA_Differences <- relativeAbundances[,2] - pred_relativeAbundances
# equal to: Predicted_RA_Differences-Real_RA_Differences
Sum_PredToReal_RA_Differences <- colSums(abs(PredicToReal_RA_Differences))
# I take the absolute here because an OTU that goes down shifts in the predictio

# To test if the prediction from each OTU brought the rel. abundances of the 
# others closer to the real rel. abundances, I divide this Sum by the sum of the real differences (taking the change of the predicting OTU out)

Real_RA_Differences_MinusPredictor <- replicate(nrow(relativeAbundances),
                                                Real_RA_Differences)
colnames(Real_RA_Differences_MinusPredictor) <- LETTERS[1:nrow(relativeAbundances)]

diag(Real_RA_Differences_MinusPredictor) <- 0

Sum_Real_RA_Differences_MinusPredictor <- colSums(abs(Real_RA_Differences_MinusPredictor))

PredictionToReal_Ratio <-Sum_PredToReal_RA_Differences/Sum_Real_RA_Differences_MinusPredictor

And the visualisation of this prescision measure:

Ratiodf <- melt(PredictionToReal_Ratio, value.name = "Ratio")
Ratiodf <- mutate(Ratiodf, OTU = rownames(Ratiodf))

illustratePrecision(Ratiodf)

# Tr <- ggplot(Ratiodf, aes(x = OTU, y = Ratio))
# Tr <- Tr + 
#         geom_hline(aes(yintercept = c(1)), color = cbPalette[1]) +
#         geom_point(size = 6, colour = cbPalette[2])
# ggplot(Ratiodf, aes(x = OTU, y = Ratio)) + geom_bar(position="dodge", stat = "identity", width = .5 ) + theme_bw(14) + ylab("Ratio_PredictedToRealDifferences") + xlab("Predictor OTU")

Towards the power measures.

Power_Real_RA_Differences <- abs(Real_RA_Differences/(1-relativeAbundances[,1]))
# a first simple power measure: because the same absolute abundance change causes more relative abundance change on an OTU with small relative abundance,
# I balance this with the division.

# The entire method is based on the assumpiton that a lot of the changes are
# compositon effects, but this power measure completely ingnores the composition
# effects, therefore more

Predicted_RA_Differences2 <- Predicted_RA_Differences
diag(Predicted_RA_Differences2) <- 0
# each row of Predicted_RA_Differences says how much the OTU of that row should
# have been changed if only the others had changed (one in each case)

OTUChange_PredictedFromAllOthers <- rowSums(Predicted_RA_Differences2)
# so this sum shows for each OTU the sum of changes that would have been predicted from changes of all the others independently.
# NOTE: OF COURSE THIS IS ALSO A BIT PROBLEMATIC with occam's razor, becuase
# you consider changes of all the others (albeit assuming as little change
# as possible.)


Diff_RealToPredictionFromAllOthers <- Real_RA_Differences-OTUChange_PredictedFromAllOthers
# E.g. when A went up (positive 
# difference) but the others predict A to go down, then this difference is 
# even bigger. If it is negative it means it went more down than predicted. Or less up than predicted.

# this is the sedond power measure. 

# INITIALLY I THOUGHT:
# To have a ratiometric measurement I divide this difference by the predicted difference. 

# Ratio_RealChangeTOPredictedChange <- abs(Diff_RealToPredictionFromAllOthers)/
#         abs(OTUChange_PredictedFromAllOthers)
# HOWERVER, this made no sense, because a very small prediction in the right direction will in the end lead to a bigger ratio than a somewhat bigger prediction in the wrong direction (draw it)

# General Note: an OTU with small rel abundance in k, is less affected by compositional effects. See simply:
#  Predicted_RA_Differences/relativeAbundances[,1]
# dividing by the relativeAbundances of k equals the compositional effect.
# On the other hand, a change in the same absolute number has a more pronounced
# effect on the relative abundance of small OTUs than big ones.
# see for example 100, 50, 25, 25 as absolute abundances, then add 10 to OTU_A 
# and OTU_D, OTU_D changes 4,1% OTU_A only 2.38%

# So OTUs with high relative Abundance in k, have it harder to get real RA changes from absolute changes. They have it though easier to be effected by compositional effects. See the NOTE before, taking all the compositional effects
# into accoutn anyway clashes a bit with occam's razor. 
# so also the compositional effect is actually affected opposite to the real_RA_difference, I divide for the power measure again just with 1-relativeAbundances in k.

Power_RealtoPrediction_RA_Differences <- abs(Diff_RealToPredictionFromAllOthers/(1-relativeAbundances[,1]))

Visualisation of the power measures, whose task it is to find those OTUs that really changed independent of composition effects:

Ratiodf <- melt(Power_Real_RA_Differences)
Ratiodf <- mutate(Ratiodf, OTU = rownames(Ratiodf), Measure = "Power_Real")
Ratiodf1 <- melt(Power_RealtoPrediction_RA_Differences)
Ratiodf1 <- mutate(Ratiodf1, OTU = rownames(Ratiodf1), Measure = "Power_Real-Predict")
RatiodfA <- rbind(Ratiodf, Ratiodf1)

Ratiodf2 <- melt(Real_RA_Differences)
Ratiodf2 <- mutate(Ratiodf2, OTU = rownames(Ratiodf2), Measure = 'Diff_Real')
Ratiodf3 <- melt(Diff_RealToPredictionFromAllOthers)
Ratiodf3 <- mutate(Ratiodf3, OTU = rownames(Ratiodf3), Measure = 'Diff_Real-Predict')
RatiodfB <- rbind(Ratiodf2, Ratiodf3)

illustratePowers(RatiodfA)

illustratePowers(RatiodfB)

Example 1b: The reciprocal of example 1.

Condition2 <- data.frame(OTU = c("A", "B", "C", "D", "E"),
                            absolute = c(5E8, 2.5E8, 2E8, 1E8, .2E8))
Condition1 <- data.frame(OTU = c("A", "B", "C", "D", "E"),
                            absolute = c(10E8, 2.5E8, 2E8, 1E8, .2E8))

Tr

Trr

relativeAbundances

##            k          l
## A 0.63694268 0.46728972
## B 0.15923567 0.23364486
## C 0.12738854 0.18691589
## D 0.06369427 0.09345794
## E 0.01273885 0.01869159

pred_relativeAbundances

##            A          B          C          D          E
## A 0.46728972 0.58057208 0.59349205 0.61669528 0.63310220
## B 0.23364486 0.23364486 0.14837301 0.15417382 0.15827555
## C 0.18691589 0.11611442 0.18691589 0.12333906 0.12662044
## D 0.09345794 0.05805721 0.05934921 0.09345794 0.06331022
## E 0.01869159 0.01161144 0.01186984 0.01233391 0.01869159

The three OTUs that do not change obviously would not predict any change.

plotPaulPredict(PredAbund,Predictor)

In this case, there are no compositional effects here. So all predictors should be bad.

And the visualisation of this prescision measure:

illustratePrecision(Ratiodf)

Indeed no predictor below 1

Towards the power measures.

Visualisation of the power measures.

illustratePowers(RatiodfA)

illustratePowers(RatiodfB)

Example 2: Total absolute abundance stays constant. Two change.

Condition1 <- data.frame(OTU = c("A", "B", "C", "D", "E"),
                            absolute = c(5E8, 2.5E8, 2E8, 1E8, .2E8))
Condition2 <- data.frame(OTU = c("A", "B", "C", "D", "E"),
                            absolute = c(5E8, 2E8, 2.5E8, 1E8, .2E8))

Tr

Trr

relativeAbundances

##            k          l
## A 0.46728972 0.46728972
## B 0.23364486 0.18691589
## C 0.18691589 0.23364486
## D 0.09345794 0.09345794
## E 0.01869159 0.01869159

pred_relativeAbundances

##            A          B          C          D          E
## A 0.46728972 0.49578300 0.44043399 0.46728972 0.46728972
## B 0.23364486 0.18691589 0.22021699 0.23364486 0.23364486
## C 0.18691589 0.19831320 0.23364486 0.18691589 0.18691589
## D 0.09345794 0.09915660 0.08808680 0.09345794 0.09345794
## E 0.01869159 0.01983132 0.01761736 0.01869159 0.01869159

The three OTUs that do not change obviously would not predict any change.

plotPaulPredict(PredAbund,Predictor)

In this case, there are no compositional effects here. So all predictors should be bad.

And the visualisation of this prescision measure:

illustratePrecision(Ratiodf)

Indeed no predictor below 1

Towards the power measures.

Visualisation of the power measures.

illustratePowers(RatiodfA)

illustratePowers(RatiodfB)

Example 3: Only one OTU really changes but a smaller one

Condition1 <- data.frame(OTU = c("A", "B", "C", "D", "E"),
                            absolute = c(5E8, 2.5E8, 2.5E8, 1E8, .2E8))
Condition2 <- data.frame(OTU = c("A", "B", "C", "D", "E"),
                            absolute = c(5E8, 2.5E8, 2.5E8, 2E8, .2E8))

Tr

Trr

relativeAbundances

##            k          l
## A 0.44642857 0.40983607
## B 0.22321429 0.20491803
## C 0.22321429 0.20491803
## D 0.08928571 0.16393443
## E 0.01785714 0.01639344

pred_relativeAbundances

##            A          B          C          D          E
## A 0.40983607 0.45694366 0.45694366 0.40983607 0.44709389
## B 0.23796933 0.20491803 0.22847183 0.20491803 0.22354694
## C 0.23796933 0.22847183 0.20491803 0.20491803 0.22354694
## D 0.09518773 0.09138873 0.09138873 0.16393443 0.08941878
## E 0.01903755 0.01827775 0.01827775 0.01639344 0.01639344

The three OTUs that do not change obviously would not predict any change.

plotPaulPredict(PredAbund,Predictor)

In this case, there are no compositional effects here. So all predictors should be bad.

And the visualisation of this prescision measure:

illustratePrecision(Ratiodf)

Indeed no predictor below 1

Towards the power measures.

Visualisation of the power measures.

illustratePowers(RatiodfA)

illustratePowers(RatiodfB)

Example 4: In principle the same as example 3 but simulated after 30 samples each

The idea is see if you would get significant differences of the others. If so, would the analysis here help to figure out the really significant one?

Condition1 <- data.frame(OTU = c("A", "B", "C", "D", "E"),
                            absolute = c(5E8, 2.5E8, 2.5E8, 1E8, .2E8))
Condition2 <- data.frame(OTU = c("A", "B", "C", "D", "E"),
                            absolute = c(5E8, 2.5E8, 2.5E8, 2E8, .2E8))

nosim <- 30
set.seed(123)
RandomMatrix1 <- matrix(rnorm(5 * nosim, mean =1, sd=0.125), nrow=5)
RandomMatrix2 <- matrix(rnorm(5 * nosim, mean =1, sd=0.125), nrow=5)
Conditions1 <- RandomMatrix1*Condition1$absolute
Conditions2 <- RandomMatrix2*Condition2$absolute

RA_Conditions1 <- Conditions1/colSums(Conditions1)
RA_Conditions2 <- Conditions2/colSums(Conditions2)

How To Work With Relative Abundances

Thorsten Brach

12 May 2015

Function definitions

Background and Future Questions

One example how misleading relative abundances can be for associations between OTUs if we know nothing about the absolute abundances (from Lowell et al. (2014))

Paul’s system to test if changes in relative abundances are likely real or simply driven by changes in another OTU

The equation to calculate the expected change of the other OTUs knowing the change of one OTU from one condtion to the other.

Problems/considerations with the current measures

Example 1: Indeed only one OTU changes in absolute abundance.

Example 1b: The reciprocal of example 1.

Example 2: Total absolute abundance stays constant. Two change.

Example 3: Only one OTU really changes but a smaller one

Example 4: In principle the same as example 3 but simulated after 30 samples each