source('Functions/Dist_Functs.R')
source('Functions/Sim_Functs.R')
source('Functions/Predict_Functs.R')
source('Functions/Illus_Functs.R')
source('Functions/SubComp_Functs.R')

FUNCTIONS

Sim_Functs

#########################################
#########################################
###### To Make DFs of simulated data ###
#########################################
#########################################


#################################################
### Generate data.frames from simulated examples##
##################################################
# INPUT: the vectors k and l containing the absolute abundances
# of condition k and l
# OUTPUT: SimDFs, the three data frames WideDF (absolute and realtive 
# abundances), AT_Sim the relative Abundance Table of the two conditions,
# LongDF, the long version of WideDF for plotting

GenerateSimDFs <- function(k,l){
        WideDF <- data.frame(OTU = LETTERS[1:length(k)], absolute_k = k, absolute_l = l, relative_k = k/sum(k), relative_l =l/sum(l))
        AT_Sim <- select(WideDF, OTU, k = relative_k, l = relative_l)
        LongDF <- gather(WideDF, Condition, Value, -OTU)
        LongDF <- separate(LongDF, Condition, 
                           into = c('AbundType', 'Condition'))
        SimDFs <- list(WideDF, AT_Sim, LongDF)
        return(SimDFs)
}

Illus_Functs

########################################################
###### the color palette                         #####
########################################################
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73",
               "#F0E442", "#0072B2", "#D55E00", "#CC79A7")


################################################
################################################
##### For Simulations from abs abundance #######
################################################
################################################

###############################################################
###### Show Changes Relative vs Absolute for simulations #####
###############################################################
# INPUT: SimDFs[[3]]

plotSimAbsRel <- function(LongDataFrame){
        LongDataFrame$OTU <- as.character(LongDataFrame$OTU)
        LongDataFrame$Condition <- as.character(LongDataFrame$Condition)
        # do I need this?
        Tr <- ggplot(LongDataFrame, aes(x = OTU, y = Value, 
                                        colour = Condition, 
                                        size = Condition, 
                                        shape = Condition))
        
        Tr <- Tr + geom_point() +
                facet_grid(AbundType ~., scales = "free") +
                scale_size_discrete(range = c(6,4)) +
                scale_colour_manual(values = c(cbPalette[1],cbPalette[2])) +
                scale_shape_manual(values = c(16,17)) +
                theme_bw(base_size = 25) +
                theme(panel.grid.minor = element_blank()) +
                # removes all minor gridlines
                theme(panel.grid.major = element_line(colour = cbPalette[1],
                                                      size = 0.4)) +
                # sets the grid lines thicker and in a new colour
                theme(panel.background = element_rect(colour = cbPalette[1],
                                                      size = 2)) +
                # adjust colour and size of the surrounding box
                theme(panel.grid.major.y = element_blank()) +
                # removes the horisontal major grid lines +
                xlab("") +
                ylab("Abundance") +
                labs(colour = "", size = "", shape = "")
        return(Tr)
}



################################################
################################################
##### For the Predict Method             #######
################################################
################################################

###############################################################
###### Plot Predictions dependent on Predictor            #####
###############################################################
# INPUT: The data frame where each column is a prediction, last two columns are
# k and l. E.g. pred_RAs, so the outcome of calc_pred_RAs, or Predicts[[1]],
# being the outcome of the predictions function. Further you have to give the
# predictor, e.g. "A" or "OTU1"
# ATTENTION: This plot is nicest when combiend with manipulate

plotPaulPredict <- function(pred_RAs, Predictor){
        
        rownames(pred_RAs) <- colnames(pred_RAs)[1:(ncol(pred_RAs)-2)]
        DF <- select(pred_RAs, k, l, matches(Predictor))
        names(DF)[3] <- paste("from:",Predictor)
        DF$OTU <- rownames(DF)
        
        if (nrow(DF) < 20) {
                size = c(6,4,6)
                sizetextx = 18
        } else if((nrow(AbTable_sub) > 19) & (nrow(AbTable_sub) < 100)) {
                size = c(4,2,4)
                sizetextx = 10
        } else if (nrow(AbTable_sub) > 99) {
                size = c(3,1.5, 3)
                sizetextx = 6
        }
        
        DF <- gather(DF, Condition, Value, -OTU)
        
        Tr <- ggplot(DF, aes(x = OTU, y = Value, 
                             colour = Condition, 
                             size = Condition, 
                             shape = Condition))
        
        Tr <- Tr + geom_point()  +
                scale_size_discrete(range = size) +
                scale_colour_manual(values = c(cbPalette[1:3])) +
                scale_shape_manual(values = c(16,17,18)) +
                theme_bw(base_size = 18) +
                theme(panel.grid.minor = element_blank()) +
                # removes all minor gridlines
                theme(panel.grid.major = element_line(colour = cbPalette[1],
                                                      size = 0.2)) +
                # sets the grid lines thicker and in a new colour
                theme(panel.background = element_rect(colour = cbPalette[1],
                                                      size = 2)) +
                # adjust colour and size of the surrounding box
                theme(panel.grid.major.y = element_blank()) +
                # removes the horisontal major grid lines +
                xlab("") +
                ylab("Relative Abundance") +
                labs(colour = "", size = "", shape = "")
        return(Tr)        
}

# NOTE use with manipulate possible
# manipulate(
# plotPaulPredict(PredictsSim[[1]],Predictor),
# Predictor = picker(as.list(rownames(PredictsSim[[1]]))))

###############################################################
###### Plot Predictions dependent on Predictor LOG           #####
###############################################################

plotPaulPredictLog <- function(pred_RAs, Predictor){
        Tr <- plotPaulPredict(pred_RAs, Predictor)
        Tr <- Tr + scale_y_log10()
        return(Tr)
}

#NOTE Use with manipulate possible
# manipulate(
# plotPaulPredictLog(PredictsSim[[1]],Predictor),
# Predictor = picker(as.list(rownames(PredictsSim[[1]]))))


###############################################################
###### Barplot Illustration mainly for Simulations        #####
###############################################################
# INPUT: The data frame where each column is a prediction, last two columns are
# k and l. E.g. pred_RAs, so the outcome of calc_pred_RAs, or Predicts[[1]],
# being the outcome of the predictions function. Further you have to give the
# predictor, e.g. "A" or "OTU1"
# ATTENTION: This plot is nicest when combiend with manipulate

BarplotIllu <- function(pred_RAs, Predictor){
        
        rownames(pred_RAs) <- colnames(pred_RAs)[1:(ncol(pred_RAs)-2)]
        DF <- select(pred_RAs, k, l, matches(Predictor))
        names(DF)[3] <- paste("from:",Predictor)
        DF$OTU <- rownames(DF)
        
        if (nrow(DF) < 20) {
                size = c(6,4,6)
                sizetextx = 18
        } else if((nrow(AbTable_sub) > 19) & (nrow(AbTable_sub) < 100)) {
                size = c(4,2,4)
                sizetextx = 10
        } else if (nrow(AbTable_sub) > 99) {
                size = c(3,1.5, 3)
                sizetextx = 6
        }
        
        DF <- gather(DF, Condition, Value, -OTU)
        
        Tr <- ggplot(DF, aes(x = Condition, y = Value, fill = OTU))
        
        Tr <- Tr + geom_bar(stat = 'identity')  +
                scale_fill_manual(values 
                                  = c(cbPalette[1:length(levels(factor(DF$OTU)))])) +
                theme_bw(base_size = 25) +
                theme(panel.grid = element_blank()) +
                # sets the grid lines thicker and in a new colour
                theme(panel.background = element_rect(colour = cbPalette[1],
                                                      size = 2)) +
                xlab("") +
                ylab("Relative Abundance") 
        return(Tr)        
}

# NOTE use with manipulate possible
# manipulate(
# BarplotIllu(PredictsSim[[1]],Predictor),
# Predictor = picker(as.list(rownames(PredictsSim[[1]]))))



###############################################################
###### Illustrate the Percentage Explained               #####
###############################################################
# INPUT: usually PredReal_DistComp, 


IlluPercExpl <- function(DF){
        
        DF <- DF[-nrow(DF),] # remove the k row
        # the size of the dots and the writing depends on how many OTUs there
        # are
        if (nrow(DF) < 20) {
                size = 6
                sizetextx = 18
        } else if((nrow(DF) > 19) & (nrow(DF) < 100)) {
                size = 4
                sizetextx = 14
        } else if (nrow(DF) > 99) {
                size = 3
                sizetextx = 6
        }
        
        # get the order of the OTU right. OTU is used as a factor and levels 
        # in R are alphabetically. But you want the OTUs in the same order as
        # they are in DF (rownames)
        LevelsWant <- rev(as.character(DF$OTU)) #you had to reverse
        for (i in 1:length(LevelsWant)) {
                DF$OTU <- relevel(DF$OTU, ref = LevelsWant[i])
        }
        
        Tr <- ggplot(DF, aes(x = OTU, y = PercExpl*100))
        Tr <- Tr +
                geom_hline(aes(yintercept = 0), colour = cbPalette[1], size = 1.2) +
                geom_point(size = size, colour = cbPalette[2]) +
                theme_bw(base_size = 18) +
                theme(panel.grid.minor = element_blank()) +
                # removes all minor gridlines
                theme(panel.grid.major = element_line(colour = cbPalette[1],
                                                      size = 0.2)) +
                # sets the grid lines thicker and in a new colour
                theme(panel.background = element_rect(colour = cbPalette[1],
                                                      size = 2)) +
                # adjust colour and size of the surrounding box
                theme(panel.grid.major.y = element_blank()) +
                # removes the horisontal major grid lines +
                theme(axis.text.x = element_text(angle = 90, vjust = .5, size = sizetextx)) +
                xlab("") +
                ylab("Percentage of k-l Distance Explained")
        return(Tr)
}

###############################################################
###### Illustrate the Percentage Explained per Effect Size #####
###############################################################
# INPUT: usually PredReal_DistComp, 


IlluPercExplperES <- function(DF){
        
        DF <- DF[-nrow(DF),] # remove the k row
        # the size of the dots and the writing depends on how many OTUs there
        # are
        if (nrow(DF) < 20) {
                size = 6
                sizetextx = 18
        } else if((nrow(DF) > 19) & (nrow(DF) < 100)) {
                size = 4
                sizetextx = 14
        } else if (nrow(DF) > 99) {
                size = 3
                sizetextx = 6
        }
        
        # get the order of the OTU right. OTU is used as a factor and levels 
        # in R are alphabetically. But you want the OTUs in the same order as
        # they are in DF (rownames)
        LevelsWant <- rev(as.character(DF$OTU)) #you had to reverse
        for (i in 1:length(LevelsWant)) {
                DF$OTU <- relevel(DF$OTU, ref = LevelsWant[i])
        }
        
        Tr <- ggplot(DF, aes(x = OTU, y = PercExplperES))
        Tr <- Tr +
                geom_hline(aes(yintercept = 0), colour = cbPalette[1], size = 1.2) +
                geom_point(size = size, colour = cbPalette[2]) +
                theme_bw(base_size = 18) +
                theme(panel.grid.minor = element_blank()) +
                # removes all minor gridlines
                theme(panel.grid.major = element_line(colour = cbPalette[1],
                                                      size = 0.2)) +
                # sets the grid lines thicker and in a new colour
                theme(panel.background = element_rect(colour = cbPalette[1],
                                                      size = 2)) +
                # adjust colour and size of the surrounding box
                theme(panel.grid.major.y = element_blank()) +
                # removes the horisontal major grid lines +
                theme(axis.text.x = element_text(angle = 90, vjust = .5, size = sizetextx)) +
                xlab("") +
                ylab("k-l Distance Explained per Effect Size")
        return(Tr)
}


###############################################################
###### Illustrate the Percentage Explained ES1 #####
###############################################################
# INPUT: usually PredReal_DistComp, 


IlluPercExplES1 <- function(DF){
        
        DF <- DF[-nrow(DF),] # remove the k row
        # the size of the dots and the writing depends on how many OTUs there
        # are
        if (nrow(DF) < 20) {
                size = 6
                sizetextx = 18
        } else if((nrow(DF) > 19) & (nrow(DF) < 100)) {
                size = 4
                sizetextx = 14
        } else if (nrow(DF) > 99) {
                size = 3
                sizetextx = 6
        }
        
        # get the order of the OTU right. OTU is used as a factor and levels 
        # in R are alphabetically. But you want the OTUs in the same order as
        # they are in DF (rownames)
        LevelsWant <- rev(as.character(DF$OTU)) #you had to reverse
        for (i in 1:length(LevelsWant)) {
                DF$OTU <- relevel(DF$OTU, ref = LevelsWant[i])
        }
        
        Tr <- ggplot(DF, aes(x = OTU, y = PercExplES1))
        Tr <- Tr +
                geom_hline(aes(yintercept = 0), colour = cbPalette[1], size = 1.2) +
                geom_point(size = size, colour = cbPalette[2]) +
                theme_bw(base_size = 18) +
                theme(panel.grid.minor = element_blank()) +
                # removes all minor gridlines
                theme(panel.grid.major = element_line(colour = cbPalette[1],
                                                      size = 0.2)) +
                # sets the grid lines thicker and in a new colour
                theme(panel.background = element_rect(colour = cbPalette[1],
                                                      size = 2)) +
                # adjust colour and size of the surrounding box
                theme(panel.grid.major.y = element_blank()) +
                # removes the horisontal major grid lines +
                theme(axis.text.x = element_text(angle = 90, vjust = .5, size = sizetextx)) +
                xlab("") +
                ylab("k-l Distance Explained times ES1")
        return(Tr)
}

###############################################################
###### Illustrate the Percentage Explained ES2 #####
###############################################################
# INPUT: usually PredReal_DistComp, 


IlluPercExplES2 <- function(DF){
        
        DF <- DF[-nrow(DF),] # remove the k row
        # the size of the dots and the writing depends on how many OTUs there
        # are
        if (nrow(DF) < 20) {
                size = 6
                sizetextx = 18
        } else if((nrow(DF) > 19) & (nrow(DF) < 100)) {
                size = 4
                sizetextx = 14
        } else if (nrow(DF) > 99) {
                size = 3
                sizetextx = 6
        }
        
        # get the order of the OTU right. OTU is used as a factor and levels 
        # in R are alphabetically. But you want the OTUs in the same order as
        # they are in DF (rownames)
        LevelsWant <- rev(as.character(DF$OTU)) #you had to reverse
        for (i in 1:length(LevelsWant)) {
                DF$OTU <- relevel(DF$OTU, ref = LevelsWant[i])
        }
        
        Tr <- ggplot(DF, aes(x = OTU, y = PercExplES2))
        Tr <- Tr +
                geom_hline(aes(yintercept = 0), colour = cbPalette[1], size = 1.2) +
                geom_point(size = size, colour = cbPalette[2]) +
                theme_bw(base_size = 18) +
                theme(panel.grid.minor = element_blank()) +
                # removes all minor gridlines
                theme(panel.grid.major = element_line(colour = cbPalette[1],
                                                      size = 0.2)) +
                # sets the grid lines thicker and in a new colour
                theme(panel.background = element_rect(colour = cbPalette[1],
                                                      size = 2)) +
                # adjust colour and size of the surrounding box
                theme(panel.grid.major.y = element_blank()) +
                # removes the horisontal major grid lines +
                theme(axis.text.x = element_text(angle = 90, vjust = .5, size = sizetextx)) +
                xlab("") +
                ylab("k-l Distance Explained times ES2")
        return(Tr)
}

###############################################################
###### Illustrate the Percentage Explained ES3 #####
###############################################################
# INPUT: usually PredReal_DistComp, 


IlluPercExplES3 <- function(DF){
        
        DF <- DF[-nrow(DF),] # remove the k row
        # the size of the dots and the writing depends on how many OTUs there
        # are
        if (nrow(DF) < 20) {
                size = 6
                sizetextx = 18
        } else if((nrow(DF) > 19) & (nrow(DF) < 100)) {
                size = 4
                sizetextx = 14
        } else if (nrow(DF) > 99) {
                size = 3
                sizetextx = 6
        }
        
        # get the order of the OTU right. OTU is used as a factor and levels 
        # in R are alphabetically. But you want the OTUs in the same order as
        # they are in DF (rownames)
        LevelsWant <- rev(as.character(DF$OTU)) #you had to reverse
        for (i in 1:length(LevelsWant)) {
                DF$OTU <- relevel(DF$OTU, ref = LevelsWant[i])
        }
        
        Tr <- ggplot(DF, aes(x = OTU, y = PercExplES3))
        Tr <- Tr +
                geom_hline(aes(yintercept = 0), colour = cbPalette[1], size = 1.2) +
                geom_point(size = size, colour = cbPalette[2]) +
                theme_bw(base_size = 18) +
                theme(panel.grid.minor = element_blank()) +
                # removes all minor gridlines
                theme(panel.grid.major = element_line(colour = cbPalette[1],
                                                      size = 0.2)) +
                # sets the grid lines thicker and in a new colour
                theme(panel.background = element_rect(colour = cbPalette[1],
                                                      size = 2)) +
                # adjust colour and size of the surrounding box
                theme(panel.grid.major.y = element_blank()) +
                # removes the horisontal major grid lines +
                theme(axis.text.x = element_text(angle = 90, vjust = .5, size = sizetextx)) +
                xlab("") +
                ylab("k-l Distance Explained times ES3")
        return(Tr)
}


################################################
################################################
##### For the SubComp Method             #######
################################################
################################################

###############################################################
###### Illustrate the Distances                           #####
###############################################################
# INPUT: just a Distance Vector, and as labY the Method used
# Output: just the Trellis to plot


IlluDistances <- function(Dist, labY = "Distance"){
        
        DF <- data.frame(OTU = names(Dist), Dist = Dist)
        # the size of the dots and the writing depends on how many OTUs there
        # are
        if (nrow(DF) < 20) {
                size = 6
                sizetextx = 18
        } else if((nrow(DF) > 19) & (nrow(DF) < 100)) {
                size = 4
                sizetextx = 14
        } else if (nrow(DF) > 99) {
                size = 3
                sizetextx = 6
        }
        
        # get the order of the OTU right. OTU is used as a factor and levels 
        # in R are alphabetically. But you want the OTUs in the same order as
        # they are in DF (rownames)
        LevelsWant <- rev(as.character(DF$OTU)) #you had to reverse
        for (i in 1:length(LevelsWant)) {
                DF$OTU <- relevel(DF$OTU, ref = LevelsWant[i])
        }
        
        Tr <- ggplot(DF, aes(x = OTU, y = Dist))
        Tr <- Tr +
                geom_hline(aes(yintercept = Dist[length(Dist)]), colour = cbPalette[1], size = 1.2) +
                geom_point(size = size, colour = cbPalette[2]) +
                theme_bw(base_size = 18) +
                theme(panel.grid.minor = element_blank()) +
                # removes all minor gridlines
                theme(panel.grid.major = element_line(colour = cbPalette[1],
                                                      size = 0.2)) +
                # sets the grid lines thicker and in a new colour
                theme(panel.background = element_rect(colour = cbPalette[1],
                                                      size = 2)) +
                # adjust colour and size of the surrounding box
                theme(panel.grid.major.y = element_blank()) +
                # removes the horisontal major grid lines +
                theme(axis.text.x = element_text(angle = 90, vjust = .5, size = sizetextx)) +
                xlab("") +
                ylab(labY)
        return(Tr)
}




################################################
################################################
##### For real Data                     #######
################################################
################################################





###############################################################
###### Illustrate the OTU changes between condition k and l #####
###############################################################
# INPUT: AbTable_sub, so the subsetted Abundance Table, a data frame with 
# the columns k and l

plotOTUChanges <- function(AbTable_sub){
        
        # make sure you have the OTUs as variable and not just as rownames
        if (! 'OTU' %in% colnames(AbTable_sub)) { 
                AbTable_sub$OTU <- rownames(AbTable_sub)       
        }
        
        # the size of the dots and the writing depends on how many OTUs there
        # are
        if (nrow(AbTable_sub) < 20) {
                size = c(6,4)
                sizetextx = 18
        } else if((nrow(AbTable_sub) > 19) & (nrow(AbTable_sub) < 100)) {
                size = c(4,2)
                sizetextx = 10
        } else if (nrow(AbTable_sub) > 99) {
                size = c(3,1.5)
                sizetextx = 6
        }
        
        AbTable_sub$OTU <- factor(AbTable_sub$OTU)
        LevelsWant <- rev(as.character(AbTable_sub$OTU)) #you had to reverse
        for (i in 1:length(LevelsWant)) {
                AbTable_sub$OTU <- relevel(AbTable_sub$OTU, ref = LevelsWant[i])
        }
        
        AbTable_sub_long <- gather(AbTable_sub, Condition, RA, -OTU)
        
        #AbTable_sub_long$Condition <- as.character(AbTable_sub_long$Condition)
        Tr <- ggplot(AbTable_sub_long, aes(x = OTU, y = RA, 
                                           colour = Condition, 
                                           size = Condition, 
                                           shape = Condition))
        
        Tr <- Tr + geom_point()  +
                scale_size_discrete(range = size) +
                scale_colour_manual(values = c(cbPalette[1],cbPalette[2])) +
                scale_shape_manual(values = c(16,17)) +
                #scale_x_discrete(expand = c(0.01,.01)) +
                theme_bw(base_size = 18) +
                theme(panel.grid.minor = element_blank()) +
                # removes all minor gridlines
                theme(panel.grid.major = element_line(colour = cbPalette[1],
                                                      size = 0.2)) +
                # sets the grid lines thicker and in a new colour
                theme(panel.background = element_rect(colour = cbPalette[1],
                                                      size = .5)) +
                # adjust colour and size of the surrounding box
                theme(panel.grid.major.y = element_blank()) +
                # removes the horisontal major grid lines +
                theme(axis.text.x = element_text(angle = 90, vjust = 0.5,
                                                 size = sizetextx)) +
                xlab("") +
                ylab("relative Abundance") +
                labs(colour = "", size = "", shape = "")
        return(Tr)
}




################################################
################################################
##### Old Plots           #######
################################################
################################################



###############################################################
###### Illustrate the prediction precision               #####
###############################################################
# INPUT: usually PredReal_DistComp, so just a data frame with
# the columns OTU and PredReal_Ratio


illustratePrecision <- function(DF){
        
        DF <- DF[-nrow(DF),] # remove the k row
        
        # the size of the dots and the writing depends on how many OTUs there
        # are
        if (nrow(DF) < 20) {
                size = 6
                sizetextx = 18
        } else if((nrow(DF) > 19) & (nrow(DF) < 100)) {
                size = 4
                sizetextx = 14
        } else if (nrow(DF) > 99) {
                size = 3
                sizetextx = 6
        }
        
        # get the order of the OTU right. OTU is used as a factor and levels 
        # in R are alphabetically. But you want the OTUs in the same order as
        # they are in DF (rownames)
        LevelsWant <- rev(as.character(DF$OTU)) #you had to reverse
        for (i in 1:length(LevelsWant)) {
                DF$OTU <- relevel(DF$OTU, ref = LevelsWant[i])
        }
        
        Tr <- ggplot(DF, aes(x = OTU, y = PredReal_Ratio))
        Tr <- Tr +
                geom_hline(aes(yintercept = 1), colour = cbPalette[1], size = 1.2) +
                geom_point(size = size, colour = cbPalette[2]) +
                theme_bw(base_size = 18) +
                theme(panel.grid.minor = element_blank()) +
                # removes all minor gridlines
                theme(panel.grid.major = element_line(colour = cbPalette[1],
                                                      size = 0.2)) +
                # sets the grid lines thicker and in a new colour
                theme(panel.background = element_rect(colour = cbPalette[1],
                                                      size = 2)) +
                # adjust colour and size of the surrounding box
                theme(panel.grid.major.y = element_blank()) +
                # removes the horisontal major grid lines +
                theme(axis.text.x = element_text(angle = 90, vjust = .5, size = sizetextx)) +
                xlab("") +
                ylab("PredReal_DistanceRatio")
        return(Tr)
}

SubComp_Functs

########################################################
########################################################
############### For method S1 ######################### 
######simple renormalised subcompositions #############
########################################################
########################################################

#################################################
### Generate List with Subcompositions##
##################################################
# INPUT: Abundance Table, e.g. SimDFs[[2]], It is a data frame, columns:
# OTU, k, and l
# OUTPUT: SubATs is a list with the ATs in which one OTU is missing each, and
# the compositions have been renormalised. The last entry is the original AT,
# so the input SimDFs[[2]]

SubCompis <- function(AT) {
        
        # remove one OTU in each case
        SubATs <- lapply(1:nrow(AT), function(x) AT[-x,])
        # normalise the remaining composition to 1
        SubATs <- lapply(1:length(SubATs), 
                         function(x) mutate(SubATs[[x]], k = k/sum(k), 
                                            l = l/sum(l)))
        # a proof that now all columms sum up to 1
        # sapply(1:length(SubATs), function(x) summarise(SubATs[[x]], 
        # k = sum(k), l = sum(l)))
        
        names(SubATs) <- paste("miOTU_", AT$OTU, sep = '')
        SubATs$AT <- AT # add the original AT as an entry to the list
        return(SubATs)
        
        
}


########################################################
########################################################
############### For method S2 ######################### 
######subcompositions with average OTU   #############
########################################################
########################################################

################### For Method S2, "subcompositions" where one OTU
# is set to the average of k and l via "absolute" abundances ######


#################################################
### Generate List with Subcompositions##
##################################################
# INPUT: Abundance Table, e.g. SimDFs[[2]], It is a data frame, columns:
# OTU, k, and l
# OUTPUT: SubATs is a list with the ATs in which one OTU is missing each, and
# the compositions have been renormalised. The last entry is the original AT,
# so the input SimDFs[[2]]

SubCompisAV <- function(AT, Total = 1000) {
        
        AT$Order <- 1:nrow(AT) # used later to reorder the data frames
        OTUMeans <- rowMeans(select(AT, k, l))
        TotalAddFMeans <-  Total*OTUMeans
        TotalAddDF <- data.frame(OTU = AT$OTU,
                                 k = TotalAddFMeans,
                                 l = TotalAddFMeans,
                                 Order = AT$Order)
        TotalForRest <- Total*(1-OTUMeans)
        # remove one OTU in each case
        SubATs <- lapply(1:nrow(AT), function(x) AT[-x,])
        # normalise the remaining composition to 1
        SubATs <- lapply(1:length(SubATs), 
                         function(x) mutate(SubATs[[x]], k = k/sum(k), 
                                            l = l/sum(l)))
        
        SubATsAbs <- lapply(1:length(SubATs), function(x) 
                mutate(SubATs[[x]], k = k*TotalForRest[x],
                       l = l*TotalForRest[x]))
        
        SubATsAbs <- lapply(1:length(SubATsAbs), function(x){
                rbind(SubATsAbs[[x]], TotalAddDF[x,])
        })
        SubATsAbs <- lapply(1:length(SubATsAbs), function(x){
                arrange(SubATsAbs[[x]], Order)
        })
        
        # a proof that now all columms sum up to Total
        # sapply(1:length(SubATsAbs), function(x) summarise(SubATsAbs[[x]], 
        # k = sum(k), l = sum(l)))
        
        # remove the Order column
        SubATsAbs <- lapply(1:length(SubATsAbs), function(x){
                select(SubATsAbs[[x]], -Order)
        })
        
        SubATs <- lapply(1:length(SubATsAbs), function(x) {
                mutate(SubATsAbs[[x]], k = k/sum(k), l = l/sum(l))
        })
        
        names(SubATs) <- paste("miOTU_", AT$OTU, sep = '')
        SubATs$AT <- AT # add the original AT as an entry to the list
        return(SubATs)    
        
}

Predict_Functs

#########################################
#########################################
###### The actual prediction functions###
#########################################
#########################################


###############################################################
###### function to predict the relative abundance of OTU j in 
# condition l from the relative abundances of OTU i in condition l
# and condtion k                                           #####
###############################################################
# INPUT:  k and l are the vectors of relative abundances at two conditions.
# i and j are the indices of the OTUs of interest.
# OUTPUT: the predicted abundance of OTU j in condition
# l


predictAbundance <- function( k, l, i, j ){
        if( i == j ){
                l[i]
        }
        else {
                k[j]*(1 - l[i])/(1 - k[i]) 
        }
}


###############################################################
###### function to calculate all predicted abundances, predicted
# from each OTU for each OTU >> matrix/data frame         #####
###############################################################
# INPUT: AbTable_sub is an abundance table subset of two conditions, i.e.
# a data.frame with two variables k and l (the two compositions), rownames(AbTable_sup) should be the OTUs, e.g. OTU_1 to OTU_n, alternatively for the
# simulations, there can be an OTU column
# OUTPUT: pred_RAs is a data.frame. The columns are the predicted compositions
# using each OTU as a predictor. In addition, the last two colums are k and l
# from AbTable_sub. pred_RAs can be used as input of the distance functions.

calc_pred_RAs <- function(AbTable_sub) {
        
        # to make sure OTUs are rownames and not the extra column
        if ('OTU' %in% colnames(AbTable_sub)) {
                rownames(AbTable_sub) <- AbTable_sub$OTU
                AbTable_sub <- select(AbTable_sub, k, l)
        }
        
        pred_RAs <- sapply(seq(nrow(AbTable_sub)), function(i) 
                sapply(seq(nrow(AbTable_sub)),
                       function(j) predictAbundance(AbTable_sub$k, AbTable_sub$l, i, j)))
        # calculates the predicted relative Abundance for each OTU as predictor.
        # for n OTUs you get a nxn matrix
        colnames(pred_RAs) <- rownames(AbTable_sub)
        rownames(pred_RAs) <- rownames(AbTable_sub)
        # turn into data frame and add the k and l compositions
        pred_RAs <- as.data.frame(pred_RAs)
        pred_RAs$k <- AbTable_sub$k
        pred_RAs$l <- AbTable_sub$l
        # Thus, for nOTUs pred_RAs is a data.frame with n+2 variables.
        return(pred_RAs)
} 


##################################
##################################
###### not prediction function####
###### but for prediction method##
##################################
##################################


#################################################
### Generate the 5 plots for each item of a list##
### of dist comparisons ###########################
##################################################
# INPUT: PredReal_DistComp
# OUTPUT: a list of the Trellis objects for the 5 plots
# to compare the predicted with the real distance based on different
# effect sizes and so on

getPlotList <- function(PredReal_DistComp) {
        
        Tr <- IlluPercExpl(PredReal_DistComp)
        Tr2 <- IlluPercExplperES(PredReal_DistComp)
        Tr3 <- IlluPercExplES1(PredReal_DistComp)
        Tr4 <- IlluPercExplES2(PredReal_DistComp)
        Tr5 <- IlluPercExplES3(PredReal_DistComp)

        TrList <- list(Tr, Tr2, Tr3, Tr4, Tr5)
        
}



##################################
##################################
###### OLD not Used anymore#######
##################################
##################################

###############################################################
###### function to produce a data frame where each column is 
# composition k only the abundance of the "predictor OTU" is
# from composition l. This to calculate the real distances 
# between composition k and l without contribuition from the
# predictor OTU                                          #####
###############################################################
# INPUT: AbTable_sub is an abundance table subset of two conditions, i.e.
# a data.frame with two variables k and l (the two compositions), rownames(AbTable_sup) should be the OTUs, e.g. OTU_1 to OTU_n, alternatively for the
# simulations, there can be an OTU column
# OUTPUT: All_k_Predictor_l is a similar data.frame as pred_RAs. Each column is
# the composition k but the predictor is l. k and l are the last two columns.
# can be used as input of the distance functions to calculate the real distances
# between composition k and l with the contribution of the predictor removed

make_AllkPredictorl <- function(AbTable_sub) {
        
        # to make sure OTUs are rownames and not the extra column
        if ('OTU' %in% colnames(AbTable_sub)) {
                rownames(AbTable_sub) <- AbTable_sub$OTU
                AbTable_sub <- select(AbTable_sub, k, l)
        }
        
        All_k_Predictor_l <- replicate(nrow(AbTable_sub), AbTable_sub$k)
        # a square matrix, each column is composition k
        colnames(All_k_Predictor_l) <- rownames(AbTable_sub)
        rownames(All_k_Predictor_l) <- rownames(AbTable_sub)
        diag(All_k_Predictor_l) <- AbTable_sub$l 
        # changes the "predictor" to the value of condition l
        All_k_Predictor_l <- as.data.frame(All_k_Predictor_l)
        All_k_Predictor_l$k <- AbTable_sub$k
        All_k_Predictor_l$l <- AbTable_sub$l
        return(All_k_Predictor_l)
} 

###############################################################
###### combine calc_pred_RAs and make_AllkPredictorl       #####
###############################################################
# INPUT: AbTable_sub is an abundance table subset of two conditions, i.e.
# a data.frame with two variables k and l (the two compositions), rownames(AbTable_sup) should be the OTUs, e.g. OTU_1 to OTU_n
# OUTPUT: Predicts is a list with the data.frames pred_RAs and
# All_k_Predictor_l

predictions <- function(AbTable_sub) {
        
        pred_RAs <- calc_pred_RAs(AbTable_sub)
        All_k_Predictor_l <- make_AllkPredictorl(AbTable_sub)
        Predicts <- list(pred_RAs, All_k_Predictor_l)
        return(Predicts)
        
}

Dist_Functs

################################################
################################################
##### For Prediction Method ####################
################################################
################################################

# ATTENTION: for the prediction method, the input to the Distance
# functions is always a data frame with the predictions (pred_RAs)

##### ATTENTION, INPUT and OUTPUT for the following 
# INPUT:  for the basic the df such as pred_RAs, each column is
# a prediction, last two columns is k and l. rownames are OTU1 to OTUn
# OUTPUT: The Distance vector, giving the distance for each column to l

### All these distances are used in 


###############################################################
######   sum of absolute differences for each OTU        #####
###############################################################

absoluteDiffPred <- function(df){
        
        DistsTo_l <- colSums(abs(df-df$l))
        return(DistsTo_l)
}

########################################
######   Euclidean Distance       #####
#######################################


EuclDistPred <- function(df){
        
        DistsTo_l <- sqrt(colSums((df-df$l)^2))
        return(DistsTo_l)
}


########################################
######   Bray-Curtis Distance      #####
#######################################


BrayCurtisDistPred <- function(df){
        
        DistsTo_l <- colSums(abs(df-df$l))/colSums(df+df$l)
        return(DistsTo_l)
}

########################################
######   Jensen-Shannon Distance      #####
#######################################
# ATTENTION: For the equation see:
# http://stackoverflow.com/questions/11226627/jensen-shannon-divergence-in-r
# the equation the guy came self up with plus the link to the enterotypes EMBL.
# wikipedia

JSDistPred <- function(df){
        
        # since it does not work with 0, I add a pseudocount, that was also 
        # added in the enterotype paper
        pseudocount <- 0.000001
        # df <- df + pseudocount
        df <- apply(df,1:2,function(x) ifelse (x==0,pseudocount,x))
        # looked cooler, and only changes the ones that are actually 0.
        # but changes df to matrix, so I need:
        df <- as.data.frame(df)
        Ms <- .5*(df+df$l)
        JS <- 0.5 * (colSums(df * log(df/Ms)) + colSums(df$l * log(df$l/Ms)))
        JS = ifelse(JS<0,0,JS) # Sometimes 0 is given as -3...e-17, then
        # the following sqrt would give a NaN warning message
        DistsTo_l <- sqrt(JS)
        return(DistsTo_l)
}


######################################################
### dist.JSD from Enterotype paper ###################
#######################################################
# ATTENTION, not used here, just as a comparison
# ATTENTION, the function sometimes produces weird NaN, the reason 
# is usually that you got a -1....e-18 instead of 0. sqrt(negative)
# results then in the NaN.
# ATTENTION: the results are the same as for my own JSDist function


dist.JSD <- function(inMatrix, pseudocount=0.000001, ...) {
        #inMatrix <- inMatrix + pseudocount
        KLD <- function(x,y) sum(x *log(x/y))
        JSD<- function(x,y) sqrt(0.5 * KLD(x, (x+y)/2) + 0.5 * KLD(y, (x+y)/2))
        matrixColSize <- length(colnames(inMatrix))
        # matrixRowSize <- length(rownames(inMatrix)) ## NEVER USED
        colnames <- colnames(inMatrix)
        resultsMatrix <- matrix(0, matrixColSize, matrixColSize)
        
        inMatrix = apply(inMatrix,1:2,function(x) ifelse (x==0,pseudocount,x))
        
        for(i in 1:matrixColSize) {
                for(j in 1:matrixColSize) { 
                        resultsMatrix[i,j]=JSD(as.vector(inMatrix[,i]),
                                               as.vector(inMatrix[,j]))
                }
        }
        colnames -> colnames(resultsMatrix) -> rownames(resultsMatrix)
        as.dist(resultsMatrix)->resultsMatrix
        attr(resultsMatrix, "method") <- "dist"
        return(resultsMatrix) 
}




#################################################################
######   Calculate Distances and Effect Size Corrections     #####
##################################################################
# INPUT:  for the basic the df such as pred_RAs, each column is
# a prediction, last two columns is k and l. rownames(are OTU1...2)
# OUTPUT: PredReal_DistComp 

CalcDistances_Pred <- function(pred_RAs, Method = 'JS'){
        
        #######################################
        #4 Effect Size (ES) measures for the different OTUs
        # all are supposed to value changes in relative abundance 
        # in relation to the size of the relative abundance
        ES <- abs(pred_RAs$l - pred_RAs$k) # the one I think is not
        # sufficient since it favours those that change almost not at all
        
        ES1 <- pred_RAs$l/pred_RAs$k
        ES1 <- ifelse(ES1<1, 1/ES1, ES1)
        
        ES2 <- abs(pred_RAs$l - pred_RAs$k)/
                rowMeans(cbind(pred_RAs$k,pred_RAs$l))
        
        ES3 <- (1-pred_RAs$l)/(1-pred_RAs$k)
        ES3 <- ifelse(ES3<1, 1/ES3, ES3)
        ES3 <- ES3/rowMeans(cbind(pred_RAs$k,pred_RAs$l))
        
        # ES3 might value the low relative abundance too much
        #############################################
        
        ####### calculate the distance on the basis of the method ###
        
        if (Method %in% c('JS', 'J', 'Jensen')) {
                
                DistsPred <- JSDistPred(pred_RAs)    
                
        } else if (Method %in% c('BC', 'B', 'BrayCurtis')) {
                
                DistsPred <- BrayCurtisDistPred(pred_RAs)
                
        } else if (Method %in% c('E', 'Euclidean', 'Eucl')) {
                
                DistsPred <- EuclDistPred(pred_RAs)
                
        } else if (Method %in% c('A', 'Abs', 'Absolute')) {
                
                DistsPred <- absoluteDiffPred(pred_RAs)
                
        } else {
                
                stop('Do not know the method')
        }
        
        
        PredReal_DistComp <- data.frame(OTU = names(DistsPred), 
                                        Pred_Dists = DistsPred, 
                                        Real_Dists = DistsPred["k"])
        
        if (!isTRUE(all.equal(0, PredReal_DistComp$Pred_Dists[
                nrow(PredReal_DistComp)]))) {
                stop("predicted or real distance to l was not zero")
        }
        # just a little check that the distance to l was indeed 0
        
        PredReal_DistComp <- PredReal_DistComp[-nrow(PredReal_DistComp),]
        # removes the l row because it is always 0 (tested above)
        
        PredReal_DistComp <- mutate(PredReal_DistComp,
                                    PredReal_Ratio = Pred_Dists/Real_Dists,
                                    PercExpl = (Real_Dists-Pred_Dists)/
                                            Real_Dists,
                                    ES = c(ES/sum(ES), NaN), #so I norm it
                                    ES1 = c(ES1/sum(ES1), NaN),
                                    ES2 = c(ES2/sum(ES2), NaN),
                                    ES3 = c(ES3/sum(ES3), NaN),
                                    PercExplperES = PercExpl/ES,
                                    PercExplES1 = PercExpl*ES1,
                                    PercExplES2 = PercExpl*ES2,
                                    PercExplES3 = PercExpl*ES3)
        
        PredReal_DistComp[is.na(PredReal_DistComp)] = 0 # removes the NaN
        PredReal_DistComp$PercExplperES[
                is.infinite(PredReal_DistComp$PercExplperES)] = 0
        return(PredReal_DistComp) 
        
}


################################################
################################################
##### For Subcomposition Method ####################
################################################
################################################


# ATTENTION: for the subcomposition method, the input to the Distance
# functions is always a data frame which is the abundance table

##### ATTENTION, INPUT and OUTPUT for the following distance functions
# INPUT:  An abundance Table as data frame with the two columns
# k and l, and the
# OUTPUT: the distance, so a one number vector



###############################################################
######   sum of absolute differences for each OTU        #####
###############################################################

absoluteDiff <- function(df){
        
        DistsTo_l <- sum(abs(df$k-df$l))
        return(DistsTo_l)
}

########################################
######   Euclidean Distance       #####
#######################################


EuclDist <- function(df){
        
        DistsTo_l <- sqrt(sum((df$k-df$l)^2))
        return(DistsTo_l)
}

########################################
######   Bray-Curtis Distance      #####
#######################################

### BASIC

BrayCurtisDist <- function(df){
        
        DistsTo_l <- sum(abs(df$k-df$l))/sum(df$k+df$l)
        return(DistsTo_l)
}



########################################
######   Jensen-Shannon Distance   #####
#######################################

JSDist <- function(df){
        # since it does not work with 0, I add a pseudocount, that was also 
        # added in the enterotype paper
        pseudocount <- 0.000001
        # df <- df + pseudocount
        df1 <- apply(df[,-1],1:2,function(x) ifelse (x==0,pseudocount,x))
        # looked cooler, and only changes the ones that are actually 0.
        # but changes df to matrix, so I need:
        df1 <- as.data.frame(df)
        df$k <- df1$k
        df$l <- df1$l
        Ms <- .5*(df$k+df$l)
        JS <- 0.5 * (sum(df$k * log(df$k/Ms)) + sum(df$l * log(df$l/Ms)))
        JS = ifelse(JS<0,0,JS) # Sometimes 0 is given as -3...e-17, then
        # the following sqrt would give a NaN warning message
        DistsTo_l <- sqrt(JS)
        return(DistsTo_l)
}

Simulations

k <- c(10E8, 2.5E8, 2E8, 1E8, .2E8)
l <- c(10E8, 3E8, 1E8, 1E8, .2E8)

# generate AT and DataFrames for plotting
SimDFs <- GenerateSimDFs(k,l)
AT <- SimDFs[[2]]

# plot relative and absolute abundances under each other
Tr <- plotSimAbsRel(SimDFs[[3]])
Tr

Prediction Method

Absolute Diffs

Abs

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

Eucledian Distance

Eucl

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

Bray Curtis Distance

BC

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

Jensen-Shannon Distance

JS

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

Subcomposition Methods

Method S1: k-l distances when one OTU is missing

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

Method S2: Same with the Average Method

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

150602_SimulationsPredPSubC

Thorsten Brach

2 Jun 2015

FUNCTIONS

Sim_Functs

Illus_Functs

SubComp_Functs

Predict_Functs

Dist_Functs

Simulations

Prediction Method

Absolute Diffs

Eucledian Distance

Bray Curtis Distance

Jensen-Shannon Distance

Subcomposition Methods

Method S1: k-l distances when one OTU is missing

Method S2: Same with the Average Method