FUNCTIONS
Sim_Functs
#########################################
#########################################
###### To Make DFs of simulated data ###
#########################################
#########################################
#################################################
### Generate data.frames from simulated examples##
##################################################
# INPUT: the vectors k and l containing the absolute abundances
# of condition k and l
# OUTPUT: SimDFs, the three data frames WideDF (absolute and realtive
# abundances), AT_Sim the relative Abundance Table of the two conditions,
# LongDF, the long version of WideDF for plotting
GenerateSimDFs <- function(k,l){
WideDF <- data.frame(OTU = LETTERS[1:length(k)], absolute_k = k, absolute_l = l, relative_k = k/sum(k), relative_l =l/sum(l))
AT_Sim <- select(WideDF, OTU, k = relative_k, l = relative_l)
LongDF <- gather(WideDF, Condition, Value, -OTU)
LongDF <- separate(LongDF, Condition,
into = c('AbundType', 'Condition'))
SimDFs <- list(WideDF, AT_Sim, LongDF)
return(SimDFs)
}
Illus_Functs
########################################################
###### the color palette #####
########################################################
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73",
"#F0E442", "#0072B2", "#D55E00", "#CC79A7")
################################################
################################################
##### For Simulations from abs abundance #######
################################################
################################################
###############################################################
###### Show Changes Relative vs Absolute for simulations #####
###############################################################
# INPUT: SimDFs[[3]]
plotSimAbsRel <- function(LongDataFrame){
LongDataFrame$OTU <- as.character(LongDataFrame$OTU)
LongDataFrame$Condition <- as.character(LongDataFrame$Condition)
# do I need this?
Tr <- ggplot(LongDataFrame, aes(x = OTU, y = Value,
colour = Condition,
size = Condition,
shape = Condition))
Tr <- Tr + geom_point() +
facet_grid(AbundType ~., scales = "free") +
scale_size_discrete(range = c(6,4)) +
scale_colour_manual(values = c(cbPalette[1],cbPalette[2])) +
scale_shape_manual(values = c(16,17)) +
theme_bw(base_size = 25) +
theme(panel.grid.minor = element_blank()) +
# removes all minor gridlines
theme(panel.grid.major = element_line(colour = cbPalette[1],
size = 0.4)) +
# sets the grid lines thicker and in a new colour
theme(panel.background = element_rect(colour = cbPalette[1],
size = 2)) +
# adjust colour and size of the surrounding box
theme(panel.grid.major.y = element_blank()) +
# removes the horisontal major grid lines +
xlab("") +
ylab("Abundance") +
labs(colour = "", size = "", shape = "")
return(Tr)
}
################################################
################################################
##### For the Predict Method #######
################################################
################################################
###############################################################
###### Plot Predictions dependent on Predictor #####
###############################################################
# INPUT: The data frame where each column is a prediction, last two columns are
# k and l. E.g. pred_RAs, so the outcome of calc_pred_RAs, or Predicts[[1]],
# being the outcome of the predictions function. Further you have to give the
# predictor, e.g. "A" or "OTU1"
# ATTENTION: This plot is nicest when combiend with manipulate
plotPaulPredict <- function(pred_RAs, Predictor){
rownames(pred_RAs) <- colnames(pred_RAs)[1:(ncol(pred_RAs)-2)]
DF <- select(pred_RAs, k, l, matches(Predictor))
names(DF)[3] <- paste("from:",Predictor)
DF$OTU <- rownames(DF)
if (nrow(DF) < 20) {
size = c(6,4,6)
sizetextx = 18
} else if((nrow(AbTable_sub) > 19) & (nrow(AbTable_sub) < 100)) {
size = c(4,2,4)
sizetextx = 10
} else if (nrow(AbTable_sub) > 99) {
size = c(3,1.5, 3)
sizetextx = 6
}
DF <- gather(DF, Condition, Value, -OTU)
Tr <- ggplot(DF, aes(x = OTU, y = Value,
colour = Condition,
size = Condition,
shape = Condition))
Tr <- Tr + geom_point() +
scale_size_discrete(range = size) +
scale_colour_manual(values = c(cbPalette[1:3])) +
scale_shape_manual(values = c(16,17,18)) +
theme_bw(base_size = 18) +
theme(panel.grid.minor = element_blank()) +
# removes all minor gridlines
theme(panel.grid.major = element_line(colour = cbPalette[1],
size = 0.2)) +
# sets the grid lines thicker and in a new colour
theme(panel.background = element_rect(colour = cbPalette[1],
size = 2)) +
# adjust colour and size of the surrounding box
theme(panel.grid.major.y = element_blank()) +
# removes the horisontal major grid lines +
xlab("") +
ylab("Relative Abundance") +
labs(colour = "", size = "", shape = "")
return(Tr)
}
# NOTE use with manipulate possible
# manipulate(
# plotPaulPredict(PredictsSim[[1]],Predictor),
# Predictor = picker(as.list(rownames(PredictsSim[[1]]))))
###############################################################
###### Plot Predictions dependent on Predictor LOG #####
###############################################################
plotPaulPredictLog <- function(pred_RAs, Predictor){
Tr <- plotPaulPredict(pred_RAs, Predictor)
Tr <- Tr + scale_y_log10()
return(Tr)
}
#NOTE Use with manipulate possible
# manipulate(
# plotPaulPredictLog(PredictsSim[[1]],Predictor),
# Predictor = picker(as.list(rownames(PredictsSim[[1]]))))
###############################################################
###### Barplot Illustration mainly for Simulations #####
###############################################################
# INPUT: The data frame where each column is a prediction, last two columns are
# k and l. E.g. pred_RAs, so the outcome of calc_pred_RAs, or Predicts[[1]],
# being the outcome of the predictions function. Further you have to give the
# predictor, e.g. "A" or "OTU1"
# ATTENTION: This plot is nicest when combiend with manipulate
BarplotIllu <- function(pred_RAs, Predictor){
rownames(pred_RAs) <- colnames(pred_RAs)[1:(ncol(pred_RAs)-2)]
DF <- select(pred_RAs, k, l, matches(Predictor))
names(DF)[3] <- paste("from:",Predictor)
DF$OTU <- rownames(DF)
if (nrow(DF) < 20) {
size = c(6,4,6)
sizetextx = 18
} else if((nrow(AbTable_sub) > 19) & (nrow(AbTable_sub) < 100)) {
size = c(4,2,4)
sizetextx = 10
} else if (nrow(AbTable_sub) > 99) {
size = c(3,1.5, 3)
sizetextx = 6
}
DF <- gather(DF, Condition, Value, -OTU)
Tr <- ggplot(DF, aes(x = Condition, y = Value, fill = OTU))
Tr <- Tr + geom_bar(stat = 'identity') +
scale_fill_manual(values
= c(cbPalette[1:length(levels(factor(DF$OTU)))])) +
theme_bw(base_size = 25) +
theme(panel.grid = element_blank()) +
# sets the grid lines thicker and in a new colour
theme(panel.background = element_rect(colour = cbPalette[1],
size = 2)) +
xlab("") +
ylab("Relative Abundance")
return(Tr)
}
# NOTE use with manipulate possible
# manipulate(
# BarplotIllu(PredictsSim[[1]],Predictor),
# Predictor = picker(as.list(rownames(PredictsSim[[1]]))))
###############################################################
###### Illustrate the Percentage Explained #####
###############################################################
# INPUT: usually PredReal_DistComp,
IlluPercExpl <- function(DF){
DF <- DF[-nrow(DF),] # remove the k row
# the size of the dots and the writing depends on how many OTUs there
# are
if (nrow(DF) < 20) {
size = 6
sizetextx = 18
} else if((nrow(DF) > 19) & (nrow(DF) < 100)) {
size = 4
sizetextx = 14
} else if (nrow(DF) > 99) {
size = 3
sizetextx = 6
}
# get the order of the OTU right. OTU is used as a factor and levels
# in R are alphabetically. But you want the OTUs in the same order as
# they are in DF (rownames)
LevelsWant <- rev(as.character(DF$OTU)) #you had to reverse
for (i in 1:length(LevelsWant)) {
DF$OTU <- relevel(DF$OTU, ref = LevelsWant[i])
}
Tr <- ggplot(DF, aes(x = OTU, y = PercExpl*100))
Tr <- Tr +
geom_hline(aes(yintercept = 0), colour = cbPalette[1], size = 1.2) +
geom_point(size = size, colour = cbPalette[2]) +
theme_bw(base_size = 18) +
theme(panel.grid.minor = element_blank()) +
# removes all minor gridlines
theme(panel.grid.major = element_line(colour = cbPalette[1],
size = 0.2)) +
# sets the grid lines thicker and in a new colour
theme(panel.background = element_rect(colour = cbPalette[1],
size = 2)) +
# adjust colour and size of the surrounding box
theme(panel.grid.major.y = element_blank()) +
# removes the horisontal major grid lines +
theme(axis.text.x = element_text(angle = 90, vjust = .5, size = sizetextx)) +
xlab("") +
ylab("Percentage of k-l Distance Explained")
return(Tr)
}
###############################################################
###### Illustrate the Percentage Explained per Effect Size #####
###############################################################
# INPUT: usually PredReal_DistComp,
IlluPercExplperES <- function(DF){
DF <- DF[-nrow(DF),] # remove the k row
# the size of the dots and the writing depends on how many OTUs there
# are
if (nrow(DF) < 20) {
size = 6
sizetextx = 18
} else if((nrow(DF) > 19) & (nrow(DF) < 100)) {
size = 4
sizetextx = 14
} else if (nrow(DF) > 99) {
size = 3
sizetextx = 6
}
# get the order of the OTU right. OTU is used as a factor and levels
# in R are alphabetically. But you want the OTUs in the same order as
# they are in DF (rownames)
LevelsWant <- rev(as.character(DF$OTU)) #you had to reverse
for (i in 1:length(LevelsWant)) {
DF$OTU <- relevel(DF$OTU, ref = LevelsWant[i])
}
Tr <- ggplot(DF, aes(x = OTU, y = PercExplperES))
Tr <- Tr +
geom_hline(aes(yintercept = 0), colour = cbPalette[1], size = 1.2) +
geom_point(size = size, colour = cbPalette[2]) +
theme_bw(base_size = 18) +
theme(panel.grid.minor = element_blank()) +
# removes all minor gridlines
theme(panel.grid.major = element_line(colour = cbPalette[1],
size = 0.2)) +
# sets the grid lines thicker and in a new colour
theme(panel.background = element_rect(colour = cbPalette[1],
size = 2)) +
# adjust colour and size of the surrounding box
theme(panel.grid.major.y = element_blank()) +
# removes the horisontal major grid lines +
theme(axis.text.x = element_text(angle = 90, vjust = .5, size = sizetextx)) +
xlab("") +
ylab("k-l Distance Explained per Effect Size")
return(Tr)
}
###############################################################
###### Illustrate the Percentage Explained ES1 #####
###############################################################
# INPUT: usually PredReal_DistComp,
IlluPercExplES1 <- function(DF){
DF <- DF[-nrow(DF),] # remove the k row
# the size of the dots and the writing depends on how many OTUs there
# are
if (nrow(DF) < 20) {
size = 6
sizetextx = 18
} else if((nrow(DF) > 19) & (nrow(DF) < 100)) {
size = 4
sizetextx = 14
} else if (nrow(DF) > 99) {
size = 3
sizetextx = 6
}
# get the order of the OTU right. OTU is used as a factor and levels
# in R are alphabetically. But you want the OTUs in the same order as
# they are in DF (rownames)
LevelsWant <- rev(as.character(DF$OTU)) #you had to reverse
for (i in 1:length(LevelsWant)) {
DF$OTU <- relevel(DF$OTU, ref = LevelsWant[i])
}
Tr <- ggplot(DF, aes(x = OTU, y = PercExplES1))
Tr <- Tr +
geom_hline(aes(yintercept = 0), colour = cbPalette[1], size = 1.2) +
geom_point(size = size, colour = cbPalette[2]) +
theme_bw(base_size = 18) +
theme(panel.grid.minor = element_blank()) +
# removes all minor gridlines
theme(panel.grid.major = element_line(colour = cbPalette[1],
size = 0.2)) +
# sets the grid lines thicker and in a new colour
theme(panel.background = element_rect(colour = cbPalette[1],
size = 2)) +
# adjust colour and size of the surrounding box
theme(panel.grid.major.y = element_blank()) +
# removes the horisontal major grid lines +
theme(axis.text.x = element_text(angle = 90, vjust = .5, size = sizetextx)) +
xlab("") +
ylab("k-l Distance Explained times ES1")
return(Tr)
}
###############################################################
###### Illustrate the Percentage Explained ES2 #####
###############################################################
# INPUT: usually PredReal_DistComp,
IlluPercExplES2 <- function(DF){
DF <- DF[-nrow(DF),] # remove the k row
# the size of the dots and the writing depends on how many OTUs there
# are
if (nrow(DF) < 20) {
size = 6
sizetextx = 18
} else if((nrow(DF) > 19) & (nrow(DF) < 100)) {
size = 4
sizetextx = 14
} else if (nrow(DF) > 99) {
size = 3
sizetextx = 6
}
# get the order of the OTU right. OTU is used as a factor and levels
# in R are alphabetically. But you want the OTUs in the same order as
# they are in DF (rownames)
LevelsWant <- rev(as.character(DF$OTU)) #you had to reverse
for (i in 1:length(LevelsWant)) {
DF$OTU <- relevel(DF$OTU, ref = LevelsWant[i])
}
Tr <- ggplot(DF, aes(x = OTU, y = PercExplES2))
Tr <- Tr +
geom_hline(aes(yintercept = 0), colour = cbPalette[1], size = 1.2) +
geom_point(size = size, colour = cbPalette[2]) +
theme_bw(base_size = 18) +
theme(panel.grid.minor = element_blank()) +
# removes all minor gridlines
theme(panel.grid.major = element_line(colour = cbPalette[1],
size = 0.2)) +
# sets the grid lines thicker and in a new colour
theme(panel.background = element_rect(colour = cbPalette[1],
size = 2)) +
# adjust colour and size of the surrounding box
theme(panel.grid.major.y = element_blank()) +
# removes the horisontal major grid lines +
theme(axis.text.x = element_text(angle = 90, vjust = .5, size = sizetextx)) +
xlab("") +
ylab("k-l Distance Explained times ES2")
return(Tr)
}
###############################################################
###### Illustrate the Percentage Explained ES3 #####
###############################################################
# INPUT: usually PredReal_DistComp,
IlluPercExplES3 <- function(DF){
DF <- DF[-nrow(DF),] # remove the k row
# the size of the dots and the writing depends on how many OTUs there
# are
if (nrow(DF) < 20) {
size = 6
sizetextx = 18
} else if((nrow(DF) > 19) & (nrow(DF) < 100)) {
size = 4
sizetextx = 14
} else if (nrow(DF) > 99) {
size = 3
sizetextx = 6
}
# get the order of the OTU right. OTU is used as a factor and levels
# in R are alphabetically. But you want the OTUs in the same order as
# they are in DF (rownames)
LevelsWant <- rev(as.character(DF$OTU)) #you had to reverse
for (i in 1:length(LevelsWant)) {
DF$OTU <- relevel(DF$OTU, ref = LevelsWant[i])
}
Tr <- ggplot(DF, aes(x = OTU, y = PercExplES3))
Tr <- Tr +
geom_hline(aes(yintercept = 0), colour = cbPalette[1], size = 1.2) +
geom_point(size = size, colour = cbPalette[2]) +
theme_bw(base_size = 18) +
theme(panel.grid.minor = element_blank()) +
# removes all minor gridlines
theme(panel.grid.major = element_line(colour = cbPalette[1],
size = 0.2)) +
# sets the grid lines thicker and in a new colour
theme(panel.background = element_rect(colour = cbPalette[1],
size = 2)) +
# adjust colour and size of the surrounding box
theme(panel.grid.major.y = element_blank()) +
# removes the horisontal major grid lines +
theme(axis.text.x = element_text(angle = 90, vjust = .5, size = sizetextx)) +
xlab("") +
ylab("k-l Distance Explained times ES3")
return(Tr)
}
################################################
################################################
##### For the SubComp Method #######
################################################
################################################
###############################################################
###### Illustrate the Distances #####
###############################################################
# INPUT: just a Distance Vector, and as labY the Method used
# Output: just the Trellis to plot
IlluDistances <- function(Dist, labY = "Distance"){
DF <- data.frame(OTU = names(Dist), Dist = Dist)
# the size of the dots and the writing depends on how many OTUs there
# are
if (nrow(DF) < 20) {
size = 6
sizetextx = 18
} else if((nrow(DF) > 19) & (nrow(DF) < 100)) {
size = 4
sizetextx = 14
} else if (nrow(DF) > 99) {
size = 3
sizetextx = 6
}
# get the order of the OTU right. OTU is used as a factor and levels
# in R are alphabetically. But you want the OTUs in the same order as
# they are in DF (rownames)
LevelsWant <- rev(as.character(DF$OTU)) #you had to reverse
for (i in 1:length(LevelsWant)) {
DF$OTU <- relevel(DF$OTU, ref = LevelsWant[i])
}
Tr <- ggplot(DF, aes(x = OTU, y = Dist))
Tr <- Tr +
geom_hline(aes(yintercept = Dist[length(Dist)]), colour = cbPalette[1], size = 1.2) +
geom_point(size = size, colour = cbPalette[2]) +
theme_bw(base_size = 18) +
theme(panel.grid.minor = element_blank()) +
# removes all minor gridlines
theme(panel.grid.major = element_line(colour = cbPalette[1],
size = 0.2)) +
# sets the grid lines thicker and in a new colour
theme(panel.background = element_rect(colour = cbPalette[1],
size = 2)) +
# adjust colour and size of the surrounding box
theme(panel.grid.major.y = element_blank()) +
# removes the horisontal major grid lines +
theme(axis.text.x = element_text(angle = 90, vjust = .5, size = sizetextx)) +
xlab("") +
ylab(labY)
return(Tr)
}
################################################
################################################
##### For real Data #######
################################################
################################################
###############################################################
###### Illustrate the OTU changes between condition k and l #####
###############################################################
# INPUT: AbTable_sub, so the subsetted Abundance Table, a data frame with
# the columns k and l
plotOTUChanges <- function(AbTable_sub){
# make sure you have the OTUs as variable and not just as rownames
if (! 'OTU' %in% colnames(AbTable_sub)) {
AbTable_sub$OTU <- rownames(AbTable_sub)
}
# the size of the dots and the writing depends on how many OTUs there
# are
if (nrow(AbTable_sub) < 20) {
size = c(6,4)
sizetextx = 18
} else if((nrow(AbTable_sub) > 19) & (nrow(AbTable_sub) < 100)) {
size = c(4,2)
sizetextx = 10
} else if (nrow(AbTable_sub) > 99) {
size = c(3,1.5)
sizetextx = 6
}
AbTable_sub$OTU <- factor(AbTable_sub$OTU)
LevelsWant <- rev(as.character(AbTable_sub$OTU)) #you had to reverse
for (i in 1:length(LevelsWant)) {
AbTable_sub$OTU <- relevel(AbTable_sub$OTU, ref = LevelsWant[i])
}
AbTable_sub_long <- gather(AbTable_sub, Condition, RA, -OTU)
#AbTable_sub_long$Condition <- as.character(AbTable_sub_long$Condition)
Tr <- ggplot(AbTable_sub_long, aes(x = OTU, y = RA,
colour = Condition,
size = Condition,
shape = Condition))
Tr <- Tr + geom_point() +
scale_size_discrete(range = size) +
scale_colour_manual(values = c(cbPalette[1],cbPalette[2])) +
scale_shape_manual(values = c(16,17)) +
#scale_x_discrete(expand = c(0.01,.01)) +
theme_bw(base_size = 18) +
theme(panel.grid.minor = element_blank()) +
# removes all minor gridlines
theme(panel.grid.major = element_line(colour = cbPalette[1],
size = 0.2)) +
# sets the grid lines thicker and in a new colour
theme(panel.background = element_rect(colour = cbPalette[1],
size = .5)) +
# adjust colour and size of the surrounding box
theme(panel.grid.major.y = element_blank()) +
# removes the horisontal major grid lines +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5,
size = sizetextx)) +
xlab("") +
ylab("relative Abundance") +
labs(colour = "", size = "", shape = "")
return(Tr)
}
################################################
################################################
##### Old Plots #######
################################################
################################################
###############################################################
###### Illustrate the prediction precision #####
###############################################################
# INPUT: usually PredReal_DistComp, so just a data frame with
# the columns OTU and PredReal_Ratio
illustratePrecision <- function(DF){
DF <- DF[-nrow(DF),] # remove the k row
# the size of the dots and the writing depends on how many OTUs there
# are
if (nrow(DF) < 20) {
size = 6
sizetextx = 18
} else if((nrow(DF) > 19) & (nrow(DF) < 100)) {
size = 4
sizetextx = 14
} else if (nrow(DF) > 99) {
size = 3
sizetextx = 6
}
# get the order of the OTU right. OTU is used as a factor and levels
# in R are alphabetically. But you want the OTUs in the same order as
# they are in DF (rownames)
LevelsWant <- rev(as.character(DF$OTU)) #you had to reverse
for (i in 1:length(LevelsWant)) {
DF$OTU <- relevel(DF$OTU, ref = LevelsWant[i])
}
Tr <- ggplot(DF, aes(x = OTU, y = PredReal_Ratio))
Tr <- Tr +
geom_hline(aes(yintercept = 1), colour = cbPalette[1], size = 1.2) +
geom_point(size = size, colour = cbPalette[2]) +
theme_bw(base_size = 18) +
theme(panel.grid.minor = element_blank()) +
# removes all minor gridlines
theme(panel.grid.major = element_line(colour = cbPalette[1],
size = 0.2)) +
# sets the grid lines thicker and in a new colour
theme(panel.background = element_rect(colour = cbPalette[1],
size = 2)) +
# adjust colour and size of the surrounding box
theme(panel.grid.major.y = element_blank()) +
# removes the horisontal major grid lines +
theme(axis.text.x = element_text(angle = 90, vjust = .5, size = sizetextx)) +
xlab("") +
ylab("PredReal_DistanceRatio")
return(Tr)
}
SubComp_Functs
########################################################
########################################################
############### For method S1 #########################
######simple renormalised subcompositions #############
########################################################
########################################################
#################################################
### Generate List with Subcompositions##
##################################################
# INPUT: Abundance Table, e.g. SimDFs[[2]], It is a data frame, columns:
# OTU, k, and l
# OUTPUT: SubATs is a list with the ATs in which one OTU is missing each, and
# the compositions have been renormalised. The last entry is the original AT,
# so the input SimDFs[[2]]
SubCompis <- function(AT) {
# remove one OTU in each case
SubATs <- lapply(1:nrow(AT), function(x) AT[-x,])
# normalise the remaining composition to 1
SubATs <- lapply(1:length(SubATs),
function(x) mutate(SubATs[[x]], k = k/sum(k),
l = l/sum(l)))
# a proof that now all columms sum up to 1
# sapply(1:length(SubATs), function(x) summarise(SubATs[[x]],
# k = sum(k), l = sum(l)))
names(SubATs) <- paste("miOTU_", AT$OTU, sep = '')
SubATs$AT <- AT # add the original AT as an entry to the list
return(SubATs)
}
########################################################
########################################################
############### For method S2 #########################
######subcompositions with average OTU #############
########################################################
########################################################
################### For Method S2, "subcompositions" where one OTU
# is set to the average of k and l via "absolute" abundances ######
#################################################
### Generate List with Subcompositions##
##################################################
# INPUT: Abundance Table, e.g. SimDFs[[2]], It is a data frame, columns:
# OTU, k, and l
# OUTPUT: SubATs is a list with the ATs in which one OTU is missing each, and
# the compositions have been renormalised. The last entry is the original AT,
# so the input SimDFs[[2]]
SubCompisAV <- function(AT, Total = 1000) {
AT$Order <- 1:nrow(AT) # used later to reorder the data frames
OTUMeans <- rowMeans(select(AT, k, l))
TotalAddFMeans <- Total*OTUMeans
TotalAddDF <- data.frame(OTU = AT$OTU,
k = TotalAddFMeans,
l = TotalAddFMeans,
Order = AT$Order)
TotalForRest <- Total*(1-OTUMeans)
# remove one OTU in each case
SubATs <- lapply(1:nrow(AT), function(x) AT[-x,])
# normalise the remaining composition to 1
SubATs <- lapply(1:length(SubATs),
function(x) mutate(SubATs[[x]], k = k/sum(k),
l = l/sum(l)))
SubATsAbs <- lapply(1:length(SubATs), function(x)
mutate(SubATs[[x]], k = k*TotalForRest[x],
l = l*TotalForRest[x]))
SubATsAbs <- lapply(1:length(SubATsAbs), function(x){
rbind(SubATsAbs[[x]], TotalAddDF[x,])
})
SubATsAbs <- lapply(1:length(SubATsAbs), function(x){
arrange(SubATsAbs[[x]], Order)
})
# a proof that now all columms sum up to Total
# sapply(1:length(SubATsAbs), function(x) summarise(SubATsAbs[[x]],
# k = sum(k), l = sum(l)))
# remove the Order column
SubATsAbs <- lapply(1:length(SubATsAbs), function(x){
select(SubATsAbs[[x]], -Order)
})
SubATs <- lapply(1:length(SubATsAbs), function(x) {
mutate(SubATsAbs[[x]], k = k/sum(k), l = l/sum(l))
})
names(SubATs) <- paste("miOTU_", AT$OTU, sep = '')
SubATs$AT <- AT # add the original AT as an entry to the list
return(SubATs)
}
Predict_Functs
#########################################
#########################################
###### The actual prediction functions###
#########################################
#########################################
###############################################################
###### function to predict the relative abundance of OTU j in
# condition l from the relative abundances of OTU i in condition l
# and condtion k #####
###############################################################
# INPUT: k and l are the vectors of relative abundances at two conditions.
# i and j are the indices of the OTUs of interest.
# OUTPUT: the predicted abundance of OTU j in condition
# l
predictAbundance <- function( k, l, i, j ){
if( i == j ){
l[i]
}
else {
k[j]*(1 - l[i])/(1 - k[i])
}
}
###############################################################
###### function to calculate all predicted abundances, predicted
# from each OTU for each OTU >> matrix/data frame #####
###############################################################
# INPUT: AbTable_sub is an abundance table subset of two conditions, i.e.
# a data.frame with two variables k and l (the two compositions), rownames(AbTable_sup) should be the OTUs, e.g. OTU_1 to OTU_n, alternatively for the
# simulations, there can be an OTU column
# OUTPUT: pred_RAs is a data.frame. The columns are the predicted compositions
# using each OTU as a predictor. In addition, the last two colums are k and l
# from AbTable_sub. pred_RAs can be used as input of the distance functions.
calc_pred_RAs <- function(AbTable_sub) {
# to make sure OTUs are rownames and not the extra column
if ('OTU' %in% colnames(AbTable_sub)) {
rownames(AbTable_sub) <- AbTable_sub$OTU
AbTable_sub <- select(AbTable_sub, k, l)
}
pred_RAs <- sapply(seq(nrow(AbTable_sub)), function(i)
sapply(seq(nrow(AbTable_sub)),
function(j) predictAbundance(AbTable_sub$k, AbTable_sub$l, i, j)))
# calculates the predicted relative Abundance for each OTU as predictor.
# for n OTUs you get a nxn matrix
colnames(pred_RAs) <- rownames(AbTable_sub)
rownames(pred_RAs) <- rownames(AbTable_sub)
# turn into data frame and add the k and l compositions
pred_RAs <- as.data.frame(pred_RAs)
pred_RAs$k <- AbTable_sub$k
pred_RAs$l <- AbTable_sub$l
# Thus, for nOTUs pred_RAs is a data.frame with n+2 variables.
return(pred_RAs)
}
##################################
##################################
###### not prediction function####
###### but for prediction method##
##################################
##################################
#################################################
### Generate the 5 plots for each item of a list##
### of dist comparisons ###########################
##################################################
# INPUT: PredReal_DistComp
# OUTPUT: a list of the Trellis objects for the 5 plots
# to compare the predicted with the real distance based on different
# effect sizes and so on
getPlotList <- function(PredReal_DistComp) {
Tr <- IlluPercExpl(PredReal_DistComp)
Tr2 <- IlluPercExplperES(PredReal_DistComp)
Tr3 <- IlluPercExplES1(PredReal_DistComp)
Tr4 <- IlluPercExplES2(PredReal_DistComp)
Tr5 <- IlluPercExplES3(PredReal_DistComp)
TrList <- list(Tr, Tr2, Tr3, Tr4, Tr5)
}
##################################
##################################
###### OLD not Used anymore#######
##################################
##################################
###############################################################
###### function to produce a data frame where each column is
# composition k only the abundance of the "predictor OTU" is
# from composition l. This to calculate the real distances
# between composition k and l without contribuition from the
# predictor OTU #####
###############################################################
# INPUT: AbTable_sub is an abundance table subset of two conditions, i.e.
# a data.frame with two variables k and l (the two compositions), rownames(AbTable_sup) should be the OTUs, e.g. OTU_1 to OTU_n, alternatively for the
# simulations, there can be an OTU column
# OUTPUT: All_k_Predictor_l is a similar data.frame as pred_RAs. Each column is
# the composition k but the predictor is l. k and l are the last two columns.
# can be used as input of the distance functions to calculate the real distances
# between composition k and l with the contribution of the predictor removed
make_AllkPredictorl <- function(AbTable_sub) {
# to make sure OTUs are rownames and not the extra column
if ('OTU' %in% colnames(AbTable_sub)) {
rownames(AbTable_sub) <- AbTable_sub$OTU
AbTable_sub <- select(AbTable_sub, k, l)
}
All_k_Predictor_l <- replicate(nrow(AbTable_sub), AbTable_sub$k)
# a square matrix, each column is composition k
colnames(All_k_Predictor_l) <- rownames(AbTable_sub)
rownames(All_k_Predictor_l) <- rownames(AbTable_sub)
diag(All_k_Predictor_l) <- AbTable_sub$l
# changes the "predictor" to the value of condition l
All_k_Predictor_l <- as.data.frame(All_k_Predictor_l)
All_k_Predictor_l$k <- AbTable_sub$k
All_k_Predictor_l$l <- AbTable_sub$l
return(All_k_Predictor_l)
}
###############################################################
###### combine calc_pred_RAs and make_AllkPredictorl #####
###############################################################
# INPUT: AbTable_sub is an abundance table subset of two conditions, i.e.
# a data.frame with two variables k and l (the two compositions), rownames(AbTable_sup) should be the OTUs, e.g. OTU_1 to OTU_n
# OUTPUT: Predicts is a list with the data.frames pred_RAs and
# All_k_Predictor_l
predictions <- function(AbTable_sub) {
pred_RAs <- calc_pred_RAs(AbTable_sub)
All_k_Predictor_l <- make_AllkPredictorl(AbTable_sub)
Predicts <- list(pred_RAs, All_k_Predictor_l)
return(Predicts)
}
Dist_Functs
################################################
################################################
##### For Prediction Method ####################
################################################
################################################
# ATTENTION: for the prediction method, the input to the Distance
# functions is always a data frame with the predictions (pred_RAs)
##### ATTENTION, INPUT and OUTPUT for the following
# INPUT: for the basic the df such as pred_RAs, each column is
# a prediction, last two columns is k and l. rownames are OTU1 to OTUn
# OUTPUT: The Distance vector, giving the distance for each column to l
### All these distances are used in
###############################################################
###### sum of absolute differences for each OTU #####
###############################################################
absoluteDiffPred <- function(df){
DistsTo_l <- colSums(abs(df-df$l))
return(DistsTo_l)
}
########################################
###### Euclidean Distance #####
#######################################
EuclDistPred <- function(df){
DistsTo_l <- sqrt(colSums((df-df$l)^2))
return(DistsTo_l)
}
########################################
###### Bray-Curtis Distance #####
#######################################
BrayCurtisDistPred <- function(df){
DistsTo_l <- colSums(abs(df-df$l))/colSums(df+df$l)
return(DistsTo_l)
}
########################################
###### Jensen-Shannon Distance #####
#######################################
# ATTENTION: For the equation see:
# http://stackoverflow.com/questions/11226627/jensen-shannon-divergence-in-r
# the equation the guy came self up with plus the link to the enterotypes EMBL.
# wikipedia
JSDistPred <- function(df){
# since it does not work with 0, I add a pseudocount, that was also
# added in the enterotype paper
pseudocount <- 0.000001
# df <- df + pseudocount
df <- apply(df,1:2,function(x) ifelse (x==0,pseudocount,x))
# looked cooler, and only changes the ones that are actually 0.
# but changes df to matrix, so I need:
df <- as.data.frame(df)
Ms <- .5*(df+df$l)
JS <- 0.5 * (colSums(df * log(df/Ms)) + colSums(df$l * log(df$l/Ms)))
JS = ifelse(JS<0,0,JS) # Sometimes 0 is given as -3...e-17, then
# the following sqrt would give a NaN warning message
DistsTo_l <- sqrt(JS)
return(DistsTo_l)
}
######################################################
### dist.JSD from Enterotype paper ###################
#######################################################
# ATTENTION, not used here, just as a comparison
# ATTENTION, the function sometimes produces weird NaN, the reason
# is usually that you got a -1....e-18 instead of 0. sqrt(negative)
# results then in the NaN.
# ATTENTION: the results are the same as for my own JSDist function
dist.JSD <- function(inMatrix, pseudocount=0.000001, ...) {
#inMatrix <- inMatrix + pseudocount
KLD <- function(x,y) sum(x *log(x/y))
JSD<- function(x,y) sqrt(0.5 * KLD(x, (x+y)/2) + 0.5 * KLD(y, (x+y)/2))
matrixColSize <- length(colnames(inMatrix))
# matrixRowSize <- length(rownames(inMatrix)) ## NEVER USED
colnames <- colnames(inMatrix)
resultsMatrix <- matrix(0, matrixColSize, matrixColSize)
inMatrix = apply(inMatrix,1:2,function(x) ifelse (x==0,pseudocount,x))
for(i in 1:matrixColSize) {
for(j in 1:matrixColSize) {
resultsMatrix[i,j]=JSD(as.vector(inMatrix[,i]),
as.vector(inMatrix[,j]))
}
}
colnames -> colnames(resultsMatrix) -> rownames(resultsMatrix)
as.dist(resultsMatrix)->resultsMatrix
attr(resultsMatrix, "method") <- "dist"
return(resultsMatrix)
}
#################################################################
###### Calculate Distances and Effect Size Corrections #####
##################################################################
# INPUT: for the basic the df such as pred_RAs, each column is
# a prediction, last two columns is k and l. rownames(are OTU1...2)
# OUTPUT: PredReal_DistComp
CalcDistances_Pred <- function(pred_RAs, Method = 'JS'){
#######################################
#4 Effect Size (ES) measures for the different OTUs
# all are supposed to value changes in relative abundance
# in relation to the size of the relative abundance
ES <- abs(pred_RAs$l - pred_RAs$k) # the one I think is not
# sufficient since it favours those that change almost not at all
ES1 <- pred_RAs$l/pred_RAs$k
ES1 <- ifelse(ES1<1, 1/ES1, ES1)
ES2 <- abs(pred_RAs$l - pred_RAs$k)/
rowMeans(cbind(pred_RAs$k,pred_RAs$l))
ES3 <- (1-pred_RAs$l)/(1-pred_RAs$k)
ES3 <- ifelse(ES3<1, 1/ES3, ES3)
ES3 <- ES3/rowMeans(cbind(pred_RAs$k,pred_RAs$l))
# ES3 might value the low relative abundance too much
#############################################
####### calculate the distance on the basis of the method ###
if (Method %in% c('JS', 'J', 'Jensen')) {
DistsPred <- JSDistPred(pred_RAs)
} else if (Method %in% c('BC', 'B', 'BrayCurtis')) {
DistsPred <- BrayCurtisDistPred(pred_RAs)
} else if (Method %in% c('E', 'Euclidean', 'Eucl')) {
DistsPred <- EuclDistPred(pred_RAs)
} else if (Method %in% c('A', 'Abs', 'Absolute')) {
DistsPred <- absoluteDiffPred(pred_RAs)
} else {
stop('Do not know the method')
}
PredReal_DistComp <- data.frame(OTU = names(DistsPred),
Pred_Dists = DistsPred,
Real_Dists = DistsPred["k"])
if (!isTRUE(all.equal(0, PredReal_DistComp$Pred_Dists[
nrow(PredReal_DistComp)]))) {
stop("predicted or real distance to l was not zero")
}
# just a little check that the distance to l was indeed 0
PredReal_DistComp <- PredReal_DistComp[-nrow(PredReal_DistComp),]
# removes the l row because it is always 0 (tested above)
PredReal_DistComp <- mutate(PredReal_DistComp,
PredReal_Ratio = Pred_Dists/Real_Dists,
PercExpl = (Real_Dists-Pred_Dists)/
Real_Dists,
ES = c(ES/sum(ES), NaN), #so I norm it
ES1 = c(ES1/sum(ES1), NaN),
ES2 = c(ES2/sum(ES2), NaN),
ES3 = c(ES3/sum(ES3), NaN),
PercExplperES = PercExpl/ES,
PercExplES1 = PercExpl*ES1,
PercExplES2 = PercExpl*ES2,
PercExplES3 = PercExpl*ES3)
PredReal_DistComp[is.na(PredReal_DistComp)] = 0 # removes the NaN
PredReal_DistComp$PercExplperES[
is.infinite(PredReal_DistComp$PercExplperES)] = 0
return(PredReal_DistComp)
}
################################################
################################################
##### For Subcomposition Method ####################
################################################
################################################
# ATTENTION: for the subcomposition method, the input to the Distance
# functions is always a data frame which is the abundance table
##### ATTENTION, INPUT and OUTPUT for the following distance functions
# INPUT: An abundance Table as data frame with the two columns
# k and l, and the
# OUTPUT: the distance, so a one number vector
###############################################################
###### sum of absolute differences for each OTU #####
###############################################################
absoluteDiff <- function(df){
DistsTo_l <- sum(abs(df$k-df$l))
return(DistsTo_l)
}
########################################
###### Euclidean Distance #####
#######################################
EuclDist <- function(df){
DistsTo_l <- sqrt(sum((df$k-df$l)^2))
return(DistsTo_l)
}
########################################
###### Bray-Curtis Distance #####
#######################################
### BASIC
BrayCurtisDist <- function(df){
DistsTo_l <- sum(abs(df$k-df$l))/sum(df$k+df$l)
return(DistsTo_l)
}
########################################
###### Jensen-Shannon Distance #####
#######################################
JSDist <- function(df){
# since it does not work with 0, I add a pseudocount, that was also
# added in the enterotype paper
pseudocount <- 0.000001
# df <- df + pseudocount
df1 <- apply(df[,-1],1:2,function(x) ifelse (x==0,pseudocount,x))
# looked cooler, and only changes the ones that are actually 0.
# but changes df to matrix, so I need:
df1 <- as.data.frame(df)
df$k <- df1$k
df$l <- df1$l
Ms <- .5*(df$k+df$l)
JS <- 0.5 * (sum(df$k * log(df$k/Ms)) + sum(df$l * log(df$l/Ms)))
JS = ifelse(JS<0,0,JS) # Sometimes 0 is given as -3...e-17, then
# the following sqrt would give a NaN warning message
DistsTo_l <- sqrt(JS)
return(DistsTo_l)
}