source('Functions/Dist_Functs.R')
source('Functions/Sim_Functs.R')
source('Functions/Predict_Functs.R')
source('Functions/Illus_Functs.R')
source('Functions/SubComp_Functs.R')

Loading Data and AT_sub preparation

## if we wanted species level ########
if(file.exists("Data/mOTU.species.profile.tsv")){
        AT <- read.table("Data/mOTU.species.profile.tsv", header = TRUE)
}

# ### if we wanted genus level, weirdly even more OTUs ####
# if(file.exists("Data/mOTU.genus.profile.tsv")){
#         AT <- read.table("Data/mOTU.genus.profile.tsv", header = TRUE)
# }

From the from the bypass_read_information Excel file with the meta data I can see that Id 9437 and 9438 is the same individual pre and 3 month after GB

# choosing the two or more samples to compare, and generating the subsetted 
# abundance table
Before <- c('X9437')
After <- c('X9438')

names(AT)[1] <- "OTU"

AT_sub <- select(AT, OTU, one_of(Before), one_of(After))

names(AT_sub)[2:3] <- c('k', 'l')

# sanity checks
# check if the abundances sum up to 1
summarise_each(AT_sub[,-1], funs(sum))
##           k         l
## 1 0.9999997 0.9999998
# check the OTU with the highest abundance
WhichMax <- function(x) { AT_sub$OTU[which(x == max(x))]}
summarise_each(AT_sub[,-1], funs(WhichMax))
##                        k                l
## 1 motu_linkage_group_585 Prevotella_copri

Replacing all zero rows and replacing other zeros with pseudocount

#### remove OTUs that are zero in both conditions
AT_sub <- AT_sub[which(rowSums(AT_sub[,-1]) != 0),]

# filter would have worked too, but have to work out how to do it with really 
# many
# AT_sub <- filter(AT_sub, (k != 0) | (l != 0))

### replace all other 0 with pseudocount

# calculate the pseudocount, being the next lowest relative abundance after 0
AT_sub[AT_sub == 0] <- NA
# find the minimum of all other values
Pseudocount <- min(AT_sub[,-1], na.rm = TRUE)
# set all NA to this min = Psuedocount Value
AT_sub[is.na(AT_sub)] <- Pseudocount

# how did the pseudocount affect the colsums?
summarise_each(AT_sub[,-1], funs(sum))
##          k        l
## 1 1.001987 1.001046
# since they went a bit up, clear, I renormalise
Norm <- function(x) { x/sum(x)}
AT_sub <- mutate_each(AT_sub, funs(Norm), -OTU)

summarise_each(AT_sub, funs(sum), -OTU)
##   k l
## 1 1 1
Tr <- plotOTUChanges(AT_sub)
Tr

Tr + scale_y_log10()

The Prediction Method

Absolute Diffs

# Abs

Eucledian Distance

#Eucl

Bray Curtis Distance

#BC

Jensen-Shannon Distance

JS
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

The Subcomposition Methods

Method S1: k-l distances when one OTU is missing

## [[1]]

Method S2: Same with the Average Method

## [[1]]

## 
## [[2]]

Compare the two methods

PredictJS <- DistCompares$JS
SubCompJS <- DFList$JS
SubCompJSAV <- DFListAV$JSAV
CompareJS <- data.frame(OTU_P = PredictJS$OTU, PercExpl_P = PredictJS$PercExpl,  PercExpl_SC = SubCompJS$PercExpl, PercExpl_SCAV = SubCompJSAV$PercExpl, ES_Abs = PredictJS$ES)
CompareJS <- CompareJS[-nrow(CompareJS),]
CompareJS$ES <- AT_sub$l - AT_sub$k
CompareJS$Mean <- rowMeans(cbind(AT_sub$l, AT_sub$k))
head(arrange(CompareJS, desc(PercExpl_P)), 10)
##                     OTU_P PercExpl_P  PercExpl_SC PercExpl_SCAV     ES_Abs
## 1        Prevotella_copri 0.10834721 -0.012576546   0.062141465 0.16489420
## 2  motu_linkage_group_297 0.05319722  0.053148381   0.077817802 0.10265267
## 3        Escherichia_coli 0.04209337  0.020844853   0.031428056 0.04286125
## 4     Eubacterium_eligens 0.03668509  0.014547276   0.026024282 0.04253468
## 5  motu_linkage_group_230 0.02932411  0.029298749   0.043758208 0.05903529
## 6  motu_linkage_group_104 0.02399934  0.008891406   0.016814002 0.02866228
## 7  motu_linkage_group_585 0.02139209 -0.007519415   0.066779794 0.17096616
## 8  motu_linkage_group_260 0.02099135  0.008705205   0.015024421 0.02385136
## 9  motu_linkage_group_837 0.01613224  0.007785810   0.011963214 0.01670089
## 10 motu_linkage_group_525 0.01281798 -0.006298373   0.006580875 0.02439815
##             ES       Mean
## 1   0.16489420 0.14213501
## 2  -0.10265267 0.05142949
## 3   0.04286125 0.02150019
## 4   0.04253468 0.02315722
## 5  -0.05903529 0.02956989
## 6   0.02866228 0.01592344
## 7  -0.17096616 0.14205110
## 8   0.02385136 0.01270878
## 9   0.01670089 0.00840264
## 10  0.02439815 0.02543347
head(arrange(CompareJS, desc(PercExpl_SC)), 10)
##                     OTU_P  PercExpl_P PercExpl_SC PercExpl_SCAV
## 1  motu_linkage_group_297 0.053197221 0.053148381   0.077817802
## 2  motu_linkage_group_230 0.029324107 0.029298749   0.043758208
## 3        Escherichia_coli 0.042093371 0.020844853   0.031428056
## 4     Eubacterium_eligens 0.036685085 0.014547276   0.026024282
## 5  motu_linkage_group_104 0.023999344 0.008891406   0.016814002
## 6  motu_linkage_group_260 0.020991349 0.008705205   0.015024421
## 7  motu_linkage_group_837 0.016132239 0.007785810   0.011963214
## 8  motu_linkage_group_638 0.007278156 0.004811185   0.015658831
## 9      Veillonella_dispar 0.008144304 0.003842458   0.006004081
## 10      Roseburia_hominis 0.008625814 0.003422568   0.006119902
##         ES_Abs           ES        Mean
## 1  0.102652671 -0.102652671 0.051429495
## 2  0.059035294 -0.059035294 0.029569892
## 3  0.042861249  0.042861249 0.021500186
## 4  0.042534676  0.042534676 0.023157221
## 5  0.028662281  0.028662281 0.015923444
## 6  0.023851360  0.023851360 0.012708781
## 7  0.016700888  0.016700888 0.008402640
## 8  0.033459426 -0.033459426 0.021681364
## 9  0.008566034  0.008566034 0.004335213
## 10 0.010018201  0.010018201 0.005405871
head(arrange(CompareJS, desc(PercExpl_SCAV)), 10)
##                                  OTU_P  PercExpl_P  PercExpl_SC
## 1               motu_linkage_group_297 0.053197221  0.053148381
## 2               motu_linkage_group_585 0.021392095 -0.007519415
## 3                     Prevotella_copri 0.108347212 -0.012576546
## 4               motu_linkage_group_230 0.029324107  0.029298749
## 5                     Escherichia_coli 0.042093371  0.020844853
## 6                  Eubacterium_eligens 0.036685085  0.014547276
## 7               motu_linkage_group_104 0.023999344  0.008891406
## 8               motu_linkage_group_638 0.007278156  0.004811185
## 9               motu_linkage_group_260 0.020991349  0.008705205
## 10 Phascolarctobacterium_succinatutens 0.002965919 -0.003533422
##    PercExpl_SCAV     ES_Abs          ES       Mean
## 1     0.07781780 0.10265267 -0.10265267 0.05142949
## 2     0.06677979 0.17096616 -0.17096616 0.14205110
## 3     0.06214146 0.16489420  0.16489420 0.14213501
## 4     0.04375821 0.05903529 -0.05903529 0.02956989
## 5     0.03142806 0.04286125  0.04286125 0.02150019
## 6     0.02602428 0.04253468  0.04253468 0.02315722
## 7     0.01681400 0.02866228  0.02866228 0.01592344
## 8     0.01565883 0.03345943 -0.03345943 0.02168136
## 9     0.01502442 0.02385136  0.02385136 0.01270878
## 10    0.01216961 0.03627934 -0.03627934 0.03105064