Fungi bacteria and virus community composition

Author
Published

September 10, 2024

Code
# print link to github repo if any
if (file.exists("./.git/config")){
  config <- readLines("./.git/config")
  url <- grep("url",  config, value = TRUE)
  url <- gsub("\\turl = |.git$", "", url)
  cat("\nSource code and data found at [", url, "](", url, ")", sep = "")
  }

Source code and data found at

Code
# options to customize chunk outputs
knitr::opts_chunk$set(
  tidy.opts = list(width.cutoff = 65), 
  tidy = TRUE,
  message = FALSE
 )

1 load packages

Code
# Load required libraries
library(ape)  # For phylogenetic tree manipulation
library(MASS)  # For multivariate normal simulation
library(vegan)
library(phytools)

reps <- 10000

2 read data

Code
# all data
bac <- read.csv("./data/raw/Bacterial-phylogeneticSignal/bacteria-counts-new-t.csv")

# balanced groups
bal_bac <- read.csv("./data/raw/balanced-bacteria-counts-t.csv")

# all group phylo
phy_mat <- read.csv("./data/raw/concatenated_mldist.csv", header = FALSE)

# balanced group phylo
bal_phy_mat <- read.csv("./data/raw/balanced_mldist.csv", header = FALSE)

# read tree
tree <- ape::read.tree("./data/raw/Bacterial-phylogeneticSignal/concatenated.fas.newick")

3 Stats on entire data

Code
bac <- bac[order(match(bac$Sample, phy_mat$V1)), ]

phy_mat$V1 <- NULL

diag(as.matrix(phy_mat))
 [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Code
bin_bac <- bac
bin_bac[bin_bac > 0] <- 1

bin_bac$Sample <- NULL

dim(bin_bac)
[1]   19 1434
Code
community_dist_mat <- vegdist(bin_bac, method = "jaccard", binary = TRUE)

mantel_ape <- mantel.test(phy_mat, as.matrix(community_dist_mat),
    nperm = 10000)

mantel_ape
$z.stat
[1] 88.95141

$p
[1] 0.00729927

$alternative
[1] "two.sided"
Code
mantel_vegan <- mantel(phy_mat, as.matrix(community_dist_mat), permutations = 10000)

mantel_vegan

Mantel statistic based on Pearson's product-moment correlation 

Call:
mantel(xdis = phy_mat, ydis = as.matrix(community_dist_mat),      permutations = 10000) 

Mantel statistic r: 0.4786 
      Significance: 0.0022998 

Upper quantiles of permutations (null model):
  90%   95% 97.5%   99% 
0.229 0.297 0.357 0.422 
Permutation: free
Number of permutations: 10000

4 Stats on balanced groups

Code
bal_bac <- bal_bac[order(match(bal_bac$Sample, bal_phy_mat$V1)), ]

bal_phy_mat$V1 <- NULL

diag(as.matrix(bal_phy_mat))
 [1] 0 0 0 0 0 0 0 0 0 0 0 0
Code
bin_bal_bac <- bal_bac
bin_bal_bac[bin_bal_bac > 0] <- 1

bin_bal_bac$Sample <- NULL

bal_community_dist_mat <- vegdist(bin_bal_bac, method = "jaccard",
    binary = TRUE)

mantel_ape <- mantel.test(bal_phy_mat, as.matrix(bal_community_dist_mat),
    nperm = 10000)

mantel_ape
$z.stat
[1] 49.19362

$p
[1] 0.08109189

$alternative
[1] "two.sided"
Code
mantel_vegan <- mantel(bal_phy_mat, as.matrix(bal_community_dist_mat),
    permutations = 10000)

mantel_vegan

Mantel statistic based on Pearson's product-moment correlation 

Call:
mantel(xdis = bal_phy_mat, ydis = as.matrix(bal_community_dist_mat),      permutations = 10000) 

Mantel statistic r: 0.1812 
      Significance: 0.036096 

Upper quantiles of permutations (null model):
  90%   95% 97.5%   99% 
0.128 0.165 0.198 0.231 
Permutation: free
Number of permutations: 10000

5 Simulations

Code
phy_sig_communities <- function(tree, num_parasites, probability_of_occurrence = 0.9) {

    po <- probability_of_occurrence
    # po <- 0.9
    Q <- matrix(c(-po, po, po, -po), nrow = 2, byrow = TRUE)
    rownames(Q) <- colnames(Q) <- c("0", "1")

    # Step 3: Simulate the binary trait evolution The root state
    # can be specified, or it can be randomly assigned

    binary_trait <- sim.history(tree = tree, Q = Q, anc = NULL, nsim = num_parasites)  # Start with state '0' at the root

    mat <- do.call(cbind, lapply(binary_trait, function(x) as.numeric(x$node.states[1:Ntip(tree),
        1])))
    return(mat)

}

random_communities <- function(num_hosts, num_parasites, probability_of_occurrence = 0.3) {

    # Simulate the binary occurrence matrix
    occurrence_matrix <- matrix(rbinom(num_hosts * num_parasites,
        1, probability_of_occurrence), nrow = num_hosts, ncol = num_parasites)

    # Name the rows and columns
    rownames(occurrence_matrix) <- paste("Host", 1:num_hosts, sep = "_")
    colnames(occurrence_matrix) <- paste("Parasite", 1:num_parasites,
        sep = "_")

    return(occurrence_matrix)
}

# Set parameters
num_hosts <- nrow(as.matrix(community_dist_mat))  # Number of host species
num_parasites <- ncol(as.matrix(community_dist_mat))  # Number of parasite species

random_communities(num_hosts, num_parasites)
        Parasite_1 Parasite_2 Parasite_3 Parasite_4 Parasite_5 Parasite_6
Host_1           0          0          0          0          1          1
Host_2           0          0          1          0          0          0
Host_3           0          0          0          0          1          0
Host_4           0          0          0          0          1          1
Host_5           0          1          0          0          0          0
Host_6           0          1          0          0          0          0
Host_7           0          0          1          0          0          0
Host_8           0          0          1          1          0          0
Host_9           1          1          1          1          1          1
Host_10          0          0          0          1          0          0
Host_11          0          0          1          1          0          0
Host_12          0          0          1          0          0          0
Host_13          0          0          0          0          1          0
Host_14          0          0          0          0          0          0
Host_15          0          0          0          0          0          0
Host_16          0          1          1          0          0          0
Host_17          0          1          1          0          0          0
Host_18          0          0          1          0          1          0
Host_19          0          0          0          0          0          0
        Parasite_7 Parasite_8 Parasite_9 Parasite_10 Parasite_11 Parasite_12
Host_1           0          0          0           0           0           1
Host_2           0          0          1           0           0           0
Host_3           0          0          1           0           0           1
Host_4           0          0          1           0           1           0
Host_5           0          0          1           0           1           1
Host_6           1          1          1           0           0           1
Host_7           1          0          1           0           0           0
Host_8           0          0          0           0           0           0
Host_9           0          1          1           1           1           0
Host_10          1          0          0           0           0           0
Host_11          1          1          0           0           0           0
Host_12          0          1          0           1           0           0
Host_13          0          1          1           0           1           0
Host_14          1          0          0           0           1           1
Host_15          0          1          0           0           0           0
Host_16          1          0          1           1           0           0
Host_17          0          0          0           0           0           0
Host_18          0          0          0           1           1           0
Host_19          0          1          0           0           0           1
        Parasite_13 Parasite_14 Parasite_15 Parasite_16 Parasite_17 Parasite_18
Host_1            0           0           0           0           0           0
Host_2            1           0           0           0           0           0
Host_3            0           0           1           0           0           0
Host_4            1           0           0           0           0           0
Host_5            0           0           1           1           0           0
Host_6            1           0           0           1           0           1
Host_7            1           0           0           1           0           0
Host_8            0           0           0           0           0           1
Host_9            0           0           0           1           1           0
Host_10           0           0           0           0           1           0
Host_11           0           1           0           0           1           1
Host_12           1           0           1           0           0           0
Host_13           0           0           0           0           0           0
Host_14           0           0           1           0           0           0
Host_15           0           0           0           0           1           0
Host_16           1           0           0           1           0           0
Host_17           0           0           1           0           0           0
Host_18           1           0           0           1           0           0
Host_19           0           0           1           0           1           1
        Parasite_19
Host_1            1
Host_2            0
Host_3            0
Host_4            1
Host_5            0
Host_6            0
Host_7            1
Host_8            1
Host_9            0
Host_10           0
Host_11           1
Host_12           0
Host_13           0
Host_14           0
Host_15           0
Host_16           0
Host_17           0
Host_18           1
Host_19           0
Code
random_comm_dist <- function(num_hosts, num_parasites, probability_of_occurrence = 0.3,
    phy_sig = TRUE, tree = NULL) {

    if (phy_sig)
        comm_mat <- phy_sig_communities(tree, num_parasites, probability_of_occurrence = probability_of_occurrence) else comm_mat <- random_communities(num_hosts, num_parasites,
        probability_of_occurrence = probability_of_occurrence)

    comm_mat[is.nan(comm_mat)] <- 1

    comm_dist <- vegdist(comm_mat, method = "jaccard", binary = TRUE)

    comm_dist[is.nan(comm_dist)] <- 1
    return(comm_dist)
}


random_comm_dist(num_hosts = 19, num_parasites = 30, probability_of_occurrence = 0.5,
    phy_sig = TRUE, tree = tree)
Done simulation(s).
            1          2          3          4          5          6          7
2  0.00000000                                                                  
3  0.12500000 0.12500000                                                       
4  0.12500000 0.12500000 0.00000000                                            
5  0.12500000 0.12500000 0.00000000 0.00000000                                 
6  0.12500000 0.12500000 0.00000000 0.00000000 0.00000000                      
7  0.17647059 0.17647059 0.06666667 0.06666667 0.06666667 0.06666667           
8  0.17647059 0.17647059 0.06666667 0.06666667 0.06666667 0.06666667 0.00000000
9  0.17647059 0.17647059 0.06666667 0.06666667 0.06666667 0.06666667 0.00000000
10 0.26315789 0.26315789 0.17647059 0.17647059 0.17647059 0.17647059 0.11764706
11 0.26315789 0.26315789 0.17647059 0.17647059 0.17647059 0.17647059 0.11764706
12 0.17647059 0.17647059 0.06666667 0.06666667 0.06666667 0.06666667 0.00000000
13 0.23529412 0.23529412 0.13333333 0.13333333 0.13333333 0.13333333 0.06666667
14 0.23529412 0.23529412 0.13333333 0.13333333 0.13333333 0.13333333 0.06666667
15 0.27777778 0.27777778 0.18750000 0.18750000 0.18750000 0.18750000 0.12500000
16 0.27777778 0.27777778 0.18750000 0.18750000 0.18750000 0.18750000 0.12500000
17 0.27777778 0.27777778 0.18750000 0.18750000 0.18750000 0.18750000 0.12500000
18 0.27777778 0.27777778 0.18750000 0.18750000 0.18750000 0.18750000 0.12500000
19 0.23529412 0.23529412 0.13333333 0.13333333 0.13333333 0.13333333 0.06666667
            8          9         10         11         12         13         14
2                                                                              
3                                                                              
4                                                                              
5                                                                              
6                                                                              
7                                                                              
8                                                                              
9  0.00000000                                                                  
10 0.11764706 0.11764706                                                       
11 0.11764706 0.11764706 0.00000000                                            
12 0.00000000 0.00000000 0.11764706 0.11764706                                 
13 0.06666667 0.06666667 0.17647059 0.17647059 0.06666667                      
14 0.06666667 0.06666667 0.17647059 0.17647059 0.06666667 0.00000000           
15 0.12500000 0.12500000 0.11764706 0.11764706 0.12500000 0.06666667 0.06666667
16 0.12500000 0.12500000 0.11764706 0.11764706 0.12500000 0.06666667 0.06666667
17 0.12500000 0.12500000 0.11764706 0.11764706 0.12500000 0.06666667 0.06666667
18 0.12500000 0.12500000 0.11764706 0.11764706 0.12500000 0.06666667 0.06666667
19 0.06666667 0.06666667 0.17647059 0.17647059 0.06666667 0.00000000 0.00000000
           15         16         17         18
2                                             
3                                             
4                                             
5                                             
6                                             
7                                             
8                                             
9                                             
10                                            
11                                            
12                                            
13                                            
14                                            
15                                            
16 0.00000000                                 
17 0.00000000 0.00000000                      
18 0.00000000 0.00000000 0.00000000           
19 0.06666667 0.06666667 0.06666667 0.06666667
Code
random_comm_dist(num_hosts = 19, num_parasites = 30, probability_of_occurrence = 0.5,
    phy_sig = FALSE)
           Host_1    Host_2    Host_3    Host_4    Host_5    Host_6    Host_7
Host_2  0.8695652                                                            
Host_3  0.8846154 0.5789474                                                  
Host_4  0.6363636 0.5263158 0.6521739                                        
Host_5  0.5217391 0.6086957 0.7037037 0.5600000                              
Host_6  0.8333333 0.7000000 0.7391304 0.6956522 0.6400000                    
Host_7  0.6190476 0.7142857 0.6956522 0.7083333 0.5416667 0.7391304          
Host_8  0.6250000 0.5909091 0.5217391 0.6000000 0.4400000 0.7307692 0.5833333
Host_9  0.8333333 0.6315789 0.6818182 0.8000000 0.5833333 0.5263158 0.7391304
Host_10 0.7000000 0.7368421 0.8260870 0.7272727 0.7200000 0.7619048 0.6500000
Host_11 0.6956522 0.7826087 0.5238095 0.6666667 0.6666667 0.7500000 0.7083333
Host_12 0.6521739 0.6818182 0.7200000 0.6800000 0.5200000 0.5909091 0.7200000
Host_13 0.7200000 0.5714286 0.6800000 0.6400000 0.4800000 0.5454545 0.6800000
Host_14 0.7200000 0.6363636 0.5652174 0.5217391 0.6428571 0.7200000 0.5652174
Host_15 0.5000000 0.8800000 0.7083333 0.6666667 0.6153846 0.6956522 0.6521739
Host_16 0.5454545 0.6956522 0.6800000 0.6400000 0.5384615 0.6666667 0.5652174
Host_17 0.7500000 0.6000000 0.5238095 0.6086957 0.5600000 0.6956522 0.6521739
Host_18 0.6666667 0.7619048 0.7916667 0.8000000 0.5217391 0.6000000 0.6818182
Host_19 0.7272727 0.8181818 0.7391304 0.8000000 0.5833333 0.6666667 0.6190476
           Host_8    Host_9   Host_10   Host_11   Host_12   Host_13   Host_14
Host_2                                                                       
Host_3                                                                       
Host_4                                                                       
Host_5                                                                       
Host_6                                                                       
Host_7                                                                       
Host_8                                                                       
Host_9  0.6250000                                                            
Host_10 0.6521739 0.6315789                                                  
Host_11 0.5416667 0.6956522 0.6666667                                        
Host_12 0.6153846 0.3684211 0.6818182 0.6800000                              
Host_13 0.5200000 0.5454545 0.6956522 0.7407407 0.4090909                    
Host_14 0.6296296 0.7200000 0.6363636 0.5833333 0.7037037 0.6153846          
Host_15 0.6000000 0.6363636 0.6666667 0.5454545 0.5652174 0.6400000 0.6400000
Host_16 0.5769231 0.6666667 0.5000000 0.6923077 0.5416667 0.5000000 0.6666667
Host_17 0.5416667 0.6363636 0.8333333 0.6666667 0.6800000 0.5217391 0.5217391
Host_18 0.6800000 0.5263158 0.7000000 0.6363636 0.6521739 0.6666667 0.7692308
Host_19 0.5000000 0.6000000 0.7000000 0.6956522 0.5909091 0.6086957 0.6666667
          Host_15   Host_16   Host_17   Host_18
Host_2                                         
Host_3                                         
Host_4                                         
Host_5                                         
Host_6                                         
Host_7                                         
Host_8                                         
Host_9                                         
Host_10                                        
Host_11                                        
Host_12                                        
Host_13                                        
Host_14                                        
Host_15                                        
Host_16 0.5833333                              
Host_17 0.6666667 0.6400000                    
Host_18 0.6956522 0.7200000 0.6363636          
Host_19 0.6956522 0.7200000 0.6956522 0.6000000
Code
mantel_sim <- function(probability_of_occurrence = 0.5, num_parasites,
    num_hosts, type = "ape", phy_sig = FALSE, tree = tree) {
    m1 <- random_comm_dist(num_parasites = num_parasites, num_hosts = num_hosts,
        probability_of_occurrence = probability_of_occurrence, tree = tree,
        phy_sig = phy_sig)

    if (type == "ape")
        res <- mantel.test(phy_mat, as.matrix(m1), nperm = 1000)$p

    if (type == "vegan")
        res <- mantel(phy_mat, as.matrix(m1), permutations = 1000)$signif

    return(res)
}

5.1 Entire data

5.1.1 Type I error

Code
# mantel_sim(num_hosts = 19, num_parasites = 1434, type = 'ape')

out_vegan_unsig <- pbapply::pbreplicate(reps, mantel_sim(num_hosts = 19,
    num_parasites = 1434, type = "vegan", tree = tree), cl = 20)

hist(out_vegan_unsig)

Code
sum(out_vegan_unsig > 0.05)/length(out_vegan_unsig)
[1] 0.9512
Code
out_ape_unsig <- pbapply::pbreplicate(reps, mantel_sim(num_hosts = 19,
    num_parasites = 1434, type = "ape", tree = tree), cl = 20)

hist(out_ape_unsig)

Code
sum(out_ape_unsig > 0.05)/length(out_ape_unsig)
[1] 0.9527

5.1.2 Statistical power

Code
out_vegan_sig <- unlist(pbapply::pbreplicate(reps, mantel_sim(num_hosts = 19,
    num_parasites = 1434, type = "vegan", phy_sig = TRUE, probability_of_occurrence = 0.2,
    tree = tree), cl = 24))

hist(out_vegan_sig)

Code
sum(out_vegan_sig < 0.05)/length(out_vegan_sig)
[1] 0
Code
out_ape_sig <- unlist(pbapply::pbreplicate(reps, mantel_sim(num_hosts = 19,
    num_parasites = 1434, type = "ape", phy_sig = TRUE, probability_of_occurrence = 0.2,
    tree = tree), cl = 24))

sum(out_ape_sig < 0.05)/length(out_ape_sig)
[1] 0
Code
hist(out_ape_sig)

5.2 Balanced data

5.2.1 Type I error

Code
# mantel_sim(num_hosts = 19, num_parasites = 1434, type = 'ape')

# read tree
bal_tree <- drop.tip(tree, tree$tip.label[!tree$tip.label %in% bal_bac$Sample])

out_vegan_unsig <- pbapply::pbreplicate(reps, mantel_sim(num_hosts = 19,
    num_parasites = 1434, type = "vegan", tree = bal_tree), cl = 20)

hist(out_vegan_unsig)

Code
sum(out_vegan_unsig > 0.05)/length(out_vegan_unsig)
[1] 0.9491
Code
out_ape_unsig <- pbapply::pbreplicate(reps, mantel_sim(num_hosts = 19,
    num_parasites = 1434, type = "ape", tree = bal_tree), cl = 20)

hist(out_ape_unsig)

Code
sum(out_ape_unsig > 0.05)/length(out_ape_unsig)
[1] 0.9508

5.2.2 Statistical power

Code
out_vegan_sig <- unlist(pbapply::pbreplicate(reps, mantel_sim(num_hosts = 12,
    num_parasites = 1000, type = "vegan", phy_sig = TRUE, probability_of_occurrence = 0.2,
    tree = tree), cl = 24))

hist(out_vegan_sig)

Code
sum(out_vegan_sig < 0.05)/length(out_vegan_sig)
[1] 0
Code
out_ape_sig <- unlist(pbapply::pbreplicate(reps, mantel_sim(num_hosts = 12,
    num_parasites = 1000, type = "ape", phy_sig = TRUE, probability_of_occurrence = 0.2,
    tree = tree), cl = 24))

sum(out_ape_sig < 0.05)/length(out_ape_sig)
[1] 0
Code
hist(out_ape_sig)

Takeaways

 


 

Session information

R version 4.4.1 (2024-06-14)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.4 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       

time zone: America/Costa_Rica
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] phytools_2.3-0 maps_3.4.2     vegan_2.6-8    lattice_0.22-6 permute_0.9-7 
[6] MASS_7.3-61    ape_5.8       

loaded via a namespace (and not attached):
 [1] Matrix_1.7-0            expm_0.999-9            jsonlite_1.8.8         
 [4] compiler_4.4.1          Rcpp_1.0.13             parallel_4.4.1         
 [7] cluster_2.1.6           splines_4.4.1           optimParallel_1.0-2    
[10] yaml_2.3.10             fastmap_1.2.0           coda_0.19-4.1          
[13] generics_0.1.3          igraph_2.0.3            DEoptim_2.2-8          
[16] knitr_1.48              iterators_1.0.14        htmlwidgets_1.6.4      
[19] rlang_1.1.4             fastmatch_1.1-4         xfun_0.47              
[22] quadprog_1.5-8          doParallel_1.0.17       cli_3.6.3              
[25] magrittr_2.0.3          phangorn_2.11.1         mgcv_1.9-1             
[28] formatR_1.14            combinat_0.0-8          digest_0.6.37          
[31] foreach_1.5.2           grid_4.4.1              pbapply_1.7-2          
[34] rstudioapi_0.16.0       lifecycle_1.0.4         nlme_3.1-165           
[37] clusterGeneration_1.3.8 scatterplot3d_0.3-44    mnormt_2.1.1           
[40] evaluate_0.24.0         numDeriv_2016.8-1.1     codetools_0.2-20       
[43] rmarkdown_2.28          pkgconfig_2.0.3         tools_4.4.1            
[46] htmltools_0.5.8.1