Code
# options to customize chunk outputs
::opts_chunk$set(
knitrtidy.opts = list(width.cutoff = 65),
tidy = TRUE,
message = FALSE
)
Fungi bacteria and virus community composition
# print link to github repo if any
if (file.exists("./.git/config")){
<- readLines("./.git/config")
config <- grep("url", config, value = TRUE)
url <- gsub("\\turl = |.git$", "", url)
url cat("\nSource code and data found at [", url, "](", url, ")", sep = "")
}
# options to customize chunk outputs
::opts_chunk$set(
knitrtidy.opts = list(width.cutoff = 65),
tidy = TRUE,
message = FALSE
)
# Load required libraries
library(ape) # For phylogenetic tree manipulation
library(MASS) # For multivariate normal simulation
library(vegan)
library(phytools)
<- 10000 reps
# all data
<- read.csv("./data/raw/Bacterial-phylogeneticSignal/bacteria-counts-new-t.csv")
bac
# balanced groups
<- read.csv("./data/raw/balanced-bacteria-counts-t.csv")
bal_bac
# all group phylo
<- read.csv("./data/raw/concatenated_mldist.csv", header = FALSE)
phy_mat
# balanced group phylo
<- read.csv("./data/raw/balanced_mldist.csv", header = FALSE)
bal_phy_mat
# read tree
<- ape::read.tree("./data/raw/Bacterial-phylogeneticSignal/concatenated.fas.newick") tree
<- bac[order(match(bac$Sample, phy_mat$V1)), ]
bac
$V1 <- NULL
phy_mat
diag(as.matrix(phy_mat))
[1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
<- bac
bin_bac > 0] <- 1
bin_bac[bin_bac
$Sample <- NULL
bin_bac
dim(bin_bac)
[1] 19 1434
<- vegdist(bin_bac, method = "jaccard", binary = TRUE)
community_dist_mat
<- mantel.test(phy_mat, as.matrix(community_dist_mat),
mantel_ape nperm = 10000)
mantel_ape
$z.stat
[1] 88.95141
$p
[1] 0.00729927
$alternative
[1] "two.sided"
<- mantel(phy_mat, as.matrix(community_dist_mat), permutations = 10000)
mantel_vegan
mantel_vegan
Mantel statistic based on Pearson's product-moment correlation
Call:
mantel(xdis = phy_mat, ydis = as.matrix(community_dist_mat), permutations = 10000)
Mantel statistic r: 0.4786
Significance: 0.0022998
Upper quantiles of permutations (null model):
90% 95% 97.5% 99%
0.229 0.297 0.357 0.422
Permutation: free
Number of permutations: 10000
<- bal_bac[order(match(bal_bac$Sample, bal_phy_mat$V1)), ]
bal_bac
$V1 <- NULL
bal_phy_mat
diag(as.matrix(bal_phy_mat))
[1] 0 0 0 0 0 0 0 0 0 0 0 0
<- bal_bac
bin_bal_bac > 0] <- 1
bin_bal_bac[bin_bal_bac
$Sample <- NULL
bin_bal_bac
<- vegdist(bin_bal_bac, method = "jaccard",
bal_community_dist_mat binary = TRUE)
<- mantel.test(bal_phy_mat, as.matrix(bal_community_dist_mat),
mantel_ape nperm = 10000)
mantel_ape
$z.stat
[1] 49.19362
$p
[1] 0.08109189
$alternative
[1] "two.sided"
<- mantel(bal_phy_mat, as.matrix(bal_community_dist_mat),
mantel_vegan permutations = 10000)
mantel_vegan
Mantel statistic based on Pearson's product-moment correlation
Call:
mantel(xdis = bal_phy_mat, ydis = as.matrix(bal_community_dist_mat), permutations = 10000)
Mantel statistic r: 0.1812
Significance: 0.036096
Upper quantiles of permutations (null model):
90% 95% 97.5% 99%
0.128 0.165 0.198 0.231
Permutation: free
Number of permutations: 10000
<- function(tree, num_parasites, probability_of_occurrence = 0.9) {
phy_sig_communities
<- probability_of_occurrence
po # po <- 0.9
<- matrix(c(-po, po, po, -po), nrow = 2, byrow = TRUE)
Q rownames(Q) <- colnames(Q) <- c("0", "1")
# Step 3: Simulate the binary trait evolution The root state
# can be specified, or it can be randomly assigned
<- sim.history(tree = tree, Q = Q, anc = NULL, nsim = num_parasites) # Start with state '0' at the root
binary_trait
<- do.call(cbind, lapply(binary_trait, function(x) as.numeric(x$node.states[1:Ntip(tree),
mat 1])))
return(mat)
}
<- function(num_hosts, num_parasites, probability_of_occurrence = 0.3) {
random_communities
# Simulate the binary occurrence matrix
<- matrix(rbinom(num_hosts * num_parasites,
occurrence_matrix 1, probability_of_occurrence), nrow = num_hosts, ncol = num_parasites)
# Name the rows and columns
rownames(occurrence_matrix) <- paste("Host", 1:num_hosts, sep = "_")
colnames(occurrence_matrix) <- paste("Parasite", 1:num_parasites,
sep = "_")
return(occurrence_matrix)
}
# Set parameters
<- nrow(as.matrix(community_dist_mat)) # Number of host species
num_hosts <- ncol(as.matrix(community_dist_mat)) # Number of parasite species
num_parasites
random_communities(num_hosts, num_parasites)
Parasite_1 Parasite_2 Parasite_3 Parasite_4 Parasite_5 Parasite_6
Host_1 0 0 0 0 1 1
Host_2 0 0 1 0 0 0
Host_3 0 0 0 0 1 0
Host_4 0 0 0 0 1 1
Host_5 0 1 0 0 0 0
Host_6 0 1 0 0 0 0
Host_7 0 0 1 0 0 0
Host_8 0 0 1 1 0 0
Host_9 1 1 1 1 1 1
Host_10 0 0 0 1 0 0
Host_11 0 0 1 1 0 0
Host_12 0 0 1 0 0 0
Host_13 0 0 0 0 1 0
Host_14 0 0 0 0 0 0
Host_15 0 0 0 0 0 0
Host_16 0 1 1 0 0 0
Host_17 0 1 1 0 0 0
Host_18 0 0 1 0 1 0
Host_19 0 0 0 0 0 0
Parasite_7 Parasite_8 Parasite_9 Parasite_10 Parasite_11 Parasite_12
Host_1 0 0 0 0 0 1
Host_2 0 0 1 0 0 0
Host_3 0 0 1 0 0 1
Host_4 0 0 1 0 1 0
Host_5 0 0 1 0 1 1
Host_6 1 1 1 0 0 1
Host_7 1 0 1 0 0 0
Host_8 0 0 0 0 0 0
Host_9 0 1 1 1 1 0
Host_10 1 0 0 0 0 0
Host_11 1 1 0 0 0 0
Host_12 0 1 0 1 0 0
Host_13 0 1 1 0 1 0
Host_14 1 0 0 0 1 1
Host_15 0 1 0 0 0 0
Host_16 1 0 1 1 0 0
Host_17 0 0 0 0 0 0
Host_18 0 0 0 1 1 0
Host_19 0 1 0 0 0 1
Parasite_13 Parasite_14 Parasite_15 Parasite_16 Parasite_17 Parasite_18
Host_1 0 0 0 0 0 0
Host_2 1 0 0 0 0 0
Host_3 0 0 1 0 0 0
Host_4 1 0 0 0 0 0
Host_5 0 0 1 1 0 0
Host_6 1 0 0 1 0 1
Host_7 1 0 0 1 0 0
Host_8 0 0 0 0 0 1
Host_9 0 0 0 1 1 0
Host_10 0 0 0 0 1 0
Host_11 0 1 0 0 1 1
Host_12 1 0 1 0 0 0
Host_13 0 0 0 0 0 0
Host_14 0 0 1 0 0 0
Host_15 0 0 0 0 1 0
Host_16 1 0 0 1 0 0
Host_17 0 0 1 0 0 0
Host_18 1 0 0 1 0 0
Host_19 0 0 1 0 1 1
Parasite_19
Host_1 1
Host_2 0
Host_3 0
Host_4 1
Host_5 0
Host_6 0
Host_7 1
Host_8 1
Host_9 0
Host_10 0
Host_11 1
Host_12 0
Host_13 0
Host_14 0
Host_15 0
Host_16 0
Host_17 0
Host_18 1
Host_19 0
<- function(num_hosts, num_parasites, probability_of_occurrence = 0.3,
random_comm_dist phy_sig = TRUE, tree = NULL) {
if (phy_sig)
<- phy_sig_communities(tree, num_parasites, probability_of_occurrence = probability_of_occurrence) else comm_mat <- random_communities(num_hosts, num_parasites,
comm_mat probability_of_occurrence = probability_of_occurrence)
is.nan(comm_mat)] <- 1
comm_mat[
<- vegdist(comm_mat, method = "jaccard", binary = TRUE)
comm_dist
is.nan(comm_dist)] <- 1
comm_dist[return(comm_dist)
}
random_comm_dist(num_hosts = 19, num_parasites = 30, probability_of_occurrence = 0.5,
phy_sig = TRUE, tree = tree)
Done simulation(s).
1 2 3 4 5 6 7
2 0.00000000
3 0.12500000 0.12500000
4 0.12500000 0.12500000 0.00000000
5 0.12500000 0.12500000 0.00000000 0.00000000
6 0.12500000 0.12500000 0.00000000 0.00000000 0.00000000
7 0.17647059 0.17647059 0.06666667 0.06666667 0.06666667 0.06666667
8 0.17647059 0.17647059 0.06666667 0.06666667 0.06666667 0.06666667 0.00000000
9 0.17647059 0.17647059 0.06666667 0.06666667 0.06666667 0.06666667 0.00000000
10 0.26315789 0.26315789 0.17647059 0.17647059 0.17647059 0.17647059 0.11764706
11 0.26315789 0.26315789 0.17647059 0.17647059 0.17647059 0.17647059 0.11764706
12 0.17647059 0.17647059 0.06666667 0.06666667 0.06666667 0.06666667 0.00000000
13 0.23529412 0.23529412 0.13333333 0.13333333 0.13333333 0.13333333 0.06666667
14 0.23529412 0.23529412 0.13333333 0.13333333 0.13333333 0.13333333 0.06666667
15 0.27777778 0.27777778 0.18750000 0.18750000 0.18750000 0.18750000 0.12500000
16 0.27777778 0.27777778 0.18750000 0.18750000 0.18750000 0.18750000 0.12500000
17 0.27777778 0.27777778 0.18750000 0.18750000 0.18750000 0.18750000 0.12500000
18 0.27777778 0.27777778 0.18750000 0.18750000 0.18750000 0.18750000 0.12500000
19 0.23529412 0.23529412 0.13333333 0.13333333 0.13333333 0.13333333 0.06666667
8 9 10 11 12 13 14
2
3
4
5
6
7
8
9 0.00000000
10 0.11764706 0.11764706
11 0.11764706 0.11764706 0.00000000
12 0.00000000 0.00000000 0.11764706 0.11764706
13 0.06666667 0.06666667 0.17647059 0.17647059 0.06666667
14 0.06666667 0.06666667 0.17647059 0.17647059 0.06666667 0.00000000
15 0.12500000 0.12500000 0.11764706 0.11764706 0.12500000 0.06666667 0.06666667
16 0.12500000 0.12500000 0.11764706 0.11764706 0.12500000 0.06666667 0.06666667
17 0.12500000 0.12500000 0.11764706 0.11764706 0.12500000 0.06666667 0.06666667
18 0.12500000 0.12500000 0.11764706 0.11764706 0.12500000 0.06666667 0.06666667
19 0.06666667 0.06666667 0.17647059 0.17647059 0.06666667 0.00000000 0.00000000
15 16 17 18
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 0.00000000
17 0.00000000 0.00000000
18 0.00000000 0.00000000 0.00000000
19 0.06666667 0.06666667 0.06666667 0.06666667
random_comm_dist(num_hosts = 19, num_parasites = 30, probability_of_occurrence = 0.5,
phy_sig = FALSE)
Host_1 Host_2 Host_3 Host_4 Host_5 Host_6 Host_7
Host_2 0.8695652
Host_3 0.8846154 0.5789474
Host_4 0.6363636 0.5263158 0.6521739
Host_5 0.5217391 0.6086957 0.7037037 0.5600000
Host_6 0.8333333 0.7000000 0.7391304 0.6956522 0.6400000
Host_7 0.6190476 0.7142857 0.6956522 0.7083333 0.5416667 0.7391304
Host_8 0.6250000 0.5909091 0.5217391 0.6000000 0.4400000 0.7307692 0.5833333
Host_9 0.8333333 0.6315789 0.6818182 0.8000000 0.5833333 0.5263158 0.7391304
Host_10 0.7000000 0.7368421 0.8260870 0.7272727 0.7200000 0.7619048 0.6500000
Host_11 0.6956522 0.7826087 0.5238095 0.6666667 0.6666667 0.7500000 0.7083333
Host_12 0.6521739 0.6818182 0.7200000 0.6800000 0.5200000 0.5909091 0.7200000
Host_13 0.7200000 0.5714286 0.6800000 0.6400000 0.4800000 0.5454545 0.6800000
Host_14 0.7200000 0.6363636 0.5652174 0.5217391 0.6428571 0.7200000 0.5652174
Host_15 0.5000000 0.8800000 0.7083333 0.6666667 0.6153846 0.6956522 0.6521739
Host_16 0.5454545 0.6956522 0.6800000 0.6400000 0.5384615 0.6666667 0.5652174
Host_17 0.7500000 0.6000000 0.5238095 0.6086957 0.5600000 0.6956522 0.6521739
Host_18 0.6666667 0.7619048 0.7916667 0.8000000 0.5217391 0.6000000 0.6818182
Host_19 0.7272727 0.8181818 0.7391304 0.8000000 0.5833333 0.6666667 0.6190476
Host_8 Host_9 Host_10 Host_11 Host_12 Host_13 Host_14
Host_2
Host_3
Host_4
Host_5
Host_6
Host_7
Host_8
Host_9 0.6250000
Host_10 0.6521739 0.6315789
Host_11 0.5416667 0.6956522 0.6666667
Host_12 0.6153846 0.3684211 0.6818182 0.6800000
Host_13 0.5200000 0.5454545 0.6956522 0.7407407 0.4090909
Host_14 0.6296296 0.7200000 0.6363636 0.5833333 0.7037037 0.6153846
Host_15 0.6000000 0.6363636 0.6666667 0.5454545 0.5652174 0.6400000 0.6400000
Host_16 0.5769231 0.6666667 0.5000000 0.6923077 0.5416667 0.5000000 0.6666667
Host_17 0.5416667 0.6363636 0.8333333 0.6666667 0.6800000 0.5217391 0.5217391
Host_18 0.6800000 0.5263158 0.7000000 0.6363636 0.6521739 0.6666667 0.7692308
Host_19 0.5000000 0.6000000 0.7000000 0.6956522 0.5909091 0.6086957 0.6666667
Host_15 Host_16 Host_17 Host_18
Host_2
Host_3
Host_4
Host_5
Host_6
Host_7
Host_8
Host_9
Host_10
Host_11
Host_12
Host_13
Host_14
Host_15
Host_16 0.5833333
Host_17 0.6666667 0.6400000
Host_18 0.6956522 0.7200000 0.6363636
Host_19 0.6956522 0.7200000 0.6956522 0.6000000
<- function(probability_of_occurrence = 0.5, num_parasites,
mantel_sim type = "ape", phy_sig = FALSE, tree = tree) {
num_hosts, <- random_comm_dist(num_parasites = num_parasites, num_hosts = num_hosts,
m1 probability_of_occurrence = probability_of_occurrence, tree = tree,
phy_sig = phy_sig)
if (type == "ape")
<- mantel.test(phy_mat, as.matrix(m1), nperm = 1000)$p
res
if (type == "vegan")
<- mantel(phy_mat, as.matrix(m1), permutations = 1000)$signif
res
return(res)
}
# mantel_sim(num_hosts = 19, num_parasites = 1434, type = 'ape')
<- pbapply::pbreplicate(reps, mantel_sim(num_hosts = 19,
out_vegan_unsig num_parasites = 1434, type = "vegan", tree = tree), cl = 20)
hist(out_vegan_unsig)
sum(out_vegan_unsig > 0.05)/length(out_vegan_unsig)
[1] 0.9512
<- pbapply::pbreplicate(reps, mantel_sim(num_hosts = 19,
out_ape_unsig num_parasites = 1434, type = "ape", tree = tree), cl = 20)
hist(out_ape_unsig)
sum(out_ape_unsig > 0.05)/length(out_ape_unsig)
[1] 0.9527
<- unlist(pbapply::pbreplicate(reps, mantel_sim(num_hosts = 19,
out_vegan_sig num_parasites = 1434, type = "vegan", phy_sig = TRUE, probability_of_occurrence = 0.2,
tree = tree), cl = 24))
hist(out_vegan_sig)
sum(out_vegan_sig < 0.05)/length(out_vegan_sig)
[1] 0
<- unlist(pbapply::pbreplicate(reps, mantel_sim(num_hosts = 19,
out_ape_sig num_parasites = 1434, type = "ape", phy_sig = TRUE, probability_of_occurrence = 0.2,
tree = tree), cl = 24))
sum(out_ape_sig < 0.05)/length(out_ape_sig)
[1] 0
hist(out_ape_sig)
# mantel_sim(num_hosts = 19, num_parasites = 1434, type = 'ape')
# read tree
<- drop.tip(tree, tree$tip.label[!tree$tip.label %in% bal_bac$Sample])
bal_tree
<- pbapply::pbreplicate(reps, mantel_sim(num_hosts = 19,
out_vegan_unsig num_parasites = 1434, type = "vegan", tree = bal_tree), cl = 20)
hist(out_vegan_unsig)
sum(out_vegan_unsig > 0.05)/length(out_vegan_unsig)
[1] 0.9491
<- pbapply::pbreplicate(reps, mantel_sim(num_hosts = 19,
out_ape_unsig num_parasites = 1434, type = "ape", tree = bal_tree), cl = 20)
hist(out_ape_unsig)
sum(out_ape_unsig > 0.05)/length(out_ape_unsig)
[1] 0.9508
<- unlist(pbapply::pbreplicate(reps, mantel_sim(num_hosts = 12,
out_vegan_sig num_parasites = 1000, type = "vegan", phy_sig = TRUE, probability_of_occurrence = 0.2,
tree = tree), cl = 24))
hist(out_vegan_sig)
sum(out_vegan_sig < 0.05)/length(out_vegan_sig)
[1] 0
<- unlist(pbapply::pbreplicate(reps, mantel_sim(num_hosts = 12,
out_ape_sig num_parasites = 1000, type = "ape", phy_sig = TRUE, probability_of_occurrence = 0.2,
tree = tree), cl = 24))
sum(out_ape_sig < 0.05)/length(out_ape_sig)
[1] 0
hist(out_ape_sig)
R version 4.4.1 (2024-06-14)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.4 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=es_CR.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
time zone: America/Costa_Rica
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] phytools_2.3-0 maps_3.4.2 vegan_2.6-8 lattice_0.22-6 permute_0.9-7
[6] MASS_7.3-61 ape_5.8
loaded via a namespace (and not attached):
[1] Matrix_1.7-0 expm_0.999-9 jsonlite_1.8.8
[4] compiler_4.4.1 Rcpp_1.0.13 parallel_4.4.1
[7] cluster_2.1.6 splines_4.4.1 optimParallel_1.0-2
[10] yaml_2.3.10 fastmap_1.2.0 coda_0.19-4.1
[13] generics_0.1.3 igraph_2.0.3 DEoptim_2.2-8
[16] knitr_1.48 iterators_1.0.14 htmlwidgets_1.6.4
[19] rlang_1.1.4 fastmatch_1.1-4 xfun_0.47
[22] quadprog_1.5-8 doParallel_1.0.17 cli_3.6.3
[25] magrittr_2.0.3 phangorn_2.11.1 mgcv_1.9-1
[28] formatR_1.14 combinat_0.0-8 digest_0.6.37
[31] foreach_1.5.2 grid_4.4.1 pbapply_1.7-2
[34] rstudioapi_0.16.0 lifecycle_1.0.4 nlme_3.1-165
[37] clusterGeneration_1.3.8 scatterplot3d_0.3-44 mnormt_2.1.1
[40] evaluate_0.24.0 numDeriv_2016.8-1.1 codetools_0.2-20
[43] rmarkdown_2.28 pkgconfig_2.0.3 tools_4.4.1
[46] htmltools_0.5.8.1