Taxa bar plot
## tbp
# metadata <- "neurogenic_table_with_reads_24feb22.csv" %>% read_csv()
metadata <- metadata %>% filter(number != "37")
tbp_table <- read_csv("tbp.tsv/level-2.csv")
## Rows: 105 Columns: 51
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (23): index, number, name, m.f, birth, accident.date, spinal, ASIA, weig...
## dbl (27): k__Archaea;p__Euryarchaeota, k__Bacteria;__, k__Bacteria;p__Actino...
## lgl (1): LYMPHO
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tbp_table$group[tbp_table$group == "NG"] <- "Hospitalized"
tbp_table$group[tbp_table$group == "NG_clinic"] <- "Ambulatoric"
tbp_table$group <- tbp_table$group %>% factor(levels = c("Ambulatoric","Hospitalized", "healthy"))
tbp_table <- tbp_table %>%
left_join(metadata) %>%
pivot_longer(cols = contains("k__"), names_to = "Taxon") %>%
mutate(Taxon = str_remove(Taxon, "k__Bacteria;")) %>%
group_by(number, Taxon) %>% summarise(reads = last(value), group = first(group)) %>%
filter(reads > 10)
## Joining, by = c("number", "name", "id_suf", "age", "birth", "ASIA", "weight",
## "BMI", "supplements", "diet", "smoker", "HGB", "NEUTRO", "MONO", "glucose",
## "albumin", "CRP", "sample_code", "reads_number", "group")
## `summarise()` has grouped output by 'number'. You can override using the
## `.groups` argument.
tbp_table <- tbp_table %>% group_by(number) %>% mutate(RA = reads/sum(reads)) %>% ungroup()
low_species <- tbp_table %>% group_by(Taxon) %>% summarise(reads = sum(reads)) %>% arrange(reads) %>% slice_head(n = 8) %>% pull(Taxon)
high_species <- tbp_table %>% group_by(Taxon) %>% summarise(reads = sum(reads)) %>% arrange(desc(reads)) %>% slice_head(n = 4) %>% pull(Taxon)
for(species in low_species) tbp_table <- tbp_table %>% mutate(Taxon = na_if(Taxon, species))
tbp_table %>%
# mutate(Taxon = replace_na(Taxon, "Others")) %>%
# ggplot(aes(x = number, y = reads, fill = Taxon)) + geom_col(position = "fill") +
ggplot(aes(x = number, y = RA, fill = Taxon)) + geom_col(position = "fill") +
facet_grid(~group, scales = "free_x", space = "free_x") +
ylab("") + theme_void() + theme(axis.text.x = element_blank(), strip.text.x = element_text(size = 12))

tbp_table %>% filter(Taxon == "p__Bacteroidetes" | Taxon == "p__Firmicutes") %>%
pivot_wider(id_cols = c(number, group), names_from = "Taxon", values_from = "RA") %>%
mutate(FB_ratio = p__Firmicutes/p__Bacteroidetes) %>%
ggplot() + aes(x = group, y = FB_ratio, color = group) +
geom_boxplot() +
ggsignif::geom_signif(comparisons = list(
c("Hospitalized", "Ambulatoric")
, c("healthy", "Hospitalized")
, c("Ambulatoric", "healthy")), step_increase = .1) +
theme_classic()
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_signif).

tbp_table %>% filter(Taxon %in% high_species) %>%
ggplot() + aes(x = group, y = RA, color = group) +
geom_boxplot() + geom_jitter(alpha = .5) +
facet_wrap(~Taxon, scales = "free_y") +
ggsignif::geom_signif(comparisons =
list(c("Hospitalized", "Ambulatoric")
, c("healthy", "Hospitalized")
, c("Ambulatoric", "healthy")), step_increase = .1, map_signif_level = F) +
theme_classic() + theme(strip.background = element_blank())

Alpha first and last
# tbp_table %>% group_by(Taxon) %>% summarise(cor.test())
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:qiime2R':
##
## mean_sd
tbl <-
metadata %>% inner_join(
data.frame(qiime2R::read_qza("cmr2/faith_pd_vector.qza")$data) %>%
rownames_to_column("sampleid")) %>%
# group_by(number) %>% filter(sampleid == last(sampleid)) %>%
# group_by(number) %>% filter(sampleid == first(sampleid) | sampleid == last(sampleid)) %>%
group_by(number) %>% mutate(fl = case_when(sampleid == first(sampleid) ~ "first", sampleid== last(sampleid) ~ "last" )) %>%
mutate(group = paste(group, fl)) %>%
filter(group != "Hospitalized NA")
## Joining, by = "sampleid"
pl <- tbl %>% ggplot(aes(x = group, y = faith_pd, fill = group)) +
geom_boxplot(show.legend = F, outlier.alpha = 0) +
geom_dotplot(binaxis = "y", stackdir = "center", alpha = .3)
mycompr <- list( c("healthy first", "Ambulatoric first")
, c("healthy first", "Hospitalized last")
, c("healthy first", "Hospitalized first")
, c("Hospitalized first", "Hospitalized last")
, c("Hospitalized first", "Ambulatoric first")
, c("Hospitalized last" , "Ambulatoric first")
)
pl + stat_compare_means(paired = T)
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.

pl +
# ggsignif::geom_signif(comparisons = list(c("Hospitalized", "Ambulatoric"), c("healthy", "Hospitalized"), c("Ambulatoric", "healthy")), step_increase = .1) +
ggsignif::geom_signif(comparisons = mycompr, step_increase = .1) +
theme_classic() +
theme(legend.position = "none")
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.

df <- data.frame(num_ASV = c(37, 71, 30, 20), healthy = c(T,T,F,F), hospitalized = c(T,F,T,F))
df %>% ggplot(aes(x = healthy, y = num_ASV, fill = hospitalized)) + geom_col() +
labs(x = "Higher in healthy", y = "number of ASV", fill = "Higher in hospitalized") +
theme_classic()

df %>% gt::gt_preview()
|
num_ASV |
healthy |
hospitalized |
| 1 |
37 |
TRUE |
TRUE |
| 2 |
71 |
TRUE |
FALSE |
| 3 |
30 |
FALSE |
TRUE |
| 4 |
20 |
FALSE |
FALSE |
tbp_table_RA_fill0 <- tbp_table %>%
filter(!is.na(Taxon)) %>%
# group_by(number) %>% mutate(RA = reads/sum(reads)) %>% ungroup() %>%
pivot_wider(id_cols = Taxon, names_from = number, values_from = RA, values_fill = 0) %>%
pivot_longer(cols = -1, names_to = "number", values_to = "RA") %>%
left_join(tbp_table %>% select(number, group)) %>% distinct(number, Taxon, group, RA)
library(rstatix)
tbp_table_RA_fill0 %>% group_by(Taxon) %>%
t_test(RA ~ group) %>% adjust_pvalue(method = "BH") %>% add_significance() %>%
gt::gt_preview()
species_signif <- tbp_table_RA_fill0 %>% group_by(Taxon) %>% group_by(Taxon) %>%
t_test(RA ~ group) %>% adjust_pvalue(method = "BH") %>% filter(p.adj <.05) %>% pull(Taxon) %>% unique()
for(taxon in high_species){
tbp_table_RA_fill0 %>% filter(Taxon == taxon) %>%
ggplot(aes(x = group, y = RA, fill = group)) + geom_boxplot() +
geom_jitter()+
ggsignif::geom_signif(comparisons = list(c("Hospitalized", "Ambulatoric"), c("healthy", "Hospitalized"), c("healthy", "Ambulatoric")),
step_increase = .1, map_signif_level = F)+ theme_classic() + ggtitle(taxon)
ggsave(paste0(taxon, ".png"), height = 4)
}
tbp_table_RA_fill0 %>%
# filter(Taxon %in% species_signif) %>%
ggplot(aes(x = group, y = RA, fill = group)) +
geom_jitter(aes(color = group),alpha = .4) +
geom_boxplot( outlier.size = 0) +
ggsignif::geom_signif(comparisons = list(c("Hospitalized", "Ambulatoric"), c("healthy", "Hospitalized"), c("healthy", "Ambulatoric")),
step_increase = .1, map_signif_level = F)+ theme_classic() +
facet_wrap(~Taxon, scales = "free_y")
ggsave("Taxons_level.png", height = 8, width = 8)
metadata %>% distinct(number, .keep_all = T) %>%
group_by(group) %>%
summarise(Age = mean(age), Size = n(), Males = sum(str_count(`m/f`, "m"))) %>%
mutate(`%` = Males/Size*100)
## # A tibble: 3 × 5
## group Age Size Males `%`
## <fct> <dbl> <int> <int> <dbl>
## 1 Ambulatoric 41.1 19 16 84.2
## 2 Hospitalized 45.7 12 12 100
## 3 healthy 39.6 32 30 93.8
pl <- tbp_table %>% filter(Taxon %in% high_species) %>%
ggplot(aes(x = group, y = RA, fill = group)) +
geom_boxplot() + geom_jitter(alpha = .7)+
ggsignif::geom_signif(comparisons =
list( c("Hospitalized", "Ambulatoric")
, c("healthy", "Hospitalized")
, c("healthy", "Ambulatoric")),
step_increase = .1, map_signif_level = F)+ theme_classic() +
facet_wrap(~Taxon)
pl

ggsave("4_highs.png", pl, height = 5)
## Saving 7 x 5 in image
norm_data <-
data %>% data.frame() %>% rownames_to_column("Feature.ID") %>% pivot_longer(-1) %>%
group_by(name) %>% mutate(RA = value/sum(value)) %>% select(-value) %>%
pivot_wider(id_cols = Feature.ID, names_from = name, values_from = RA)
NG_ids <- metadata %>% filter(group == "Hospitalized") %>% pull(sampleid)
hel_ids <- metadata %>% filter(group == "healthy") %>% pull(sampleid)
at <-
read_csv("Corelation_with_time_from_accident.csv") %>% rename(Feature.ID = 1) %>%
filter(`_calour_direction` == 'Anti-days from accident') %>% left_join(norm_data)
## New names:
## Rows: 47 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): ...1, _feature_id, _calour_direction dbl (3): _calour_stat, _calour_pval,
## _calour_qval
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining, by = "Feature.ID"
## • `` -> `...1`
dd <-
read_csv("Corelation_with_time_from_accident.csv") %>% rename(Feature.ID = 1) %>%
filter(`_calour_direction` != 'Anti-days from accident') %>% left_join(norm_data)
## New names:
## Rows: 47 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): ...1, _feature_id, _calour_direction dbl (3): _calour_stat, _calour_pval,
## _calour_qval
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining, by = "Feature.ID"
## • `` -> `...1`
at <- at %>% select(Feature.ID, contains("calour")) %>%
cbind(data.frame(Ambulatoric = at %>% select(NG_ids) %>% rowMeans())) %>%
cbind(data.frame(Healthy = at %>% select(hel_ids) %>% rowMeans()))
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(NG_ids)` instead of `NG_ids` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(hel_ids)` instead of `hel_ids` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
dd <- dd %>% select(Feature.ID, contains("calour")) %>%
cbind(data.frame(Ambulatoric = dd %>% select(NG_ids) %>% rowMeans())) %>%
cbind(data.frame(Healthy = dd %>% select(hel_ids) %>% rowMeans()))
# for(species in low_species) tbp_table <- tbp_table %>% mutate(Taxon = na_if(Taxon, species))
amnon <- at %>% rbind(dd) %>%
left_join(taxonomy) %>%
separate(Taxon, sep = "; ", into = c("K", "Phylum", "C", "O","F","G", "S"))
## Joining, by = "Feature.ID"
## Warning: Expected 7 pieces. Missing pieces filled with `NA` in 7 rows [15, 23,
## 27, 31, 38, 39, 47].
amnon <-
amnon %>%
mutate(`_calour_direction` =
recode(`_calour_direction`, "Anti-days from accident"= "Decrease over time")) %>%
mutate(direction =
recode(`_calour_direction`, "days from accident"= "Increase over time"))
bb <- data.frame(line = c(0, .05), direction = NA, Phylum = NA)
amnon %>%
ggplot(aes(x = Healthy, y = Ambulatoric
, shape = direction)) +
stat_ellipse(aes(fill = direction), geom = "polygon", alpha = .3) +
geom_point(aes(color = Phylum), size = 4, stroke = 3) +
scale_shape_manual(values = c(25, 24)) +
scale_color_manual(values = c("#F8766D", "#A3A500", "#39B600", "red","#00B0F6","blue")) +
scale_x_continuous(trans = "log10", breaks = c(10^-5,10^-3,10^-1)) +
scale_y_continuous(trans = "log10", breaks = c(10^-5,10^-3,10^-1)) +
# scale_fill_manual(values = c("#F8766D", "#A3A500", "#39B600", "red","#00B0F6","blue")) +
theme_classic() +
labs(x = ~"Average abundance in "*underline(healthy)*" group (log scale)"
, y = ~"Average abundance in "*underline(ambulatoric)*" group (log scale)"
, shape = ~"Correlation with time\nfrom accident in\nHospitalized group"
, fill = ~"Correlation with time\nfrom accident in\nHospitalized group") +
geom_line(data = bb, aes(x = line, y = line), linetype = 2, color = "grey") +
ggtitle("Bacetria species that corelated with time from accident \nare higher in the ambulatoric group and vice versa") +
guides(fill = guide_legend(order = 1), shape = guide_legend(order = 1))
amnon %>%
ggplot(aes(x = Healthy, y = Ambulatoric, shape = direction)) +
geom_smooth(aes(fill = direction), linetype = "blank", span = 100) +
geom_point(aes(color = Phylum, fill = direction)
, size = 4, stroke = 1) +
scale_shape_manual(values = c(25, 24)) +
scale_x_continuous(trans = "log10", breaks = c(10^-5,10^-3,10^-1)) +
scale_y_continuous(trans = "log10", breaks = c(10^-5,10^-3,10^-1)) +
theme_classic() +
labs(x = ~"Average abundance in "*underline(healthy)*" group (log scale)"
, y = ~"Average abundance in "*underline(ambulatoric)*" group (log scale)"
, shape = ~"Correlation with time\nfrom accident in\nHospitalized group"
, fill = ~"Correlation with time\nfrom accident in\nHospitalized group"
, color = ~"Phylum") +
geom_line(data = bb, aes(x = line, y = line), linetype = 3, color = "black") +
ggtitle("Bacetria species that corelated with time from accident \nare higher in the ambulatoric group and vice versa") +
scale_color_manual(values = c("#F8766D", "#A3A500", "#39B600", "red","#00B0F6","blue")) +
guides(fill = guide_legend(order = 1), shape = guide_legend(order = 1)) +
theme(legend.key.size = unit(.1,"native"))

amnon %>% pivot_longer(cols = c(Ambulatoric, Healthy)) %>%
ggplot(aes(x = name, y = value %>% log())) +
geom_boxplot(outlier.alpha =0) +
facet_wrap(~direction, scales = "free") +
geom_jitter(aes(color = `_calour_qval`)) +
geom_signif(comparisons = list(c("Ambulatoric", "Healthy")))
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## Warning: Removed 8 rows containing non-finite values (stat_signif).
## Warning in wilcox.test.default(c(-8.98776548840483, -5.03565510067515,
## -7.03484848840462, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(-9.28901687668794, -7.21594866474404,
## -10.124139145837, : cannot compute exact p-value with ties
## Warning: Removed 8 rows containing missing values (geom_point).

taxonomy$Taxon[1]
## [1] "k__Bacteria; p__Cyanobacteria; c__Chloroplast; o__Streptophyta; f__; g__; s__"
norm_data %>% left_join(taxonomy) %>%
separate(Taxon, sep = "; ", into = c("k", "p", "c", "o","f","g","s")) %>%
pivot_longer(contains("DB"), names_to = "sampleid", values_to = "RA") %>%
left_join(metadata) %>%
# filter(g == "g__Alistipes" | g == "g__Anaerotruncus") %>%
# filter(g == "g__Alistipes") %>%
filter( g == "g__Roseburia") %>%
filter(!is.na(group)) %>%
ggplot(aes(color = group, x = group, y = RA)) + geom_boxplot() +
geom_jitter() +
scale_y_continuous(trans = "log10") +
ggsignif::geom_signif(comparisons =
list( c("Hospitalized", "Ambulatoric")
, c("healthy", "Hospitalized")
, c("healthy", "Ambulatoric")),
step_increase = .1, map_signif_level = F) +
ggtitle("g__Megamonas") +
theme_classic()
## Joining, by = "Feature.ID"
## Warning: Expected 7 pieces. Missing pieces filled with `NA` in 226 rows [9,
## 10, 11, 12, 13, 16, 17, 19, 20, 51, 79, 81, 122, 142, 143, 221, 273, 290, 300,
## 309, ...].
## Joining, by = "sampleid"
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Warning: Removed 1796 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1796 rows containing non-finite values (stat_signif).
## Warning: Removed 1796 rows containing missing values (geom_point).

# Maaslin2::Maaslin2(norm_data, metadata, output = "mmaslin", reference = )
"g__Alistipes"
## [1] "g__Alistipes"
"g__Anaerotruncus"
## [1] "g__Anaerotruncus"
"g__Roseburia"
## [1] "g__Roseburia"
"g__Megamonas"
## [1] "g__Megamonas"
taxonomy$Taxon[taxonomy$Taxon %>% str_detect("Megamonas")]
## [1] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"
## [2] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"
## [3] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"
## [4] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"
## [5] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"
## [6] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"
## [7] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"
## [8] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"
## [9] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"
## [10] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"
## [11] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"
## [12] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"
## [13] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__hypermegale"
## [14] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"
## [15] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__hypermegale"
## [16] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__hypermegale"
## [17] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"
## [18] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"
ggsignif::geom_signif()
## geom_signif: na.rm = FALSE, extend_line = 0, parse = FALSE, orientation = NA
## stat_signif: na.rm = FALSE, comparisons = NULL, test = wilcox.test, test.args = NULL, annotations = NULL, map_signif_level = FALSE, y_position = NULL, xmin = NULL, xmax = NULL, margin_top = 0.05, step_increase = 0, tip_length = 0.03, manual = FALSE, orientation = NA
## position_identity