Neurogenic

Code
library(tidyverse)
library(qiime2R)
library(ggsignif)

# setwd("~/oldserver/neurogenic/")
setwd("/pita/users/rotem/neurogenic/")
metadata <- "neurogenic_table_with_reads_10Feb22.txt" %>% read_tsv()
metadata <- "neurogenic_table_with_reads_19Sep22.csv" %>% read_csv()
metadata <- metadata %>% filter(number != "37")

metadata$group <- metadata$group %>% factor(levels = 
                                              c("Ambulatoric","Hospitalized", "healthy"))

Cohort

Code
metadata %>% group_by(number) %>%
  # mutate(times = row_number()) %>%
  filter(group == "Hospitalized") %>% inner_join(
    data.frame(qiime2R::read_qza("cmr/faith_pd_vector.qza")$data) %>% 
    rename(Faith = faith_pd) %>% rownames_to_column("sampleid")) %>% 
  group_by(number) %>% mutate(delta_Faith = Faith - lag(Faith)) %>% 
  # filter(number != 8) %>% 
  ggplot(aes(x = `days from accident`, y = fct_reorder(number, Faith))) + 
  geom_line(color = "black") +
  geom_point(size = 4) +
  scale_color_gradient2(low="red", high="blue", mid = "grey") +
  # facet_wrap(~number, ncol = 3)
  theme_classic()  + 
  theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
  labs(y = "Patient", x = "Days since accident", col = "Faith")

Alpha Diversity

Code
# metadata <- "neurogenic_table_with_reads_21Dec21.tsv" %>% read_tsv()
# metadata <- "neurogenic_table_with_reads_24feb22.xlsx" %>% readxl::read_xlsx(1)
# metadata <- "neurogenic_table_with_reads_24feb22.csv" %>% read_csv()
metadata <- metadata %>% filter(number != "37")
data <- qiime2R::read_qza("neurogenic_table.qza")$data
taxonomy <- read_qza("../../../pub/data/16S_DBs/processed/16S_DB1-19_merged/taxonomy.qza")$data %>% data.frame()
shannon <- vegan::diversity(data, MARGIN = 2, index = "shannon") %>% data.frame()

pl <- 
metadata %>% inner_join(
  data.frame(qiime2R::read_qza("cmr2/faith_pd_vector.qza")$data) %>% rownames_to_column("sampleid")) %>% 
  group_by(number) %>% filter(sampleid == last(sampleid)) %>%
  # group_by(number) %>% filter(sampleid == first(sampleid) | sampleid == last(sampleid)) %>%
  # group_by(number) %>% mutate(fl = case_when(sampleid == first(sampleid) ~ "first", sampleid== last(sampleid) ~ "last" )) %>%
  # mutate(group = paste(group, fl)) %>% 
  ggplot(aes(x = group, y = faith_pd, fill = group)) + 
  geom_boxplot(show.legend = F, outlier.alpha = 0) +
  geom_dotplot(binaxis = "y", stackdir = "center", alpha = .3) +
  ggsignif::geom_signif(comparisons = list(c("Hospitalized", "Ambulatoric"), c("healthy", "Hospitalized"), c("Ambulatoric", "healthy")), step_increase = .1, map_signif_level = F) +
  # ggsignif::geom_signif(comparisons = list(c("Hospitalized first", "Hospitalized last"), c("healthy first", "Hospitalized last"), c("Ambulatoric first", "Hospitalized last")), step_increase = .1) +
  theme_classic() + 
  theme(legend.position = "none") +ggtitle("All Hospitalized")
pl

Code
# ggsave("faith.png", pl, height = 14, width = 8, units = "cm")

Disease index

Code
# pl <- 
metadata %>% rename(SampleID = sampleid) %>% 
  filter(!(group %>% is.na())) %>% 
  inner_join(read_csv("NG_unidi.csv")) %>% 
  group_by(number) %>% filter(SampleID == last(SampleID)) %>%
  # group_by(number) %>% filter(SampleID == first(SampleID) | SampleID == last(SampleID)) %>%
  # group_by(number) %>% mutate(fl = case_when(SampleID == first(SampleID) ~ "first", SampleID == last(SampleID) ~ "last" )) %>%
  # mutate(group = paste(group, fl)) %>%
  # filter(group != "NG NA") %>% 
  ggplot(aes(x = group, y = UniDI, fill = group)) +
  geom_boxplot(width = .3, show.legend = F, outlier.alpha = 0) +
  geom_dotplot(binaxis = "y", stackdir = "center", alpha = .3) +
  ggsignif::geom_signif(comparisons = list(c("Hospitalized", "Ambulatoric"), c("healthy", "Hospitalized"), c("Ambulatoric", "healthy")), step_increase = .1, map_signif_level = F) +
  # ggsignif::geom_signif(comparisons = mycompr, step_increase = .1, map_signif_level = T) +
  theme_classic() + 
  theme(legend.position = "none") + ggtitle("All Hospitalized")

Code
pl

Code
ggsave("UniDi.png", pl, height = 14, width = 8, units = "cm")

Taxa bar plot

Code
## tbp  
# metadata <- "neurogenic_table_with_reads_24feb22.csv" %>% read_csv()
metadata <- metadata %>% filter(number != "37")

tbp_table <- read_csv("tbp.tsv/level-2.csv")
tbp_table$group[tbp_table$group  == "NG"] <- "Hospitalized"
tbp_table$group[tbp_table$group  == "NG_clinic"] <- "Ambulatoric"
tbp_table$group <- tbp_table$group %>% factor(levels = c("Ambulatoric","Hospitalized", "healthy"))
tbp_table <- tbp_table %>% 
  left_join(metadata) %>% 
  pivot_longer(cols = contains("k__"), names_to = "Taxon") %>% 
  mutate(Taxon = str_remove(Taxon, "k__Bacteria;")) %>% 
  group_by(number, Taxon) %>% summarise(reads = last(value), group = first(group)) %>% 
  filter(reads > 10) 
tbp_table <- tbp_table %>% group_by(number) %>% mutate(RA = reads/sum(reads)) %>% ungroup()


low_species <- tbp_table %>% group_by(Taxon) %>% summarise(reads = sum(reads)) %>% arrange(reads) %>% slice_head(n = 8) %>% pull(Taxon)
high_species <- tbp_table %>% group_by(Taxon) %>% summarise(reads = sum(reads)) %>% arrange(desc(reads)) %>% slice_head(n = 4) %>% pull(Taxon)
for(species in low_species) tbp_table <- tbp_table %>% mutate(Taxon = na_if(Taxon, species))

tbp_table %>% 
  # mutate(Taxon = replace_na(Taxon, "Others")) %>%
  # ggplot(aes(x = number, y = reads, fill = Taxon)) + geom_col(position = "fill") + 
  ggplot(aes(x = number, y = RA, fill = Taxon)) + geom_col(position = "fill") + 
  facet_grid(~group, scales = "free_x", space = "free_x") + 
  ylab("") + theme_void() + theme(axis.text.x = element_blank(), strip.text.x = element_text(size = 12))

Code
tbp_table %>% filter(Taxon == "p__Bacteroidetes" | Taxon == "p__Firmicutes") %>% 
  pivot_wider(id_cols = c(number, group), names_from = "Taxon", values_from = "RA") %>% 
  mutate(BF_ratio = p__Bacteroidetes/p__Firmicutes) %>% 
  ggplot() + aes(x = group, y = BF_ratio, color = group) + 
  geom_boxplot() + 
  ggsignif::geom_signif(comparisons = list(
    c("Hospitalized", "Ambulatoric")
  , c("healthy", "Hospitalized")
  , c("Ambulatoric", "healthy")), step_increase = .1) + 
  theme_classic()

Code
tbp_table %>% filter(Taxon %in% high_species) %>% 
  ggplot() + aes(x = group, y = RA, color = group) + 
  geom_boxplot() + geom_jitter(alpha = .5) + 
  facet_wrap(~Taxon, scales = "free_y") +
  ggsignif::geom_signif(comparisons = 
                list(c("Hospitalized", "Ambulatoric")
                   , c("healthy", "Hospitalized")
                   , c("Ambulatoric", "healthy")), step_increase = .1, map_signif_level = F) +
  theme_classic() + theme(strip.background = element_blank())

Alpha first and last

Code
# tbp_table %>% group_by(Taxon) %>% summarise(cor.test())

library(ggpubr)
tbl <- 
metadata %>% inner_join(
  data.frame(qiime2R::read_qza("cmr2/faith_pd_vector.qza")$data) %>% 
    rownames_to_column("sampleid")) %>% 
  # group_by(number) %>% filter(sampleid == last(sampleid)) %>%
  # group_by(number) %>% filter(sampleid == first(sampleid) | sampleid == last(sampleid)) %>%
  group_by(number) %>% mutate(fl = case_when(sampleid == first(sampleid) ~ "first", sampleid== last(sampleid) ~ "last" )) %>%
  mutate(group = paste(group, fl)) %>% 
  filter(group != "Hospitalized NA")
pl <- tbl %>% ggplot(aes(x = group, y = faith_pd, fill = group)) + 
  geom_boxplot(show.legend = F, outlier.alpha = 0) +
  geom_dotplot(binaxis = "y", stackdir = "center", alpha = .3)

mycompr <- list(  c("healthy first", "Ambulatoric first")
                , c("healthy first", "Hospitalized last")
                , c("healthy first", "Hospitalized first")
                , c("Hospitalized first", "Hospitalized last")
                , c("Hospitalized first", "Ambulatoric first")
                , c("Hospitalized last" , "Ambulatoric first")
                )
pl + stat_compare_means(paired = T)

Code
pl +
  # ggsignif::geom_signif(comparisons = list(c("Hospitalized", "Ambulatoric"), c("healthy", "Hospitalized"), c("Ambulatoric", "healthy")), step_increase = .1) +
  ggsignif::geom_signif(comparisons = mycompr, step_increase = .1) +
  theme_classic() +
  theme(legend.position = "none")

Code
df <- data.frame(num_ASV = c(37, 71, 30, 20), healthy = c(T,T,F,F), hospitalized = c(T,F,T,F))
df %>% ggplot(aes(x = healthy, y = num_ASV, fill = hospitalized)) + geom_col() + 
  labs(x = "Higher in healthy", y = "number of ASV", fill = "Higher in hospitalized") + 
  theme_classic() 

Code
df %>% gt::gt_preview()
num_ASV healthy hospitalized
1 37 TRUE TRUE
2 71 TRUE FALSE
3 30 FALSE TRUE
4 20 FALSE FALSE
Code
tbp_table_RA_fill0 <- tbp_table %>% 
  filter(!is.na(Taxon)) %>% 
  # group_by(number) %>% mutate(RA = reads/sum(reads)) %>% ungroup() %>% 
  pivot_wider(id_cols = Taxon, names_from = number, values_from = RA, values_fill = 0) %>% 
  pivot_longer(cols = -1, names_to = "number", values_to = "RA") %>%
    left_join(tbp_table %>% select(number, group)) %>%  distinct(number, Taxon, group, RA)


library(rstatix)
tbp_table_RA_fill0 %>% group_by(Taxon) %>% 
  t_test(RA ~ group) %>% adjust_pvalue(method = "BH") %>% add_significance() %>% 
  gt::gt_preview()

species_signif <- tbp_table_RA_fill0 %>% group_by(Taxon)  %>% group_by(Taxon) %>% 
  t_test(RA ~ group) %>% adjust_pvalue(method = "BH") %>%  filter(p.adj <.05) %>% pull(Taxon) %>% unique()
for(taxon in high_species){
  tbp_table_RA_fill0 %>% filter(Taxon == taxon) %>% 
    ggplot(aes(x = group, y = RA, fill = group)) + geom_boxplot() + 
    geom_jitter()+
    ggsignif::geom_signif(comparisons = list(c("Hospitalized", "Ambulatoric"), c("healthy", "Hospitalized"), c("healthy", "Ambulatoric")),
                step_increase = .1, map_signif_level = F)+ theme_classic() + ggtitle(taxon)
  ggsave(paste0(taxon, ".png"), height = 4)
}

tbp_table_RA_fill0 %>% 
  # filter(Taxon %in% species_signif) %>% 
  ggplot(aes(x = group, y = RA, fill = group)) + 
  geom_jitter(aes(color = group),alpha = .4) +
  geom_boxplot(  outlier.size = 0) + 
  ggsignif::geom_signif(comparisons = list(c("Hospitalized", "Ambulatoric"), c("healthy", "Hospitalized"), c("healthy", "Ambulatoric")),
                        step_increase = .1, map_signif_level = F)+ theme_classic() +
  facet_wrap(~Taxon, scales = "free_y")
ggsave("Taxons_level.png", height = 8, width = 8)
Code
metadata %>% distinct(number, .keep_all = T) %>% 
    group_by(group) %>% 
    summarise(Age = mean(age), Size = n(), Males = sum(str_count(`m/f`, "m"))) %>%
    mutate(`%` = Males/Size*100)
# A tibble: 3 × 5
  group          Age  Size Males   `%`
  <fct>        <dbl> <int> <int> <dbl>
1 Ambulatoric   41.1    19    16  84.2
2 Hospitalized  45.7    12    12 100  
3 healthy       39.6    32    30  93.8
Code
  pl <- tbp_table %>% filter(Taxon %in%  high_species) %>% 
    ggplot(aes(x = group, y = RA, fill = group)) + 
    geom_boxplot() + geom_jitter(alpha = .7)+
    ggsignif::geom_signif(comparisons = 
                            list(  c("Hospitalized", "Ambulatoric")
                                 , c("healthy", "Hospitalized")
                                 , c("healthy", "Ambulatoric")),
                step_increase = .1, map_signif_level = F)+ theme_classic() + 
    facet_wrap(~Taxon) 
  pl

Code
  ggsave("4_highs.png", pl, height = 5)

norm_data <- 
data %>% data.frame() %>% rownames_to_column("Feature.ID") %>% pivot_longer(-1) %>% 
  group_by(name) %>% mutate(RA = value/sum(value)) %>% select(-value) %>% 
  pivot_wider(id_cols = Feature.ID, names_from = name, values_from = RA)
Code
NG_ids <- metadata %>% filter(group == "Hospitalized") %>% pull(sampleid)
hel_ids <- metadata %>% filter(group == "healthy") %>% pull(sampleid)



at <- 
read_csv("Corelation_with_time_from_accident.csv") %>% rename(Feature.ID = 1) %>% 
  filter(`_calour_direction` == 'Anti-days from accident') %>% left_join(norm_data)

dd <- 
  read_csv("Corelation_with_time_from_accident.csv") %>% rename(Feature.ID = 1) %>% 
  filter(`_calour_direction` != 'Anti-days from accident') %>% left_join(norm_data)

at <- at %>% select(Feature.ID, contains("calour")) %>% 
  cbind(data.frame(Ambulatoric = at %>% select(NG_ids) %>% rowMeans())) %>% 
  cbind(data.frame(Healthy = at %>% select(hel_ids) %>% rowMeans()))

dd <- dd %>% select(Feature.ID, contains("calour")) %>% 
  cbind(data.frame(Ambulatoric = dd %>% select(NG_ids) %>% rowMeans())) %>% 
  cbind(data.frame(Healthy = dd %>% select(hel_ids) %>% rowMeans()))


# for(species in low_species) tbp_table <- tbp_table %>% mutate(Taxon = na_if(Taxon, species))


amnon <- at %>% rbind(dd) %>% 
  left_join(taxonomy) %>% 
  separate(Taxon, sep = "; ", into = c("K", "Phylum", "C", "O","F","G", "S"))

amnon <- 
  amnon %>% 
  mutate(`_calour_direction` = 
         recode(`_calour_direction`, "Anti-days from accident"= "Decrease over time")) %>% 
  mutate(direction = 
         recode(`_calour_direction`, "days from accident"= "Increase over time"))

bb <- data.frame(line = c(0, .05), direction = NA, Phylum = NA)
Code
amnon %>% 
  ggplot(aes(x = Healthy, y = Ambulatoric
             , shape = direction)) + 
  stat_ellipse(aes(fill = direction), geom = "polygon", alpha = .3) + 
  geom_point(aes(color = Phylum), size = 4, stroke = 3) + 
  scale_shape_manual(values = c(25, 24)) + 
  scale_color_manual(values = c("#F8766D", "#A3A500", "#39B600", "red","#00B0F6","blue")) +
  scale_x_continuous(trans = "log10", breaks = c(10^-5,10^-3,10^-1)) + 
  scale_y_continuous(trans = "log10", breaks = c(10^-5,10^-3,10^-1)) +
  # scale_fill_manual(values  = c("#F8766D", "#A3A500", "#39B600", "red","#00B0F6","blue")) +
  theme_classic() + 
  labs(x = ~"Average abundance in "*underline(healthy)*" group (log scale)"
     , y = ~"Average abundance in "*underline(ambulatoric)*" group (log scale)"
     , shape = ~"Correlation with time\nfrom accident in\nHospitalized group"
     , fill  = ~"Correlation with time\nfrom accident in\nHospitalized group") + 
  geom_line(data = bb, aes(x = line, y = line), linetype = 2, color = "grey") + 
   ggtitle("Bacetria species that corelated with time from accident \nare higher in the ambulatoric group and vice versa") + 
  guides(fill = guide_legend(order = 1), shape = guide_legend(order = 1))
Code
amnon %>% 
  ggplot(aes(x = Healthy, y = Ambulatoric, shape = direction)) + 
  geom_smooth(aes(fill = direction), linetype = "blank", span = 100) +
  geom_point(aes(color = Phylum, fill = direction)
             , size = 4, stroke = 1) + 
  scale_shape_manual(values = c(25, 24)) +
  scale_x_continuous(trans = "log10", breaks = c(10^-5,10^-3,10^-1)) + 
  scale_y_continuous(trans = "log10", breaks = c(10^-5,10^-3,10^-1)) +
  theme_classic() + 
  labs(x = ~"Average abundance in "*underline(healthy)*" group (log scale)"
     , y = ~"Average abundance in "*underline(ambulatoric)*" group (log scale)"
     , shape = ~"Correlation with time\nfrom accident in\nHospitalized group"
     , fill  = ~"Correlation with time\nfrom accident in\nHospitalized group"
     , color  = ~"Phylum") + 
  geom_line(data = bb, aes(x = line, y = line), linetype = 3, color = "black") + 
   ggtitle("Bacetria species that corelated with time from accident \nare higher in the ambulatoric group and vice versa") +
  scale_color_manual(values  = c("#F8766D", "#A3A500", "#39B600", "red","#00B0F6","blue")) + 
  guides(fill = guide_legend(order = 1), shape = guide_legend(order = 1)) + 
  theme(legend.key.size = unit(.1,"native"))

Code
amnon %>% pivot_longer(cols = c(Ambulatoric, Healthy)) %>% 
  ggplot(aes(x = name, y = value %>% log())) + 
  geom_boxplot(outlier.alpha =0) + 
  facet_wrap(~direction, scales = "free") + 
  geom_jitter(aes(color = `_calour_qval`)) + 
  geom_signif(comparisons = list(c("Ambulatoric", "Healthy")))

Code
taxonomy$Taxon[1]
[1] "k__Bacteria; p__Cyanobacteria; c__Chloroplast; o__Streptophyta; f__; g__; s__"
Code
norm_data %>% left_join(taxonomy) %>% 
  separate(Taxon, sep = "; ", into = c("k", "p", "c", "o","f","g","s")) %>% 
  pivot_longer(contains("DB"), names_to = "sampleid", values_to = "RA") %>% 
  left_join(metadata) %>% 
  # filter(g == "g__Alistipes" | g == "g__Anaerotruncus") %>%
  # filter(g == "g__Alistipes") %>% 
  filter( g == "g__Roseburia") %>% 
  filter(!is.na(group)) %>% 
  ggplot(aes(color = group, x = group, y = RA)) + geom_boxplot() + 
  geom_jitter() + 
  scale_y_continuous(trans = "log10") +
      ggsignif::geom_signif(comparisons = 
                            list(  c("Hospitalized", "Ambulatoric")
                                 , c("healthy", "Hospitalized")
                                 , c("healthy", "Ambulatoric")),
                step_increase = .1, map_signif_level = F) + 
  ggtitle("g__Megamonas") + 
  theme_classic()

Code
# Maaslin2::Maaslin2(norm_data, metadata, output = "mmaslin", reference = )
Code
"g__Alistipes"
[1] "g__Alistipes"
Code
"g__Anaerotruncus"
[1] "g__Anaerotruncus"
Code
"g__Roseburia"
[1] "g__Roseburia"
Code
"g__Megamonas"
[1] "g__Megamonas"
Code
taxonomy$Taxon[taxonomy$Taxon %>% str_detect("Megamonas")]
 [1] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"           
 [2] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"           
 [3] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"           
 [4] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"           
 [5] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"           
 [6] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"           
 [7] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"           
 [8] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"           
 [9] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"           
[10] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"           
[11] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"           
[12] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"           
[13] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__hypermegale"
[14] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"           
[15] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__hypermegale"
[16] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__hypermegale"
[17] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"           
[18] "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Megamonas; s__"           
Code
ggsignif::geom_signif()
geom_signif: na.rm = FALSE, extend_line = 0, parse = FALSE, orientation = NA
stat_signif: na.rm = FALSE, comparisons = NULL, test = wilcox.test, test.args = NULL, annotations = NULL, map_signif_level = FALSE, y_position = NULL, xmin = NULL, xmax = NULL, margin_top = 0.05, step_increase = 0, tip_length = 0.03, manual = FALSE, orientation = NA
position_identity