Samples cross QC (Reads above 2000)

data = read.csv("/pita/users/rotem/eyes/human/Feaces/16s_map.tsv",sep = "\t")

Total number of families that passed QC

data %>% 
  filter(reads_number > 2000) %>% 
  select(Family) %>% 
  unique() %>% 
  nrow()
## [1] 26

PCoA not included the families from old DB3

pca = read_core_pcoa("/pita/users/rotem/eyes/human/Feaces/res/core-metrics-results/unweighted_unifrac_pcoa_results.qza")
names(pca) = paste0("PCoA_", 1:length(names(pca)))

faith = read_faith_qzv("res/core-metrics-results/faith-pd-correlation.qzv")
names(faith)[1] =  "sampleid"
faith$sampleid = faith$sampleid %>% as.character()
sum(data$reads_number > 2000)
## [1] 135
data = 
data %>% 
  left_join(pca %>% rownames_to_column("sampleid")) 
## Joining, by = "sampleid"
## Warning: Column `sampleid` joining factor and character vector, coercing into
## character vector
data = data %>% left_join(faith %>% select(sampleid, faith_pd), by = names(faith)[1])
create_dt(data)
pl = 
ggplot(data, aes(x = PCoA_1, y = PCoA_2, color = Age, name = sample_ID)) + 
  geom_point() + 
  theme_rh
plotly::ggplotly(pl)
pl = 
data %>% 
  ggplot(aes(x = Age, y = faith_pd, fill = Family, color = Family)) +
  geom_point(show.legend = F) + 
  theme_rh + theme(axis.text.x = element_text(angle = -45, hjust = 0))
plotly::ggplotly(pl)
# data %>% 
#   ggplot(aes(fill = Retinal_degeneration_type, x = Age)) + 
#   geom_histogram(stat = 'count') + theme_rh
# data$Date_of_diagnosis

# data %>% 
#   group_by(Family) %>% 
#   slice(which.min(Date_of_diagnosis))
pl = 
data %>% 
  filter(grepl("FR.*", Family)) %>% 
  ggplot(aes(x = sample_ID, y = Age)) +
  geom_point(aes(y = Age_on_diagnosis), color = 'red') +
  geom_point(data = data %>% group_by(Family) %>%
    slice(which.min(Date_of_diagnosis)), aes(size = Date_of_diagnosis), color = "red") +
  geom_point()+
  facet_wrap(~Family, nrow = 4, scales = 'free_x') +
  theme_rh
pl
## Warning: Removed 76 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

# plotly::ggplotly(pl)
pl = 
ggplot(data, aes(x = PCoA_1, y = PCoA_2, color = Family, name = sample_ID)) + 
  geom_point() + 
  theme_rh

plotly::ggplotly(pl)
pl = 
ggplot(data, aes(x = PCoA_1, y = PCoA_2, color = Gender, name = sample_ID)) + 
  geom_point() + 
  theme_rh
plotly::ggplotly(pl)
faith = read_faith_qzv("res/core-metrics-results/faith-pd-correlation.qzv")
names(faith)[1] =  "sampleid"
faith$sampleid = faith$sampleid %>% as.character()

data = data %>% left_join(faith %>% select(sampleid, contains("faith_pd")), by = names(faith)[1])

data %>% 
  ggplot(aes(x = Family, y = faith_pd.x, fill = Family)) +
  geom_boxplot(show.legend = F) + 
  theme_rh + theme(axis.text.x = element_text(angle = -45, hjust = 0))
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).