Human eyes analysis

Rotem Hadar
2021-01-13
library(tidyverse)
library(xlsx)
# library(rotemfuns)
library(vegan)
library(funrar)
library(subSeq)
library(qiime2R)
library(ggpubr)
library(magrittr)
library(forcats)
library(rmarkdown)
library(kableExtra)
library(gghighlight)
library(microbiomeutilities)
source("/pita/users/rotem/scripts/qiime2_funcs.R")
if(getwd() == "/pita/users/rotem/eyes/") { setwd("human") }

Edit metadata

hadasmap = read.xlsx("Table for Rotem2- Families 12.11.20.xlsx", 1)
hadasmap %<>% pivot_longer(cols = starts_with("DNA_Extracted_from")
                           , names_to = "Sample.type", values_to = "ID", values_drop_na = T) 

hadasmap$Sample.ID = hadasmap$ID
hadasmap$Sample.ID = str_replace_all(hadasmap$Sample.ID, "E_", replacement = "")
hadasmap$Sample.ID = str_replace_all(hadasmap$Sample.ID, "-", replacement = "_")
 hadasmap$Sample.ID = str_replace_all(hadasmap$Sample.ID, "_(?=[FSM])", "")
hadasmap$Age %<>% as.character() %>% as.numeric()
# map$Sample.ID = paste0("E_", map$Sample.ID)
hadasmap$pnid = paste0(hadasmap$Family,"-", hadasmap$Subject_.)
# Verify that all samples that already run found at Hadas table
# setdiff(done$sample_ID , map$sample_ID)
# intersect(done$sample_ID , hadasmap$sample_ID) %>% length() == length(done$sample_ID)

names(hadasmap)[96] = "sample_ID"
hadasmap %<>% na_if("-")
col_to_dbl = c(4, 8:37, 39:43, 48,49, 51, 52, 54, 56:59)
hadasmap %<>% mutate_at(col_to_dbl, funs(as.numeric))
hadasmap$Subject_. <- as.factor(hadasmap$Subject_.)
dbmap = read.csv("/pita/pub/data/16S_DBs/maps/DB1-14_premap_v1.csv")
dbmap$reads_number = dbmap$reads_number %>% as.character() %>% as.numeric()
done = 
  dbmap %>% 
  filter(Cohort == "eyes") %>% 
  # select(-Family) %>% 
  filter(Organism == "Human") %>% 
  arrange(desc(reads_number)) %>% 
  distinct(sample_ID, .keep_all = T)
done$Family = str_extract(gsub("E_", "", done$sample_ID), "F[RH][0-9]{1,2}")

prev_families = read.csv("DB3_control_families.csv")

done =
  done %>%
  full_join(dbmap %>% filter(X.SampleID %in% prev_families$X.SampleID))

# done = filter(done, reads_number > 1000)
done$sample_ID = gsub("E_", "", done$sample_ID %>% as.character())
done$sample_ID = gsub("-", "_", done$sample_ID %>% as.character())
done = distinct(done, sample_ID, .keep_all = T)
done$Age %<>% as.character() %>% as.numeric()
names(done)[1] = "sampleid"
# write.table(done, "16s_map.tsv", row.names = F, sep = "\t", quote = F)

names(prev_families)[1] = "sampleid"

cutoff = 2000
number_pass_reads_co = intersect(hadasmap$sample_ID, 
                                 done %>% filter(reads_number > cutoff) %>% pull(sample_ID)) %>% 
  length()

Samples numbers

At Hadas table (for humans) there are 398 samples. From all that samples 241 samples sequenced. 223 samples have more than 2000 reads per sample.

allmap = done %>% select(sample_ID, sampleid, reads_number, Database, Cohort, Family) %>% 
  left_join(hadasmap %>% select(-ID, -Family), by = "sample_ID") %>% 
  filter(reads_number > cutoff)

allmap %<>%  left_join(  prev_families %>% select(sampleid, Family, Age, Gender)
                         , by = "sampleid") 
# allmap = full_join(tt
#                    , prev_families %>% select(X.SampleID, sample_ID, ID, Age, Gender, Family)
#           , by = "sample_ID") 

allmap = unite(allmap, "Family", contains("Family."), na.rm = T)
allmap = unite(allmap, "Age", contains("Age."), na.rm = T)
allmap = unite(allmap, "Gender", contains("Gender."), na.rm = T)
allmap$Gender = gsub("Male", "M", allmap$Gender)
allmap$Gender = gsub("Female", "F", allmap$Gender)
allmap$Age = allmap$Age %>% as.numeric()
allmap$Disease.group %<>% replace_na(0)
map = sample_data(allmap)
sample_names(map) = map$sampleid

# write.xlsx(hadasmap, "hadasmap_for_rotem_12Nov20.xlsx")
# hadasmap %>% filter(Family == "FR5") %>% select(ID)
# map %>% data.frame() %>% filter(is.na(Sample.type))

Sompe plots

allmap$Sample.type[is.na(allmap$Sample.type)] = "DNA_Extracted_from_feaces...F."
allmap %>% 
  group_by(Family, Sample.type) %>% summarise(number_of_samples = n()) %>% 
  group_by(Sample.type) %>% 
  summarise(number_of_samples = sum(number_of_samples), number_of_families = n()) %>% 
  ggplot(aes(x = Sample.type, y = number_of_samples)) + 
  scale_x_discrete(labels = c("Feces", "Mouth", "Saliva")) +
  scale_y_continuous(expand = expansion(mult = c(0, .1))) +
  geom_segment(color = "red", aes(xend = Sample.type, y=0, yend = number_of_samples)) +
  geom_point(aes(size = number_of_families), 
             color="red", fill="orange", alpha=0.7, shape=21, stroke=2) +
  geom_text(aes(label = number_of_families)) +
  theme_q2r()+
  theme(text = element_text(size = 14))
allmap %>% group_by(Family) %>% 
  summarise(patients = sum(Disease.group == 1 | Disease.group == 2)) %>%  
  filter(patients > 0) %>% 
  ggplot(aes(y = patients, x = Family, color = Family)) +
  geom_point() + theme_q2r() + ggtitle("Number of patients in each family")

table = 
  allmap %>% filter(grepl("FR.*",Family)) %>% #filter(!is.na(Age)) %>%
  group_by(pnid) %>% 
  summarise(  Family = first(Family), Age = first(Age)
              , Age_on_diagnosis = first(Age_on_diagnosis)
              , Retinal_degeneration_type = first(Retinal_degeneration_type)) %>% 
  mutate(pnid = fct_reorder(pnid, Age)) %>%
  mutate(pnid = fct_reorder(pnid, Family)) %>%
  mutate(pnid = fct_reorder(pnid, Retinal_degeneration_type)) %>%
  mutate(Family = gsub("FR", "", Family))

table %<>% 
  left_join(table %>% group_by(Family) %>% 
              summarise(Family_Disease = first(Retinal_degeneration_type))) 
# mutate(Family = fct_reorder(pnid, Family_Disease))
# table$Family = with(table, fct_reorder(Family, Family_Disease))


pl = table %>% 
  ggplot(aes(x = pnid, color = Retinal_degeneration_type)) + 
  geom_point(aes(y = Age)) + 
  geom_linerange(aes( ymin = Age_on_diagnosis, ymax = Age)) +
  facet_grid(.~Family, scales = "free_x", space = "free_x") +
  labs(subtitle = "Family number") +
  theme(  axis.text.x  = element_blank()
          , axis.ticks.x = element_blank()
          , axis.title.x = element_blank()
          , strip.background = element_blank()
          , panel.grid = element_blank()
          , panel.background = element_rect(color="grey25", linetype = "blank")
          , legend.position = "bottom"
  ) 

pl 
allmap %>% filter(grepl("FR.*",Family)) %>% select(pnid, Family)
       pnid Family
1     FR7-4    FR7
2    FR10-6   FR10
3      <NA>   FR15
4    FR12-6   FR12
5     FR9-2    FR9
6     FR6-2    FR6
7      <NA>   FR15
8      <NA>   FR15
9     FR7-3    FR7
10     <NA>   FR15
11    FR3-5    FR3
12     <NA>   FR15
13    FR5-1    FR5
14    FR4-4    FR4
15     <NA>   FR15
16    FR5-4    FR5
17   FR28-5   FR28
18    FR5-2    FR5
19   FR28-4   FR28
20   FR12-3   FR12
21     <NA>   FR15
22    FR8-1    FR8
23   FR11-1   FR11
24   FR10-5   FR10
25    FR9-3    FR9
26   FR13-1   FR13
27     <NA>   FR15
28    FR3-3    FR3
29     <NA>   FR15
30   FR12-8   FR12
31     <NA>   FR15
32    FR7-1    FR7
33   FR8-10    FR8
34     <NA>   FR15
35   FR26-2   FR26
36   FR10-7   FR10
37     <NA>   FR15
38   FR10-2   FR10
39    FR7-1    FR7
40    FR2-7    FR2
41   FR12-4   FR12
42    FR8-9    FR8
43    FR2-1    FR2
44    FR2-2    FR2
45   FR10-8   FR10
46     <NA>   FR15
47   FR12-2   FR12
48   FR28-6   FR28
49   FR10-4   FR10
50   FR21-1   FR21
51   FR13-5   FR13
52    FR5-3    FR5
53   FR21-2   FR21
54    FR8-8    FR8
55   FR8-11    FR8
56   FR21-3   FR21
57     <NA>   FR15
58    FR8-3    FR8
59    FR3-1    FR3
60    FR7-4    FR7
61   FR28-1   FR28
62   FR8-12    FR8
63    FR2-6    FR2
64   FR28-2   FR28
65     <NA>   FR25
66    FR5-8    FR5
67   FR26-1   FR26
68   FR12-5   FR12
69    FR7-4    FR7
70     <NA>   FR15
71   FR14-2   FR14
72     <NA>   FR18
73   FR21-4   FR21
74   FR13-4   FR13
75   FR21-8   FR21
76    FR5-7    FR5
77    FR3-1    FR3
78     <NA>   FR15
79     <NA>   FR15
80     <NA>   FR15
81   FR28-9   FR28
82    FR7-2    FR7
83  FR28-10   FR28
84    FR7-3    FR7
85   FR13-3   FR13
86   FR28-4   FR28
87     <NA>   FR15
88   FR12-7   FR12
89   FR24-1   FR24
90    FR8-7    FR8
91   FR26-5   FR26
92   FR10-3   FR10
93    FR8-5    FR8
94     <NA>   FR15
95     <NA>   FR17
96   FR25-1   FR25
97     <NA>   FR15
98   FR8-14    FR8
99    FR9-1    FR9
100   FR4-4    FR4
101  FR21-7   FR21
102   FR5-1    FR5
103  FR27-2   FR27
104   FR4-2    FR4
105   FR5-4    FR5
106    <NA>   FR15
107  FR28-2   FR28
108  FR23-1   FR23
109    <NA>   FR15
110    <NA>   FR15
111    <NA>   FR15
112    <NA>   FR18
113 FR28-11   FR28
114    <NA>   FR15
115    <NA>   FR15
116   FR5-1    FR5
117   FR7-3    FR7
118  FR21-7   FR21
119    <NA>   FR15
120    <NA>   FR15
121    <NA>   FR15
122  FR23-1   FR23
123    <NA>   FR15
124 FR28-12   FR28
125    <NA>   FR16
126  FR29-3   FR29
127  FR26-3   FR26
128  FR13-2   FR13
129   FR5-7    FR5
130  FR14-1   FR14
131   FR5-2    FR5
132    <NA>   FR18
133  FR24-1   FR24
134   FR5-6    FR5
135   FR5-3    FR5
136   FR5-3    FR5
137    <NA>   FR15
138    <NA>   FR15
139    <NA>   FR15
140    <NA>   FR19
141    <NA>   FR16
142  FR26-3   FR26
143    <NA>   FR18
144   FR8-4    FR8
145    <NA>   FR16
146  FR28-8   FR28
147    <NA>   FR15
148  FR21-6   FR21
149    <NA>   FR16
150    <NA>   FR15
151    <NA>   FR15
152   FR5-4    FR5
153    <NA>   FR17
154   FR5-3    FR5
155    <NA>   FR20
156    <NA>   FR17
157   FR8-7    FR8
158  FR28-7   FR28
159    <NA>   FR15
160   FR8-2    FR8
161  FR23-6   FR23
162    <NA>    FR3
163    <NA>   FR15
164    <NA>   FR19
165    <NA>   FR20
166  FR29-1   FR29
167  FR23-3   FR23
168    <NA>   FR16
169  FR8-14    FR8
170    <NA>   FR18
171    <NA>   FR16
172  FR14-4   FR14
173  FR29-5   FR29
174    <NA>   FR15
175  FR8-13    FR8
176    <NA>   FR20
177  FR23-4   FR23
178  FR23-5   FR23
179    <NA>   FR16
180  FR8-10    FR8
181  FR8-13    FR8
182    <NA>   FR16
183    <NA>   FR20
184    <NA>   FR16
185    <NA>   FR22
186  FR23-5   FR23
187   FR8-9    FR8
188    <NA>   FR20
189   FR2-8    FR2
190    <NA>   FR20
191    <NA>   FR22
192    <NA>   FR22
193  FR28-9   FR28
194    <NA>   FR19
195  FR29-2   FR29
196    <NA>   FR20
197    <NA>   FR15
198    <NA>   FR22
199    <NA>   FR18
200   FR8-1    FR8
201  FR29-4   FR29
202  FR23-3   FR23
203  FR12-1   FR12
204   FR3-2    FR3
205    <NA>    FR3
206   FR5-4    FR5
207  FR21-5   FR21
208    <NA>   FR15
209   FR8-5    FR8
210   FR9-2    FR9
211   FR8-6    FR8
212    <NA>   FR17
213   FR2-8    FR2
214  FR25-1   FR25
215    <NA>   FR19
216    <NA>   FR16
217    <NA>   FR22
218    <NA>   FR16
219 FR28-11   FR28
220    <NA>   FR19
221   FR2-4    FR2
222    <NA>   FR19
223    <NA>   FR19
224    <NA>   FR18
225    <NA>   FR16
226   FR5-4    FR5
227 FR28-10   FR28
228    <NA>   FR22
229   FR8-8    FR8
230    <NA>    FR3
231    <NA>   FR20
232   FR8-2    FR8
233    <NA>   FR19
234   FR5-8    FR5
235    <NA>   FR20
236    <NA>   FR22
237    <NA>   FR20
238    <NA>   FR19
239  FR28-3   FR28
240    <NA>   FR20
241    <NA>   FR20
242    <NA>   FR20
243  FR23-6   FR23
244  FR29-2   FR29
245    <NA>   FR20
246    <NA>   FR18
247   FR7-1    FR7
248    <NA>   FR15
249    <NA>   FR22
250  FR21-4   FR21
251    <NA>   FR19
252    <NA>   FR20
253  FR26-1   FR26
254   FR9-1    FR9
255    <NA>    FR3
256  FR8-11    FR8
257   FR3-4    FR3
258    <NA>   FR19
259    <NA>   FR20
260    <NA>   FR22
261   FR2-5    FR2
262    <NA>   FR22
263    <NA>   FR15
264  FR28-8   FR28
265    <NA>   FR15
266    <NA>   FR19
267  FR21-1   FR21
268  FR27-2   FR27
269  FR21-2   FR21
270  FR25-2   FR25
271    <NA>    FR3
272   FR4-3    FR4
273    <NA>   FR20
274    <NA>   FR22
275  FR23-2   FR23
276    <NA>   FR16
277  FR21-3   FR21
278    <NA>   FR19
279   FR2-1    FR2
280  FR27-5   FR27
281    <NA>   FR20
282   FR3-3    FR3
283    <NA>   FR18
284  FR26-4   FR26
285   FR8-4    FR8
286  FR27-1   FR27
287   FR4-2    FR4
288   FR4-3    FR4
289    <NA>   FR22
290    <NA>   FR15
291    <NA>    FR3
292   FR5-2    FR5
293    <NA>   FR16
294   FR7-2    FR7
295   FR3-4    FR3
296   FR5-1    FR5
297  FR29-5   FR29
298   FR5-1    FR5
299    <NA>   FR18
300    <NA>    FR3
301   FR3-5    FR3
302  FR28-6   FR28
303   FR2-5    FR2
304   FR4-1    FR4
305  FR23-2   FR23
306   FR5-3    FR5
307  FR28-1   FR28
308    <NA>    FR6
309   FR5-6    FR5
310    <NA>   FR19
311  FR27-6   FR27
312    <NA>   FR22
313   FR7-2    FR7
314  FR8-12    FR8
315   FR2-2    FR2
316    <NA>   FR20
317  FR27-4   FR27
318    <NA>   FR20
319    <NA>    FR3
320    <NA>   FR20
321  FR27-3   FR27
322   FR8-3    FR8
323   FR2-7    FR2
324   FR2-6    FR2
325  FR29-3   FR29
326  FR28-7   FR28
327    <NA>    FR3
328    <NA>   FR22
329   FR4-1    FR4
330    <NA>    FR3
331    <NA>   FR15

More than one patient in family

# more_than_one = paste0("FR", c(3,4,5,8,9,13,15,16,18,19,20,21,23))
# allmap %>% filter(Family %in% more_than_one) %>% select(Family)

allmap %>% group_by(pnid) %>% 
  summarise(Family = first(Family), Disease = first(Disease.code)) %>% ungroup() %>% 
  group_by(Family) %>% summarise(pn_in_family = sum(!is.na(Disease))) %>% 
  filter(pn_in_family > 1) %>% kable()
Family pn_in_family
FR13 2
FR2 4
FR21 4
FR23 3
FR26 2
FR3 2
FR4 2
FR5 2
FR8 2
FR9 2

samples sequenced more than once

allmap %>% group_by(pnid, Sample.type) %>% summarise(n = n()) %>% filter(n >1) %>% kable()
pnid Sample.type n
FR5-1 DNA_Extracted_from_feaces…F. 2
FR5-1 DNA_Extracted_from_saliva…S. 2
FR5-2 DNA_Extracted_from_feaces…F. 2
FR5-3 DNA_Extracted_from_feaces…F. 2
FR5-3 DNA_Extracted_from_saliva…S. 2
FR5-4 DNA_Extracted_from_feaces…F. 2
FR5-4 DNA_Extracted_from_saliva…S. 2
FR7-1 DNA_Extracted_from_feaces…F. 2
FR7-2 DNA_Extracted_from_feaces…F. 2
FR7-3 DNA_Extracted_from_feaces…F. 2
FR7-4 DNA_Extracted_from_feaces…F. 2
NA DNA_Extracted_from_feaces…F. 162

Below 2000 reads

done %>% filter(reads_number < 2000) %>% select(sampleid, Database, sample_ID) %>% kable()
sampleid Database sample_ID
DB9.094 DB9 FR8_6S
DB12.192 DB12 FR15_27_F
DB14.339 DB14 FR28_3S
DB14.216 DB14 FR5_2S V2
DB13.031 DB13 FR22_8_F
DB14.317 DB14 FR21_6F
DB14.277 DB14 FR25_2F
DB14.349 DB14 FR29_1S
DB13.116 DB13 FR16_6_S
DB14.226 DB14 FR21_5S
DB13.006 DB13 FR18_6_F
DB10.010 DB10 FR3_2M
DB14.239 DB14 FR26_2S
DB13.106 DB13 FR15_27_S
DB13.108 DB13 FR15_29_S
DB13.115 DB13 FR16_5_S
DB9.262 DB9 FR9_4F
DB14.281 DB14 FR26_4F
DB13.103 DB13 FR15_24_S
DB13.099 DB13 FR15_20_S
DB13.082 DB13 FR15_3_S
DB14.341 DB14 FR28_5S
DB12.213 DB12 FR6_3
DB13.090 DB13 FR15_11_S
DB9.066 DB9 FR2_3M
DB9.254 DB9 FR6_1
DB13.104 DB13 FR15_25_S
DB13.007 DB13 FR19_1_F
DB13.111 DB13 FR15_32_S
DB13.119 DB13 FR16_9_S
DB9.148 DB9 FR10_1F
DB14.320 DB14 FR28_12F
# allmap$Family %>% unique() %>% length()
# grep("FR", allmap$Family %>% unique()) %>% length()

Import experiment data

biompath = "/pita/pub/data/16S_DBs/processed/16S_DB1-14_merged/table.qza"
taxpath = "/pita/pub/data/16S_DBs/processed/16S_DB1-14_merged/taxonomy.qza"
biom = read_qza(biompath)$data
biom = biom %>% data.frame() 
biom = biom %>% select(sample_names(map))
min_reads_cutoff = cutoff
biom = biom[apply(biom, 2, sum) > min_reads_cutoff]
rare_biom = vegan::rrarefy(biom %>% t(), 10000) %>% t()
otu = phyloseq::otu_table(rare_biom, taxa_are_rows = T)
otu = phyloseq::prune_taxa(taxa_sums(otu) > 10, otu)
relative_otu = otu_table(funrar::make_relative(otu@.Data), taxa_are_rows = T)
relative_otu = filter_taxa(relative_otu,  function(x) mean(x) > 1e-5, TRUE)

pretax = read_qza(taxpath)$data
tax = pretax %>% phyloseq::tax_table()
phyloseq::taxa_names(tax) = pretax$Feature.ID

tree = ape::rtree(ntaxa(tax), rooted = T, tip.label = phyloseq::taxa_names(tax))

abs_exp = phyloseq(otu, map, tax, tree)
exp = phyloseq(relative_otu, map, tax, tree)
# write_phyloseq(abs_exp, path = "/pita/users/rotem/eyes/human/eyes123.csv",type = "all")
samples_to_filter = "/pita/users/rotem/eyes/human/samples_to_filter.csv"
abs_exp %>% sample_data() %>% data.frame() %>% rownames() %>% 
  write.table(samples_to_filter, quote = F, sep = "\t", row.names = F, col.names = "sample-id")
cmd = paste0(qiime_path, "feature-table filter-samples",
             " --i-table ", biompath,
             " --m-metadata-file ", samples_to_filter, 
             " --o-filtered-table ", "/pita/users/rotem/eyes/human/eyes-filtered.qza")
system(cmd)
abs_exp %>% sample_data() %>% data.frame() %>% 
  write.table("/pita/users/rotem/eyes/human/eyes-filtered.tsv"
              , quote = F, sep = "\t", row.names = F)
# read_qza("/pita/users/rotem/eyes/human/eyes-filtered.qza")$data %>% dim()
file.remove(samples_to_filter)