ALL IMPORTED FILES MUST BE IN THE SAME DIRECTORY (‘MGRB’) AS THIS SCRIPT

Load required packages

library("readbulk")
library("tidyverse")

Import files with sample cohort and relevant MGRB data for individual candidate genes (located in sub-directory ‘Genes’, downloaded from https://sgc.garvan.org.au/), removing other genes

ViP_list <- read.delim("ViP Samples & Candidate Genes.txt", header=TRUE, stringsAsFactors=FALSE, sep="\t", comment.char="") %>% 
  arrange(Sample)
genes <- ViP_list$SYMBOL

MGRB <- read_bulk(directory="Genes")
MGRB_genes <- filter(MGRB, gene%in%genes) %>% 
  filter(!gene%in%"ENTPD4") %>% 
  mutate_at(vars(gnomadAF),funs(replace(., is.na(.), 0)))

Remove non-LOF and common (GnomAD AF > 0.005) variants

MGRB_genes_LOF <- filter(MGRB_genes,consequences%in%c(
  "frameshift_variant",
  "stop_gained",
  "splice_donor_variant",
  "splice_acceptor_variant",
  "start_lost",
  "frameshift_variant,splice_region_variant"))
MGRB_genes_LOF_rare <- filter(MGRB_genes_LOF, gnomadAF<=0.005)

Calculate summary stats for MGRB LoF candidate gene variants, and output results

MGRB_LOF_stats <-group_by(MGRB_genes_LOF,gene) %>%
  select(Allele.Count,Allele.Frequency) %>% 
  mutate(Total_MGRB_Allele_Count=sum(Allele.Count)) %>% 
  mutate(Total_MGRB_Gene_Freq=sum(Allele.Frequency)) %>% 
  distinct(gene,Total_MGRB_Allele_Count,Total_MGRB_Gene_Freq) %>% 
  rename(SYMBOL=gene)
MGRB_LOF_rare_stats <-group_by(MGRB_genes_LOF_rare,gene) %>%
  select(Allele.Count,Allele.Frequency) %>% 
  mutate(Total_MGRB_Allele_Count_0.005=sum(Allele.Count)) %>% 
  mutate(Total_MGRB_Gene_Freq_0.005=sum(Allele.Frequency)) %>% 
  distinct(gene,Total_MGRB_Allele_Count_0.005,Total_MGRB_Gene_Freq_0.005) %>% 
  rename(SYMBOL=gene)

write_excel_csv(MGRB_LOF_stats,path="MGRB_LOF_stats.csv", na=".",append=FALSE,col_names=TRUE)
write_excel_csv(MGRB_LOF_rare_stats,path="MGRB_LOF_rare_stats.csv", na=".",append=FALSE,col_names=TRUE)

Import candidate gene summary stats for case cohort (see Nature Comms paper- https://www.nature.com/articles/s41467-020-15461-z, and Supp Data 2), and merge with MGRB summary data

ViP_Candidate_Genes_Stats <- read.delim("ViP Candidate Genes Stats.txt", header=TRUE, stringsAsFactors=FALSE, sep="\t", comment.char="")
ViP_MGRB_Stats_Combined <-  left_join(ViP_Candidate_Genes_Stats, MGRB_LOF_stats,by="SYMBOL",copy=FALSE) %>% 
  mutate_at(vars(Total_MGRB_Allele_Count),funs(replace(., is.na(.), 0))) %>% 
  mutate_at(vars(Total_MGRB_Gene_Freq),funs(replace(., is.na(.), 0)))
ViP_MGRB_Stats_Combined <- left_join(ViP_MGRB_Stats_Combined, MGRB_LOF_rare_stats,by="SYMBOL",copy=FALSE) %>% 
  mutate_at(vars(Total_MGRB_Allele_Count_0.005),funs(replace(., is.na(.), 0))) %>% 
  mutate_at(vars(Total_MGRB_Gene_Freq_0.005),funs(replace(., is.na(.), 0))) %>% 
  mutate(Total_MGRB_Alleles=5690)

Calculate Fisher’s test values, and output results

ViP_MGRB_Stats_Combined_ratios_fisherresults <- 
  mutate(ViP_MGRB_Stats_Combined, 
         "Sample_Gene_LOF_Freq_Ratio_MGRB"=Sample_Gene_Freq/Total_MGRB_Gene_Freq,
         "Sample_Gene_LOF_Freq_Ratio_MGRB_0.005"=Sample_Gene_Freq/Total_MGRB_Gene_Freq_0.005,
         "Ratio_Difference"=Sample_Gene_LOF_Freq_Ratio_MGRB_0.005-Sample_Gene_LOF_Freq_Ratio_MGRB)

fisherresults_MGRB_0.005 <- apply(ViP_MGRB_Stats_Combined_ratios_fisherresults[,c(16,29,18,31)],1,function(x) 
  {fisher.test(rbind(x[1:2],c(x[3]-x[1],x[4]-x[2])))})
fisherresults_MGRBvsGnomADNFE_0.005 <- apply(ViP_MGRB_Stats_Combined_ratios_fisherresults[,c(29,17,31,19)],1,function(x) 
{fisher.test(rbind(x[1:2],c(x[3]-x[1],x[4]-x[2])))})

ViP_MGRB_Stats_Combined_ratios_fisherresults$P_value_Fisher_MGRB_0.005 = sapply(fisherresults_MGRB_0.005,function(x) round(x$p.value,12))
ViP_MGRB_Stats_Combined_ratios_fisherresults$OR_MGRB_0.005 = sapply(fisherresults_MGRB_0.005,function(x) round(x$estimate,4))
ViP_MGRB_Stats_Combined_ratios_fisherresults$`95%CI_MGRB_0.005` = sapply(fisherresults_MGRB_0.005,function(x) paste(round(x$conf.int[1:2],2),collapse="~"))
ViP_MGRB_Stats_Combined_ratios_fisherresults$P_value_Fisher_MGRBvsGnomADNFE_0.005 = sapply(fisherresults_MGRBvsGnomADNFE_0.005,function(x) round(x$p.value,12))
ViP_MGRB_Stats_Combined_ratios_fisherresults$OR_MGRBvsGnomADNFE_0.005 = sapply(fisherresults_MGRBvsGnomADNFE_0.005,function(x) round(x$estimate,4))
ViP_MGRB_Stats_Combined_ratios_fisherresults$`95%CI_MGRBvsGnomADNFE_0.005` = sapply(fisherresults_MGRBvsGnomADNFE_0.005,function(x) paste(round(x$conf.int[1:2],2),collapse="~"))

write_excel_csv(ViP_MGRB_Stats_Combined_ratios_fisherresults,path="ViP_MGRB_Stats_Combined_ratios_fisherresults.csv", na=".",append=FALSE,col_names=TRUE)
LS0tCnRpdGxlOiAiVGhlc2lzIE1HUkIgRGF0YSBDb21wYXJpc29uIFNjcmlwdCIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKQUxMIElNUE9SVEVEIEZJTEVTIE1VU1QgQkUgSU4gVEhFIFNBTUUgRElSRUNUT1JZICgnTUdSQicpIEFTIFRISVMgU0NSSVBUCgpMb2FkIHJlcXVpcmVkIHBhY2thZ2VzCmBgYHtyfQpsaWJyYXJ5KCJyZWFkYnVsayIpCmxpYnJhcnkoInRpZHl2ZXJzZSIpCmBgYAoKSW1wb3J0IGZpbGVzIHdpdGggc2FtcGxlIGNvaG9ydCBhbmQgcmVsZXZhbnQgTUdSQiBkYXRhIGZvciBpbmRpdmlkdWFsIGNhbmRpZGF0ZSBnZW5lcyAobG9jYXRlZCBpbiBzdWItZGlyZWN0b3J5ICdHZW5lcycsIGRvd25sb2FkZWQgZnJvbSBodHRwczovL3NnYy5nYXJ2YW4ub3JnLmF1LyksIHJlbW92aW5nIG90aGVyIGdlbmVzCmBgYHtyfQpWaVBfbGlzdCA8LSByZWFkLmRlbGltKCJWaVAgU2FtcGxlcyAmIENhbmRpZGF0ZSBHZW5lcy50eHQiLCBoZWFkZXI9VFJVRSwgc3RyaW5nc0FzRmFjdG9ycz1GQUxTRSwgc2VwPSJcdCIsIGNvbW1lbnQuY2hhcj0iIikgJT4lIAogIGFycmFuZ2UoU2FtcGxlKQpnZW5lcyA8LSBWaVBfbGlzdCRTWU1CT0wKCk1HUkIgPC0gcmVhZF9idWxrKGRpcmVjdG9yeT0iR2VuZXMiKQpNR1JCX2dlbmVzIDwtIGZpbHRlcihNR1JCLCBnZW5lJWluJWdlbmVzKSAlPiUgCiAgZmlsdGVyKCFnZW5lJWluJSJFTlRQRDQiKSAlPiUgCiAgbXV0YXRlX2F0KHZhcnMoZ25vbWFkQUYpLGZ1bnMocmVwbGFjZSguLCBpcy5uYSguKSwgMCkpKQpgYGAKClJlbW92ZSBub24tTE9GIGFuZCBjb21tb24gKEdub21BRCBBRiA+IDAuMDA1KSB2YXJpYW50cwpgYGB7cn0KTUdSQl9nZW5lc19MT0YgPC0gZmlsdGVyKE1HUkJfZ2VuZXMsY29uc2VxdWVuY2VzJWluJWMoCiAgImZyYW1lc2hpZnRfdmFyaWFudCIsCiAgInN0b3BfZ2FpbmVkIiwKICAic3BsaWNlX2Rvbm9yX3ZhcmlhbnQiLAogICJzcGxpY2VfYWNjZXB0b3JfdmFyaWFudCIsCiAgInN0YXJ0X2xvc3QiLAogICJmcmFtZXNoaWZ0X3ZhcmlhbnQsc3BsaWNlX3JlZ2lvbl92YXJpYW50IikpCk1HUkJfZ2VuZXNfTE9GX3JhcmUgPC0gZmlsdGVyKE1HUkJfZ2VuZXNfTE9GLCBnbm9tYWRBRjw9MC4wMDUpCmBgYAoKQ2FsY3VsYXRlIHN1bW1hcnkgc3RhdHMgZm9yIE1HUkIgTG9GIGNhbmRpZGF0ZSBnZW5lIHZhcmlhbnRzLCBhbmQgb3V0cHV0IHJlc3VsdHMKYGBge3J9Ck1HUkJfTE9GX3N0YXRzIDwtZ3JvdXBfYnkoTUdSQl9nZW5lc19MT0YsZ2VuZSkgJT4lCiAgc2VsZWN0KEFsbGVsZS5Db3VudCxBbGxlbGUuRnJlcXVlbmN5KSAlPiUgCiAgbXV0YXRlKFRvdGFsX01HUkJfQWxsZWxlX0NvdW50PXN1bShBbGxlbGUuQ291bnQpKSAlPiUgCiAgbXV0YXRlKFRvdGFsX01HUkJfR2VuZV9GcmVxPXN1bShBbGxlbGUuRnJlcXVlbmN5KSkgJT4lIAogIGRpc3RpbmN0KGdlbmUsVG90YWxfTUdSQl9BbGxlbGVfQ291bnQsVG90YWxfTUdSQl9HZW5lX0ZyZXEpICU+JSAKICByZW5hbWUoU1lNQk9MPWdlbmUpCk1HUkJfTE9GX3JhcmVfc3RhdHMgPC1ncm91cF9ieShNR1JCX2dlbmVzX0xPRl9yYXJlLGdlbmUpICU+JQogIHNlbGVjdChBbGxlbGUuQ291bnQsQWxsZWxlLkZyZXF1ZW5jeSkgJT4lIAogIG11dGF0ZShUb3RhbF9NR1JCX0FsbGVsZV9Db3VudF8wLjAwNT1zdW0oQWxsZWxlLkNvdW50KSkgJT4lIAogIG11dGF0ZShUb3RhbF9NR1JCX0dlbmVfRnJlcV8wLjAwNT1zdW0oQWxsZWxlLkZyZXF1ZW5jeSkpICU+JSAKICBkaXN0aW5jdChnZW5lLFRvdGFsX01HUkJfQWxsZWxlX0NvdW50XzAuMDA1LFRvdGFsX01HUkJfR2VuZV9GcmVxXzAuMDA1KSAlPiUgCiAgcmVuYW1lKFNZTUJPTD1nZW5lKQoKd3JpdGVfZXhjZWxfY3N2KE1HUkJfTE9GX3N0YXRzLHBhdGg9Ik1HUkJfTE9GX3N0YXRzLmNzdiIsIG5hPSIuIixhcHBlbmQ9RkFMU0UsY29sX25hbWVzPVRSVUUpCndyaXRlX2V4Y2VsX2NzdihNR1JCX0xPRl9yYXJlX3N0YXRzLHBhdGg9Ik1HUkJfTE9GX3JhcmVfc3RhdHMuY3N2IiwgbmE9Ii4iLGFwcGVuZD1GQUxTRSxjb2xfbmFtZXM9VFJVRSkKYGBgCgpJbXBvcnQgY2FuZGlkYXRlIGdlbmUgc3VtbWFyeSBzdGF0cyBmb3IgY2FzZSBjb2hvcnQgKHNlZSBOYXR1cmUgQ29tbXMgcGFwZXItIGh0dHBzOi8vd3d3Lm5hdHVyZS5jb20vYXJ0aWNsZXMvczQxNDY3LTAyMC0xNTQ2MS16LCBhbmQgU3VwcCBEYXRhIDIpLCBhbmQgbWVyZ2Ugd2l0aCBNR1JCIHN1bW1hcnkgZGF0YQpgYGB7cn0KVmlQX0NhbmRpZGF0ZV9HZW5lc19TdGF0cyA8LSByZWFkLmRlbGltKCJWaVAgQ2FuZGlkYXRlIEdlbmVzIFN0YXRzLnR4dCIsIGhlYWRlcj1UUlVFLCBzdHJpbmdzQXNGYWN0b3JzPUZBTFNFLCBzZXA9Ilx0IiwgY29tbWVudC5jaGFyPSIiKQpWaVBfTUdSQl9TdGF0c19Db21iaW5lZCA8LSAgbGVmdF9qb2luKFZpUF9DYW5kaWRhdGVfR2VuZXNfU3RhdHMsIE1HUkJfTE9GX3N0YXRzLGJ5PSJTWU1CT0wiLGNvcHk9RkFMU0UpICU+JSAKICBtdXRhdGVfYXQodmFycyhUb3RhbF9NR1JCX0FsbGVsZV9Db3VudCksZnVucyhyZXBsYWNlKC4sIGlzLm5hKC4pLCAwKSkpICU+JSAKICBtdXRhdGVfYXQodmFycyhUb3RhbF9NR1JCX0dlbmVfRnJlcSksZnVucyhyZXBsYWNlKC4sIGlzLm5hKC4pLCAwKSkpClZpUF9NR1JCX1N0YXRzX0NvbWJpbmVkIDwtIGxlZnRfam9pbihWaVBfTUdSQl9TdGF0c19Db21iaW5lZCwgTUdSQl9MT0ZfcmFyZV9zdGF0cyxieT0iU1lNQk9MIixjb3B5PUZBTFNFKSAlPiUgCiAgbXV0YXRlX2F0KHZhcnMoVG90YWxfTUdSQl9BbGxlbGVfQ291bnRfMC4wMDUpLGZ1bnMocmVwbGFjZSguLCBpcy5uYSguKSwgMCkpKSAlPiUgCiAgbXV0YXRlX2F0KHZhcnMoVG90YWxfTUdSQl9HZW5lX0ZyZXFfMC4wMDUpLGZ1bnMocmVwbGFjZSguLCBpcy5uYSguKSwgMCkpKSAlPiUgCiAgbXV0YXRlKFRvdGFsX01HUkJfQWxsZWxlcz01NjkwKQpgYGAKCkNhbGN1bGF0ZSBGaXNoZXIncyB0ZXN0IHZhbHVlcywgYW5kIG91dHB1dCByZXN1bHRzCmBgYHtyfQpWaVBfTUdSQl9TdGF0c19Db21iaW5lZF9yYXRpb3NfZmlzaGVycmVzdWx0cyA8LSAKICBtdXRhdGUoVmlQX01HUkJfU3RhdHNfQ29tYmluZWQsIAogICAgICAgICAiU2FtcGxlX0dlbmVfTE9GX0ZyZXFfUmF0aW9fTUdSQiI9U2FtcGxlX0dlbmVfRnJlcS9Ub3RhbF9NR1JCX0dlbmVfRnJlcSwKICAgICAgICAgIlNhbXBsZV9HZW5lX0xPRl9GcmVxX1JhdGlvX01HUkJfMC4wMDUiPVNhbXBsZV9HZW5lX0ZyZXEvVG90YWxfTUdSQl9HZW5lX0ZyZXFfMC4wMDUsCiAgICAgICAgICJSYXRpb19EaWZmZXJlbmNlIj1TYW1wbGVfR2VuZV9MT0ZfRnJlcV9SYXRpb19NR1JCXzAuMDA1LVNhbXBsZV9HZW5lX0xPRl9GcmVxX1JhdGlvX01HUkIpCgpmaXNoZXJyZXN1bHRzX01HUkJfMC4wMDUgPC0gYXBwbHkoVmlQX01HUkJfU3RhdHNfQ29tYmluZWRfcmF0aW9zX2Zpc2hlcnJlc3VsdHNbLGMoMTYsMjksMTgsMzEpXSwxLGZ1bmN0aW9uKHgpIAogIHtmaXNoZXIudGVzdChyYmluZCh4WzE6Ml0sYyh4WzNdLXhbMV0seFs0XS14WzJdKSkpfSkKZmlzaGVycmVzdWx0c19NR1JCdnNHbm9tQURORkVfMC4wMDUgPC0gYXBwbHkoVmlQX01HUkJfU3RhdHNfQ29tYmluZWRfcmF0aW9zX2Zpc2hlcnJlc3VsdHNbLGMoMjksMTcsMzEsMTkpXSwxLGZ1bmN0aW9uKHgpIAp7ZmlzaGVyLnRlc3QocmJpbmQoeFsxOjJdLGMoeFszXS14WzFdLHhbNF0teFsyXSkpKX0pCgpWaVBfTUdSQl9TdGF0c19Db21iaW5lZF9yYXRpb3NfZmlzaGVycmVzdWx0cyRQX3ZhbHVlX0Zpc2hlcl9NR1JCXzAuMDA1ID0gc2FwcGx5KGZpc2hlcnJlc3VsdHNfTUdSQl8wLjAwNSxmdW5jdGlvbih4KSByb3VuZCh4JHAudmFsdWUsMTIpKQpWaVBfTUdSQl9TdGF0c19Db21iaW5lZF9yYXRpb3NfZmlzaGVycmVzdWx0cyRPUl9NR1JCXzAuMDA1ID0gc2FwcGx5KGZpc2hlcnJlc3VsdHNfTUdSQl8wLjAwNSxmdW5jdGlvbih4KSByb3VuZCh4JGVzdGltYXRlLDQpKQpWaVBfTUdSQl9TdGF0c19Db21iaW5lZF9yYXRpb3NfZmlzaGVycmVzdWx0cyRgOTUlQ0lfTUdSQl8wLjAwNWAgPSBzYXBwbHkoZmlzaGVycmVzdWx0c19NR1JCXzAuMDA1LGZ1bmN0aW9uKHgpIHBhc3RlKHJvdW5kKHgkY29uZi5pbnRbMToyXSwyKSxjb2xsYXBzZT0ifiIpKQpWaVBfTUdSQl9TdGF0c19Db21iaW5lZF9yYXRpb3NfZmlzaGVycmVzdWx0cyRQX3ZhbHVlX0Zpc2hlcl9NR1JCdnNHbm9tQURORkVfMC4wMDUgPSBzYXBwbHkoZmlzaGVycmVzdWx0c19NR1JCdnNHbm9tQURORkVfMC4wMDUsZnVuY3Rpb24oeCkgcm91bmQoeCRwLnZhbHVlLDEyKSkKVmlQX01HUkJfU3RhdHNfQ29tYmluZWRfcmF0aW9zX2Zpc2hlcnJlc3VsdHMkT1JfTUdSQnZzR25vbUFETkZFXzAuMDA1ID0gc2FwcGx5KGZpc2hlcnJlc3VsdHNfTUdSQnZzR25vbUFETkZFXzAuMDA1LGZ1bmN0aW9uKHgpIHJvdW5kKHgkZXN0aW1hdGUsNCkpClZpUF9NR1JCX1N0YXRzX0NvbWJpbmVkX3JhdGlvc19maXNoZXJyZXN1bHRzJGA5NSVDSV9NR1JCdnNHbm9tQURORkVfMC4wMDVgID0gc2FwcGx5KGZpc2hlcnJlc3VsdHNfTUdSQnZzR25vbUFETkZFXzAuMDA1LGZ1bmN0aW9uKHgpIHBhc3RlKHJvdW5kKHgkY29uZi5pbnRbMToyXSwyKSxjb2xsYXBzZT0ifiIpKQoKd3JpdGVfZXhjZWxfY3N2KFZpUF9NR1JCX1N0YXRzX0NvbWJpbmVkX3JhdGlvc19maXNoZXJyZXN1bHRzLHBhdGg9IlZpUF9NR1JCX1N0YXRzX0NvbWJpbmVkX3JhdGlvc19maXNoZXJyZXN1bHRzLmNzdiIsIG5hPSIuIixhcHBlbmQ9RkFMU0UsY29sX25hbWVzPVRSVUUpCmBgYAo=