title: “DESeq2 SCRIPT” output: html_notebook

rm(list=ls())
suppressMessages(library(DESeq2))
suppressMessages(library(optparse))
suppressMessages(library(tximport))
suppressMessages(library(dplyr))
suppressMessages(library(sva))
suppressMessages(library(limma))

option_list = list(
  make_option(c("-i", "--input"), type="character", default=NULL,
              help="list input files", metavar="character"),
  make_option(c("-b", "--batch"), type="character", default=NULL,
              help="control batch effect or not", metavar="character"),
  make_option(c("-t", "--type"), type="character", default=NULL,
              help="salmon", metavar="character"),
  make_option(c("-m", "--meta"), type="character", default=NULL,
              help="metasheet info", metavar="character"),
  make_option(c("-x", "--tx2gene"), type="character", default=NULL,
              help="transcript annotation", metavar="character"),
  make_option(c("-o", "--outpath"), type="character", default="./",
              help="output file path and prefix ", metavar="character"),
  make_option(c("-g", "--condition"), type="character", default="./",
              help="Condition to do comparison", metavar="character"),
  make_option(c("-r", "--treatment"), type="character", default="./",
              help="Treatment", metavar="character"),
  make_option(c("-c", "--control"), type="character", default="./",
              help="Control", metavar="character"),
  make_option(c("-p", "--pcoding"), type="character", default="./",
              help="proding coding gene list", metavar="character")
)
          

opt_parser = OptionParser(option_list=option_list)
opt = parse_args(opt_parser)

batch <- opt$batch 
Condition <- opt$condition
metadata <- opt$meta
gene <- opt$tx2gene
Treatment <- opt$treatment
print(Treatment)
Control <- opt$control
print(Control)
Type <- opt$type
input <- opt$input

print("Reading meta file ...")
meta <- read.table(file = metadata, sep=',', header = TRUE, stringsAsFactors = FALSE, row.names = 1)
samples <- subset(meta, meta[,Condition] != 'NA')


print ("Reading tx2gene file ...")
tx2gene <- read.table(file = gene,sep = ",",header = TRUE)


if (is.null(opt$input) || is.null(opt$tx2gene) || is.null(opt$type)){
  print_help(opt_parser)
  stop("At least 3 arguments must be supplied ", call.=FALSE)
}

######################################--------------programme-------------##################################
###----If you have transcript quantification files, as produced by Salmon, Sailfish, or kallisto, you would use DESeqDataSetFromTximport.

Transcript <- function(files,samples,tx2gene,Type,batch){

  filelist <- strsplit(files, "\\,")[[1]]
  print(filelist)
  #print(rownames(meta))
  filelist.samples <- sapply(rownames(samples), function(x) grep(paste0("\\b",x,"\\b"), filelist, value = TRUE))
  
  #exactly match may output a list, need to convert list to character
  filelist.samples <- filelist.samples[lapply(filelist.samples,length)>0]
  print(paste("There are ",length(filelist.samples), "samples to be compared ...", sep = ""))

  tmp_chr <- as.character(filelist.samples)
  names(tmp_chr) <- names(filelist.samples)

  filelist.samples <- tmp_chr
  print(filelist.samples)

  txi <- tximport(filelist.samples, type=Type, tx2gene=tx2gene)
  txi$length[txi$length == 0] <- 1
  
  print(head(txi$counts))
  exprsn <- txi$counts
  
  tmp_matrix <- txi$abundance

  
  if(batch != "False"){
    print (paste("Running DESeq2 with batch effects ",opt$batch," on ",opt$condition,sep=""))  
     
    colData <- samples[,c(batch ,Condition)]
    colnames(colData) <- c("Batch","Condition")
       
    ddsTxi <- DESeqDataSetFromTximport(txi,
                                       colData = colData,
                                       design = ~ Batch + Condition)
                                       
    print ("Generating TPM matrix ...")
    write.table(tmp_matrix,paste(opt$outpath,opt$condition,'_',opt$treatment,'_vs_',opt$control,'_TPMs.txt',sep = ""),quote = FALSE,sep = "\t")
    write.table(exprsn,paste(opt$outpath,opt$condition,'_',opt$treatment,'_vs_',opt$control,'_genecount.txt',sep = ""),quote = FALSE,sep = "\t")

  }else{
    print(paste("Running DESeq2 on ",opt$condition,sep=""))
    
    colData <- cbind(rownames(samples),samples[,Condition])
    colnames(colData) <- c('sample','Condition')
    print(colData)
    
    ddsTxi <- DESeqDataSetFromTximport(txi,
                                       colData = colData,
                                       design = ~ Condition)
                                       
    print ("Generating TPM matrix ...")
    write.table(exprsn,paste(opt$outpath,opt$condition,'_',opt$treatment,'_vs_',opt$control,'_TPMs.txt',sep = ""),quote = FALSE,sep = "\t")
  }
  dds <- DESeq(ddsTxi)
  
  return (dds)
}


print ("Star running DESeq2 ...")
dds <- Transcript(files = input,samples, tx2gene = tx2gene, Type = Type, batch = batch)
print(class(dds))

save.image("git_version.RData")

print (paste("Comparing ",opt$treatment , " VS ", opt$control, sep = ""))
res <- results(dds, contrast = c("Condition",c(opt$treatment,opt$control)))
                 
source("src/differentialexpr/clusteringheatmap.R")
clustering_heatmap(dds, res, opt$pcoding, opt$outpath, opt$treatment, opt$control)

res_final <- as.data.frame(res)
res_final$Gene_name <- rownames(res_final)
res_final <- res_final[c(7, 1:6)]
res_final$`-log10(padj)` <- -log10(res_final$padj)
write.table(res_final,file = paste(opt$outpath,opt$condition,'_',opt$treatment,'_vs_',opt$control,'_DESeq2.txt',sep = ""), quote = FALSE,sep = "\t")
LS0tCm91dHB1dDoKICBodG1sX25vdGVib29rOiBkZWZhdWx0CiAgaHRtbF9kb2N1bWVudDogZGVmYXVsdAotLS0KLS0tCnRpdGxlOiAiREVTZXEyIFNDUklQVCIKb3V0cHV0OiBodG1sX25vdGVib29rCgpgYGB7cn0Kcm0obGlzdD1scygpKQpzdXBwcmVzc01lc3NhZ2VzKGxpYnJhcnkoREVTZXEyKSkKc3VwcHJlc3NNZXNzYWdlcyhsaWJyYXJ5KG9wdHBhcnNlKSkKc3VwcHJlc3NNZXNzYWdlcyhsaWJyYXJ5KHR4aW1wb3J0KSkKc3VwcHJlc3NNZXNzYWdlcyhsaWJyYXJ5KGRwbHlyKSkKc3VwcHJlc3NNZXNzYWdlcyhsaWJyYXJ5KHN2YSkpCnN1cHByZXNzTWVzc2FnZXMobGlicmFyeShsaW1tYSkpCgpvcHRpb25fbGlzdCA9IGxpc3QoCiAgbWFrZV9vcHRpb24oYygiLWkiLCAiLS1pbnB1dCIpLCB0eXBlPSJjaGFyYWN0ZXIiLCBkZWZhdWx0PU5VTEwsCiAgICAgICAgICAgICAgaGVscD0ibGlzdCBpbnB1dCBmaWxlcyIsIG1ldGF2YXI9ImNoYXJhY3RlciIpLAogIG1ha2Vfb3B0aW9uKGMoIi1iIiwgIi0tYmF0Y2giKSwgdHlwZT0iY2hhcmFjdGVyIiwgZGVmYXVsdD1OVUxMLAogICAgICAgICAgICAgIGhlbHA9ImNvbnRyb2wgYmF0Y2ggZWZmZWN0IG9yIG5vdCIsIG1ldGF2YXI9ImNoYXJhY3RlciIpLAogIG1ha2Vfb3B0aW9uKGMoIi10IiwgIi0tdHlwZSIpLCB0eXBlPSJjaGFyYWN0ZXIiLCBkZWZhdWx0PU5VTEwsCiAgICAgICAgICAgICAgaGVscD0ic2FsbW9uIiwgbWV0YXZhcj0iY2hhcmFjdGVyIiksCiAgbWFrZV9vcHRpb24oYygiLW0iLCAiLS1tZXRhIiksIHR5cGU9ImNoYXJhY3RlciIsIGRlZmF1bHQ9TlVMTCwKICAgICAgICAgICAgICBoZWxwPSJtZXRhc2hlZXQgaW5mbyIsIG1ldGF2YXI9ImNoYXJhY3RlciIpLAogIG1ha2Vfb3B0aW9uKGMoIi14IiwgIi0tdHgyZ2VuZSIpLCB0eXBlPSJjaGFyYWN0ZXIiLCBkZWZhdWx0PU5VTEwsCiAgICAgICAgICAgICAgaGVscD0idHJhbnNjcmlwdCBhbm5vdGF0aW9uIiwgbWV0YXZhcj0iY2hhcmFjdGVyIiksCiAgbWFrZV9vcHRpb24oYygiLW8iLCAiLS1vdXRwYXRoIiksIHR5cGU9ImNoYXJhY3RlciIsIGRlZmF1bHQ9Ii4vIiwKICAgICAgICAgICAgICBoZWxwPSJvdXRwdXQgZmlsZSBwYXRoIGFuZCBwcmVmaXggIiwgbWV0YXZhcj0iY2hhcmFjdGVyIiksCiAgbWFrZV9vcHRpb24oYygiLWciLCAiLS1jb25kaXRpb24iKSwgdHlwZT0iY2hhcmFjdGVyIiwgZGVmYXVsdD0iLi8iLAogICAgICAgICAgICAgIGhlbHA9IkNvbmRpdGlvbiB0byBkbyBjb21wYXJpc29uIiwgbWV0YXZhcj0iY2hhcmFjdGVyIiksCiAgbWFrZV9vcHRpb24oYygiLXIiLCAiLS10cmVhdG1lbnQiKSwgdHlwZT0iY2hhcmFjdGVyIiwgZGVmYXVsdD0iLi8iLAogICAgICAgICAgICAgIGhlbHA9IlRyZWF0bWVudCIsIG1ldGF2YXI9ImNoYXJhY3RlciIpLAogIG1ha2Vfb3B0aW9uKGMoIi1jIiwgIi0tY29udHJvbCIpLCB0eXBlPSJjaGFyYWN0ZXIiLCBkZWZhdWx0PSIuLyIsCiAgICAgICAgICAgICAgaGVscD0iQ29udHJvbCIsIG1ldGF2YXI9ImNoYXJhY3RlciIpLAogIG1ha2Vfb3B0aW9uKGMoIi1wIiwgIi0tcGNvZGluZyIpLCB0eXBlPSJjaGFyYWN0ZXIiLCBkZWZhdWx0PSIuLyIsCiAgICAgICAgICAgICAgaGVscD0icHJvZGluZyBjb2RpbmcgZ2VuZSBsaXN0IiwgbWV0YXZhcj0iY2hhcmFjdGVyIikKKQoJICAgICAgCgpvcHRfcGFyc2VyID0gT3B0aW9uUGFyc2VyKG9wdGlvbl9saXN0PW9wdGlvbl9saXN0KQpvcHQgPSBwYXJzZV9hcmdzKG9wdF9wYXJzZXIpCgpiYXRjaCA8LSBvcHQkYmF0Y2ggCkNvbmRpdGlvbiA8LSBvcHQkY29uZGl0aW9uCm1ldGFkYXRhIDwtIG9wdCRtZXRhCmdlbmUgPC0gb3B0JHR4MmdlbmUKVHJlYXRtZW50IDwtIG9wdCR0cmVhdG1lbnQKcHJpbnQoVHJlYXRtZW50KQpDb250cm9sIDwtIG9wdCRjb250cm9sCnByaW50KENvbnRyb2wpClR5cGUgPC0gb3B0JHR5cGUKaW5wdXQgPC0gb3B0JGlucHV0CgpwcmludCgiUmVhZGluZyBtZXRhIGZpbGUgLi4uIikKbWV0YSA8LSByZWFkLnRhYmxlKGZpbGUgPSBtZXRhZGF0YSwgc2VwPScsJywgaGVhZGVyID0gVFJVRSwgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFLCByb3cubmFtZXMgPSAxKQpzYW1wbGVzIDwtIHN1YnNldChtZXRhLCBtZXRhWyxDb25kaXRpb25dICE9ICdOQScpCgoKcHJpbnQgKCJSZWFkaW5nIHR4MmdlbmUgZmlsZSAuLi4iKQp0eDJnZW5lIDwtIHJlYWQudGFibGUoZmlsZSA9IGdlbmUsc2VwID0gIiwiLGhlYWRlciA9IFRSVUUpCgoKaWYgKGlzLm51bGwob3B0JGlucHV0KSB8fCBpcy5udWxsKG9wdCR0eDJnZW5lKSB8fCBpcy5udWxsKG9wdCR0eXBlKSl7CiAgcHJpbnRfaGVscChvcHRfcGFyc2VyKQogIHN0b3AoIkF0IGxlYXN0IDMgYXJndW1lbnRzIG11c3QgYmUgc3VwcGxpZWQgIiwgY2FsbC49RkFMU0UpCn0KCiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjLS0tLS0tLS0tLS0tLS1wcm9ncmFtbWUtLS0tLS0tLS0tLS0tIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIwojIyMtLS0tSWYgeW91IGhhdmUgdHJhbnNjcmlwdCBxdWFudGlmaWNhdGlvbiBmaWxlcywgYXMgcHJvZHVjZWQgYnkgU2FsbW9uLCBTYWlsZmlzaCwgb3Iga2FsbGlzdG8sIHlvdSB3b3VsZCB1c2UgREVTZXFEYXRhU2V0RnJvbVR4aW1wb3J0LgoKVHJhbnNjcmlwdCA8LSBmdW5jdGlvbihmaWxlcyxzYW1wbGVzLHR4MmdlbmUsVHlwZSxiYXRjaCl7CgogIGZpbGVsaXN0IDwtIHN0cnNwbGl0KGZpbGVzLCAiXFwsIilbWzFdXQogIHByaW50KGZpbGVsaXN0KQogICNwcmludChyb3duYW1lcyhtZXRhKSkKICBmaWxlbGlzdC5zYW1wbGVzIDwtIHNhcHBseShyb3duYW1lcyhzYW1wbGVzKSwgZnVuY3Rpb24oeCkgZ3JlcChwYXN0ZTAoIlxcYiIseCwiXFxiIiksIGZpbGVsaXN0LCB2YWx1ZSA9IFRSVUUpKQogIAogICNleGFjdGx5IG1hdGNoIG1heSBvdXRwdXQgYSBsaXN0LCBuZWVkIHRvIGNvbnZlcnQgbGlzdCB0byBjaGFyYWN0ZXIKICBmaWxlbGlzdC5zYW1wbGVzIDwtIGZpbGVsaXN0LnNhbXBsZXNbbGFwcGx5KGZpbGVsaXN0LnNhbXBsZXMsbGVuZ3RoKT4wXQogIHByaW50KHBhc3RlKCJUaGVyZSBhcmUgIixsZW5ndGgoZmlsZWxpc3Quc2FtcGxlcyksICJzYW1wbGVzIHRvIGJlIGNvbXBhcmVkIC4uLiIsIHNlcCA9ICIiKSkKCiAgdG1wX2NociA8LSBhcy5jaGFyYWN0ZXIoZmlsZWxpc3Quc2FtcGxlcykKICBuYW1lcyh0bXBfY2hyKSA8LSBuYW1lcyhmaWxlbGlzdC5zYW1wbGVzKQoKICBmaWxlbGlzdC5zYW1wbGVzIDwtIHRtcF9jaHIKICBwcmludChmaWxlbGlzdC5zYW1wbGVzKQoKICB0eGkgPC0gdHhpbXBvcnQoZmlsZWxpc3Quc2FtcGxlcywgdHlwZT1UeXBlLCB0eDJnZW5lPXR4MmdlbmUpCiAgdHhpJGxlbmd0aFt0eGkkbGVuZ3RoID09IDBdIDwtIDEKICAKICBwcmludChoZWFkKHR4aSRjb3VudHMpKQogIGV4cHJzbiA8LSB0eGkkY291bnRzCiAgCiAgdG1wX21hdHJpeCA8LSB0eGkkYWJ1bmRhbmNlCgogIAogIGlmKGJhdGNoICE9ICJGYWxzZSIpewogICAgcHJpbnQgKHBhc3RlKCJSdW5uaW5nIERFU2VxMiB3aXRoIGJhdGNoIGVmZmVjdHMgIixvcHQkYmF0Y2gsIiBvbiAiLG9wdCRjb25kaXRpb24sc2VwPSIiKSkgIAogICAgIAogICAgY29sRGF0YSA8LSBzYW1wbGVzWyxjKGJhdGNoICxDb25kaXRpb24pXQogICAgY29sbmFtZXMoY29sRGF0YSkgPC0gYygiQmF0Y2giLCJDb25kaXRpb24iKQogICAgICAgCiAgICBkZHNUeGkgPC0gREVTZXFEYXRhU2V0RnJvbVR4aW1wb3J0KHR4aSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgY29sRGF0YSA9IGNvbERhdGEsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRlc2lnbiA9IH4gQmF0Y2ggKyBDb25kaXRpb24pCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgcHJpbnQgKCJHZW5lcmF0aW5nIFRQTSBtYXRyaXggLi4uIikKICAgIHdyaXRlLnRhYmxlKHRtcF9tYXRyaXgscGFzdGUob3B0JG91dHBhdGgsb3B0JGNvbmRpdGlvbiwnXycsb3B0JHRyZWF0bWVudCwnX3ZzXycsb3B0JGNvbnRyb2wsJ19UUE1zLnR4dCcsc2VwID0gIiIpLHF1b3RlID0gRkFMU0Usc2VwID0gIlx0IikKICAgIHdyaXRlLnRhYmxlKGV4cHJzbixwYXN0ZShvcHQkb3V0cGF0aCxvcHQkY29uZGl0aW9uLCdfJyxvcHQkdHJlYXRtZW50LCdfdnNfJyxvcHQkY29udHJvbCwnX2dlbmVjb3VudC50eHQnLHNlcCA9ICIiKSxxdW90ZSA9IEZBTFNFLHNlcCA9ICJcdCIpCgogIH1lbHNlewogICAgcHJpbnQocGFzdGUoIlJ1bm5pbmcgREVTZXEyIG9uICIsb3B0JGNvbmRpdGlvbixzZXA9IiIpKQogICAgCiAgICBjb2xEYXRhIDwtIGNiaW5kKHJvd25hbWVzKHNhbXBsZXMpLHNhbXBsZXNbLENvbmRpdGlvbl0pCiAgICBjb2xuYW1lcyhjb2xEYXRhKSA8LSBjKCdzYW1wbGUnLCdDb25kaXRpb24nKQogICAgcHJpbnQoY29sRGF0YSkKICAgIAogICAgZGRzVHhpIDwtIERFU2VxRGF0YVNldEZyb21UeGltcG9ydCh0eGksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbERhdGEgPSBjb2xEYXRhLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkZXNpZ24gPSB+IENvbmRpdGlvbikKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICBwcmludCAoIkdlbmVyYXRpbmcgVFBNIG1hdHJpeCAuLi4iKQogICAgd3JpdGUudGFibGUoZXhwcnNuLHBhc3RlKG9wdCRvdXRwYXRoLG9wdCRjb25kaXRpb24sJ18nLG9wdCR0cmVhdG1lbnQsJ192c18nLG9wdCRjb250cm9sLCdfVFBNcy50eHQnLHNlcCA9ICIiKSxxdW90ZSA9IEZBTFNFLHNlcCA9ICJcdCIpCiAgfQogIGRkcyA8LSBERVNlcShkZHNUeGkpCiAgCiAgcmV0dXJuIChkZHMpCn0KCgpwcmludCAoIlN0YXIgcnVubmluZyBERVNlcTIgLi4uIikKZGRzIDwtIFRyYW5zY3JpcHQoZmlsZXMgPSBpbnB1dCxzYW1wbGVzLCB0eDJnZW5lID0gdHgyZ2VuZSwgVHlwZSA9IFR5cGUsIGJhdGNoID0gYmF0Y2gpCnByaW50KGNsYXNzKGRkcykpCgpzYXZlLmltYWdlKCJnaXRfdmVyc2lvbi5SRGF0YSIpCgpwcmludCAocGFzdGUoIkNvbXBhcmluZyAiLG9wdCR0cmVhdG1lbnQgLCAiIFZTICIsIG9wdCRjb250cm9sLCBzZXAgPSAiIikpCnJlcyA8LSByZXN1bHRzKGRkcywgY29udHJhc3QgPSBjKCJDb25kaXRpb24iLGMob3B0JHRyZWF0bWVudCxvcHQkY29udHJvbCkpKQoJCQkgICAgIApzb3VyY2UoInNyYy9kaWZmZXJlbnRpYWxleHByL2NsdXN0ZXJpbmdoZWF0bWFwLlIiKQpjbHVzdGVyaW5nX2hlYXRtYXAoZGRzLCByZXMsIG9wdCRwY29kaW5nLCBvcHQkb3V0cGF0aCwgb3B0JHRyZWF0bWVudCwgb3B0JGNvbnRyb2wpCgpyZXNfZmluYWwgPC0gYXMuZGF0YS5mcmFtZShyZXMpCnJlc19maW5hbCRHZW5lX25hbWUgPC0gcm93bmFtZXMocmVzX2ZpbmFsKQpyZXNfZmluYWwgPC0gcmVzX2ZpbmFsW2MoNywgMTo2KV0KcmVzX2ZpbmFsJGAtbG9nMTAocGFkailgIDwtIC1sb2cxMChyZXNfZmluYWwkcGFkaikKd3JpdGUudGFibGUocmVzX2ZpbmFsLGZpbGUgPSBwYXN0ZShvcHQkb3V0cGF0aCxvcHQkY29uZGl0aW9uLCdfJyxvcHQkdHJlYXRtZW50LCdfdnNfJyxvcHQkY29udHJvbCwnX0RFU2VxMi50eHQnLHNlcCA9ICIiKSwgcXVvdGUgPSBGQUxTRSxzZXAgPSAiXHQiKQpgYGAKCg==