Experiment design| Group | Barcode | Sample_ID | index | Qubit |
|---|---|---|---|---|
| control | c667 | 17-R-100 | 96-25 | 28.0 |
| control | c669 | 17-R-101 | 96-26 | 33.4 |
| control | c670 | 17-R-102 | 96-27 | 40.2 |
| control | c678 | 17-R-103 | 96-28 | 40.9 |
| control | c683 | 17-R-104 | 96-29 | 35.8 |
| H-Hcy | h653 | 17-R-105 | 96-30 | 39.1 |
| H-Hcy | h654 | 17-R-106 | 96-31 | 42.4 |
| H-Hcy | h655 | 17-R-107 | 96-32 | 34.6 |
| H-Hcy | h656 | 17-R-108 | 96-33 | 33.9 |
| H-Hcy | h658 | 17-R-109 | 96-34 | 34.6 |
| Ischemia control | Ic668 | 17-R-110 | 96-35 | 33.5 |
| Ischemia control | Ic673 | 17-R-111 | 96-36 | 41.4 |
| Ischemia control | Ic677 | 17-R-112 | 96-37 | 33.8 |
| Ischemia control | Ic679 | 17-R-113 | 96-38 | 31.9 |
| Ischemia control | Ic684 | 17-R-114 | 96-39 | 29.0 |
| Ischemia-H-Hcy | Ih649 | 17-R-115 | 96-40 | 34.3 |
| Ischemia-H-Hcy | Ih651 | 17-R-116 | 96-41 | 35.7 |
| Ischemia-H-Hcy | Ih652 | 17-R-117 | 96-42 | 40.3 |
| Ischemia-H-Hcy | Ih659 | 17-R-118 | 96-43 | 43.5 |
| Ischemia-H-Hcy | Ih661 | 17-R-119 | 96-44 | 42.7 |
RNASeq Analysis Environment Set UpReference Genomes and annotations from UCSC mm10: http://hgdownload.cse.ucsc.edu/goldenPath/mm10/
#!/bin/bash
#annotation and data
project_dir=/zzh_gpfs02/jixing/Project/leishjie_Hcy
fastq_raw=/zzh_gpfs02/jixing/DataBase/Hcy_before
#adapter file
adapter=/zzh_gpfs02/jixing/Project/leishjie_Hcy/adapter.fasta
#tools
trim_adapter=/zzh_gpfs/home/zzhgroup/jixing/workspace/biosoft/Trimmomatic-0.36/trimmomatic-0.36.jar
Fastqc=/zzh_gpfs/apps/FastQC/fastqc
P=8 #use 16 threads/cores
mkdir ${project_dir}/processed_data/fastq_processed
mkdir ${project_dir}/processed_data/fastqc
cd ${fastq_raw}
find . -name '*_R1_001.fastq.gz'| sed 's/_R1_001.fastq.gz//g'| tr -d './' > ${project_dir}/sample_id.txt
cd ${project_dir}/processed_data/fastq_processed
for SAMPLE_ID in `cat ${project_dir}/sample_id.txt`
do
cat > ${SAMPLE_ID}.sh <<EOF
java -jar ${trim_adapter} PE -phred33 ${fastq_raw}/${SAMPLE_ID}_R1_001.fastq.gz ${fastq_raw}/${SAMPLE_ID}_R2_001.fastq.gz\
${SAMPLE_ID}_R1.fastq.gz unpaired_${SAMPLE_ID}_R1.fastq.gz ${SAMPLE_ID}_R2.fastq.gz unpaired_${SAMPLE_ID}_R2.fastq.gz \
ILLUMINACLIP:${adapter}:2:30:10:1:true \
LEADING:3 TRAILING:3 \
AVGQUAL:20 \
SLIDINGWINDOW:7:15 MINLEN:50
${Fastqc} ${SAMPLE_ID}_R1.fastq.gz ${SAMPLE_ID}_R2.fastq.gz -o ${project_dir}/processed_data/fastqc
EOF
bsub < ${SAMPLE_ID}.sh
rm ${SAMPLE_ID}.sh
doneSTAR Indexingproject_dir=/zzh_gpfs02/jixing/Project/leishjie_Hcy
ref_gtf=/zzh_gpfs02/jixing/Annotation/mm10/idx_hisat2/genes.gtf
ref_fasta=/zzh_gpfs02/jixing/Annotation/mm10/fa/genome.fa
#tools
RUN_STAR=/zzh_gpfs/home/zzhgroup/jixing/miniconda2/bin/STAR
#parameter
P=8 #use 8 threads/cores
sjdbOverhang=149
cd ${project_dir}/processed_data
mkdir star_index
$RUN_STAR --runMode genomeGenerate --runThreadN $P \
--genomeDir ${project_dir}/processed_data/star_index \
--limitGenomeGenerateRAM 136410676608 \
--genomeFastaFiles ${ref_fasta} \
--sjdbGTFfile ${ref_gtf} \
--sjdbOverhang ${sjdbOverhang}STAR Alignment#!/bin/r
project_dir=/zzh_gpfs02/jixing/Project/leishjie_Hcy
fastq_clean=${project_dir}/processed_data/fastq_processed
ref_gtf=/zzh_gpfs02/jixing/Annotation/mm10/idx_hisat2/genes.gtf
ref_fasta=/zzh_gpfs02/jixing/Annotation/mm10/fa/genome.fa
star_index=${project_dir}/processed_data/star_index
#tools=================================================
RUN_STAR=/zzh_gpfs/home/zzhgroup/jixing/miniconda2/bin/STAR
SAMTOOLS=/zzh_gpfs/home/zzhgroup/jixing/workspace/rnaseq/tools/samtools-1.1/samtools
P=8 #use 8 threads/cores
sjdbOverhang=149
mkdir ${project_dir}/processed_data/algn_star
cd ${project_dir}/processed_data/algn_star
for SAMPLE_ID in `cat ${project_dir}/sample_id.txt`
do
cat > ${SAMPLE_ID}.sh <<EOF
#!/bin/bash
mkdir ${project_dir}/processed_data/algn_star/${SAMPLE_ID}
cd ${project_dir}/processed_data/algn_star/${SAMPLE_ID}
${RUN_STAR} --runMode alignReads --runThreadN $P --sjdbOverhang $sjdbOverhang \
--sjdbGTFfile ${ref_gtf} \
--genomeDir ${star_index} \
--outSAMstrandField intronMotif \
--outFileNamePrefix ${SAMPLE_ID}. \
--outSAMtype BAM SortedByCoordinate \
--outFilterScoreMinOverLread 0.33 --outFilterMatchNminOverLread 0.33 \
--readFilesIn ${fastq_clean}/${SAMPLE_ID}_R1.fastq.gz ${fastq_clean}/${SAMPLE_ID}_R2.fastq.gz \
--readFilesCommand zcat
${SAMTOOLS} sort ${SAMPLE_ID}.Aligned.sortedByCoord.out.bam ${SAMPLE_ID}.star.sorted
${SAMTOOLS} index ${SAMPLE_ID}.star.sorted.bam
EOF
bsub < ${SAMPLE_ID}.sh
rm ${SAMPLE_ID}.sh
doneExpression Count wit htseq-count#!/bin/bash
project_dir=/zzh_gpfs02/jixing/Project/leishjie_Hcy
ref_gtf=/zzh_gpfs02/jixing/Annotation/mm10/idx_hisat2/genes.gtf
ref_fasta=/zzh_gpfs02/jixing/Annotation/mm10/fa/genome.fa
fastq_clean=${project_dir}/processed_data/fastq_processed
bam_dir=${project_dir}/processed_data/algn_star
#tools========
#HTSeq version 0.6.0.
run_htseqCount=/zzh_gpfs/home/zzhgroup/jixing/anaconda3/bin/htseq-count
P=8 #use 8 threads/cores
mkdir ${project_dir}/processed_data/count_htseq_star
cd ${project_dir}/processed_data/count_htseq_star
for SAMPLE_ID in `cat ${project_dir}/sample_id.txt`
do
cat > ${SAMPLE_ID}.sh <<EOF
#!/bin/bash
$run_htseqCount \
--format bam --order pos \
--mode intersection-strict \
--stranded reverse \
--minaqual 1 --type exon --idattr gene_id \
${bam_dir}/${SAMPLE_ID}/${SAMPLE_ID}.star.sorted.bam ${ref_gtf} > ${SAMPLE_ID}_gene.tsv
EOF
bsub < ${SAMPLE_ID}.sh
rm ${SAMPLE_ID}.sh
done
# -r pos sort.bam base position
# -r name sorted.bam base read name
# a new advice : use a name sorted bamfile for htseqDifferential Expression Analysis Use DESeq2library(tidyverse)
#第一列是公司测序ID 第二列是实验室ID
library(readxl)
metadata <- read_excel("./NFYKD_dataOutput.20171113.20171117.xlsx")
colnames(metadata)[1:2] <- c("company_ID","Lab_ID")
library(readr)
countmatrix <- read_csv("star_count_merged.csv")
colnames(countmatrix)[2:dim(countmatrix)[2]] <-
colnames(countmatrix)[2:dim(countmatrix)[2]] %>% str_sub(1,nchar(metadata$company_ID[1]))
colnames(countmatrix)[2:dim(countmatrix)[2]] <-
metadata %>% arrange(company_ID) %>% dplyr::select(Lab_ID) %>% .[[1]]
rownames(countmatrix) <- countmatrix$gene_name
countmatrix <- dplyr::select(countmatrix,starts_with("I"), -Ic673, -Ic684)
colnames(countmatrix)ExpDesign <- data.frame(row.names = colnames(countmatrix),
name = colnames(countmatrix),
hcy = c(rep("untrt",3), rep("trt",5)))
dds <- DESeq2::DESeqDataSetFromMatrix(countData = countmatrix, colData = ExpDesign, design = ~hcy)nrow(dds)
dds <- dds[rowSums(DESeq2::counts(dds)) > 1, ]# remove no-expression gene
nrow(dds)library("dplyr")
library("ggplot2")
library(DESeq2)
rld <- rlog(dds, blind = FALSE)
vsd <- vst(dds, blind = FALSE)
dds <- estimateSizeFactors(dds)
df <- bind_rows(
as_data_frame(log2(counts(dds, normalized=TRUE)[, 1:2]+1)) %>%
mutate(transformation = "log2(x + 1)"),
as_data_frame(assay(rld)[, 1:2]) %>% mutate(transformation = "rlog"),
as_data_frame(assay(vsd)[, 1:2]) %>% mutate(transformation = "vst"))
colnames(df)[1:2] <- c("x", "y")
ggplot(df, aes(x = x, y = y)) + geom_hex(bins = 80) +
coord_fixed() + facet_grid( . ~ transformation) sampleDists <- dist(t(assay(rld)))
library("pheatmap")
library("RColorBrewer")
sampleDistMatrix <- as.matrix( sampleDists )
rownames(sampleDistMatrix) <- colnames(rld)
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
clustering_distance_rows = sampleDists,
clustering_distance_cols = sampleDists,
col = colors)library(ggrepel)
plotPCA(rld, intgroup = c( "hcy"))
(pcaData <- plotPCA(rld, intgroup = c( "hcy"), returnData = TRUE))
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(x = PC1, y = PC2, color = hcy)) +
geom_point(size =3) +
xlab(paste0("PC1: ", percentVar[1], "% variance")) +
ylab(paste0("PC2: ", percentVar[2], "% variance")) +
coord_fixed()+
theme_bw() +
ggrepel::geom_label_repel(aes(label = name), data = pcaData)dds <- DESeq2::DESeq(dds)
library(dplyr)
#res <- results(dds, contrast=c("dex","trt","untrt"))
res.05 <- results(dds, contrast=c("hcy","trt","untrt"), alpha = 0.05)
summary(res.05)library("AnnotationDbi")
library("org.Mm.eg.db")
columns(org.Mm.eg.db)
#tolower() toupper()
res.05$symbol <- mapIds(org.Mm.eg.db,
keys=row.names(res.05),
column="SYMBOL",
keytype="SYMBOL",
multiVals="first")
res.05$ensembl <- mapIds(org.Mm.eg.db,
keys=row.names(res.05),
column="ENSEMBL",
keytype="SYMBOL",
multiVals="first")
res.05$entrezid <- mapIds(org.Mm.eg.db,
keys=row.names(res.05),
column="ENTREZID",
keytype="SYMBOL",
multiVals="first")library(dplyr)
dir.create("./table/DEG_report/Ih_vs_Ic",recursive = TRUE)
DEG <- as.data.frame(res.05)
DEG %<>%
dplyr::mutate(abs_log2FC = abs(log2FoldChange)) %>%
dplyr::mutate(FoldChange = 2^(log2FoldChange)) %>%
dplyr::mutate(trend = if_else(log2FoldChange <= 0,"down","up"))
write.csv(DEG, file="./table/DEG_report/Ih_vs_Ic/results_DEG.csv")library("ReportingTools")
report <- dplyr::select(DEG, symbol,abs_log2FC,trend,FoldChange,log2FoldChange,pvalue, padj, entrezid) %>%
dplyr::arrange(abs_log2FC)
htmlRep <- HTMLReport(shortName="DEG_list", title="ZERO report",
reportDirectory="./table/DEG_report/Ih_vs_Ic")
publish(report, htmlRep)
url <- finish(htmlRep)Enrichment Analysis: GO and KEGGlibrary(enrichR)
listEnrichrDbs()
DEG %>% #combine
dplyr::filter(padj <= 0.05) %>%
dplyr::filter(abs_log2FC >= 1) %>%
dplyr::arrange(abs_log2FC) %>%
dplyr::select(symbol) %>%
.[[1]] %>% as.character() %>%
enrichr(., databases = "GO_Biological_Process_2017")DEG %>% #combine
dplyr::filter(padj <= 0.05) %>%
dplyr::filter(abs_log2FC >= 1) %>%
dplyr::arrange(abs_log2FC) %>%
dplyr::select(symbol) %>%
.[[1]] %>% as.character() %>%
enrichr(., databases = "GO_Cellular_Component_2017")DEG %>% #combine
dplyr::filter(padj <= 0.05) %>%
dplyr::filter(abs_log2FC >= 1) %>%
dplyr::arrange(abs_log2FC) %>%
dplyr::select(symbol) %>%
.[[1]] %>% as.character() %>%
enrichr(., databases = "GO_Molecular_Function_2017")DEG %>% #combine
dplyr::filter(padj <= 0.05) %>%
dplyr::filter(abs_log2FC >= 1) %>%
dplyr::arrange(abs_log2FC) %>%
dplyr::select(symbol) %>%
.[[1]] %>% as.character() %>%
enrichr(., databases = "KEGG_2016")topGene <- rownames(res.05)[which.min(res.05$padj)]
plotCounts(dds, gene = topGene, intgroup=c("hcy"))
plotCounts(dds, gene = "Numb", intgroup=c("hcy"))
plotCounts(dds, gene = "Trp53", intgroup=c("hcy"))res <- lfcShrink(dds, contrast=c("hcy","trt","untrt"), res=res.05)
plotMA(res, ylim = c(-5, 5))hist(res$pvalue[res$baseMean > 1], breaks = 0:20/20,
col = "grey50", border = "white")library("genefilter")
topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20)
mat <- assay(rld)[ topVarGenes, ]
mat <- mat - rowMeans(mat)
#anno <- as.data.frame(colData(rld)[, c("hcy")])
pheatmap(mat)devtools::session_info()## ─ Session info ──────────────────────────────────────────────────────────
## setting value
## version R version 3.4.4 Patched (2018-03-19 r74624)
## os macOS High Sierra 10.13.6
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz Asia/Shanghai
## date 2018-11-21
##
## ─ Packages ──────────────────────────────────────────────────────────────
## package * version date lib
## assertthat 0.2.0 2017-04-11 [1]
## backports 1.1.2 2017-12-13 [1]
## base64enc 0.1-3 2015-07-28 [1]
## callr 3.0.0 2018-08-24 [1]
## cli 1.0.1 2018-09-25 [1]
## crayon 1.3.4 2017-09-16 [1]
## debugme 1.1.0 2017-10-22 [1]
## desc 1.2.0 2018-05-01 [1]
## devtools 2.0.1 2018-10-26 [1]
## digest 0.6.18 2018-10-10 [1]
## dplyr 0.7.99.9000 2018-10-13 [1]
## evaluate 0.12 2018-10-09 [1]
## fs 1.2.6 2018-08-23 [1]
## glue 1.3.0 2018-08-23 [1]
## htmldeps 0.1.1 2018-07-29 [1]
## htmltools 0.3.6 2017-04-28 [1]
## knitr 1.20.15 2018-10-08 [1]
## magrittr 1.5 2014-11-22 [1]
## memoise 1.1.0 2018-07-28 [1]
## pillar 1.3.0 2018-07-14 [1]
## pkgbuild 1.0.2 2018-10-16 [1]
## pkgconfig 2.0.2 2018-08-16 [1]
## pkgload 1.0.2 2018-10-29 [1]
## prettyunits 1.0.2 2015-07-13 [1]
## processx 3.2.0 2018-08-16 [1]
## ps 1.2.1 2018-11-06 [1]
## purrr 0.2.5 2018-05-29 [1]
## R6 2.3.0 2018-10-04 [1]
## Rcpp 1.0.0 2018-11-07 [1]
## remotes 2.0.2 2018-10-30 [1]
## rlang 0.3.0.1 2018-10-25 [1]
## rmarkdown 1.10.13 2018-10-08 [1]
## rprojroot 1.3-2 2018-01-03 [1]
## rstudioapi 0.8 2018-10-02 [1]
## sessioninfo 1.1.1 2018-11-05 [1]
## stringi 1.2.4 2018-07-20 [1]
## stringr 1.3.1 2018-05-10 [1]
## testthat 2.0.1 2018-10-13 [1]
## tibble 1.4.2 2018-01-22 [1]
## tidyselect 0.2.5 2018-10-11 [1]
## usethis 1.4.0 2018-08-14 [1]
## withr 2.1.2 2018-07-28 [1]
## xfun 0.4 2018-10-23 [1]
## yaml 2.2.0 2018-07-25 [1]
## source
## CRAN (R 3.4.0)
## CRAN (R 3.4.3)
## CRAN (R 3.4.0)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## CRAN (R 3.4.1)
## CRAN (R 3.4.2)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## Github (tidyverse/dplyr@1573f60)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## Github (tidyverse/glue@4e74901)
## Github (rstudio/htmldeps@c1023e0)
## CRAN (R 3.4.0)
## Github (yihui/knitr@3d6a6c3)
## CRAN (R 3.4.0)
## Github (hadley/memoise@06d16ec)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## CRAN (R 3.4.0)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## Github (rstudio/rmarkdown@6931cdb)
## CRAN (R 3.4.3)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## CRAN (R 3.4.3)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
## Github (jimhester/withr@fe56f20)
## CRAN (R 3.4.4)
## CRAN (R 3.4.4)
##
## [1] /Library/Frameworks/R.framework/Versions/3.4/Resources/library