library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.1
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.2.1
## Warning: package 'dplyr' was built under R version 4.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ dplyr::slice() masks IRanges::slice()
library(dplyr)
library(apeglm)
library(ggplot2)
library(RColorBrewer)
library(vsn)
library(pheatmap)
data <- read.csv("D:/ChIP seq and RNA seq analysis/E15 to E17 Foxg1 RNA seq/Foxg1_countsMatrix.csv")
data.cortex <- data[,1:8]
data.hc <- data[, c(1,9:15)]
data.Ctx_vs_HC <- data[,c(1:4,9:11)]
x1 <- data.Ctx_vs_HC[,-1]
rownames(x1) <- data.Ctx_vs_HC[,1]
head(x1)
## Ctrl_Ctx1 Ctrl_Ctx2 Ctrl_Ctx3 Ctrl_HC1 Ctrl_HC2 Ctrl_HC3
## Zc3h14 2631 2258 2946 4200 3261 2990
## Troap 502 537 522 799 499 540
## Clca2 0 0 1 0 5 2
## 1810013L24Rik 1732 1525 1969 2979 2313 1933
## Srsf7 7693 6986 8574 10672 8991 9577
## Fignl2 78 66 71 74 58 25
Region <- factor(c(rep("Ctx",3),rep("HC", 3)))
coldata <- data.frame(row.names=colnames(x1),Region)
coldata
## Region
## Ctrl_Ctx1 Ctx
## Ctrl_Ctx2 Ctx
## Ctrl_Ctx3 Ctx
## Ctrl_HC1 HC
## Ctrl_HC2 HC
## Ctrl_HC3 HC
dds <- DESeqDataSetFromMatrix(countData = x1, colData = coldata, design= ~ Region)
relevel(dds$Region, ref = 'Ctx')
## [1] Ctx Ctx Ctx HC HC HC
## Levels: Ctx HC
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
res <- results(dds)
res
## log2 fold change (MLE): Region HC vs Ctx
## Wald test p-value: Region HC vs Ctx
## DataFrame with 25229 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## Zc3h14 2987.67348 -0.0843253 0.0809422 -1.041797 0.2975057
## Troap 563.20413 -0.2830905 0.1525825 -1.855328 0.0635494
## Clca2 1.21973 2.2841755 2.3378065 0.977059 0.3285398
## 1810013L24Rik 2023.68286 -0.0394208 0.0879985 -0.447971 0.6541741
## Srsf7 8681.96743 -0.1538219 0.1054412 -1.458840 0.1446092
## ... ... ... ... ... ...
## Gm42517 90.69342 0.8527168 0.277170 3.0765094 0.0020944
## Gm5565 1.52847 -0.0724341 1.717404 -0.0421765 0.9663580
## Paqr9 240.38149 0.2078848 0.215576 0.9643235 0.3348838
## Btbd35f26 0.00000 NA NA NA NA
## Btbd35f6 0.00000 NA NA NA NA
## padj
## <numeric>
## Zc3h14 0.608144
## Troap 0.243988
## Clca2 NA
## 1810013L24Rik 0.866612
## Srsf7 0.407589
## ... ...
## Gm42517 0.0186469
## Gm5565 NA
## Paqr9 0.6472733
## Btbd35f26 NA
## Btbd35f6 NA
write.csv(res, file = "Foxg1_HC_vs_Ctx.csv")
resDF <- as.data.frame(res)
###Create new column “diffexpressed”
resDF$diffexpressed <- "NO"
resDF$diffexpressed[resDF$log2FoldChange > 0.58 & resDF$padj < 0.05] <- "HC_UP"
resDF$diffexpressed[resDF$log2FoldChange < -0.58 & resDF$padj < 0.05] <- "CTX_UP"
res_cutoff <- resDF %>% filter(diffexpressed=="HC_UP" | diffexpressed =="CTX_UP") %>% drop_na()
head(res_cutoff, 20)
## baseMean log2FoldChange lfcSE stat pvalue
## Fignl2 63.537993 -0.9955802 0.3286456 -3.029343 2.450864e-03
## Vsx1 3.874003 4.1049206 1.5134410 2.712310 6.681616e-03
## Hpcal1 390.513372 1.1747488 0.1759250 6.677554 2.429633e-11
## Akap7 635.486176 -0.7064122 0.1450917 -4.868729 1.123185e-06
## Slit3 314.831292 2.4566426 0.3146453 7.807658 5.826049e-15
## Plekhb1 173.347529 -1.0592592 0.1913806 -5.534830 3.115306e-08
## Rara 265.477861 -1.1328709 0.2753401 -4.114443 3.881158e-05
## Calca 11.278642 -2.2879732 0.6991095 -3.272697 1.065267e-03
## Nlgn1 1202.504160 0.7097112 0.1301872 5.451468 4.995563e-08
## Shroom2 691.602194 -0.6084318 0.1427849 -4.261176 2.033539e-05
## Arpp21 946.639485 -0.6168235 0.1571837 -3.924220 8.701132e-05
## Cdk6 4404.239815 -0.7223648 0.1786920 -4.042513 5.288140e-05
## Ackr3 348.609631 -1.9518716 0.6337081 -3.080080 2.069451e-03
## Nkain3 458.338436 0.9197504 0.2159493 4.259104 2.052480e-05
## Trpc4 444.494311 -1.6078437 0.2020987 -7.955734 1.780727e-15
## Bend5 669.414056 -0.7434878 0.1321228 -5.627246 1.831090e-08
## Scg2 112.300712 1.5151910 0.2788652 5.433417 5.528506e-08
## Tectb 7.284991 -3.0346716 0.9913627 -3.061111 2.205170e-03
## Mgam 47.714301 0.9887121 0.3590636 2.753585 5.894644e-03
## D030068K23Rik 135.594596 -1.3225058 0.2661195 -4.969594 6.709315e-07
## padj diffexpressed
## Fignl2 2.124299e-02 CTX_UP
## Vsx1 4.764726e-02 HC_UP
## Hpcal1 1.291571e-09 HC_UP
## Akap7 2.608779e-05 CTX_UP
## Slit3 4.630578e-13 HC_UP
## Plekhb1 1.012042e-06 CTX_UP
## Rara 6.323004e-04 CTX_UP
## Calca 1.081328e-02 CTX_UP
## Nlgn1 1.575960e-06 HC_UP
## Shroom2 3.534516e-04 CTX_UP
## Arpp21 1.273133e-03 CTX_UP
## Cdk6 8.206893e-04 CTX_UP
## Ackr3 1.847498e-02 CTX_UP
## Nkain3 3.563654e-04 HC_UP
## Trpc4 1.487543e-13 CTX_UP
## Bend5 6.181533e-07 CTX_UP
## Scg2 1.733309e-06 HC_UP
## Tectb 1.942874e-02 CTX_UP
## Mgam 4.293283e-02 HC_UP
## D030068K23Rik 1.639576e-05 CTX_UP
write.csv(res_cutoff, file = "Foxg1_HC_vs_CTX_results_cutoff.csv")
p <- ggplot(resDF, aes(x=log2FoldChange, y=-log10(padj), col=diffexpressed)) + geom_point() +
theme_minimal()
mycolors <- c("blue", "red", "black")
names(mycolors) <- c("CTX_UP", "HC_UP", "NO")
p3 <- p + scale_colour_manual(values = mycolors)
p3
## Warning: Removed 8856 rows containing missing values (geom_point).
plotMA(res,ylim=c(-2,2))
resultsNames(dds)
## [1] "Intercept" "Region_HC_vs_Ctx"
resNorm <- lfcShrink(dds, coef=2, type="normal")
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
##
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
resAsh <- lfcShrink(dds, coef=2, type="ashr")
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
## Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
## https://doi.org/10.1093/biostatistics/kxw041
par(mfrow=c(1,3), mar=c(4,4,2,1))
xlim <- c(1,1e5); ylim <- c(-3,3)
plotMA(res,xlim=xlim,ylim=ylim, main="apeglm")
plotMA(resNorm,xlim=xlim,ylim=ylim, main="normal")
plotMA(resAsh,xlim=xlim,ylim=ylim, main="ashr")
d <- plotCounts(dds, gene=which.min(res$padj), intgroup="Region", returnData=TRUE)
vsd <- vst(dds, blind=FALSE)
rld <- rlog(dds, blind=FALSE)
head(assay(vsd),10)
## Ctrl_Ctx1 Ctrl_Ctx2 Ctrl_Ctx3 Ctrl_HC1 Ctrl_HC2 Ctrl_HC3
## Zc3h14 11.713046 11.650163 11.741272 11.656634 11.572415 11.642745
## Troap 9.702057 9.887516 9.646850 9.654718 9.372604 9.590326
## Clca2 7.033334 7.033334 7.163976 7.033334 7.293650 7.210020
## 1810013L24Rik 11.164577 11.136802 11.211381 11.206145 11.124474 11.073668
## Srsf7 13.186961 13.199971 13.210078 12.931289 12.956665 13.240792
## Fignl2 8.209022 8.175258 8.109399 7.930910 7.907698 7.653615
## Sema6a 10.256994 10.072016 10.206432 10.590084 10.544197 10.378334
## Anapc15 9.674578 9.714428 9.731620 9.632423 9.605059 9.578905
## Misp3 7.170118 7.033334 7.033334 7.033334 7.198106 7.033334
## Nmb 8.706304 8.532080 8.655471 8.410858 8.631606 8.831155
ntd <- normTransform(dds)
head(ntd)
## class: DESeqTransform
## dim: 6 6
## metadata(1): version
## assays(1): ''
## rownames(6): Zc3h14 Troap ... Srsf7 Fignl2
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(6): Ctrl_Ctx1 Ctrl_Ctx2 ... Ctrl_HC2 Ctrl_HC3
## colData names(2): Region sizeFactor
meanSdPlot(assay(ntd))
meanSdPlot(assay(vsd))
meanSdPlot(assay(rld))
select <- order(rowMeans(counts(dds,normalized=TRUE)), decreasing=TRUE)[1:20]
colData(dds)
## DataFrame with 6 rows and 2 columns
## Region sizeFactor
## <factor> <numeric>
## Ctrl_Ctx1 Ctx 0.848618
## Ctrl_Ctx2 Ctx 0.763515
## Ctrl_Ctx3 Ctx 0.930346
## Ctrl_HC1 HC 1.413285
## Ctrl_HC2 HC 1.169216
## Ctrl_HC3 HC 1.016689
df <- as.data.frame(colData(dds)[,c("Region","sizeFactor")])
pheatmap(assay(ntd)[select,], cluster_rows=FALSE, show_rownames=FALSE, cluster_cols=FALSE, annotation_col=df)
pheatmap(assay(vsd)[select,], cluster_rows=FALSE, show_rownames=FALSE, cluster_cols=FALSE, annotation_col=df)
pheatmap(assay(rld)[select,], cluster_rows=FALSE, show_rownames=FALSE, cluster_cols=FALSE, annotation_col=df)
sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(vsd$Region, vsd$sizeFactor, sep="-")
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")))(255)
pheatmap(sampleDistMatrix, clustering_distance_rows=sampleDists,
clustering_distance_cols=sampleDists, col=colors)
plotPCA(vsd, intgroup=c("Region"))
pcaData <- plotPCA(vsd, intgroup=c("Region"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=Region)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed()