In this Differential gene expression analysis by DESeq2, I am creating different dataframes for female and male. The analysis finds the differentially expressed genes between Saline treated and PBS treated samples.
The data is from a paper Generation of a microglial developmental index in mice and in humans reveals a sex difference in maturation and immune reactivity Published in Glia in 2017.
library(DESeq2)count_data <- read.csv('/Users/nishapaudel/Documents/CASP/Data/treated_samples/counts_lps_sal.csv', sep =',', header=T, row.names = 1)
sampleinfo <- read.csv("/Users/nishapaudel/Documents/CASP/Data/female_design copy.txt", sep = ',', header = TRUE, row.names = 1, stringsAsFactors = TRUE)
dim(count_data)## [1] 21987 25
#desired_columns <- c("F_E18_1","F_E18_2","F_E18_3","F_P4_1","F_P4_2","F_P4_3","F_P4_4","F_P14_1","F_P14_2","F_P14_3","F_P14_4","F_P14_5","F_P14_6","F_P14_7", "F_P14_8","F_P14_9","F_P14_10","F_P60_S_1","F_P60_S_2","F_P60_S_3","F_P60_S_4","F_P60_S_5","F_P60_S_6","F_P60_S_7")
desired_columns <- c("F_P60_S_1","F_P60_S_2","F_P60_S_3","F_P60_S_4","F_P60_S_5","F_P60_S_6","F_P60_S_7","F_P60_L_1","F_P60_L_2","F_P60_L_3","F_P60_L_4","F_P60_L_5","F_P60_L_6" )
# Rearrange the columns based on the desired order
data_rearranged_female <- count_data[, desired_columns]sampleinfo_female <- read.csv("/Users/nishapaudel/Documents/CASP/Data/female_design copy.txt", sep = ',', header = TRUE, row.names = 1, stringsAsFactors = TRUE)
sampleinfo_female## Sex Age Treatment
## F_P60_S_1 FALSE P60_S S
## F_P60_S_2 FALSE P60_S S
## F_P60_S_3 FALSE P60_S S
## F_P60_S_4 FALSE P60_S S
## F_P60_S_5 FALSE P60_S S
## F_P60_S_6 FALSE P60_S S
## F_P60_S_7 FALSE P60_S S
## F_P60_L_1 FALSE P60_L L
## F_P60_L_2 FALSE P60_L L
## F_P60_L_3 FALSE P60_L L
## F_P60_L_4 FALSE P60_L L
## F_P60_L_5 FALSE P60_L L
## F_P60_L_6 FALSE P60_L L
### Check that sample names match in both files
all(colnames(data_rearranged_female) %in% rownames(sampleinfo_female))## [1] TRUE
all(colnames(data_rearranged_female) == rownames(sampleinfo_female))## [1] TRUE
dds <- DESeqDataSetFromMatrix(countData = data_rearranged_female, colData = sampleinfo_female, design = ~Treatment)dds <- DESeq(dds)res <-results(dds)summary(res)##
## out of 16280 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 171, 1.1%
## LFC < 0 (down) : 578, 3.6%
## outliers [1] : 3, 0.018%
## low counts [2] : 3098, 19%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
using vst to stabilise variance, alpha value cut off < 0.05, and lfc </> 0.4
vsd <- vst(dds, blind = FALSE)
zz <- cbind(as.data.frame(res),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge_deseq2 <-dge
sig <-subset(dge, padj<0.05)
head(sig)## baseMean log2FoldChange lfcSE stat pvalue
## Akap12 739.56256 -5.385832 0.2433006 -22.13653 1.406459e-108
## Ptgs2 525.84255 -3.207046 0.1986917 -16.14081 1.317923e-58
## Gbp5 328.39739 -3.837960 0.2446915 -15.68489 1.918949e-55
## Sele 151.58915 -6.647257 0.4495124 -14.78771 1.758456e-49
## Rnd1 72.55070 -4.864725 0.3381293 -14.38717 6.229004e-47
## Gm16548 52.65918 -4.630113 0.3360372 -13.77857 3.429812e-43
## padj F_P60_S_1 F_P60_S_2 F_P60_S_3 F_P60_S_4 F_P60_S_5
## Akap12 1.853573e-104 5.484713 4.911929 5.709113 5.332548 5.507535
## Ptgs2 8.684454e-55 7.019694 7.075254 6.909208 6.486816 7.058348
## Gbp5 8.429942e-52 5.551643 5.352056 5.811521 5.773566 6.132988
## Sele 5.793672e-46 3.515134 3.312628 4.077495 3.792974 3.600481
## Rnd1 1.641841e-43 4.138362 4.135194 3.340833 3.601291 4.207353
## Gm16548 7.533581e-40 3.877466 4.135194 3.840082 3.951135 2.712197
## F_P60_S_6 F_P60_S_7 F_P60_L_1 F_P60_L_2 F_P60_L_3 F_P60_L_4 F_P60_L_5
## Akap12 5.476942 6.440575 10.512091 10.510380 10.478273 10.816627 10.359082
## Ptgs2 7.120381 6.768592 10.061635 10.258348 9.626823 10.291484 9.246411
## Gbp5 6.242406 6.080103 9.417840 9.668445 8.621473 9.137764 9.158269
## Sele 4.093355 2.712197 7.442011 7.844879 7.924951 9.091157 8.384288
## Rnd1 4.093355 4.046840 6.846937 7.156737 7.292623 7.853985 7.214110
## Gm16548 3.919023 3.937322 6.896077 6.884823 6.626662 7.134643 6.337487
## F_P60_L_6
## Akap12 10.944761
## Ptgs2 10.232249
## Gbp5 9.959113
## Sele 8.964629
## Rnd1 7.553409
## Gm16548 7.433203
dge_deseq2_up_female <- rownames(subset(sig,log2FoldChange>0.4))
length(dge_deseq2_up_female)## [1] 101
dge_deseq2_down_female <- rownames(subset(sig,log2FoldChange<0.4))
length(dge_deseq2_down_female)## [1] 486
library(EnhancedVolcano)EnhancedVolcano(sig,
lab = rownames(sig),
title = 'Female P60 saline vs lps treated',
x = 'log2FoldChange',
y = 'pvalue',
subtitle = bquote('FDR cut off 0.05 and absolute LFC cut off 0.4'))desired_columns <- c("M_P60_S_1","M_P60_S_2","M_P60_S_3","M_P60_S_4","M_P60_S_5","M_P60_L_1","M_P60_L_2","M_P60_L_3","M_P60_L_4","M_P60_L_5","M_P60_L_6","M_P60_L_7")
# Rearrange the columns based on the desired order
data_rearranged_male <- count_data[, desired_columns]sampleinfo_male <- read.csv("/Users/nishapaudel/Documents/CASP/Data/male_design copy.txt", sep = ',', header = TRUE, row.names = 1, stringsAsFactors = TRUE)
dds <- DESeqDataSetFromMatrix(countData = data_rearranged_male, colData = sampleinfo_male, design = ~Age)
dds <- DESeq(dds)
res <-results(dds)
summary(res)##
## out of 16337 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 1874, 11%
## LFC < 0 (down) : 2026, 12%
## outliers [1] : 3, 0.018%
## low counts [2] : 2802, 17%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
vsd <- vst(dds, blind = FALSE)
zz <- cbind(as.data.frame(res),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge_deseq2 <-dge
sig <-subset(dge, padj<0.05)
head(sig)## baseMean log2FoldChange lfcSE stat pvalue padj
## Rsad2 730.6169 -6.526503 0.3174410 -20.55974 6.297206e-94 8.523269e-90
## Ifit2 256.4187 -5.131051 0.2567243 -19.98662 7.201493e-89 4.873610e-85
## Ifit3 254.9147 -4.954917 0.2591263 -19.12162 1.668299e-81 7.526811e-78
## Ifit1 312.2386 -5.665695 0.3345158 -16.93700 2.400442e-64 8.122495e-61
## Cmpk2 139.2565 -4.227801 0.2582877 -16.36857 3.206526e-60 8.680065e-57
## Gbp2 387.9789 -4.104884 0.2564110 -16.00900 1.105728e-57 2.494338e-54
## M_P60_S_1 M_P60_S_2 M_P60_S_3 M_P60_S_4 M_P60_S_5 M_P60_L_1 M_P60_L_2
## Rsad2 5.417958 4.700028 4.821213 5.097841 5.208398 10.104677 10.220827
## Ifit2 4.833271 4.731245 5.298135 5.069423 5.108717 8.962964 9.020183
## Ifit3 5.110379 4.974242 4.863401 5.152580 5.300107 9.049165 9.145322
## Ifit1 4.934041 4.700028 4.863401 5.204784 4.809912 8.837755 8.812189
## Cmpk2 5.188944 4.997973 4.980311 5.010264 4.877010 8.117292 7.999594
## Gbp2 6.058743 5.603074 5.656036 6.143102 5.913548 9.618848 9.508365
## M_P60_L_3 M_P60_L_4 M_P60_L_5 M_P60_L_6 M_P60_L_7
## Rsad2 10.925939 10.289764 10.606705 9.068431 10.327907
## Ifit2 9.175144 8.608187 8.848263 8.050874 8.888432
## Ifit3 9.156846 8.637189 8.878881 7.933281 8.577288
## Ifit1 9.874462 9.497635 9.277706 7.900665 8.776291
## Cmpk2 8.198788 7.996886 8.383610 7.087723 7.824938
## Gbp2 9.784985 9.108098 9.702415 8.374127 9.047852
dge_deseq2_up_male <- rownames(subset(sig,log2FoldChange>0.4))
length(dge_deseq2_up_male)## [1] 1317
dge_deseq2_down_male <- rownames(subset(sig,log2FoldChange<0.4))
length(dge_deseq2_down_male)## [1] 1713
EnhancedVolcano(sig,
lab = rownames(sig),
title = 'Male P60 saline vs lps treated',
x = 'log2FoldChange',
y = 'pvalue')library(VennDiagram)
# Combine the lists into a named list for the Venn diagram
venn_data <- list("DESeq2 up female"=dge_deseq2_up_female,"DESeq2 up male"=dge_deseq2_up_male )
# Create the Venn diagram with custom colors
vennColors <- c("lightpink", "cyan1")
# Set up the Venn diagram configuration
venn_config <- venn.diagram(
x = venn_data,
category.names = names(venn_data),
fill = vennColors,
filename = NULL, # Set filename to NULL to display the plot directly in R session
imagetype = "png", # You can change this to other formats like "pdf", "jpeg", etc.
col = "transparent", # Set background color to transparent for better display in R session
cex = 2.7,
cex.label = 7,
lwd = 2
)
# Display the Venn diagram
grid.newpage()
grid.draw(venn_config)# Combine the lists into a named list for the Venn diagram
venn_data <- list("DESeq2 down female"=dge_deseq2_down_female,"DESeq2 down male"=dge_deseq2_down_male )
# Create the Venn diagram with custom colors
vennColors <- c("lightpink", "cyan")
# Set up the Venn diagram configuration
venn_config <- venn.diagram(
x = venn_data,
category.names = names(venn_data),
fill = vennColors,
filename = NULL, # Set filename to NULL to display the plot directly in R session
imagetype = "png", # You can change this to other formats like "pdf", "jpeg", etc.
col = "transparent", # Set background color to transparent for better display in R session
cex = 2.7,
cex.label = 7,
lwd = 2
)
# Display the Venn diagram
grid.newpage()
grid.draw(venn_config)sessionInfo()## R version 4.3.0 (2023-04-21)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS 14.0
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Australia/Melbourne
## tzcode source: internal
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] VennDiagram_1.7.3 futile.logger_1.4.3
## [3] EnhancedVolcano_1.18.0 ggrepel_0.9.3
## [5] ggplot2_3.4.3 DESeq2_1.40.2
## [7] SummarizedExperiment_1.30.2 Biobase_2.60.0
## [9] MatrixGenerics_1.12.3 matrixStats_1.0.0
## [11] GenomicRanges_1.52.0 GenomeInfoDb_1.36.1
## [13] IRanges_2.34.1 S4Vectors_0.38.1
## [15] BiocGenerics_0.46.0
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.3 xfun_0.40 bslib_0.5.1
## [4] lattice_0.21-8 vctrs_0.6.3 tools_4.3.0
## [7] bitops_1.0-7 generics_0.1.3 parallel_4.3.0
## [10] tibble_3.2.1 fansi_1.0.4 highr_0.10
## [13] pkgconfig_2.0.3 Matrix_1.6-1 lifecycle_1.0.3
## [16] GenomeInfoDbData_1.2.10 farver_2.1.1 compiler_4.3.0
## [19] munsell_0.5.0 codetools_0.2-19 htmltools_0.5.6
## [22] sass_0.4.7 RCurl_1.98-1.12 yaml_2.3.7
## [25] pillar_1.9.0 crayon_1.5.2 jquerylib_0.1.4
## [28] BiocParallel_1.34.2 DelayedArray_0.26.7 cachem_1.0.8
## [31] abind_1.4-5 tidyselect_1.2.0 locfit_1.5-9.8
## [34] digest_0.6.33 dplyr_1.1.2 labeling_0.4.2
## [37] fastmap_1.1.1 colorspace_2.1-0 cli_3.6.1
## [40] magrittr_2.0.3 S4Arrays_1.0.5 utf8_1.2.3
## [43] withr_2.5.0 scales_1.2.1 lambda.r_1.2.4
## [46] rmarkdown_2.24 XVector_0.40.0 evaluate_0.21
## [49] knitr_1.43 futile.options_1.0.1 rlang_1.1.1
## [52] Rcpp_1.0.11 glue_1.6.2 formatR_1.14
## [55] rstudioapi_0.15.0 jsonlite_1.8.7 R6_2.5.1
## [58] zlibbioc_1.46.0
getwd()## [1] "/Users/nishapaudel"