Differential gene expression-DESeq2, sex wise by creating different data frames

In this Differential gene expression analysis by DESeq2, I am creating different dataframes for female and male. The analysis finds the differentially expressed genes between Saline treated and PBS treated samples.

About dataset

The data is from a paper Generation of a microglial developmental index in mice and in humans reveals a sex difference in maturation and immune reactivity Published in Glia in 2017.

Load library

library(DESeq2)

create a Female df

count_data <- read.csv('/Users/nishapaudel/Documents/CASP/Data/treated_samples/counts_lps_sal.csv', sep =',', header=T, row.names = 1)
sampleinfo <- read.csv("/Users/nishapaudel/Documents/CASP/Data/female_design copy.txt", sep = ',', header = TRUE, row.names = 1, stringsAsFactors = TRUE)

dim(count_data)

## [1] 21987    25

#desired_columns <- c("F_E18_1","F_E18_2","F_E18_3","F_P4_1","F_P4_2","F_P4_3","F_P4_4","F_P14_1","F_P14_2","F_P14_3","F_P14_4","F_P14_5","F_P14_6","F_P14_7",  "F_P14_8","F_P14_9","F_P14_10","F_P60_S_1","F_P60_S_2","F_P60_S_3","F_P60_S_4","F_P60_S_5","F_P60_S_6","F_P60_S_7")

desired_columns <- c("F_P60_S_1","F_P60_S_2","F_P60_S_3","F_P60_S_4","F_P60_S_5","F_P60_S_6","F_P60_S_7","F_P60_L_1","F_P60_L_2","F_P60_L_3","F_P60_L_4","F_P60_L_5","F_P60_L_6" )
# Rearrange the columns based on the desired order
data_rearranged_female <- count_data[, desired_columns]

sampleinfo_female <- read.csv("/Users/nishapaudel/Documents/CASP/Data/female_design copy.txt", sep = ',', header = TRUE, row.names = 1, stringsAsFactors = TRUE)
sampleinfo_female

##             Sex   Age Treatment
## F_P60_S_1 FALSE P60_S         S
## F_P60_S_2 FALSE P60_S         S
## F_P60_S_3 FALSE P60_S         S
## F_P60_S_4 FALSE P60_S         S
## F_P60_S_5 FALSE P60_S         S
## F_P60_S_6 FALSE P60_S         S
## F_P60_S_7 FALSE P60_S         S
## F_P60_L_1 FALSE P60_L         L
## F_P60_L_2 FALSE P60_L         L
## F_P60_L_3 FALSE P60_L         L
## F_P60_L_4 FALSE P60_L         L
## F_P60_L_5 FALSE P60_L         L
## F_P60_L_6 FALSE P60_L         L

### Check that sample names match in both files
all(colnames(data_rearranged_female) %in% rownames(sampleinfo_female))

## [1] TRUE

all(colnames(data_rearranged_female) == rownames(sampleinfo_female))

## [1] TRUE

dds <- DESeqDataSetFromMatrix(countData = data_rearranged_female, colData = sampleinfo_female, design = ~Treatment)

dds <- DESeq(dds)

res <-results(dds)

summary(res)

## 
## out of 16280 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 171, 1.1%
## LFC < 0 (down)     : 578, 3.6%
## outliers [1]       : 3, 0.018%
## low counts [2]     : 3098, 19%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

A few better modifications

using vst to stabilise variance, alpha value cut off < 0.05, and lfc </> 0.4

vsd <- vst(dds, blind = FALSE)
zz <- cbind(as.data.frame(res),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge_deseq2 <-dge
sig <-subset(dge, padj<0.05)
head(sig)

##          baseMean log2FoldChange     lfcSE      stat        pvalue
## Akap12  739.56256      -5.385832 0.2433006 -22.13653 1.406459e-108
## Ptgs2   525.84255      -3.207046 0.1986917 -16.14081  1.317923e-58
## Gbp5    328.39739      -3.837960 0.2446915 -15.68489  1.918949e-55
## Sele    151.58915      -6.647257 0.4495124 -14.78771  1.758456e-49
## Rnd1     72.55070      -4.864725 0.3381293 -14.38717  6.229004e-47
## Gm16548  52.65918      -4.630113 0.3360372 -13.77857  3.429812e-43
##                  padj F_P60_S_1 F_P60_S_2 F_P60_S_3 F_P60_S_4 F_P60_S_5
## Akap12  1.853573e-104  5.484713  4.911929  5.709113  5.332548  5.507535
## Ptgs2    8.684454e-55  7.019694  7.075254  6.909208  6.486816  7.058348
## Gbp5     8.429942e-52  5.551643  5.352056  5.811521  5.773566  6.132988
## Sele     5.793672e-46  3.515134  3.312628  4.077495  3.792974  3.600481
## Rnd1     1.641841e-43  4.138362  4.135194  3.340833  3.601291  4.207353
## Gm16548  7.533581e-40  3.877466  4.135194  3.840082  3.951135  2.712197
##         F_P60_S_6 F_P60_S_7 F_P60_L_1 F_P60_L_2 F_P60_L_3 F_P60_L_4 F_P60_L_5
## Akap12   5.476942  6.440575 10.512091 10.510380 10.478273 10.816627 10.359082
## Ptgs2    7.120381  6.768592 10.061635 10.258348  9.626823 10.291484  9.246411
## Gbp5     6.242406  6.080103  9.417840  9.668445  8.621473  9.137764  9.158269
## Sele     4.093355  2.712197  7.442011  7.844879  7.924951  9.091157  8.384288
## Rnd1     4.093355  4.046840  6.846937  7.156737  7.292623  7.853985  7.214110
## Gm16548  3.919023  3.937322  6.896077  6.884823  6.626662  7.134643  6.337487
##         F_P60_L_6
## Akap12  10.944761
## Ptgs2   10.232249
## Gbp5     9.959113
## Sele     8.964629
## Rnd1     7.553409
## Gm16548  7.433203

dge_deseq2_up_female <- rownames(subset(sig,log2FoldChange>0.4))
length(dge_deseq2_up_female)

## [1] 101

dge_deseq2_down_female <- rownames(subset(sig,log2FoldChange<0.4))
length(dge_deseq2_down_female)

## [1] 486

Volcano plot for female

  library(EnhancedVolcano)

EnhancedVolcano(sig,
    lab = rownames(sig),
    title = 'Female P60 saline vs lps treated',
    x = 'log2FoldChange',
    y = 'pvalue',
    subtitle = bquote('FDR cut off 0.05 and absolute LFC cut off 0.4'))

Create a Male df

desired_columns <- c("M_P60_S_1","M_P60_S_2","M_P60_S_3","M_P60_S_4","M_P60_S_5","M_P60_L_1","M_P60_L_2","M_P60_L_3","M_P60_L_4","M_P60_L_5","M_P60_L_6","M_P60_L_7")
# Rearrange the columns based on the desired order
data_rearranged_male <- count_data[, desired_columns]

sampleinfo_male <- read.csv("/Users/nishapaudel/Documents/CASP/Data/male_design copy.txt", sep = ',', header = TRUE, row.names = 1, stringsAsFactors = TRUE)
dds <- DESeqDataSetFromMatrix(countData = data_rearranged_male, colData = sampleinfo_male, design = ~Age)
dds <- DESeq(dds)
res <-results(dds)
summary(res)

## 
## out of 16337 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1874, 11%
## LFC < 0 (down)     : 2026, 12%
## outliers [1]       : 3, 0.018%
## low counts [2]     : 2802, 17%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

A few better modifications

vsd <- vst(dds, blind = FALSE)
zz <- cbind(as.data.frame(res),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge_deseq2 <-dge
sig <-subset(dge, padj<0.05)
head(sig)

##       baseMean log2FoldChange     lfcSE      stat       pvalue         padj
## Rsad2 730.6169      -6.526503 0.3174410 -20.55974 6.297206e-94 8.523269e-90
## Ifit2 256.4187      -5.131051 0.2567243 -19.98662 7.201493e-89 4.873610e-85
## Ifit3 254.9147      -4.954917 0.2591263 -19.12162 1.668299e-81 7.526811e-78
## Ifit1 312.2386      -5.665695 0.3345158 -16.93700 2.400442e-64 8.122495e-61
## Cmpk2 139.2565      -4.227801 0.2582877 -16.36857 3.206526e-60 8.680065e-57
## Gbp2  387.9789      -4.104884 0.2564110 -16.00900 1.105728e-57 2.494338e-54
##       M_P60_S_1 M_P60_S_2 M_P60_S_3 M_P60_S_4 M_P60_S_5 M_P60_L_1 M_P60_L_2
## Rsad2  5.417958  4.700028  4.821213  5.097841  5.208398 10.104677 10.220827
## Ifit2  4.833271  4.731245  5.298135  5.069423  5.108717  8.962964  9.020183
## Ifit3  5.110379  4.974242  4.863401  5.152580  5.300107  9.049165  9.145322
## Ifit1  4.934041  4.700028  4.863401  5.204784  4.809912  8.837755  8.812189
## Cmpk2  5.188944  4.997973  4.980311  5.010264  4.877010  8.117292  7.999594
## Gbp2   6.058743  5.603074  5.656036  6.143102  5.913548  9.618848  9.508365
##       M_P60_L_3 M_P60_L_4 M_P60_L_5 M_P60_L_6 M_P60_L_7
## Rsad2 10.925939 10.289764 10.606705  9.068431 10.327907
## Ifit2  9.175144  8.608187  8.848263  8.050874  8.888432
## Ifit3  9.156846  8.637189  8.878881  7.933281  8.577288
## Ifit1  9.874462  9.497635  9.277706  7.900665  8.776291
## Cmpk2  8.198788  7.996886  8.383610  7.087723  7.824938
## Gbp2   9.784985  9.108098  9.702415  8.374127  9.047852

dge_deseq2_up_male <- rownames(subset(sig,log2FoldChange>0.4))
length(dge_deseq2_up_male)

## [1] 1317

dge_deseq2_down_male <- rownames(subset(sig,log2FoldChange<0.4))
length(dge_deseq2_down_male)

## [1] 1713

Volcano plot for male

EnhancedVolcano(sig,
    lab = rownames(sig),
    title = 'Male P60 saline vs lps treated',
    x = 'log2FoldChange',
    y = 'pvalue')

Venn Diagram

Up regulated genes

library(VennDiagram)
# Combine the lists into a named list for the Venn diagram
venn_data <- list("DESeq2 up female"=dge_deseq2_up_female,"DESeq2 up male"=dge_deseq2_up_male )

# Create the Venn diagram with custom colors
vennColors <- c("lightpink", "cyan1")

# Set up the Venn diagram configuration
venn_config <- venn.diagram(
  x = venn_data,
  category.names = names(venn_data),
  fill = vennColors,
  filename = NULL, # Set filename to NULL to display the plot directly in R session
  imagetype = "png", # You can change this to other formats like "pdf", "jpeg", etc.
  col = "transparent", # Set background color to transparent for better display in R session
  cex = 2.7,
  cex.label = 7,
  lwd = 2 
)

# Display the Venn diagram
grid.newpage()
grid.draw(venn_config)

Down regulated genes

# Combine the lists into a named list for the Venn diagram
venn_data <- list("DESeq2 down female"=dge_deseq2_down_female,"DESeq2 down male"=dge_deseq2_down_male )

# Create the Venn diagram with custom colors
vennColors <- c("lightpink", "cyan")

# Set up the Venn diagram configuration
venn_config <- venn.diagram(
  x = venn_data,
  category.names = names(venn_data),
  fill = vennColors,
  filename = NULL, # Set filename to NULL to display the plot directly in R session
  imagetype = "png", # You can change this to other formats like "pdf", "jpeg", etc.
  col = "transparent", # Set background color to transparent for better display in R session
  cex = 2.7,
  cex.label = 7,
  lwd = 2 
)

# Display the Venn diagram
grid.newpage()
grid.draw(venn_config)

Session information

sessionInfo()

## R version 4.3.0 (2023-04-21)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS 14.0
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Australia/Melbourne
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] VennDiagram_1.7.3           futile.logger_1.4.3        
##  [3] EnhancedVolcano_1.18.0      ggrepel_0.9.3              
##  [5] ggplot2_3.4.3               DESeq2_1.40.2              
##  [7] SummarizedExperiment_1.30.2 Biobase_2.60.0             
##  [9] MatrixGenerics_1.12.3       matrixStats_1.0.0          
## [11] GenomicRanges_1.52.0        GenomeInfoDb_1.36.1        
## [13] IRanges_2.34.1              S4Vectors_0.38.1           
## [15] BiocGenerics_0.46.0        
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.3            xfun_0.40               bslib_0.5.1            
##  [4] lattice_0.21-8          vctrs_0.6.3             tools_4.3.0            
##  [7] bitops_1.0-7            generics_0.1.3          parallel_4.3.0         
## [10] tibble_3.2.1            fansi_1.0.4             highr_0.10             
## [13] pkgconfig_2.0.3         Matrix_1.6-1            lifecycle_1.0.3        
## [16] GenomeInfoDbData_1.2.10 farver_2.1.1            compiler_4.3.0         
## [19] munsell_0.5.0           codetools_0.2-19        htmltools_0.5.6        
## [22] sass_0.4.7              RCurl_1.98-1.12         yaml_2.3.7             
## [25] pillar_1.9.0            crayon_1.5.2            jquerylib_0.1.4        
## [28] BiocParallel_1.34.2     DelayedArray_0.26.7     cachem_1.0.8           
## [31] abind_1.4-5             tidyselect_1.2.0        locfit_1.5-9.8         
## [34] digest_0.6.33           dplyr_1.1.2             labeling_0.4.2         
## [37] fastmap_1.1.1           colorspace_2.1-0        cli_3.6.1              
## [40] magrittr_2.0.3          S4Arrays_1.0.5          utf8_1.2.3             
## [43] withr_2.5.0             scales_1.2.1            lambda.r_1.2.4         
## [46] rmarkdown_2.24          XVector_0.40.0          evaluate_0.21          
## [49] knitr_1.43              futile.options_1.0.1    rlang_1.1.1            
## [52] Rcpp_1.0.11             glue_1.6.2              formatR_1.14           
## [55] rstudioapi_0.15.0       jsonlite_1.8.7          R6_2.5.1               
## [58] zlibbioc_1.46.0

getwd()

## [1] "/Users/nishapaudel"

Differential gene expression-DESeq2, sex wise by creating different data frames

Nisha paudel

2023-10-11

About dataset

Load library

create a Female df

A few better modifications

Volcano plot for female

Create a Male df

A few better modifications

Volcano plot for male

Venn Diagram

Up regulated genes

Down regulated genes

Session information