simple_volcano_plot_from

Reference: https://bioconductor.org/packages/devel/bioc/vignettes/EnhancedVolcano/inst/doc/EnhancedVolcano.html#download-the-package-from-bioconductor

# use the next block for plotting
# Load required libraries
library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

# Load data
data <- read.csv("./full_results_marker_comp_by_clusters_astro.csv")

# Add a column to indicate significantly differentially expressed genes
data <- data %>% 
  mutate(significant = ifelse(abs(avg_log2FC) > 1 & p_val_adj < 0.01, "Significant", "Not significant"))

# Create volcano plot
ggplot(data, aes(x=avg_log2FC, y=-log10(p_val_adj), color = significant)) +
  geom_point(alpha = 0.6) +
  scale_color_manual(values = c("red", "grey50")) +
  theme_bw() +
  theme(legend.position = "bottom") +
  labs(x = "Log2 Fold Change", y = "-Log10 Adjusted P-value") +
  ggtitle("Volcano plot of differentially expressed genes") +
  theme(plot.title = element_text(hjust = 0.5))

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

# Load required libraries
library(EnhancedVolcano)

## Loading required package: ggrepel

library(dplyr)

# Load data
data <- read.csv("./full_results_marker_comp_by_clusters_astro.csv")

# Add a column to indicate significantly differentially expressed genes
data <- data %>% 
  mutate(significant = ifelse(abs(avg_log2FC) > 1 & p_val_adj < 0.01, "Significant", "Not significant"))

# Order data based on p-value and select top 20 significant genes
#top_genes <- data %>% 
  #arrange(p_val_adj) %>% 
  #slice_head(n = 30) %>% 
  #pull(genes)

top_genes <- data %>% 
  filter(p_val_adj < 0.01, abs(avg_log2FC) > 1) %>% 
  arrange(p_val_adj) %>% 
  slice_head(n = 30) %>% 
  pull(genes)


# Plot using EnhancedVolcano
EnhancedVolcano(data,
    lab = data$genes,
    selectLab = top_genes,
    #selectLab = c('TMEM176B','ADH1A'),
    x = 'avg_log2FC',
    y = 'p_val_adj',
    # xlim =c(-6, 6),
    title = 'DEGs for Astrocyte Cluster ctrl_mTBI',
    xlab = bquote(~Log[2]~ 'fold change'),
    pCutoff = 10e-2,
    FCcutoff = 1.5,
    cutoffLineType = 'twodash',
    cutoffLineWidth = 0.8,
    pointSize = 2.0,
    labSize = 4.0,
    # shape = c(1,4, 23,25),
    colAlpha = 1,
    legendLabels=c('Not sig.','Log (base 2) FC','p-value',
      'p-value & Log (base 2) FC'),
    legendPosition = 'right',
    legendLabSize = 12,
    legendIconSize = 5.0,
    # drawConnectors = TRUE,
    # widthConnectors = 0.75,
    # ridlines.major = FALSE,
    gridlines.minor = FALSE)

ggsave("astrocyte_DEG_vocalno.pdf", height = 8, width = 10)

use the follow code blocks for additional adjustment for plotting parameters

# Load required libraries
library(EnhancedVolcano)
library(dplyr)

# Load data
data <- read.csv("./full_results_marker_comp_by_clusters.csv")
data <- data %>% filter(Cluster == 'Glut_N')

# Add a column to indicate significantly differentially expressed genes
data <- data %>% 
  mutate(significant = ifelse(abs(avg_log2FC) > 1 & p_val_adj < 0.01, "Significant", "Not significant"))

# Order data based on p-value and select top 20 significant genes
#top_genes <- data %>% 
  #arrange(p_val_adj) %>% 
  #slice_head(n = 30) %>% 
  #pull(genes)

top_genes <- data %>% 
  filter(p_val_adj < 0.01, abs(avg_log2FC) > 1) %>% 
  arrange(p_val_adj) %>% 
  slice_head(n = 30) %>% 
  pull(genes)


# Plot using EnhancedVolcano
EnhancedVolcano(data,
    lab = data$genes,
    selectLab = top_genes,
    #selectLab = c('TMEM176B','ADH1A'),
    x = 'avg_log2FC',
    y = 'p_val_adj',
    # xlim =c(-6, 6),
    title = 'DEGs for Astrocyte Cluster ctrl_mTBI',
    xlab = bquote(~Log[2]~ 'fold change'),
    pCutoff = 10e-2,
    FCcutoff = 1.5,
    cutoffLineType = 'twodash',
    cutoffLineWidth = 0.8,
    pointSize = 2.0,
    labSize = 4.0,
    # shape = c(1,4, 23,25),
    # colAlpha = 1,
    colAlpha = 0.50,
    legendLabels=c('Not sig.','Log (base 2) FC','p-value',
      'p-value & Log (base 2) FC'),
    legendPosition = 'right',
    legendLabSize = 12,
    legendIconSize = 5.0,
    # drawConnectors = TRUE,
    # widthConnectors = 0.75,
    # ridlines.major = FALSE,
    gridlines.minor = FALSE)

ggsave("astrocyte_DEG_vocalno.pdf", height = 8, width = 10)

# select a certain cluster
# Load required libraries
library(EnhancedVolcano)
library(dplyr)

# Load data
data <- read.csv("./full_results_marker_comp_by_clusters.csv")

# Order data based on p-value and select top 30 significant genes
top_genes <- data %>% 
  arrange(p_val_adj) %>% 
  slice_head(n = 30) %>% 
  pull(genes)

# Plot using EnhancedVolcano

EnhancedVolcano(data,
    lab = data$genes,
     x = 'avg_log2FC',
    y = 'p_val_adj',
    selectLab = top_genes,
    pCutoff = 0.01,
    FCcutoff = 1.0,
    pointSize = 3.0,
    labSize = 6.0,
    col=c('black', 'black', 'black', 'red3'),
    colAlpha = 1,
    title = 'Volcano plot with EnhancedVolcano',
    legendPosition = 'right',
    legendLabSize = 14
   
    )

# Load required libraries
library(EnhancedVolcano)
library(dplyr)

# Load data
data <- read.csv("full_results_marker_comp_by_clusters_astro.csv")

# Add a column to indicate significantly differentially expressed genes
data <- data %>% 
  mutate(significant = ifelse(abs(avg_log2FC) > 1 & p_val_adj < 0.01, "Significant", "Not significant"))

# Order data based on p-value and select top 20 significant genes
top_genes <- data %>% 
  arrange(p_val_adj) %>% 
  slice_head(n = 30) %>% 
  pull(genes)

# Plot using EnhancedVolcano
EnhancedVolcano(data,
                lab = data$genes,
                selectLab = top_genes,
                x = 'avg_log2FC',
                y = 'p_val_adj',
                pCutoff = 0.01,
                FCcutoff = 1.0,
                pointSize = 3.0,
                labSize = 3.0,
                title = 'Volcano plot with EnhancedVolcano',
                legendPosition = 'right',
                legendLabSize = 14,
                drawConnectors = TRUE,
                colAlpha = 0.70,
                #gridlines.major = FALSE,
                #gridlines.minor = FALSE,
                widthConnectors = 0.5,
                colConnectors = 'grey30'
                                )

 EnhancedVolcano(data,
    lab = data$genes,
    x = 'avg_log2FC',
    y = 'p_val_adj',
    title = 'title here',
    pCutoff = 10e-16,
    FCcutoff = 1.5,
    pointSize = 4.0,
    labSize = 6.0,
    shape = 8,
    colAlpha = 1)

EnhancedVolcano(data,
    lab = data$genes,
    selectLab = top_genes,
    x = 'avg_log2FC',
    y = 'p_val_adj',
    xlim =c(-6, 6),
    title = 'enter your title here',
    pCutoff = 10e-16,
    FCcutoff = 1.5,
    cutoffLineType = 'twodash',
    cutoffLineWidth = 0.8,
    pointSize = 4.0,
    labSize = 6.0,
    # shape = c(1,4, 23,25),
    colAlpha = 1,
    cutoffLineCol = 'black',
    hline = c(10e-20,
      10e-20 * 10e-30,
      10e-20 * 10e-60,
      10e-20 * 10e-90),
    hlineCol = c('pink', 'hotpink', 'purple', 'black'),
    hlineType = c('solid', 'longdash', 'dotdash', 'dotted'),
    hlineWidth = c(1.0, 1.5, 2.0, 2.5),
    gridlines.major = FALSE,
    gridlines.minor = FALSE
    )

## Warning: Removed 2 rows containing missing values (`geom_hline()`).

EnhancedVolcano(data,
    lab = data$genes,
    selectLab = top_genes,
    x = 'avg_log2FC',
    y = 'p_val_adj',
    xlim =c(-6, 6),
    title = 'enter your title here',
    pCutoff = 10e-16,
    FCcutoff = 1.5,
    cutoffLineType = 'twodash',
    cutoffLineWidth = 0.8,
    pointSize = 4.0,
    labSize = 6.0,
    # shape = c(1,4, 23,25),
    colAlpha = 1,
    legendLabels=c('Not sig.','Log (base 2) FC','p-value',
      'p-value & Log (base 2) FC'),
    legendPosition = 'right',
    legendLabSize = 16,
    legendIconSize = 5.0
    #ridlines.major = FALSE,
    #gridlines.minor = FALSE
    )

EnhancedVolcano(data,
    lab = data$genes,
    selectLab = top_genes,
    #selectLab = c('TMEM176B','ADH1A'),
    x = 'avg_log2FC',
    y = 'p_val_adj',
    xlim =c(-6, 6),
    title = 'enter your title here',
    xlab = bquote(~Log[2]~ 'fold change'),
    pCutoff = 10e-16,
    FCcutoff = 1.5,
    cutoffLineType = 'twodash',
    cutoffLineWidth = 0.8,
    pointSize = 4.0,
    labSize = 6.0,
    # shape = c(1,4, 23,25),
    colAlpha = 1,
    legendLabels=c('Not sig.','Log (base 2) FC','p-value',
      'p-value & Log (base 2) FC'),
    legendPosition = 'right',
    legendLabSize = 16,
    legendIconSize = 5.0,
    # drawConnectors = TRUE,
    # widthConnectors = 0.75,
    # ridlines.major = FALSE,
    gridlines.minor = FALSE
    )

EnhancedVolcano(data,
    lab = data$genes,
    selectLab = top_genes,
    #selectLab = c('TMEM176B','ADH1A'),
    x = 'avg_log2FC',
    y = 'p_val_adj',
    xlim =c(-6, 6),
    title = 'enter your title here',
    xlab = bquote(~Log[2]~ 'fold change'),
    pCutoff = 10e-16,
    FCcutoff = 1.5,
    cutoffLineType = 'twodash',
    cutoffLineWidth = 0.8,
    pointSize = 4.0,
    labSize = 6.0,
    labCol = 'black',
    labFace = 'bold',
    boxedLabels = TRUE,
    # shape = c(1,4, 23,25),
    colAlpha = 1,
    legendLabels=c('Not sig.','Log (base 2) FC','p-value',
      'p-value & Log (base 2) FC'),
    legendPosition = 'right',
    legendLabSize = 16,
    legendIconSize = 5.0,
    # drawConnectors = TRUE,
    widthConnectors = 1.0,
    colConnectors = 'black',
    #ridlines.major = FALSE,
    gridlines.minor = FALSE
    )

simple_volcano_plot_from_csv

Shenfeng Qiu

2023-08-07

use the follow code blocks for additional adjustment for plotting parameters