Clustering QC before annotation
Unannotated UMAP
# Set colors
colors <- c("dodgerblue","yellow","firebrick1","green","gray40","lightgray",
"cyan","chocolate4","pink","orange","darkgreen","green",
"deeppink","blue","gold","navyblue")
# Plot the UMAP
DimPlot(object = pigs.unannotated,
reduction = "umap",
label = TRUE,
#label.box = TRUE,
label.size = 3,
repel = TRUE,
group.by = "individual_clusters",
cols = colors
)

# Split by sample
DimPlot(object = pigs.unannotated,
reduction = "umap",
label = TRUE,
label.size = 3,
split.by = "sample",
repel = TRUE,
group.by = "individual_clusters",
cols = colors)

# Split by treatment
DimPlot(object = pigs.unannotated,
reduction = "umap",
label = TRUE,
label.size = 3,
split.by = "treatment",
repel = TRUE,
group.by = "individual_clusters",
cols = colors)

Color blind friendly UMAP
dittoDimPlot(object = pigs.unannotated,
var = "individual_clusters",
reduction.use = "umap",
do.label = TRUE,
labels.highlight = FALSE)

Percent cells per cluter
# treatment
pigs.unannotated@meta.data %>%
group_by(individual_clusters, treatment) %>%
dplyr::count() %>%
group_by(individual_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=individual_clusters,y=percent, fill=treatment)) +
geom_col() +
scale_fill_manual(values = c("green","gray")) +
ggtitle("Percentage of treatment per cluster")

# sample
pigs.unannotated@meta.data %>%
group_by(individual_clusters, sample) %>%
dplyr::count() %>%
group_by(individual_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=individual_clusters,y=percent, fill=sample)) +
geom_col() +
#scale_fill_manual(values = sample_colors) +
ggtitle("Percentage of sample per cluster")

Cells per cluster
n_cells <- FetchData(pigs.unannotated,
vars = c("ident", "treatment")) %>%
dplyr::count(ident,treatment) %>%
tidyr::spread(ident, n)
n_cells
## treatment 0 1 2 3 4 5
## 1 Ecoli 1364 113 58 30 32 9
## 2 Saline 2926 267 97 85 38 8
Cluster tree
pigs.unannotated <- BuildClusterTree(object = pigs.unannotated,
dims = 1:15,
reorder = FALSE,
reorder.numeric = FALSE)
tree <- pigs.unannotated@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)
ggtree::ggtree(tree, aes(x, y)) +
scale_y_reverse() +
ggtree::geom_tree() +
ggtree::theme_tree() +
ggtree::geom_tiplab(offset = 1) +
ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)],
shape = 16, size = 5) +
coord_cartesian(clip = 'off') +
theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

Cluster Annotation
Cluster 0 -
# Number of cells per condition
n_cells[,c(1,2)]
## treatment 0
## 1 Ecoli 1364
## 2 Saline 2926
# UMAP with only cluster 0
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "0"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
cols = "dodgerblue")

# show treatment colors
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "0"),
group.by = "treatment",
shuffle = TRUE,
cols = treatment_colors)

# Top 20 markers by p-value
VlnPlot(pigs.unannotated,
features = cluster0$gene[1:20],
cols = colors,
split.by = "individual_clusters",
stack = TRUE,
flip = TRUE)

# conserved cluster markers
conserved.cluster0 <- conserved.markers[conserved.markers$cluster == 0,]
VlnPlot(pigs.unannotated,
features = conserved.cluster0$gene[1:20],
cols = colors,
split.by = "individual_clusters",
stack = TRUE,
flip = TRUE)

Cluster 1
# Number of cells per condition
n_cells[,c(1,3)]
## treatment 1
## 1 Ecoli 113
## 2 Saline 267
# UMAP with only cluster 1
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "1"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
group.by = "SCT_snn_res.0.1",
cols = "gold")

# show treatment colors
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "1"),
group.by = "treatment",
shuffle = TRUE,
cols = treatment_colors)

# Top 40 markers by p-value
VlnPlot(pigs.unannotated,
features = cluster1$gene[1:20],
cols = colors,
split.by = "individual_clusters",
stack = TRUE,
flip = TRUE)

VlnPlot(pigs.unannotated,
features = cluster1$gene[21:40],
cols = colors,
split.by = "individual_clusters",
stack = TRUE,
flip = TRUE)

# conserved cluster markers
conserved.cluster1 <- conserved.markers[conserved.markers$cluster == 1,]
VlnPlot(pigs.unannotated,
features = conserved.cluster1$gene[1:20],
cols = colors,
split.by = "individual_clusters",
stack = TRUE,
flip = TRUE)

Cluster 2
# Number of cells per condition
n_cells[,c(1,4)]
## treatment 2
## 1 Ecoli 58
## 2 Saline 97
# UMAP with only cluster 2
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "2"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
group.by = "SCT_snn_res.0.1",
cols = "firebrick1")

# show treatment colors
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "2"),
group.by = "treatment",
shuffle = TRUE,
cols = treatment_colors)

# Top 20 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
features = cluster2$gene[1:20],
cols = colors,
split.by = "individual_clusters",
stack = TRUE,
flip = TRUE)

# conserved cluster markers
conserved.cluster2 <- conserved.markers[conserved.markers$cluster == 2,]
VlnPlot(pigs.unannotated,
features = conserved.cluster2$gene[1:20],
cols = colors,
split.by = "individual_clusters",
stack = TRUE,
flip = TRUE)

Cluster 3
# Number of cells per condition
n_cells[,c(1,5)]
## treatment 3
## 1 Ecoli 30
## 2 Saline 85
# UMAP with only cluster 3
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "3"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
group.by = "SCT_snn_res.0.1",
cols = "green")

# show treatment colors
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "3"),
group.by = "treatment",
shuffle = TRUE,
cols = treatment_colors)

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
features = cluster3$gene[1:20],
cols = colors,
split.by = "individual_clusters",
stack = TRUE,
flip = TRUE)

# conserved cluster markers
conserved.cluster3 <- conserved.markers[conserved.markers$cluster == 3,]
VlnPlot(pigs.unannotated,
features = conserved.cluster3$gene[1:20],
cols = colors,
split.by = "individual_clusters",
stack = TRUE,
flip = TRUE)

Cluster 4
# Number of cells per condition
n_cells[,c(1,6)]
## treatment 4
## 1 Ecoli 32
## 2 Saline 38
# UMAP with only cluster 4
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "4"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
group.by = "SCT_snn_res.0.1",
cols = "lightgray")

# show treatment colors
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "4"),
group.by = "treatment",
shuffle = TRUE,
cols = treatment_colors)

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
features = cluster4$gene[1:20],
cols = colors,
split.by = "individual_clusters",
stack = TRUE,
flip = TRUE)

VlnPlot(pigs.unannotated,
features = cluster4$gene[21:40],
cols = colors,
split.by = "individual_clusters",
stack = TRUE,
flip = TRUE)

# conserved cluster markers
conserved.cluster4 <- conserved.markers[conserved.markers$cluster == 4,]
VlnPlot(pigs.unannotated,
features = conserved.cluster4$gene[1:20],
cols = colors,
split.by = "individual_clusters",
stack = TRUE,
flip = TRUE)

Cluster 5
# Number of cells per condition
n_cells[,c(1,7)]
## treatment 5
## 1 Ecoli 9
## 2 Saline 8
# UMAP with only cluster 5
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "5"),
reduction = "umap",
label = TRUE,
label.box = TRUE,
label.size = 3,
repel = TRUE,
group.by = "SCT_snn_res.0.1",
cols = "lightgray")

# show treatment colors
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "5"),
group.by = "treatment",
shuffle = TRUE,
cols = treatment_colors)

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
features = cluster5$gene[1:20],
cols = colors,
split.by = "individual_clusters",
stack = TRUE,
flip = TRUE)

VlnPlot(pigs.unannotated,
features = cluster5$gene[21:40],
cols = colors,
split.by = "individual_clusters",
stack = TRUE,
flip = TRUE)

# conserved cluster markers
conserved.cluster5 <- conserved.markers[conserved.markers$cluster == 5,]
VlnPlot(pigs.unannotated,
features = conserved.cluster5$gene[1:20],
cols = colors,
split.by = "individual_clusters",
stack = TRUE,
flip = TRUE)

Indiviudal cluster names
# Rename all identities
pigs.annotated <- RenameIdents(object = pigs.unannotated,
"0" = "cluster 0",
"1" = "cluster 1",
"2" = "cluster 2",
"3" = "cluster 3",
"4" = "cluster 4",
"5" = "cluster 5")
pigs.annotated$individual_clusters <- Idents(pigs.annotated)
Merged cluster names
# Rename all identities
pigs.merged <- RenameIdents(object = pigs.unannotated,
"0" = "0",
"1" = "1",
"2" = "2",
"3" = "3",
"4" = "4",
"5" = "5")
pigs.annotated$merged_clusters <- Idents(pigs.merged)
#remove(pigs.merged)
#save object
saveRDS(pigs.annotated, "../../rObjects/brain_annotated.rds")
# read object
pigs.annotated <- readRDS("../../rObjects/brain_annotated.rds")
Idents(pigs.annotated) <- "individual_clusters"
pigs.annotated
## An object of class Seurat
## 28225 features across 5027 samples within 2 assays
## Active assay: RNA (14276 features, 0 variable features)
## 1 other assay present: SCT
## 3 dimensional reductions calculated: pca, harmony, umap
Pseudo bulk merge all clusters cluster names
# Rename all identities
pigs.pseudo.bulk <- RenameIdents(object = pigs.unannotated,
"0" = "Pseudo bulk",
"1" = "Pseudo bulk",
"2" = "Pseudo bulk",
"3" = "Pseudo bulk",
"4" = "Pseudo bulk",
"5" = "Pseudo bulk")
pigs.annotated$pseudo_bulk <- Idents(pigs.pseudo.bulk)
#remove(pigs.merged)
#save object
saveRDS(pigs.annotated, "../../rObjects/brain_annotated.rds")
Clustering QC after annotation
Annotated UMAP
# Set colors
colors <- c("dodgerblue","yellow","firebrick1","green","gray40","lightgray",
"cyan","chocolate4","pink","orange","darkgreen","green",
"deeppink","blue","gold","navyblue")
# Plot the UMAP
u1 <- DimPlot(object = pigs.annotated,
reduction = "umap",
label = TRUE,
label.size = 3,
label.box = TRUE,
repel = TRUE,
cols = colors,
group.by = "individual_clusters",)
u1

# Plot the UMAP
u2 <- DimPlot(object = pigs.annotated,
reduction = "umap",
label = TRUE,
label.size = 3,
label.box = TRUE,
repel = TRUE,
cols = colors,
group.by = "merged_clusters",)
u2


## png
## 2
## png
## 2
## null device
## 1
## pdf
## 2
## pdf
## 2
## null device
## 1
Color blind friendly UMAP
dittoDimPlot(object = pigs.annotated,
var = "individual_clusters",
reduction.use = "umap",
do.label = TRUE,
labels.highlight = FALSE)

dittoDimPlot(object = pigs.annotated,
var = "merged_clusters",
reduction.use = "umap",
do.label = TRUE,
labels.highlight = FALSE)

Cluster tree
# individual clusters
Idents(pigs.annotated) <- "individual_clusters"
pigs.annotated <- BuildClusterTree(object = pigs.annotated,
dims = 1:15,
reorder = FALSE,
reorder.numeric = FALSE)
tree <- pigs.annotated@tools$BuildClusterTree
tree$tip.label <- tree$tip.label
t1 <- ggtree::ggtree(tree, aes(x, y)) +
scale_y_reverse() +
ggtree::geom_tree() +
ggtree::theme_tree() +
ggtree::geom_tiplab(offset = 1) +
ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], shape = 16, size = 5) +
coord_cartesian(clip = 'off') +
theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
t1

# merged clusters
Idents(pigs.annotated) <- "merged_clusters"
pigs.annotated <- BuildClusterTree(object = pigs.annotated,
dims = 1:15,
reorder = FALSE,
reorder.numeric = FALSE)
tree <- pigs.annotated@tools$BuildClusterTree
tree$tip.label <- tree$tip.label
t2 <- ggtree::ggtree(tree, aes(x, y)) +
scale_y_reverse() +
ggtree::geom_tree() +
ggtree::theme_tree() +
ggtree::geom_tiplab(offset = 1) +
ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], shape = 16, size = 5) +
coord_cartesian(clip = 'off') +
theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
t2


## png
## 2
## png
## 2
## null device
## 1
## pdf
## 2
## pdf
## 2
## null device
## 1
Differential expression
Individual clusters
Ecoli vs saline within cluster
cell.types <- unique(pigs.annotated$individual_clusters)
treatment.df <- data.frame()
pigs.annotated$treatment <- factor(pigs.annotated$treatment,
levels = c("Ecoli","Saline"))
for (i in cell.types) {
# print what cluster we're on
print(i)
# subset cluster
cluster <- subset(pigs.annotated, individual_clusters == i)
# skip cluster 11 - Neuron 4 (no Ecoli cells) Fewer than 3 cells
if (i == "Neuron 3" | i == "Neuron 4") { next }
# DE by treatment
Idents(cluster) <- cluster$treatment
markers <- FindMarkers(object = cluster,
ident.1 = "Ecoli",
ident.2 = "Saline",
only.pos = FALSE, # default
min.pct = 0.10, # default
test.use = "MAST",
verbose = TRUE,
assay = "RNA")
# skip if no DEGs
if(nrow(markers) == 0) {
next
}
# reformat
markers$gene <- rownames(markers)
rownames(markers) <- 1:nrow(markers)
markers$cluster <- i
# add to master table
treatment.df <- rbind(treatment.df, markers)
}
# reformat table
colnames(treatment.df)[c(3,4)] <- c("percent_Ecoli", "percent_saline")
treatment.df$percent_difference <- abs(treatment.df$percent_Ecoli - treatment.df$percent_saline)
treatment.df <- treatment.df[,c(7,6,1,5,2,3,4,8)]
# write table
write.table(treatment.df,
"../../results/DEGs/individual/brain_individual_DEGs_Ecoli_vs_saline_within_cluster_pval_1.00.tsv",
sep = "\t",
quote = FALSE, row.names = FALSE)
# strict filtering
treatment.df.strict <- treatment.df[treatment.df$p_val_adj < 0.05,]
write.table(treatment.df.strict,
"../../results/DEGs/individual/brain_individual_DEGs_Ecoli_vs_saline_within_cluster_pval_0.05.tsv",
sep = "\t",
quote = FALSE, row.names = FALSE)
table(treatment.df.strict$cluster)
Single cluster vs. all other clusters
# Find markers for each cluster
# Default is logfc.threshold = 0.25 and min.pct = 0.1
Idents(pigs.annotated) <- "individual_clusters"
all.markers <- FindAllMarkers(object = pigs.annotated,
test.use = "MAST")
# create new column
all.markers$delta_pct <- abs(all.markers$pct.1 - all.markers$pct.2)
# reorder columns
all.markers <- all.markers[,c(6,7,3,4,8,5,2)]
rownames(all.markers) <- 1:nrow(all.markers)
# strict filtering
all.markers.strict <- all.markers[all.markers$p_val_adj < 0.05,]
# save object and write table
saveRDS(all.markers, "../../rObjects/annotated_all_markers.rds")
write.table(all.markers,
"../../results/DEGs/individual/brain_individual_DEGs_single_cluster_vs_other_clusters_1.00.tsv",
sep = "\t",
quote = FALSE,
row.names = FALSE)
write.table(all.markers.strict,
"../../results/DEGs/individual/brain_individual_DEGs_single_cluster_vs_other_clusters_0.05.tsv",
sep = "\t",
quote = FALSE,
row.names = FALSE)
# view number of DEGs
table(all.markers.strict$cluster)
Pseudo bulk Ecoli vs. saline
# read object
pigs.annotated <- readRDS("../../rObjects/brain_annotated.rds")
Idents(pigs.annotated) <- "pseudo_bulk"
pigs.annotated
cell.types <- unique(pigs.annotated$pseudo_bulk)
pb.treatment.df <- data.frame()
pigs.annotated$treatment <- factor(pigs.annotated$treatment,
levels = c("Ecoli","Saline"))
for (i in cell.types) {
# print what cluster we're on
print(i)
# subset cluster
cluster <- subset(pigs.annotated, pseudo_bulk == i)
# skip cluster 11 - Neuron 4 (no Ecoli cells) Fewer than 3 cells
if (i == "Neuron 4") { next }
# DE by treatment
Idents(cluster) <- cluster$treatment
markers <- FindMarkers(object = cluster,
ident.1 = "Ecoli",
ident.2 = "Saline",
only.pos = FALSE, # default
min.pct = 0.10, # default
test.use = "MAST",
verbose = TRUE,
assay = "RNA")
# skip if no DEGs
if(nrow(markers) == 0) {
next
}
# reformat
markers$gene <- rownames(markers)
rownames(markers) <- 1:nrow(markers)
markers$cluster <- i
# add to master table
pb.treatment.df <- rbind(pb.treatment.df, markers)
}
# reformat table
colnames(pb.treatment.df)[c(3,4)] <- c("percent_Ecoli", "percent_saline")
pb.treatment.df$percent_difference <- abs(pb.treatment.df$percent_Ecoli - pb.treatment.df$percent_saline)
pb.treatment.df <- pb.treatment.df[,c(7,6,1,5,2,3,4,8)]
# write table
write.table(pb.treatment.df,
"../../results/DEGs/pseudobulk/brain_pseudo_bulk_DEGs_Ecoli_vs_saline_pval_1.00.tsv",
sep = "\t",
quote = FALSE, row.names = FALSE)
# strict filtering
pb.treatment.df <- pb.treatment.df[pb.treatment.df$p_val_adj < 0.05,]
write.table(pb.treatment.df,
"../../results/DEGs/pseudobulk/brain_pseudo_bulk_DEGs_Ecoli_vs_saline_pval_0.05.tsv",
sep = "\t",
quote = FALSE, row.names = FALSE)
table(pb.treatment.df$cluster)
Merged clusters
Ecoli vs saline within cluster
cell.types <- unique(pigs.annotated$merged_clusters)
treatment.df <- data.frame()
pigs.annotated$treatment <- factor(pigs.annotated$treatment,
levels = c("Ecoli","Saline"))
for (i in cell.types) {
# print what cluster we're on
print(i)
# subset cluster
cluster <- subset(pigs.annotated, merged_clusters == i)
# DE by treatment
Idents(cluster) <- cluster$treatment
markers <- FindMarkers(object = cluster,
ident.1 = "Ecoli",
ident.2 = "Saline",
only.pos = FALSE, # default
min.pct = 0.10, # default
test.use = "MAST",
verbose = TRUE,
assay = "RNA")
# skip if no DEGs
if(nrow(markers) == 0) {
next
}
# reformat
markers$gene <- rownames(markers)
rownames(markers) <- 1:nrow(markers)
markers$cluster <- i
# add to master table
treatment.df <- rbind(treatment.df, markers)
}
# reformat table
colnames(treatment.df)[c(3,4)] <- c("percent_Ecoli", "percent_saline")
treatment.df$percent_difference <- abs(treatment.df$percent_Ecoli - treatment.df$percent_saline)
treatment.df <- treatment.df[,c(7,6,1,5,2,3,4,8)]
# write table
write.table(treatment.df,
"../../results/DEGs/merged/brain_merged_DEGs_Ecoli_vs_saline_within_cluster_pval_1.00.tsv",
sep = "\t",
quote = FALSE, row.names = FALSE)
# strict filtering
treatment.df.strict <- treatment.df[treatment.df$p_val_adj < 0.05,]
write.table(treatment.df.strict,
"../../results/DEGs/merged/brain_merged_DEGs_Ecoli_vs_saline_within_cluster_pval_0.05.tsv",
sep = "\t",
quote = FALSE, row.names = FALSE)
table(treatment.df.strict$cluster)
Single cluster vs. all other clusters
# Find markers for each cluster
# Default is logfc.threshold = 0.25 and min.pct = 0.1
Idents(pigs.annotated) <- "merged_clusters"
all.markers <- FindAllMarkers(object = pigs.annotated,
test.use = "MAST")
# create new column
all.markers$delta_pct <- abs(all.markers$pct.1 - all.markers$pct.2)
# reorder columns
all.markers <- all.markers[,c(6,7,3,4,8,5,2)]
rownames(all.markers) <- 1:nrow(all.markers)
# strict filtering
all.markers.strict <- all.markers[all.markers$p_val_adj < 0.05,]
# save object and write table
saveRDS(all.markers, "../../rObjects/annotated_all_markers.rds")
write.table(all.markers,
"../../results/DEGs/merged/brain_merged_DEGs_single_cluster_vs_other_clusters_1.00.tsv",
sep = "\t",
quote = FALSE,
row.names = FALSE)
write.table(all.markers.strict,
"../../results/DEGs/merged/brain_merged_DEGs_single_cluster_vs_other_clusters_0.05.tsv",
sep = "\t",
quote = FALSE,
row.names = FALSE)
# view number of DEGs
table(all.markers.strict$cluster)
Volcanoes
df <- treatment.df
cell_types <- as.vector(unique(pigs.annotated$merged_clusters))
for (cell_cluster in cell_types) {
# data
treatment_vs_control <- df[df$cluster == cell_cluster,]
# assign colors
color_values <- vector()
max <- nrow(treatment_vs_control)
num_DEGs <- vector()
# cutoff based on cluster
if(cell_cluster == "microglia macrophage") {
FDRq <- 0.001
LFC <- 2
} else {
FDRq <- 0.05
LFC <- 1
}
# loop through every gene
for(i in 1:max){
# if FDRq is met
if (treatment_vs_control$p_val_adj[i] < FDRq){
if (treatment_vs_control$avg_log2FC[i] > LFC){
color_values <- c(color_values, 1) # 1 when logFC met and positive and FDRq met
num_DEGs <- c(num_DEGs,"up")
}
else if (treatment_vs_control$avg_log2FC[i] < -LFC){
color_values <- c(color_values, 2) # 2 when logFC met and negative and FDRq met
num_DEGs <- c(num_DEGs, "down")
}
else{
color_values <- c(color_values, 3) # 3 when logFC not met and FDRq met
num_DEGs <- c(num_DEGs, "neither")
}
}
# if FDRq is not met
else{
color_values <- c(color_values, 3) # 3 when FDRq not met
num_DEGs <- c(num_DEGs, "neither")
}
}
table(num_DEGs)
treatment_vs_control$color <- factor(color_values)
# subset genes to label
up <- treatment_vs_control[treatment_vs_control$color == 1,]
up10 <- up[1:10,]
down <- treatment_vs_control[treatment_vs_control$color == 2,]
down10 <- down[1:10,]
# find hadjpval
hadjpval <- (-log10(max(
treatment_vs_control$p_val_adj[treatment_vs_control$p_val_adj < FDRq],
na.rm=TRUE)))
# plot
p <-
ggplot(data = treatment_vs_control,
aes(x = avg_log2FC, # x-axis is logFC
y = -log10(p_val_adj), # y-axis will be -log10 of P.Value
color = color)) + # color is based on factored color column
geom_point(alpha = 0.8, size = 2) + # create scatterplot, alpha makes points transparent
theme_bw() + # set color theme
theme(legend.position = "none") + # no legend
scale_color_manual(values = c("red", "blue","grey")) + # set factor colors
labs(
title = "", # no main title
x = expression(log[2](FC)), # x-axis title
y = expression(-log[10] ~ "(" ~ italic("adjusted p") ~ "-value)") # y-axis title
) +
theme(axis.title.x = element_text(size = 10),
axis.text.x = element_text(size = 10)) +
theme(axis.title.y = element_text(size = 10),
axis.text.y = element_text(size = 10)) +
geom_hline(yintercept = hadjpval, # horizontal line
colour = "black",
linetype = "dashed") +
geom_vline(xintercept = c(LFC,-LFC), # vertical line
colour = "black",
linetype = "dashed") +
ggtitle(paste0(tissue," ",cell_cluster," Ecoli vs Saline\nFDRq < ", FDRq, ", LFC = ", LFC)) +
geom_text_repel(data = up10,
aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene),
color = "maroon",
fontface="italic",
max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
) +
geom_text_repel(data = down10,
aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene),
color = "navyblue",
fontface="italic",
max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
)
pdf(paste0("../../results/volcano/",tolower(tissue),"_",
tolower(gsub(" ","",cell_cluster)),
"_volcano.pdf"),width = 6, height = 4)
print(p)
dev.off()
}
df <- pb.treatment.df
cell_types <- as.vector(unique(pigs.annotated$pseudo_bulk))
for (cell_cluster in cell_types) {
# data
treatment_vs_control <- df[df$cluster == cell_cluster,]
# assign colors
color_values <- vector()
max <- nrow(treatment_vs_control)
num_DEGs <- vector()
# cutoff based on cluster
if(cell_cluster == "microglia macrophage") {
FDRq <- 0.001
LFC <- 2
} else {
FDRq <- 0.05
LFC <- 1
}
# loop through every gene
for(i in 1:max){
# if FDRq is met
if (treatment_vs_control$p_val_adj[i] < FDRq){
if (treatment_vs_control$avg_log2FC[i] > LFC){
color_values <- c(color_values, 1) # 1 when logFC met and positive and FDRq met
num_DEGs <- c(num_DEGs,"up")
}
else if (treatment_vs_control$avg_log2FC[i] < -LFC){
color_values <- c(color_values, 2) # 2 when logFC met and negative and FDRq met
num_DEGs <- c(num_DEGs, "down")
}
else{
color_values <- c(color_values, 3) # 3 when logFC not met and FDRq met
num_DEGs <- c(num_DEGs, "neither")
}
}
# if FDRq is not met
else{
color_values <- c(color_values, 3) # 3 when FDRq not met
num_DEGs <- c(num_DEGs, "neither")
}
}
table(num_DEGs)
treatment_vs_control$color <- factor(color_values)
# subset genes to label
up <- treatment_vs_control[treatment_vs_control$color == 1,]
up10 <- up[1:10,]
down <- treatment_vs_control[treatment_vs_control$color == 2,]
down10 <- down[1:10,]
# find hadjpval
hadjpval <- (-log10(max(
treatment_vs_control$p_val_adj[treatment_vs_control$p_val_adj < FDRq],
na.rm=TRUE)))
# plot
p <-
ggplot(data = treatment_vs_control,
aes(x = avg_log2FC, # x-axis is logFC
y = -log10(p_val_adj), # y-axis will be -log10 of P.Value
color = color)) + # color is based on factored color column
geom_point(alpha = 0.8, size = 2) + # create scatterplot, alpha makes points transparent
theme_bw() + # set color theme
theme(legend.position = "none") + # no legend
scale_color_manual(values = c("red", "blue","grey")) + # set factor colors
labs(
title = "", # no main title
x = expression(log[2](FC)), # x-axis title
y = expression(-log[10] ~ "(" ~ italic("adjusted p") ~ "-value)") # y-axis title
) +
theme(axis.title.x = element_text(size = 10),
axis.text.x = element_text(size = 10)) +
theme(axis.title.y = element_text(size = 10),
axis.text.y = element_text(size = 10)) +
geom_hline(yintercept = hadjpval, # horizontal line
colour = "black",
linetype = "dashed") +
geom_vline(xintercept = c(LFC,-LFC), # vertical line
colour = "black",
linetype = "dashed") +
ggtitle(paste0(tissue," ",cell_cluster," Ecoli vs Saline\nFDRq < ", FDRq, ", LFC = ", LFC)) +
geom_text_repel(data = up10,
aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene),
color = "maroon",
fontface="italic",
max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
) +
geom_text_repel(data = down10,
aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene),
color = "navyblue",
fontface="italic",
max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
)
pdf(paste0("../../results/volcano/",tolower(tissue),"_",
tolower(gsub(" ","",cell_cluster)),
"_volcano.pdf"),width = 6, height = 4)
print(p)
dev.off()
}
Check DEGs
FeaturePlot(pigs.annotated,
features = c("ATF3", "CXCL10"),
split.by = "treatment",
max.cutoff = 3,
cols = c("grey", "red"))

FeaturePlot(pigs.annotated,
features = c("TSEN2", "CXCL2"),
split.by = "treatment",
max.cutoff = 3,
cols = c("grey", "red"))

FeaturePlot(pigs.annotated,
features = c("ROR1","DPP10"),
split.by = "treatment",
max.cutoff = 3,
cols = c("grey", "red"))
