library(dplyr)
library(tidyr)
library(ggplot2)
library(pheatmap)
library(patchwork)
library(Seurat)
library(stringr)
library(ggthemes)
library(forcats)
library(gridExtra)
library(ggrepel)
vis_tumor_CD45_leukocytes
Visualize expression across different conditions (Leukocytes in tumor and skin)
The experiment consist of 3 main factors of interest:
sample origin: tumor, skin, lymph node
main cell type class: leucocytes, LECs
tumor type: YUMM, YUMMER
As a first check we are interested in the expression of specific marker genes/known marker and how they vary across combinations of these factors.
Preamble
Data objects
From the FGCZ we have different data objects:
Tumor LECs integrated (only tumor LECs, but have been integrated with skin samples) (
data/scData_LEC_tumor.rds
)Tumor + skin LECs integrated (
data/scData_LEC_tumor_skin.rds
)All skin + tumor cells
All lymphnode cells
Here we work with object 3 - Integrated tumor and skin leukocytes.
#### ----- change to local path to scData.rds object here ----- #####
# e.g. seurat <- readRDS("/home/project/data_folder/scData.rds)")
<- readRDS(file.path("/Users/thomarin/Documents/PhD/Tumor project/Sequencing experiment/August 2023 first analysis shallow sequencing/rds file/scData_tumor_CD45.rds"))
seurat#### --------------------------------------------------------- #####
# correct condition assignment!!
$cond <- seurat[[]] |>
seuratmutate(
cond = case_when(
str_detect(Sample, "YUMM[0-9]") ~ "YUMM",
str_detect(Sample, "YUMMER") ~ "YUMMER",
str_detect(Sample, "Skin") ~ "skin"
)|> select(cond)
)
# check assignment
table(seurat$Sample, seurat$cond)
skin YUMM YUMMER
SkinLECs_Leukocytes1 1510 0 0
SkinLECs_Leukocytes2 2351 0 0
TumorYUMM1_1A 0 664 0
TumorYUMM1_1B 0 586 0
TumorYUMM2_1A 0 1886 0
TumorYUMM2_1B 0 1652 0
TumorYUMM5_2A 0 1829 0
TumorYUMM5_2B 0 1603 0
TumorYUMM6_2A 0 2380 0
TumorYUMM6_2B 0 2267 0
TumorYUMMER3_1A 0 0 1031
TumorYUMMER3_1B 0 0 887
TumorYUMMER4_1A 0 0 1442
TumorYUMMER4_1B 0 0 1234
TumorYUMMER7_2A 0 0 2015
TumorYUMMER7_2B 0 0 1898
TumorYUMMER8_2A 0 0 2593
TumorYUMMER8_2B 0 0 2379
table(seurat$cond)
skin YUMM YUMMER
3861 12867 13479
$cell_types <- seurat$ident |>
seuratas.factor() |>
::fct_collapse("monocytes/macrophages" = c("0","1","2","5","6","18","22"),
forcats"DCs" = c("4", "12", "19"),
"CD4 T cells" = c("10"),
"CD8 T cells" = c("7", "16"),
"other T cells" = c("3", "11", "15", "23", "24"),
"endothelial cells/fibroblasts" = c("8", "9", "13"),
"mast cells" = c("14"),
"B cells" = c("21"),
"other" = c("17", "20"))
DefaultAssay(seurat) <- "SCT"
<- c("Cd3e", "Cd8a", "Cd4" ,"Tcf7", "Lef1", "Pdcd1", "Sell", "Tigit", "Cxcr4", "Ccr7", "Foxp3", "Ctla4", "Trdc", "Il12b", "Itgae", "Xcr1", "Cd207", "Notch2", "Itgax", "Itgam", "Il1a", "Cd80", "Cd19", "Prox1", "Pecam1", "Csf2rb", "Csf3r", "Il1r2", "Acta2", "Pdgfrb", "Ctsk", "Cd74", "H2-Ab1", "H2-Aa", "Krt15", "Krt5", "Cd79a", "Ccl3", "Ccl4", "Cd63", "Cxcr3", "Fcgr1", "Fcgr3", "Ncam1", "Cd24a", "Klrb1c", "Cd62l")
cluster_marker
<- c("Cd3e", "Cd8a", "Cd4", "Tcf7", "Lef1", "Pdcd1", "Foxp3", "Cxcr4", "Cxcr3" ) cond_marker
Overview
Mean features/molecules per condition
# mean molecules/group
<- seurat[[]] |> group_by(cond) |> summarise(mean_mol = mean(nCount_RNA))
mean_mol mean_mol
# A tibble: 3 × 2
cond mean_mol
<chr> <dbl>
1 YUMM 4942.
2 YUMMER 4687.
3 skin 5856.
# mean unique genes/group
<- seurat[[]] |> group_by(cond) |> summarise(mean_gene = mean(nFeature_RNA))
mean_feature mean_feature
# A tibble: 3 × 2
cond mean_gene
<chr> <dbl>
1 YUMM 1960.
2 YUMMER 1872.
3 skin 2199.
Distribution of conditions per cluster
<- seurat[[]] |> group_by(cond, cell_types) |> summarise(n_cells = n()) cond_dat
`summarise()` has grouped output by 'cond'. You can override using the
`.groups` argument.
ggplot(cond_dat, aes(fill=cond, y=n_cells, x=cell_types)) +
geom_bar(position="stack", stat="identity") +
scale_fill_tableau() +
theme_bw()
ggplot(cond_dat, aes(fill=cond, y=n_cells, x=cell_types)) +
geom_bar(position="fill", stat="identity") +
scale_fill_tableau() +
theme_bw()
ggplot(cond_dat, aes(fill=cell_types, y=n_cells, x=cond)) +
geom_bar(position="fill", stat="identity") +
scale_fill_tableau() +
theme_bw()
Distribution of cells within umap
all together
DimPlot(seurat, group.by = "ident",
#split.by = "cond",
reduction = "umap",
label = F) +
ggtitle("")
#theme(legend.position = "none")
DimPlot(seurat, group.by = "ident",
split.by = "cond",
reduction = "umap",
label = T) +
ggtitle("") +
theme(legend.position = "none")
DimPlot(seurat, group.by = "ident",
split.by = "cond",
reduction = "umap",
label = F) +
ggtitle("")
#theme(legend.position = "none")
Distribution of broad cell types
DimPlot(seurat, group.by = "cell_types",
split.by = "cond",
reduction = "umap",
label = T) +
ggtitle("")
Cluster marker gene expression
Cluster | Genes | Cluster | Genes | Cluster | Genes |
0 - Monocytes/ Macrophages |
Cxcr4, Notch2, Itgax, Csf2rb, Il1r2 | 9 - LECs | Prox1, Pecam1 | 18 - Macrophages | Cxcr4, Il1a, CD80, Csf3r, Il1r2 |
1 - Monocytes/ Macrophages |
Cxcr4, Notch2, Itgam | 10 - Tregs | Cd3e, Cd4, Sell, Tigit, FoxP3, Ctla4 | 19 - DCs | Cd74, H2-Ab1, H2-Aa |
2 - Granulocytes | Notch2, Itgax, CD80, Csf2rb, Il1r2 | 11 - TGD | Cd3e, Tcf7, Trdc | 20 -Keratinocytes | Krt15, Krt5 |
3 - TGD | Tcf7, Lef1, Sell, Tigit, Trdc | 12 - DC | CCR7, Il12b, CD80, Csf2rb | 21 - B cells | CD19, Cd79a |
4 - cDC2 | Notch2, Itgam | 13 - Fibroblasts | acta2, Pdgfrb | 22 - Monocytes | Itgax |
5 - Macrophages | Notch2, Itgax, Itgam, Csf2rb | 14 - Mast cells | Notch2, Csf2rb, Cd63 | 23 - TGD | Tcf7, Trdc |
6 - Monocytes | Itgam | 15 - T cells | Cd3e, Tcf7, Cxcr4, | 24 - T cells | Ccl3, Ccl4 |
7 - exhausted CD8 T cells | Cd3e, Cd8a, Pdcd1 | 16 - naive CD8 T cells | Cd3e, Cd8a, Tcf7, Ctla4 | ||
8 - LECs | Notch2, Prox1, Pecam1 | 17 - Osteoclasts | Ctsk |
Dimplots
FeaturePlot(object = seurat, features = cluster_marker, ncol = 3)
Warning in FetchData.Seurat(object = object, vars = c(dims, "ident", features),
: The following requested variables were not found: Cd62l
Violinplots
Idents(seurat) <- "cell_types"
VlnPlot(object = seurat, features = cluster_marker, ncol = 3, assay = "SCT", add.noise = F)
Warning in FetchData.Seurat(object = object, vars = features, slot = slot): The
following requested variables were not found: Cd62l
Dotplots
DotPlot(object = seurat, features = cluster_marker) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
The following requested variables were not found: Cd62l
Conditional marker expression
Expression by condition as Dimplot
FeaturePlot(object = seurat, features = cond_marker, ncol = 3, split.by = "cond")
Expression as violin plot grouped by cluster
VlnPlot(object = seurat,
features = cond_marker,
assay = "SCT",
add.noise = F,
split.by = "cond",
ncol = 1) +
plot_layout(guides = 'collect') &
scale_fill_tableau() &
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
Expression as violin plot clusters combined
VlnPlot(object = seurat,
features = cond_marker,
group.by = "cond",
assay = "SCT",
add.noise = F,
ncol = 2) +
plot_layout(guides = 'collect') &
scale_fill_tableau() &
theme_bw()
Expression as violin plot cluster 7
Idents(seurat) <- "ident"
# Subset your Seurat object to only include cells from cluster 7
<- subset(seurat, idents = 7)
cluster7_seurat
# Create the violin plot for cluster 7
VlnPlot(object = cluster7_seurat,
features = cond_marker,
group.by = "cond",
assay = "SCT",
add.noise = F,
ncol = 2) +
plot_layout(guides = 'collect') &
scale_fill_tableau() &
theme_bw()
Expression as violin plot cluster 16
# Subset your Seurat object to only include cells from cluster 16
<- subset(seurat, idents = 16)
cluster16_seurat
# Create the violin plot for cluster 7
VlnPlot(object = cluster16_seurat,
features = cond_marker,
group.by = "cond",
assay = "SCT",
add.noise = FALSE,
ncol = 2) +
plot_layout(guides = 'collect') &
scale_fill_tableau() &
theme_bw()