vis_tumor_CD45_leukocytes

Visualize expression across different conditions (Leukocytes in tumor and skin)

The experiment consist of 3 main factors of interest:

  • sample origin: tumor, skin, lymph node

  • main cell type class: leucocytes, LECs

  • tumor type: YUMM, YUMMER

As a first check we are interested in the expression of specific marker genes/known marker and how they vary across combinations of these factors.

Preamble

library(dplyr)
library(tidyr)
library(ggplot2)
library(pheatmap)
library(patchwork)
library(Seurat)
library(stringr)
library(ggthemes)
library(forcats)
library(gridExtra)

library(ggrepel)

Data objects

From the FGCZ we have different data objects:

  1. Tumor LECs integrated (only tumor LECs, but have been integrated with skin samples) (data/scData_LEC_tumor.rds)

  2. Tumor + skin LECs integrated (data/scData_LEC_tumor_skin.rds)

  3. All skin + tumor cells

  4. All lymphnode cells

Here we work with object 3 - Integrated tumor and skin leukocytes.

#### ----- change to local path to scData.rds object here ----- #####
# e.g. seurat <- readRDS("/home/project/data_folder/scData.rds)")
seurat<- readRDS(file.path("/Users/thomarin/Documents/PhD/Tumor project/Sequencing experiment/August 2023 first analysis shallow sequencing/rds file/scData_tumor_CD45.rds"))
#### --------------------------------------------------------- #####


# correct condition assignment!!
seurat$cond <- seurat[[]] |> 
  mutate(
    cond = case_when(
       str_detect(Sample, "YUMM[0-9]") ~ "YUMM",
       str_detect(Sample, "YUMMER") ~ "YUMMER",
       str_detect(Sample, "Skin") ~ "skin"
    )
  ) |> select(cond)

# check assignment
table(seurat$Sample, seurat$cond)
                      
                       skin YUMM YUMMER
  SkinLECs_Leukocytes1 1510    0      0
  SkinLECs_Leukocytes2 2351    0      0
  TumorYUMM1_1A           0  664      0
  TumorYUMM1_1B           0  586      0
  TumorYUMM2_1A           0 1886      0
  TumorYUMM2_1B           0 1652      0
  TumorYUMM5_2A           0 1829      0
  TumorYUMM5_2B           0 1603      0
  TumorYUMM6_2A           0 2380      0
  TumorYUMM6_2B           0 2267      0
  TumorYUMMER3_1A         0    0   1031
  TumorYUMMER3_1B         0    0    887
  TumorYUMMER4_1A         0    0   1442
  TumorYUMMER4_1B         0    0   1234
  TumorYUMMER7_2A         0    0   2015
  TumorYUMMER7_2B         0    0   1898
  TumorYUMMER8_2A         0    0   2593
  TumorYUMMER8_2B         0    0   2379
table(seurat$cond)

  skin   YUMM YUMMER 
  3861  12867  13479 
seurat$cell_types <- seurat$ident |> 
  as.factor() |> 
  forcats::fct_collapse("monocytes/macrophages" = c("0","1","2","5","6","18","22"),
                        "DCs" = c("4", "12", "19"),
                        "CD4 T cells" = c("10"),
                        "CD8 T cells" = c("7", "16"),
                        "other T cells" = c("3", "11", "15", "23", "24"),
                        "endothelial cells/fibroblasts" = c("8", "9", "13"),
                        "mast cells" = c("14"),
                        "B cells" = c("21"),
                        "other" = c("17", "20"))


DefaultAssay(seurat) <- "SCT"

cluster_marker <- c("Cd3e", "Cd8a", "Cd4" ,"Tcf7", "Lef1", "Pdcd1", "Sell", "Tigit", "Cxcr4", "Ccr7", "Foxp3", "Ctla4", "Trdc", "Il12b", "Itgae", "Xcr1", "Cd207", "Notch2", "Itgax", "Itgam", "Il1a", "Cd80", "Cd19", "Prox1", "Pecam1", "Csf2rb", "Csf3r", "Il1r2", "Acta2", "Pdgfrb", "Ctsk", "Cd74", "H2-Ab1", "H2-Aa", "Krt15", "Krt5", "Cd79a", "Ccl3", "Ccl4", "Cd63", "Cxcr3", "Fcgr1", "Fcgr3", "Ncam1", "Cd24a", "Klrb1c", "Cd62l")

cond_marker <- c("Cd3e", "Cd8a", "Cd4", "Tcf7", "Lef1", "Pdcd1", "Foxp3", "Cxcr4", "Cxcr3" )

Overview

Mean features/molecules per condition

# mean molecules/group
mean_mol <- seurat[[]] |> group_by(cond) |>  summarise(mean_mol = mean(nCount_RNA))
mean_mol
# A tibble: 3 × 2
  cond   mean_mol
  <chr>     <dbl>
1 YUMM      4942.
2 YUMMER    4687.
3 skin      5856.
# mean unique genes/group
mean_feature <- seurat[[]] |> group_by(cond) |>  summarise(mean_gene = mean(nFeature_RNA))
mean_feature
# A tibble: 3 × 2
  cond   mean_gene
  <chr>      <dbl>
1 YUMM       1960.
2 YUMMER     1872.
3 skin       2199.

Distribution of conditions per cluster

cond_dat <- seurat[[]] |> group_by(cond, cell_types) |>  summarise(n_cells = n())
`summarise()` has grouped output by 'cond'. You can override using the
`.groups` argument.
ggplot(cond_dat, aes(fill=cond, y=n_cells, x=cell_types)) + 
  geom_bar(position="stack", stat="identity") + 
  scale_fill_tableau() + 
  theme_bw()

ggplot(cond_dat, aes(fill=cond, y=n_cells, x=cell_types)) + 
  geom_bar(position="fill", stat="identity") + 
  scale_fill_tableau() + 
  theme_bw()

ggplot(cond_dat, aes(fill=cell_types, y=n_cells, x=cond)) + 
  geom_bar(position="fill", stat="identity") + 
  scale_fill_tableau() + 
  theme_bw()

Distribution of cells within umap

all together

DimPlot(seurat, group.by = "ident", 
        #split.by = "cond", 
        reduction = "umap", 
        label = F) + 
  ggtitle("") 

  #theme(legend.position = "none")
DimPlot(seurat, group.by = "ident", 
        split.by = "cond", 
        reduction = "umap", 
        label = T) + 
  ggtitle("") + 
  theme(legend.position = "none")

DimPlot(seurat, group.by = "ident", 
        split.by = "cond", 
        reduction = "umap", 
        label = F) + 
  ggtitle("") 

  #theme(legend.position = "none")

Distribution of broad cell types

DimPlot(seurat, group.by = "cell_types", 
        split.by = "cond", 
        reduction = "umap", 
        label = T) + 
  ggtitle("") 

Cluster marker gene expression

Cluster Genes Cluster Genes Cluster Genes
0 - Monocytes/
Macrophages
Cxcr4, Notch2, Itgax, Csf2rb, Il1r2 9 - LECs Prox1, Pecam1 18 - Macrophages Cxcr4, Il1a, CD80, Csf3r, Il1r2
1 - Monocytes/
Macrophages
Cxcr4, Notch2, Itgam 10 - Tregs Cd3e, Cd4, Sell, Tigit, FoxP3, Ctla4 19 - DCs Cd74, H2-Ab1, H2-Aa
2 - Granulocytes Notch2, Itgax, CD80, Csf2rb, Il1r2 11 - TGD Cd3e, Tcf7, Trdc 20 -Keratinocytes Krt15, Krt5
3 - TGD Tcf7, Lef1, Sell, Tigit, Trdc 12 - DC CCR7, Il12b, CD80, Csf2rb 21 - B cells CD19, Cd79a
4 - cDC2 Notch2, Itgam 13 - Fibroblasts acta2, Pdgfrb 22 - Monocytes Itgax
5 - Macrophages Notch2, Itgax, Itgam, Csf2rb 14 - Mast cells Notch2, Csf2rb, Cd63 23 - TGD Tcf7, Trdc
6 - Monocytes Itgam 15 - T cells Cd3e, Tcf7, Cxcr4, 24 - T cells Ccl3, Ccl4
7 - exhausted CD8 T cells Cd3e, Cd8a, Pdcd1 16 - naive CD8 T cells Cd3e, Cd8a, Tcf7, Ctla4
8 - LECs Notch2, Prox1, Pecam1 17 - Osteoclasts Ctsk

Dimplots

FeaturePlot(object = seurat, features = cluster_marker, ncol = 3)
Warning in FetchData.Seurat(object = object, vars = c(dims, "ident", features),
: The following requested variables were not found: Cd62l

Violinplots

Idents(seurat) <- "cell_types"
VlnPlot(object = seurat, features = cluster_marker, ncol = 3, assay = "SCT", add.noise = F)
Warning in FetchData.Seurat(object = object, vars = features, slot = slot): The
following requested variables were not found: Cd62l

Dotplots

DotPlot(object = seurat, features = cluster_marker) + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
The following requested variables were not found: Cd62l

Conditional marker expression

Expression by condition as Dimplot

FeaturePlot(object = seurat, features = cond_marker, ncol = 3, split.by = "cond")

Expression as violin plot grouped by cluster

VlnPlot(object = seurat, 
        features = cond_marker, 
        assay = "SCT", 
        add.noise = F, 
        split.by = "cond",
        ncol = 1) + 
  plot_layout(guides = 'collect') &
  scale_fill_tableau() &
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) 

Expression as violin plot clusters combined

VlnPlot(object = seurat, 
        features = cond_marker,
        group.by = "cond",
        assay = "SCT", 
        add.noise = F,
        ncol = 2) + 
  plot_layout(guides = 'collect') &
  scale_fill_tableau() &
  theme_bw()

Expression as violin plot cluster 7

Idents(seurat) <- "ident"

# Subset your Seurat object to only include cells from cluster 7
cluster7_seurat <- subset(seurat, idents = 7)

# Create the violin plot for cluster 7
VlnPlot(object = cluster7_seurat, 
        features = cond_marker,
        group.by = "cond",
        assay = "SCT", 
        add.noise = F,
        ncol = 2) + 
  plot_layout(guides = 'collect') &
  scale_fill_tableau() &
  theme_bw()

Expression as violin plot cluster 16

# Subset your Seurat object to only include cells from cluster 16
cluster16_seurat <- subset(seurat, idents = 16)

# Create the violin plot for cluster 7
VlnPlot(object = cluster16_seurat, 
        features = cond_marker,
        group.by = "cond",
        assay = "SCT", 
        add.noise = FALSE,
        ncol = 2) + 
  plot_layout(guides = 'collect') &
  scale_fill_tableau() &
  theme_bw()