vis_LEC_tumor_skin

Visualize expression across different conditions (LECs in tumor and skin)

The experiment consist of 3 main factors of interest:

sample origin: tumor, skin, lymph node
main cell type class: leucocytes, LECs
tumor type: YUMM, YUMMER

As a first check we are interested in the expression of specific marker genes/known marker and how they vary across combinations of these factors.

Preamble

library(dplyr)
library(tidyr)
library(ggplot2)
library(pheatmap)
library(patchwork)
library(Seurat)
library(stringr)
library(ggthemes)
library(SingleCellExperiment)
library(gridExtra)

Data objects

From the FGCZ we have different data objects:

Tumor LECs integrated (only tumor LECs, but have been integrated with skin samples) (data/scData_LEC_tumor.rds)
Tumor + skin LECs integrated (data/scData_LEC_tumor_skin.rds)
All skin + tumor cells
All lymphnode cells

Here we work with object 2 - Integrated tumor and skin LECs.

#### ----- change to local path to scData.rds object here ----- #####
# e.g. seurat <- readRDS("/home/project/data_folder/scData.rds)")
seurat<- readRDS(file.path("/Users/thomarin/Documents/PhD/Tumor project/Sequencing experiment/August 2023 first analysis shallow sequencing/rds file/scData_LEC.rds"))
#### --------------------------------------------------------- #####

# correct condition assignment!!
seurat$cond <- seurat[[]] |> 
  mutate(
    cond = case_when(
       str_detect(Sample, "YUMM[0-9]") ~ "YUMM",
       str_detect(Sample, "YUMMER") ~ "YUMMER",
       str_detect(Sample, "Skin") ~ "skin"
    )
  ) |> select(cond)

seurat <- subset(seurat, idents = '5', invert = TRUE)
seurat$ident <- droplevels(seurat$ident)
levels(seurat$ident) <- c("0", "1", "2", "3", "4", "5", "6")
seurat <- RenameIdents(object = seurat, `6` = "5")
seurat <- RenameIdents(object = seurat, `7` = "6")

# check assignment
table(seurat$Sample, seurat$cond)

                      
                       skin YUMM YUMMER
  SkinLECs_Leukocytes1  467    0      0
  SkinLECs_Leukocytes2  667    0      0
  TumorYUMM1_1A           0   11      0
  TumorYUMM1_1B           0   10      0
  TumorYUMM2_1A           0   41      0
  TumorYUMM2_1B           0   31      0
  TumorYUMM5_2A           0   40      0
  TumorYUMM5_2B           0   41      0
  TumorYUMM6_2A           0   49      0
  TumorYUMM6_2B           0   59      0
  TumorYUMMER3_1A         0    0     59
  TumorYUMMER3_1B         0    0     37
  TumorYUMMER4_1A         0    0     74
  TumorYUMMER4_1B         0    0     64
  TumorYUMMER7_2A         0    0     78
  TumorYUMMER7_2B         0    0     83
  TumorYUMMER8_2A         0    0    147
  TumorYUMMER8_2B         0    0    129

table(seurat$cond)


  skin   YUMM YUMMER 
  1134    282    671

DefaultAssay(seurat) <- "SCT"

cluster_marker <- c("Prox1", "Pdpn", "Ackr4", "Foxp2", "Cldn11", "Alcam", "Esam", "Cd24a", "Lyve1", "Ptx3", "Mrc1", "Ackr2", "Ccl21a", "Reln", "Mmrn1", "Mki67", "Aurkb", "Jam2", "Flt1", "Plvap", "Ccl2", "Cxcl2", "Mmrn2", "Ccdc3", "Ccnb2", "Cdk1")

cond_marker <- c("Lyve1", "Ptx3", "Cldn11", "Mrc1", "Ackr2", "Icam1", "Vcam1", "Sema3a", "Nectin2", "Cd274", "Cxcl12", "Ccl21a", "Stat1")

Overview

Mean feature gene counts

# mean molecules/group
mean_mol <- seurat[[]] |> group_by(cond) |>  summarise(mean_mol = mean(nCount_RNA))
mean_mol

# A tibble: 3 × 2
  cond   mean_mol
  <chr>     <dbl>
1 YUMM      4952.
2 YUMMER    5113.
3 skin      5432.

# mean unique genes/group
mean_feature <- seurat[[]] |> group_by(cond) |>  summarise(mean_gene = mean(nFeature_RNA))
mean_feature

# A tibble: 3 × 2
  cond   mean_gene
  <chr>      <dbl>
1 YUMM       2096.
2 YUMMER     2177.
3 skin       2211.

Distribution of conditions per cluster

cond_dat <- seurat[[]] |> group_by(cond, ident) |>  summarise(n_cells = n())

`summarise()` has grouped output by 'cond'. You can override using the
`.groups` argument.

ggplot(cond_dat, aes(fill=cond, y=n_cells, x=ident)) + 
  geom_bar(position="stack", stat="identity") + 
  scale_fill_tableau() + 
  theme_bw()

ggplot(cond_dat, aes(fill=cond, y=n_cells, x=ident)) + 
  geom_bar(position="fill", stat="identity") + 
  scale_fill_tableau() + 
  theme_bw()

ggplot(cond_dat, aes(fill=ident, y=n_cells, x=cond)) + 
  geom_bar(position="fill", stat="identity") + 
  scale_fill_tableau() + 
  theme_bw()

Distribution of cluster within each condition

Distribution of cells within umap

#DimPlot(seurat, group.by = "ident", split.by = "cond", reduction = "umap") + ggtitle("")

DimPlot(seurat, group.by = "ident", 
        split.by = "cond", 
        reduction = "umap", 
        label = T, label.size = 7) + 
  ggtitle("") + 
  theme(legend.position = "none")

Cluster marker gene expression

Dimplots

FeaturePlot(object = seurat, features = cluster_marker, ncol = 3)

Violinplots

VlnPlot(object = seurat, features = cluster_marker, ncol = 3, assay = "SCT", add.noise = F)

Dotplots

DotPlot(object = seurat, features = cluster_marker) + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

Conditional marker expression

Expression by condition as Dimplot

FeaturePlot(object = seurat, features = cond_marker, ncol = 3, split.by = "cond")

Expression as violin plot grouped by cluster

VlnPlot(object = seurat, 
        features = cond_marker, 
        assay = "SCT", 
        add.noise = F, 
        split.by = "cond",
        ncol = 1) + 
  plot_layout(guides = 'collect') &
  scale_fill_tableau() &
  theme_bw()

Expression as violin plot grouped by cluster

VlnPlot(object = seurat, 
        features = cond_marker,
        group.by = "cond",
        assay = "SCT", 
        add.noise = F,
        ncol = 2) + 
  plot_layout(guides = 'collect') &
  scale_fill_tableau() &
  theme_bw()