vis_LEC_tumor_skin

Visualize expression across different conditions (LECs in tumor and skin)

The experiment consist of 3 main factors of interest:

  • sample origin: tumor, skin, lymph node

  • main cell type class: leucocytes, LECs

  • tumor type: YUMM, YUMMER

As a first check we are interested in the expression of specific marker genes/known marker and how they vary across combinations of these factors.

Preamble

library(dplyr)
library(tidyr)
library(ggplot2)
library(pheatmap)
library(patchwork)
library(Seurat)
library(stringr)
library(ggthemes)
library(SingleCellExperiment)
library(gridExtra)

Data objects

From the FGCZ we have different data objects:

  1. Tumor LECs integrated (only tumor LECs, but have been integrated with skin samples) (data/scData_LEC_tumor.rds)

  2. Tumor + skin LECs integrated (data/scData_LEC_tumor_skin.rds)

  3. All skin + tumor cells

  4. All lymphnode cells

Here we work with object 2 - Integrated tumor and skin LECs.

#### ----- change to local path to scData.rds object here ----- #####
# e.g. seurat <- readRDS("/home/project/data_folder/scData.rds)")
seurat<- readRDS(file.path("/Users/thomarin/Documents/PhD/Tumor project/Sequencing experiment/August 2023 first analysis shallow sequencing/rds file/scData_LEC.rds"))
#### --------------------------------------------------------- #####

# correct condition assignment!!
seurat$cond <- seurat[[]] |> 
  mutate(
    cond = case_when(
       str_detect(Sample, "YUMM[0-9]") ~ "YUMM",
       str_detect(Sample, "YUMMER") ~ "YUMMER",
       str_detect(Sample, "Skin") ~ "skin"
    )
  ) |> select(cond)

#seurat <- subset(seurat, idents = '5', invert = TRUE)
#seurat$ident <- droplevels(seurat$ident)
#levels(seurat$ident) <- c("0", "1", "2", "3", "4", "5", "6")
#seurat <- RenameIdents(object = seurat, `6` = "5")
#seurat <- RenameIdents(object = seurat, `7` = "6")

# check assignment
table(seurat$Sample, seurat$cond)
                      
                       skin YUMM YUMMER
  SkinLECs_Leukocytes1  469    0      0
  SkinLECs_Leukocytes2  669    0      0
  TumorYUMM1_1A           0   17      0
  TumorYUMM1_1B           0   18      0
  TumorYUMM2_1A           0   57      0
  TumorYUMM2_1B           0   49      0
  TumorYUMM5_2A           0   44      0
  TumorYUMM5_2B           0   44      0
  TumorYUMM6_2A           0   58      0
  TumorYUMM6_2B           0   72      0
  TumorYUMMER3_1A         0    0     66
  TumorYUMMER3_1B         0    0     44
  TumorYUMMER4_1A         0    0     81
  TumorYUMMER4_1B         0    0     68
  TumorYUMMER7_2A         0    0     86
  TumorYUMMER7_2B         0    0    101
  TumorYUMMER8_2A         0    0    160
  TumorYUMMER8_2B         0    0    135
table(seurat$cond)

  skin   YUMM YUMMER 
  1138    359    741 
DefaultAssay(seurat) <- "SCT"

cluster_marker <- c("Prox1", "Pdpn", "Ackr4", "Foxp2", "Cldn11", "Alcam", "Esam", "Cd24a", "Lyve1", "Ptx3", "Mrc1", "Ackr2", "Ccl21a", "Reln", "Mmrn1", "Mki67", "Aurkb", "Jam2", "Flt1", "Plvap", "Ccl2", "Cxcl2", "Mmrn2", "Ccdc3", "Ccnb2", "Cdk1")

cond_marker <- c("Lyve1", "Ptx3", "Cldn11", "Mrc1", "Ackr2", "Icam1", "Vcam1", "Sema3a", "Nectin2", "Cd274", "Cxcl12", "Ccl21a", "Stat1")

Overview

Mean feature gene counts

# mean molecules/group
mean_mol <- seurat[[]] |> group_by(cond) |>  summarise(mean_mol = mean(nCount_RNA))
mean_mol
# A tibble: 3 × 2
  cond   mean_mol
  <chr>     <dbl>
1 YUMM      5112.
2 YUMMER    5313.
3 skin      5433.
# mean unique genes/group
mean_feature <- seurat[[]] |> group_by(cond) |>  summarise(mean_gene = mean(nFeature_RNA))
mean_feature
# A tibble: 3 × 2
  cond   mean_gene
  <chr>      <dbl>
1 YUMM       2131.
2 YUMMER     2216.
3 skin       2211.

Distribution of conditions per cluster

cond_dat <- seurat[[]] |> group_by(cond, ident) |>  summarise(n_cells = n())
`summarise()` has grouped output by 'cond'. You can override using the
`.groups` argument.
ggplot(cond_dat, aes(fill=cond, y=n_cells, x=ident)) + 
  geom_bar(position="stack", stat="identity") + 
  scale_fill_tableau() + 
  theme_bw()

ggplot(cond_dat, aes(fill=cond, y=n_cells, x=ident)) + 
  geom_bar(position="fill", stat="identity") + 
  scale_fill_tableau() + 
  theme_bw()

ggplot(cond_dat, aes(fill=ident, y=n_cells, x=cond)) + 
  geom_bar(position="fill", stat="identity") + 
  scale_fill_tableau() + 
  theme_bw()

Distribution of cluster within each condition

Distribution of cells within umap

#DimPlot(seurat, group.by = "ident", split.by = "cond", reduction = "umap") + ggtitle("")

DimPlot(seurat, group.by = "ident", 
        split.by = "cond", 
        reduction = "umap", 
        label = T, label.size = 7) + 
  ggtitle("") + 
  theme(legend.position = "none")

Cluster marker gene expression

Dimplots

FeaturePlot(object = seurat, features = cluster_marker, ncol = 3)

Violinplots

VlnPlot(object = seurat, features = cluster_marker, ncol = 3, assay = "SCT", add.noise = F)

Dotplots

DotPlot(object = seurat, features = cluster_marker) + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

Conditional marker expression

Expression by condition as Dimplot

FeaturePlot(object = seurat, features = cond_marker, ncol = 3, split.by = "cond")

Expression as violin plot grouped by cluster

VlnPlot(object = seurat, 
        features = cond_marker, 
        assay = "SCT", 
        add.noise = F, 
        split.by = "cond",
        ncol = 1) + 
  plot_layout(guides = 'collect') &
  scale_fill_tableau() &
  theme_bw()

Expression as violin plot grouped by cluster

VlnPlot(object = seurat, 
        features = cond_marker,
        group.by = "cond",
        assay = "SCT", 
        add.noise = F,
        ncol = 2) + 
  plot_layout(guides = 'collect') &
  scale_fill_tableau() &
  theme_bw()