remotes::install_github(repo = "samuel-marsh/scCustomize")
SCC4.data = Read10X(data.dir = "./Data/cervical_cancer_data/Qiu et al/SCC4//")
SCC4 <- CreateSeuratObject(counts = SCC4.data, project = "scc", min.cells = 3, min.features = 200)
SCC4[["percent.mt"]] <- PercentageFeatureSet(SCC4, pattern = "^MT-")
SCC4 <- subset(SCC4, subset = percent.mt < 10)
SCC4 <- NormalizeData(SCC4)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
SCC5.data = Read10X(data.dir = "./Data/cervical_cancer_data/Qiu et al/SCC5/")
SCC5 <- CreateSeuratObject(counts = SCC5.data, project = "scc", min.cells = 3, min.features = 200)
SCC5[["percent.mt"]] <- PercentageFeatureSet(SCC5, pattern = "^MT-")
SCC5 <- subset(SCC5, subset = percent.mt < 10)
SCC5 <- NormalizeData(SCC5)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
mtx <- "./Data/cervical_cancer_data/Qiu et al/GSE197461/GSM5917937_SCC_1_matrix.mtx.gz"
cells <- "./Data/cervical_cancer_data/Qiu et al/GSE197461/GSM5917937_SCC_1_barcodes.tsv.gz"
features <- "./Data/cervical_cancer_data/Qiu et al/GSE197461/GSM5917937_SCC_1_features.tsv.gz"
SCC1.data <- ReadMtx(mtx = mtx, cells = cells, features = features)
SCC1 <- CreateSeuratObject(counts = SCC1.data, project = "scc", min.cells = 3, min.features = 200)
SCC1[["percent.mt"]] <- PercentageFeatureSet(SCC1, pattern = "^MT-")
SCC1 <- subset(SCC1, subset = percent.mt < 10)
SCC1 <- NormalizeData(SCC1)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
mtx <- "./Data/cervical_cancer_data/Qiu et al/GSE197461/GSM5917938_SCC_2_matrix.mtx.gz"
cells <- "./Data/cervical_cancer_data/Qiu et al/GSE197461/GSM5917938_SCC_2_barcodes.tsv.gz"
features <- "./Data/cervical_cancer_data/Qiu et al/GSE197461/GSM5917938_SCC_2_features.tsv.gz"
SCC2.data <- ReadMtx(mtx = mtx, cells = cells, features = features)
SCC2 <- CreateSeuratObject(counts = SCC2.data, project = "scc", min.cells = 3, min.features = 200)
SCC2[["percent.mt"]] <- PercentageFeatureSet(SCC2, pattern = "^MT-")
SCC2 <- subset(SCC2, subset = percent.mt < 10)
SCC2 <- NormalizeData(SCC2)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
mtx <- "./Data/cervical_cancer_data/Qiu et al/GSE197461/GSM5917939_SCC_3_matrix.mtx.gz"
cells <- "./Data/cervical_cancer_data/Qiu et al/GSE197461/GSM5917939_SCC_3_barcodes.tsv.gz"
features <- "./Data/cervical_cancer_data/Qiu et al/GSE197461/GSM5917939_SCC_3_features.tsv.gz"
SCC3.data <- ReadMtx(mtx = mtx, cells = cells, features = features)
SCC3 <- CreateSeuratObject(counts = SCC3.data, project = "scc", min.cells = 3, min.features = 200)
SCC3[["percent.mt"]] <- PercentageFeatureSet(SCC3, pattern = "^MT-")
SCC3 <- subset(SCC3, subset = percent.mt < 10)
SCC3 <- NormalizeData(SCC3)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
VlnPlot(object = SCC1,features = "MYB")+ggtitle("SCC1")|VlnPlot(object = SCC2,features = "MYB")+ggtitle("SCC2")|VlnPlot(object = SCC3,features = "MYB")+ggtitle("SCC3")|VlnPlot(object = SCC4,features = "MYB")+ggtitle("SCC4")|VlnPlot(object = SCC5,features = "MYB",group.by = "orig.ident")+ggtitle("SCC5")
SCC5[["percent.mt"]] <- PercentageFeatureSet(SCC5, pattern = "^MT-")
SCC5 <- subset(SCC5, subset = percent.mt < 10)
SCC5 <- NormalizeData(SCC5)
SCC5 <- FindVariableFeatures(SCC5,nfeatures = 2000)
SCC5 <- ScaleData(SCC5,features = VariableFeatures(SCC5),vars.to.regress = c("percent.mt","nCount_RNA"))
SCC5 %<>% RunPCA( features = VariableFeatures(object = SCC5))
ElbowPlot(SCC5,ndims = 50)
SCC5 %<>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = 0.2) %>% RunUMAP(dims = 1:20)
DimPlot(SCC5,label = T)
FeaturePlot(SCC5,features = "MYB")
library(patchwork)
p1 = FeaturePlot(object = SCC5,features = c("KRT18","EPCAM")) + plot_annotation(
title = 'epithelial markers')
p2 = FeaturePlot(object = SCC5,features = c("VWF", "PLVAP"))+ plot_annotation(
title = 'endothelial markers')
p3 = FeaturePlot(object = SCC5,features = c("COL1A1", "LUM"),ncol = 1)+ plot_annotation(
title = 'fibroblasts markers')
p4 = FeaturePlot(object = SCC5,features = c("ACTA2", "TAGLN"),ncol = 1)+ plot_annotation(
title = 'smooth muscle markers')
p5 = FeaturePlot(object = SCC5,features = c("CSF3R"),ncol = 1)+ plot_annotation(
title = 'neutrophils markers')
p6 = FeaturePlot(object = SCC5,features = c("CPA3"),ncol = 1)+ plot_annotation(
title = 'mast markers')
p7 = FeaturePlot(object = SCC5,features = c("FCN1"),ncol = 1)+ plot_annotation(
title = 'monocytes markers')
p8 = FeaturePlot(object = SCC5,features = c("C1QB","LYZ"),ncol = 1)+ plot_annotation(
title = 'macrophages markers')
p9 = FeaturePlot(object = SCC5,features = c("IRF8"),ncol = 1)+ plot_annotation(
title = 'dendritic markers')
p10 = FeaturePlot(object = SCC5,features = c("JCHAIN","MZB1"),ncol = 1)+ plot_annotation(
title = 'B/plasma markers')
p11= FeaturePlot(object = SCC5,features = c("CD2","CD3D"),ncol = 1)+ plot_annotation(
title = 'T markers')
for (p in list(p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11)) {
print_tab(plt = p,title = p$patches$annotation$title)
}
NA
SCC5_cancer <- subset(SCC5, subset = seurat_clusters %in% c(0,10,3))
all_reads_per_cell = c()
runs = c("20330695","20330696","20330697","20330698")
for (run in runs) {
hpv_bam = read_tsv(file = "./Data/cervical_cancer_data/Qiu et al/SCC5/HPV/" %s+% run %s+% "_reads.tsv",col_select = 1,col_names = F)
reads_per_cell = str_extract(pattern = "CR:Z:.*$",string = hpv_bam$X1) %>% str_sub( start = 6)
all_reads_per_cell = c(reads_per_cell,all_reads_per_cell)
}
Warning: One or more parsing issues, see `problems()` for details
Rows: 4716 Columns: 1
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): X1
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning: One or more parsing issues, see `problems()` for details
Rows: 4656 Columns: 1
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): X1
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning: One or more parsing issues, see `problems()` for details
Rows: 5799 Columns: 1
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): X1
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning: One or more parsing issues, see `problems()` for details
Rows: 4392 Columns: 1
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): X1
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_reads_per_cell = all_reads_per_cell%>% table() %>% as.data.frame() %>% column_to_rownames(".")
rownames(all_reads_per_cell) = paste0(rownames(all_reads_per_cell),"-1")
all_reads_per_cell
zero_cells = colnames(SCC5_cancer)[!colnames(SCC5_cancer) %in% rownames(all_reads_per_cell)]
zero_cells_freq = data.frame(row.names = zero_cells,Freq = rep(0,length(zero_cells)))
reads_per_cell_all_cells = rbind(all_reads_per_cell,zero_cells_freq)
SCC5_cancer %<>% AddMetaData(metadata = reads_per_cell_all_cells,col.name = "HPV_reads")
hpv_reads = FetchData(object = SCC5_cancer,vars = "HPV_reads")
data = hpv_reads %>% mutate("0 reads" = if_else(condition = HPV_reads == 0,true = 1,false = 0))
data = data %>% mutate("1 reads" = if_else(condition = HPV_reads == 1,true = 1,false = 0))
data = data %>% mutate("2 reads" = if_else(condition = HPV_reads == 2,true = 1,false = 0))
data = data %>% mutate("3-23 reads" = if_else(condition = HPV_reads >=3 &HPV_reads <24,true = 1,false = 0))
data = data %>% mutate("24+ reads" = if_else(condition = HPV_reads >=24,true = 1,false = 0))
data = colSums(data[,2:ncol(data)]) %>% as.data.frame()
names(data)[1] = "count"
data = rownames_to_column(data,var = "bin")
ggplot(data=data, aes(x=factor(bin,levels = c("0 reads","1 reads","2 reads","3-23 reads","24+ reads")), y=count)) +
geom_bar(stat="identity", fill="steelblue") + xlab("HPV Reads")+ theme_minimal()+
geom_text(aes(label=count), vjust=-0.5, color="black", size=3.5)
hpv_positive = hpv_reads %>% dplyr::mutate(hpv_positive = case_when(HPV_reads >= 2 ~ "positive",
HPV_reads < 2 ~ "negative")
)
SCC5_cancer = AddMetaData(object = SCC5_cancer,metadata = hpv_positive)
myb_data = FetchData(object = SCC5_cancer,vars = ("MYB"))%>% dplyr::mutate(myb_positive = case_when(MYB > 0 ~ "positive",
MYB == 0 ~ "negative")) %>% dplyr::select("myb_positive")
SCC5_cancer = AddMetaData(object = SCC5_cancer,metadata = myb_data,col.name = "MYB_positive")
DimPlot(object = SCC5_cancer,group.by = c("hpv_positive"),pt.size = 0.5)
DimPlot(object = SCC5_cancer,group.by = c("MYB_positive"),pt.size = 0.5)
FeaturePlot(object = SCC5_cancer,features = c("HPV_reads"),pt.size = 0.5)
library(ggrepel)
data = FetchData(object = SCC5_cancer,vars = c("MYB","HPV_reads"))
ggplot(data,
aes(x = MYB, y = HPV_reads)) + geom_smooth(method = lm) +
geom_point() + stat_cor(method = "pearson")
`geom_smooth()` using formula 'y ~ x'
myb_vs_hpv = FetchData(object = SCC5_cancer, vars = c("hpv_positive", "MYB"))
myb_vs_hpv$hpv_positive = paste("HPV", myb_vs_hpv$hpv_positive)
p1 = ggplot(myb_vs_hpv,aes( x = hpv_positive, y = MYB,, fill=hpv_positive))+
geom_violin(trim=FALSE) + theme_minimal()+
stat_compare_means(method = "wilcox.test",comparisons = list(c("HPV positive","HPV negative")))+
stat_summary(fun.data = function(x) data.frame(y=max(x)*1.2, label = paste("Mean=",round(mean(x),digits = 2))), geom="text") +ylab("log2(gene)")+ggtitle(gene) +
geom_boxplot(width=.1, outlier.shape=NA)
print_tab(p1,title = gene)
## MYB {.unnumbered }
NA
myb_vs_hpv = FetchData(object = SCC5_cancer, vars = c("HPV_reads", "MYB_positive"))
p2 = ggplot(myb_vs_hpv,aes( x = MYB_positive, y =HPV_reads , fill=MYB_positive))+
geom_violin(trim=FALSE) + theme_minimal()+
stat_compare_means(method = "wilcox.test",comparisons = list(c("positive","negative")))+
stat_summary(fun.data = function(x) data.frame(y=max(x)*1.2, label = paste("Mean=",round(mean(x),digits = 2))), geom="text")+
geom_boxplot(width=.1, outlier.shape=NA)
print_tab(p2,title = gene)
## MYB {.unnumbered }
NA
p1+p2 +plt
library(ggstatsplot)
df = FetchData(object = SCC5_cancer,vars = c("hpv_positive","MYB_positive")) %>% droplevels()
test = fisher.test(table(df))
plt = ggbarstats(
df,
MYB_positive,
hpv_positive,
results.subtitle = FALSE,
subtitle = paste0("Fisher's exact test", ", p-value = ",
round(test$p.value, 13))
)
print_tab(plt = plt,title = "fisher")
## fisher {.unnumbered }
NA