opscc = readRDS("./Rendered_notebooks/HPV_OPSCC_Analysis_PMC10191634/01_preprocess/opscc_suerat.RDS")
library(readxl)
metadata = read_excel(path = "./Input_data/HPV_OPSCC_Analysis_PMC10191634/NIHMS1892753-supplement-Supp_Tables.xlsx",
sheet = "S4 - Cell Table",progress = T,skip = 1,col_names = T)
-
/
/
-
# remove HPV- patients
hpv_neg<-c("OP10","OP12","OP16","OP19","OP8")
hpv_pos = unique(metadata$Patient)[! unique(metadata$Patient) %in% hpv_neg]
opscc_hpvPos = subset(opscc,subset = patient %in% hpv_pos)
name = "5Kvargenes"
opscc_hpvPos = SetIdent(object = opscc_hpvPos,value = "hpv")
opscc_hpvPos = FindVariableFeatures(object = opscc_hpvPos,nfeatures = 5000)
# find DEG
features = VariableFeatures(object = opscc_hpvPos)
deg = FindMarkers(object = opscc_hpvPos,ident.1 = "HPV+",ident.2 = "HPV-",
features = features,test.use = "LR",latent.vars = "patient",
logfc.threshold = 0,min.pct = 0.1,
mean.fxn = function(x) {
return((rowMeans(x)+1)) # change func to calculate logFC in log space data (default to exponent data)
})
deg$fdr<-p.adjust(p = as.vector(deg$p_val) ,method = "fdr")
# save
data_to_save = deg %>% dplyr::rename(avg_diff = avg_log2FC) #rename avg_log2FC because here we calculate diff
saveRDS(object = data_to_save,file = "./Data_out/opscc_deg_5Kvargenes.rds")
MYB-HPV cor
hpv_16_genes = rownames(opscc_hpvPos)[ startsWith(x =rownames(opscc_hpvPos),prefix = "HPV16")]
hpv_score = FetchData(object = opscc_hpvPos,vars = hpv_16_genes) %>% rowMeans()
hpv_score_df = data.frame(hpv_score,row.names = names(hpv_score))
opscc_hpvPos = AddMetaData(object = opscc_hpvPos,metadata = hpv_score_df)
data = opscc_hpvPos
myb_vs_hpv = FetchData(object = data, vars = c("hpv_score", "MYB","patient"),slot = "data")
sp <- ggscatter(myb_vs_hpv, x = "hpv_score", y = "MYB",
add = "reg.line", # Add regressin line
add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
conf.int = TRUE # Add confidence interval
) + facet_wrap(~ patient, scales="free")
# Add correlation coefficient
sp + stat_cor(method = "pearson")
`geom_smooth()` using formula 'y ~ x'

genes_by_tp = FetchData(object = opscc_hpvPos,vars = c("hpv", "MYB"))
formula <- as.formula("MYB ~ hpv")
#plot and split by patient:
stat.test = compare_means(formula = formula ,data = genes_by_tp,method = "wilcox.test",p.adjust.method = "fdr") # Add pairwise comparisons p-value
plt = ggplot(genes_by_tp, aes(x = hpv, y = MYB))+
geom_violin()+ geom_boxplot(width=0.1,outlier.shape = NA) +
ylim(min(genes_by_tp$MYB),max(genes_by_tp$MYB)*1.25)
plt = plt +stat_pvalue_manual(stat.test, label = "{p.adj}", #add p value
y.position = max(genes_by_tp$MYB)*1.08) # set position at the top value
plt

# plot by patients
genes_by_tp = FetchData(object = opscc,vars = c("MYB","patient")) %>%
dplyr::group_by(patient) %>%
summarise(MYB = mean(MYB)) %>%
mutate (hpv = if_else(
condition = patient %in% hpv_neg ,true = "HPV-",false = "HPV+"))
formula <- as.formula("MYB ~ hpv")
#plot and split by patient:
stat.test = compare_means(
formula = formula ,
data = genes_by_tp,
method = "wilcox.test",
p.adjust.method = "fdr"
) # Add pairwise comparisons p-value
plt = ggplot(genes_by_tp, aes(x = hpv, y = MYB)) +
geom_boxplot(width = 0.1,outlier.shape = NA) + geom_jitter(shape=16, position=position_jitter(0.2))
plt = plt + stat_pvalue_manual(stat.test,
label = paste0(stat.test$method, ", p = {p.adj}"),
#add p value
y.position = 0.2)+ ggtitle("By sample") # set position at the top value
plt

hpv_16_genes = rownames(opscc_hpvPos)[ startsWith(x =rownames(opscc_hpvPos),prefix = "HPV16")]
a = filter.umi(matrix = opscc_hpvPos@assays$RNA@counts,cells = colnames(opscc_hpvPos %>% subset(patient == "OP14")),whitelist = HPVgenes)
genes = hpv_16_genes[hpv_16_genes %in% rownames(a)]
score = a[genes,] %>% colMeans()
myb_vs_hpv = data.frame(hpv_score = score, MYB= a["MYB",],row.names = colnames(a))
sp <- ggscatter(myb_vs_hpv, x = "hpv_score", y = "MYB",
add = "reg.line", # Add regressin line
add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
conf.int = TRUE # Add confidence interval
)
# Add correlation coefficient
sp + stat_cor(method = "pearson")
LS0tCnRpdGxlOiAnYHIgcnN0dWRpb2FwaTo6Z2V0U291cmNlRWRpdG9yQ29udGV4dCgpJHBhdGggJT4lIGJhc2VuYW1lKCkgJT4lIGdzdWIocGF0dGVybiA9ICJcXC5SbWQiLHJlcGxhY2VtZW50ID0gIiIpYCcgCmF1dGhvcjogIkF2aXNoYWkgV2l6ZWwiCmRhdGU6ICdgciBTeXMudGltZSgpYCcKb3V0cHV0OiAKICBodG1sX25vdGVib29rOiAKICAgIGNvZGVfZm9sZGluZzogaGlkZQogICAgdG9jOiB5ZXMKICAgIHRvY19jb2xsYXBzZTogeWVzCiAgICB0b2NfZmxvYXQ6IAogICAgICBjb2xsYXBzZWQ6IEZBTFNFCiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIHRvY19kZXB0aDogMQotLS0KCgoKCgpgYGB7cn0Kb3BzY2MgPSByZWFkUkRTKCIuL1JlbmRlcmVkX25vdGVib29rcy9IUFZfT1BTQ0NfQW5hbHlzaXNfUE1DMTAxOTE2MzQvMDFfcHJlcHJvY2Vzcy9vcHNjY19zdWVyYXQuUkRTIikKbGlicmFyeShyZWFkeGwpCm1ldGFkYXRhID0gcmVhZF9leGNlbChwYXRoID0gIi4vSW5wdXRfZGF0YS9IUFZfT1BTQ0NfQW5hbHlzaXNfUE1DMTAxOTE2MzQvTklITVMxODkyNzUzLXN1cHBsZW1lbnQtU3VwcF9UYWJsZXMueGxzeCIsCiAgICAgICAgICBzaGVldCA9ICJTNCAtIENlbGwgVGFibGUiLHByb2dyZXNzID0gVCxza2lwID0gMSxjb2xfbmFtZXMgPSBUKQoKIyByZW1vdmUgSFBWLSBwYXRpZW50cwpocHZfbmVnPC1jKCJPUDEwIiwiT1AxMiIsIk9QMTYiLCJPUDE5IiwiT1A4IikKaHB2X3BvcyA9IHVuaXF1ZShtZXRhZGF0YSRQYXRpZW50KVshIHVuaXF1ZShtZXRhZGF0YSRQYXRpZW50KSAlaW4lIGhwdl9uZWddCm9wc2NjX2hwdlBvcyA9IHN1YnNldChvcHNjYyxzdWJzZXQgPSBwYXRpZW50ICVpbiUgaHB2X3BvcykKYGBgCgpgYGB7cn0KbmFtZSA9ICI1S3ZhcmdlbmVzIgpvcHNjY19ocHZQb3MgPSBTZXRJZGVudChvYmplY3QgPSBvcHNjY19ocHZQb3MsdmFsdWUgPSAiaHB2IikKb3BzY2NfaHB2UG9zID0gRmluZFZhcmlhYmxlRmVhdHVyZXMob2JqZWN0ID0gb3BzY2NfaHB2UG9zLG5mZWF0dXJlcyA9IDUwMDApCiMgZmluZCBERUcKZmVhdHVyZXMgPSBWYXJpYWJsZUZlYXR1cmVzKG9iamVjdCA9IG9wc2NjX2hwdlBvcykKZGVnID0gRmluZE1hcmtlcnMob2JqZWN0ID0gb3BzY2NfaHB2UG9zLGlkZW50LjEgPSAiSFBWKyIsaWRlbnQuMiA9ICJIUFYtIiwKICAgICAgICAgICAgZmVhdHVyZXMgPSBmZWF0dXJlcyx0ZXN0LnVzZSA9ICJMUiIsbGF0ZW50LnZhcnMgPSAicGF0aWVudCIsCiAgICAgICAgICAgIGxvZ2ZjLnRocmVzaG9sZCA9IDAsbWluLnBjdCA9IDAuMSwKICAgICAgICAgICAgbWVhbi5meG4gPSBmdW5jdGlvbih4KSB7CiAgICAgICAgICAgICAgcmV0dXJuKChyb3dNZWFucyh4KSsxKSkgIyBjaGFuZ2UgZnVuYyB0byBjYWxjdWxhdGUgbG9nRkMgaW4gbG9nIHNwYWNlIGRhdGEgKGRlZmF1bHQgdG8gZXhwb25lbnQgZGF0YSkKICAgICAgICAgICAgfSkKZGVnJGZkcjwtcC5hZGp1c3QocCA9IGFzLnZlY3RvcihkZWckcF92YWwpICxtZXRob2QgPSAiZmRyIikKCmBgYAoKYGBge3J9CiMgc2F2ZQpkYXRhX3RvX3NhdmUgPSBkZWcgJT4lIGRwbHlyOjpyZW5hbWUoYXZnX2RpZmYgPSBhdmdfbG9nMkZDKSAjcmVuYW1lIGF2Z19sb2cyRkMgYmVjYXVzZSBoZXJlIHdlIGNhbGN1bGF0ZSBkaWZmCnNhdmVSRFMob2JqZWN0ID0gZGF0YV90b19zYXZlLGZpbGUgPSAiLi9EYXRhX291dC9vcHNjY19kZWdfNUt2YXJnZW5lcy5yZHMiKQpgYGAKCgojIE1ZQi1IUFYgY29yCgpgYGB7cn0KaHB2XzE2X2dlbmVzID0gcm93bmFtZXMob3BzY2NfaHB2UG9zKVsgc3RhcnRzV2l0aCh4ID1yb3duYW1lcyhvcHNjY19ocHZQb3MpLHByZWZpeCA9ICAiSFBWMTYiKV0KaHB2X3Njb3JlID0gRmV0Y2hEYXRhKG9iamVjdCA9IG9wc2NjX2hwdlBvcyx2YXJzID0gaHB2XzE2X2dlbmVzKSAlPiUgcm93TWVhbnMoKQpocHZfc2NvcmVfZGYgPSBkYXRhLmZyYW1lKGhwdl9zY29yZSxyb3cubmFtZXMgPSBuYW1lcyhocHZfc2NvcmUpKQpvcHNjY19ocHZQb3MgPSBBZGRNZXRhRGF0YShvYmplY3QgPSBvcHNjY19ocHZQb3MsbWV0YWRhdGEgPSBocHZfc2NvcmVfZGYpCmBgYAoKYGBge3IgZmlnLmhlaWdodD0xMCwgZmlnLndpZHRoPTEwfQpkYXRhID0gb3BzY2NfaHB2UG9zIApteWJfdnNfaHB2ID0gRmV0Y2hEYXRhKG9iamVjdCA9IGRhdGEsIHZhcnMgPSBjKCJocHZfc2NvcmUiLCAiTVlCIiwicGF0aWVudCIpLHNsb3QgPSAiZGF0YSIpCnNwIDwtIGdnc2NhdHRlcihteWJfdnNfaHB2LCB4ID0gImhwdl9zY29yZSIsIHkgPSAiTVlCIiwKICAgYWRkID0gInJlZy5saW5lIiwgICMgQWRkIHJlZ3Jlc3NpbiBsaW5lCiAgIGFkZC5wYXJhbXMgPSBsaXN0KGNvbG9yID0gImJsdWUiLCBmaWxsID0gImxpZ2h0Z3JheSIpLCAjIEN1c3RvbWl6ZSByZWcuIGxpbmUKICAgY29uZi5pbnQgPSBUUlVFICMgQWRkIGNvbmZpZGVuY2UgaW50ZXJ2YWwKICAgKSArICBmYWNldF93cmFwKH4gcGF0aWVudCwgc2NhbGVzPSJmcmVlIikKIyBBZGQgY29ycmVsYXRpb24gY29lZmZpY2llbnQKc3AgKyBzdGF0X2NvcihtZXRob2QgPSAicGVhcnNvbiIpCgpgYGAKYGBge3J9CmdlbmVzX2J5X3RwID0gRmV0Y2hEYXRhKG9iamVjdCA9IG9wc2NjX2hwdlBvcyx2YXJzID0gYygiaHB2IiwgIk1ZQiIpKSAKCiAgICBmb3JtdWxhIDwtIGFzLmZvcm11bGEoIk1ZQiB+IGhwdiIpCiAgICAKICAgICNwbG90IGFuZCBzcGxpdCBieSBwYXRpZW50OiAgIAogICAgc3RhdC50ZXN0ID0gY29tcGFyZV9tZWFucyhmb3JtdWxhID0gZm9ybXVsYSAsZGF0YSA9IGdlbmVzX2J5X3RwLG1ldGhvZCA9ICJ3aWxjb3gudGVzdCIscC5hZGp1c3QubWV0aG9kID0gImZkciIpICMgQWRkIHBhaXJ3aXNlIGNvbXBhcmlzb25zIHAtdmFsdWUKICAgIAogICAgCgogICAgcGx0ID0gZ2dwbG90KGdlbmVzX2J5X3RwLCBhZXMoeCA9IGhwdiwgeSA9IE1ZQikpKyAKICAgICAgZ2VvbV92aW9saW4oKSsgZ2VvbV9ib3hwbG90KHdpZHRoPTAuMSxvdXRsaWVyLnNoYXBlID0gTkEpICsKICAgICAgeWxpbShtaW4oZ2VuZXNfYnlfdHAkTVlCKSxtYXgoZ2VuZXNfYnlfdHAkTVlCKSoxLjI1KQogICAgcGx0ID0gcGx0ICtzdGF0X3B2YWx1ZV9tYW51YWwoc3RhdC50ZXN0LCBsYWJlbCA9ICJ7cC5hZGp9IiwgICNhZGQgcCB2YWx1ZQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgeS5wb3NpdGlvbiA9IG1heChnZW5lc19ieV90cCRNWUIpKjEuMDgpICMgc2V0IHBvc2l0aW9uIGF0IHRoZSB0b3AgdmFsdWUKICAgIHBsdApgYGAKYGBge3J9CiMgcGxvdCBieSBwYXRpZW50cwpnZW5lc19ieV90cCA9IEZldGNoRGF0YShvYmplY3QgPSBvcHNjYyx2YXJzID0gYygiTVlCIiwicGF0aWVudCIpKSAlPiUgCiAgZHBseXI6Omdyb3VwX2J5KHBhdGllbnQpICU+JSAKICBzdW1tYXJpc2UoTVlCID0gbWVhbihNWUIpKSAlPiUKICBtdXRhdGUgKGhwdiA9IGlmX2Vsc2UoCiAgY29uZGl0aW9uID0gcGF0aWVudCAlaW4lIGhwdl9uZWcgLHRydWUgPSAiSFBWLSIsZmFsc2UgPSAiSFBWKyIpKQoKZm9ybXVsYSA8LSBhcy5mb3JtdWxhKCJNWUIgfiBocHYiKQoKI3Bsb3QgYW5kIHNwbGl0IGJ5IHBhdGllbnQ6CnN0YXQudGVzdCA9IGNvbXBhcmVfbWVhbnMoCiAgZm9ybXVsYSA9IGZvcm11bGEgLAogIGRhdGEgPSBnZW5lc19ieV90cCwKICBtZXRob2QgPSAid2lsY294LnRlc3QiLAogIHAuYWRqdXN0Lm1ldGhvZCA9ICJmZHIiCikgIyBBZGQgcGFpcndpc2UgY29tcGFyaXNvbnMgcC12YWx1ZQoKCgpwbHQgPSBnZ3Bsb3QoZ2VuZXNfYnlfdHAsIGFlcyh4ID0gaHB2LCB5ID0gTVlCKSkgKwogIGdlb21fYm94cGxvdCh3aWR0aCA9IDAuMSxvdXRsaWVyLnNoYXBlID0gTkEpICsgZ2VvbV9qaXR0ZXIoc2hhcGU9MTYsIHBvc2l0aW9uPXBvc2l0aW9uX2ppdHRlcigwLjIpKQpwbHQgPSBwbHQgKyBzdGF0X3B2YWx1ZV9tYW51YWwoc3RhdC50ZXN0LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGFiZWwgPSBwYXN0ZTAoc3RhdC50ZXN0JG1ldGhvZCwgIiwgcCA9IHtwLmFkan0iKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICNhZGQgcCB2YWx1ZQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgeS5wb3NpdGlvbiA9IDAuMikrIGdndGl0bGUoIkJ5IHNhbXBsZSIpICMgc2V0IHBvc2l0aW9uIGF0IHRoZSB0b3AgdmFsdWUKcGx0CmBgYAoKYGBge3J9Cmhwdl8xNl9nZW5lcyA9IHJvd25hbWVzKG9wc2NjX2hwdlBvcylbIHN0YXJ0c1dpdGgoeCA9cm93bmFtZXMob3BzY2NfaHB2UG9zKSxwcmVmaXggPSAgIkhQVjE2IildCmEgPSBmaWx0ZXIudW1pKG1hdHJpeCA9IG9wc2NjX2hwdlBvc0Bhc3NheXMkUk5BQGNvdW50cyxjZWxscyA9IGNvbG5hbWVzKG9wc2NjX2hwdlBvcyAlPiUgc3Vic2V0KHBhdGllbnQgPT0gIk9QMTQiKSksd2hpdGVsaXN0ID0gSFBWZ2VuZXMpCmdlbmVzID0gaHB2XzE2X2dlbmVzW2hwdl8xNl9nZW5lcyAlaW4lIHJvd25hbWVzKGEpXQpzY29yZSA9IGFbZ2VuZXMsXSAlPiUgY29sTWVhbnMoKQoKbXliX3ZzX2hwdiA9IGRhdGEuZnJhbWUoaHB2X3Njb3JlID0gc2NvcmUsIE1ZQj0gYVsiTVlCIixdLHJvdy5uYW1lcyA9IGNvbG5hbWVzKGEpKQoKc3AgPC0gZ2dzY2F0dGVyKG15Yl92c19ocHYsIHggPSAiaHB2X3Njb3JlIiwgeSA9ICJNWUIiLAogICBhZGQgPSAicmVnLmxpbmUiLCAgIyBBZGQgcmVncmVzc2luIGxpbmUKICAgYWRkLnBhcmFtcyA9IGxpc3QoY29sb3IgPSAiYmx1ZSIsIGZpbGwgPSAibGlnaHRncmF5IiksICMgQ3VzdG9taXplIHJlZy4gbGluZQogICBjb25mLmludCA9IFRSVUUgIyBBZGQgY29uZmlkZW5jZSBpbnRlcnZhbAogICApCiMgQWRkIGNvcnJlbGF0aW9uIGNvZWZmaWNpZW50CnNwICsgc3RhdF9jb3IobWV0aG9kID0gInBlYXJzb24iKQpgYGA=