Functions
# modify dotplot to be without exponent of the expression
newDef <- deparse(DotPlot)
newDef[57] = " return(mean(x = x))"
newDef[136] = " yes = \"Identity\", no = \"Split Identity\")) + cowplot::theme_cowplot()" #add package name
newDef[95] = " data.use <- data.use" # remove log
DotPlot.2 <- eval(parse(text=newDef))
Data
library("readxl")
GSE17708 <- read_excel("./Data/EMT_pathways/GSE17708_Keshamouni_TGFB1_logs.xls",sheet = 1,progress = T,cell_cols("BC")) %>% dplyr::slice(3:n())
-
\
/
-
GSE17708_genes <- read_excel("./Data/EMT_pathways/GSE17708_Keshamouni_TGFB1_logs.xls",sheet = 1,progress = T,cell_cols("B"))
-
\
/
-
GSE17708 = cbind(GSE17708,GSE17708_genes)
GSE17708_72H_UP_df = GSE17708 %>% set_names(c("deg","gene")) %>% filter(deg > 1) %>% dplyr::select(gene) #take up genes
GSE17708_72H_UP = GSE17708_72H_UP_df %>% filter(!gene == "---") %>% pull(gene) # remove na gene symbols
GSE17708_72H_UP = GSE17708_72H_UP %>% strsplit(split = " /// ") %>% unlist() %>% unique() # split ambiguous genes
bayers_EMT = c("ZEB1 LIX1L VIM AXL MMPT ANTXR2 C3orf21 FN1 NRP1 TGFBI GALNT5 PPARG FN1 HNMT CARD6 RBPMS TNFRSF21 TMEM45B MPP7 SSH3 MTAC2D1 MUC1 EPPK1 SHROOM3 EPN3 PRSS22 AP1M2 SH3YL1 KLC3 SERINC2 EVPL FXYD3 CLDN4 CRB3 LRRC54 MAPK13 EPPK1 GALNT3 STAP2 AP1M2 DSP ELMO3 KRTCAP3 MAL2 F11R GPR110 GPT56 KRT19 GRHL1 BSPRY C1orf116 S100A14 SPINT2 ANKRD22 ST14 GRHL2 PRR5 BSPRY TJP3 TACSTD2 CDH3 C1orf172 CDS1 PRR5 MPZL2 INADL EPN3 RBM35A TMC4 ITGB6 TMEM125 EPHA1 CDS1 ENPP5 ST14 EPB41L5 ERBB3 RAB25 PRSS8 TMEM30B CLDN7 RBM35A TACSTD1 CDS1 SCNN1A CDH1") %>% strsplit(split = "\t") %>% unlist() %>% unique()
bayers_mes = bayers_EMT[1:16]
bayers_epi = bayers_EMT[17:length(bayers_EMT)]
hEMT = c("PDPN, ITGA5, ITGA6, TGFBI, LAMC2, MMP10, LAMA3, CDH13, SERPINE1, P4HA2, TNC, MMP1")%>% strsplit(split = ", ") %>% unlist() %>% unique()
epi_genes = c("CDH1, DSP, OCLN, CRB3")%>% strsplit(split = ", ") %>% unlist() %>% unique()
mes_genes = c("VIM, CDH2, FOXC2, SNAI1, SNAI2, TWIST1, FN1, ITGB6, MMP2, MMP3, MMP9, SOX10, GSC, ZEB1, ZEB2, TWIST2")%>% strsplit(split = ", ") %>% unlist() %>% unique()
hallmark_emt = genesets$HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION
calculate score- scaled
data
gene_list = list(hallmark_emt = hallmark_emt, GSE17708_72H_UP = GSE17708_72H_UP,bayers_mes = bayers_mes,bayers_epi = bayers_epi,Tagli_hEMT = hEMT,Tagli_epi = epi_genes,Tagli_mes = mes_genes)
idx=1
for (dataset in list(xeno,lung)) {
# dataset = ScaleData(object = dataset,features = unlist(gene_list))
for (i in seq_along(gene_list)) {
genes = gene_list[[i]]
genes = genes[genes %in% rownames(dataset)]
name = names(gene_list)[i]
scores = dataset@assays$RNA@scale.data[genes,] %>% colMeans()
dataset %<>% AddMetaData(metadata = scores,col.name = name)
}
dataset$Tagli_emt = dataset$Tagli_mes - dataset$Tagli_epi
dataset$bayers_EMT = dataset$bayers_mes - dataset$bayers_epi
if (idx == 1 ) {
xeno = dataset
}
if (idx==2) {
lung = dataset
}
idx = idx+1
}
x_names <-emt_pathways <- c("hallmark_emt","GSE17708_72H_UP","bayers_EMT","Tagli_emt","Tagli_hEMT")
x_names[1] = "Hallmark \n EMT"
x_names[2] = "GSE17708\n72H_UP"
p1 = DotPlot.2(object = xeno, features = emt_pathways,group.by = 'treatment',scale = T,scale.by = "size")+
guides(size = guide_legend(title = "Cells expressing (%)"),color = guide_colorbar(title = "Average Score")) +
theme(axis.text.x = element_text( size = 10))+ scale_x_discrete(labels= x_names)+ggtitle("Models")+ scale_size_binned()
Scale for 'size' is already present. Adding another scale for 'size', which will replace the existing scale.
p2 = DotPlot.2(object = lung, features = emt_pathways,group.by = 'time.point',scale = T,scale.by = "size")+
guides(size = guide_legend(title = "Cells expressing (%)"),color = guide_colorbar(title = "Average Score")) +
theme(axis.text.x = element_text( size = 10))+ scale_x_discrete(labels= x_names)+ggtitle("Patients")+ scale_size_binned()
Scale for 'size' is already present. Adding another scale for 'size', which will replace the existing scale.
p1 + p2

DotPlot.2(object = xeno, features = emt_pathways,group.by = 'orig.ident',split.by = "treatment", scale = T,scale.by = "size",cols = "Blues")+
guides(size = guide_legend(title = "Cells expressing (%)"),color = guide_colorbar(title = "Average Score")) +
theme(axis.text.x = element_text( size = 10))+ scale_x_discrete(labels= x_names)+ggtitle("Models")+ scale_size_binned()
Scale for 'size' is already present. Adding another scale for 'size', which will replace the existing scale.

data = FetchData(object = lung,vars = c("time.point",emt_pathways))
df = reshape2::melt(data,value.name = "logTPM") %>% dplyr::rename(gene = variable)
stat.test <- df %>%
group_by(gene) %>%
wilcox_test(logTPM ~ time.point,comparisons = list(c("pre-treatment","on-treatment"))) %>%
mutate(y.position = 5)
stat.test
stat.test <- stat.test %>%
add_xy_position(x = "gene", dodge = 0.8)
ggboxplot(
df,
x = "gene",
y = "logTPM",
color = "time.point",
palette = "jco",
add = "jitter"
)+ stat_pvalue_manual(stat.test,y.position = 4, label = "{p.adj.signif}",remove.bracket =F)+ geom_violin(aes(fill = time.point), trim = FALSE)

calculate score- log
tpm
gene_list = list(hallmark_emt = hallmark_emt, GSE17708_72H_UP = GSE17708_72H_UP,bayers_mes = bayers_mes,bayers_epi = bayers_epi,Tagli_hEMT = hEMT,Tagli_epi = epi_genes,Tagli_mes = mes_genes)
idx=1
for (dataset in list(xeno,lung)) {
# dataset = ScaleData(object = dataset,features = unlist(gene_list))
for (i in seq_along(gene_list)) {
genes = gene_list[[i]]
genes = genes[genes %in% rownames(dataset)]
name = names(gene_list)[i]
scores = dataset@assays$RNA@data[genes,] %>% colMeans()
dataset %<>% AddMetaData(metadata = scores,col.name = name)
}
dataset$Tagli_emt = dataset$Tagli_mes - dataset$Tagli_epi
dataset$bayers_EMT = dataset$bayers_mes - dataset$bayers_epi
if (idx == 1 ) {
xeno = dataset
}
if (idx==2) {
lung = dataset
}
idx = idx+1
}
x_names <-emt_pathways <- c("hallmark_emt","GSE17708_72H_UP","bayers_EMT","Tagli_emt","Tagli_hEMT")
x_names[1] = "Hallmark \n EMT"
x_names[2] = "GSE17708\n72H_UP"
p1 = DotPlot.2(object = xeno, features = emt_pathways,group.by = 'treatment',scale = T,scale.by = "size")+
guides(size = guide_legend(title = "Cells expressing (%)"),color = guide_colorbar(title = "Average Score")) +
theme(axis.text.x = element_text( size = 10))+ scale_x_discrete(labels= x_names)+ggtitle("Models")+ scale_size_binned()
Scale for 'size' is already present. Adding another scale for 'size', which will replace the existing scale.
p2 = DotPlot.2(object = lung, features = emt_pathways,group.by = 'time.point',scale = T,scale.by = "size")+
guides(size = guide_legend(title = "Cells expressing (%)"),color = guide_colorbar(title = "Average Score")) +
theme(axis.text.x = element_text( size = 10))+ scale_x_discrete(labels= x_names)+ggtitle("Patients")+ scale_size_binned()
Scale for 'size' is already present. Adding another scale for 'size', which will replace the existing scale.
p1 + p2

all_pathways = list()
for (pathway in emt_pathways) {
data = FetchData(object = lung,vars = c(pathway, "time.point", "patient.ident"))
all_patients = list()
for (patient in unique(lung$patient.ident)) {
mean1 = data %>% filter(patient.ident == patient, time.point == "pre-treatment") %>% pull(1) %>% mean()
mean2 = data %>% filter(patient.ident == patient, time.point == "on-treatment") %>% pull(1) %>% mean()
fc = mean1 / mean2
all_patients[[patient]] = fc
}
all_pathways[[pathway]] = all_patients
}
mat = as.data.frame(lapply(all_pathways, unlist))
mat = log2(t(mat) %>% as.data.frame())
breaks <- c(seq(-1,1,by=0.1))
colors_for_plot <- colorRampPalette(colors = c("blue", "white", "red"))(n = length(breaks))
pheatmap::pheatmap(mat,color = colors_for_plot,breaks = breaks,display_numbers = T,main = "log2(FC) pre/on")

---
title: '`r rstudioapi::getSourceEditorContext()$path %>% basename() %>% gsub(pattern = "\\.Rmd",replacement = "")`' 
author: "Avishai Wizel"
date: '`r Sys.time()`'
output: 
  html_notebook: 
    code_folding: hide
    toc: yes
    toc_collapse: yes
    toc_float: 
      collapsed: FALSE
    number_sections: true
    toc_depth: 1
---



# Functions

```{r warning=FALSE}
# modify dotplot to be without exponent of the expression
newDef <- deparse(DotPlot)
newDef[57] =  "            return(mean(x =  x))"
newDef[136] =  "            yes = \"Identity\", no = \"Split Identity\")) + cowplot::theme_cowplot()" #add package name
newDef[95] = "                data.use <- data.use" # remove log

DotPlot.2 <- eval(parse(text=newDef))
```

# Data

```{r}

```


```{r}
library("readxl")
GSE17708 <- read_excel("./Data/EMT_pathways/GSE17708_Keshamouni_TGFB1_logs.xls",sheet = 1,progress = T,cell_cols("BC")) %>% dplyr::slice(3:n())
GSE17708_genes <- read_excel("./Data/EMT_pathways/GSE17708_Keshamouni_TGFB1_logs.xls",sheet = 1,progress = T,cell_cols("B"))
GSE17708 = cbind(GSE17708,GSE17708_genes) 
GSE17708_72H_UP_df = GSE17708 %>% set_names(c("deg","gene")) %>% filter(deg > 1) %>% dplyr::select(gene) #take up genes
GSE17708_72H_UP = GSE17708_72H_UP_df %>% filter(!gene == "---") %>% pull(gene) # remove na gene symbols
GSE17708_72H_UP = GSE17708_72H_UP %>% strsplit(split = " /// ") %>% unlist() %>% unique() # split ambiguous genes
```

```{r}
bayers_EMT = c("ZEB1	LIX1L	VIM	AXL	MMPT	ANTXR2	C3orf21	FN1	NRP1	TGFBI	GALNT5	PPARG	FN1	HNMT	CARD6	RBPMS	TNFRSF21	TMEM45B	MPP7	SSH3	MTAC2D1	MUC1	EPPK1	SHROOM3	EPN3	PRSS22	AP1M2	SH3YL1	KLC3	SERINC2	EVPL	FXYD3	CLDN4	CRB3	LRRC54	MAPK13	EPPK1	GALNT3	STAP2	AP1M2	DSP	ELMO3	KRTCAP3	MAL2	F11R	GPR110	GPT56	KRT19	GRHL1	BSPRY	C1orf116	S100A14	SPINT2	ANKRD22	ST14	GRHL2	PRR5	BSPRY	TJP3	TACSTD2	CDH3	C1orf172	CDS1	PRR5	MPZL2	INADL	EPN3	RBM35A	TMC4	ITGB6	TMEM125	EPHA1	CDS1	ENPP5	ST14	EPB41L5	ERBB3	RAB25	PRSS8	TMEM30B	CLDN7	RBM35A	TACSTD1	CDS1	SCNN1A	CDH1") %>% strsplit(split = "\t") %>% unlist() %>% unique()
bayers_mes = bayers_EMT[1:16]
bayers_epi = bayers_EMT[17:length(bayers_EMT)]

```

```{r}
hEMT = c("PDPN, ITGA5, ITGA6, TGFBI, LAMC2, MMP10, LAMA3, CDH13, SERPINE1, P4HA2, TNC, MMP1")%>% strsplit(split = ", ") %>% unlist() %>% unique()
epi_genes = c("CDH1, DSP, OCLN, CRB3")%>% strsplit(split = ", ") %>% unlist() %>% unique()
mes_genes = c("VIM, CDH2, FOXC2, SNAI1, SNAI2, TWIST1, FN1, ITGB6, MMP2, MMP3, MMP9, SOX10, GSC, ZEB1, ZEB2, TWIST2")%>% strsplit(split = ", ") %>% unlist() %>% unique()
```

```{r}
hallmark_emt = genesets$HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION
```


# calculate score- scaled data
```{r }
gene_list = list(hallmark_emt = hallmark_emt, GSE17708_72H_UP = GSE17708_72H_UP,bayers_mes = bayers_mes,bayers_epi = bayers_epi,Tagli_hEMT = hEMT,Tagli_epi = epi_genes,Tagli_mes = mes_genes)

idx=1
for (dataset in list(xeno,lung)) {
  # dataset = ScaleData(object = dataset,features = unlist(gene_list))
  for (i in seq_along(gene_list)) {
    genes = gene_list[[i]]
    genes = genes[genes %in% rownames(dataset)]
    name = names(gene_list)[i]
    scores = dataset@assays$RNA@scale.data[genes,] %>% colMeans()
    dataset %<>% AddMetaData(metadata = scores,col.name = name)
  }
  dataset$Tagli_emt = dataset$Tagli_mes - dataset$Tagli_epi
  dataset$bayers_EMT = dataset$bayers_mes - dataset$bayers_epi
  if (idx == 1 ) {
    xeno = dataset
  } 
  if (idx==2) {
    lung = dataset
  }
  idx = idx+1
}

x_names <-emt_pathways <- c("hallmark_emt","GSE17708_72H_UP","bayers_EMT","Tagli_emt","Tagli_hEMT")
x_names[1] = "Hallmark \n EMT"
x_names[2] = "GSE17708\n72H_UP"


```


```{r fig.height=5, fig.width=14}
p1 = DotPlot.2(object = xeno, features =  emt_pathways,group.by  = 'treatment',scale = T,scale.by = "size")+
  guides(size = guide_legend(title = "Cells expressing (%)"),color = guide_colorbar(title = "Average Score")) +
  theme(axis.text.x = element_text( size = 10))+ scale_x_discrete(labels= x_names)+ggtitle("Models")+ scale_size_binned()


p2 = DotPlot.2(object = lung, features =  emt_pathways,group.by  = 'time.point',scale = T,scale.by = "size")+
  guides(size = guide_legend(title = "Cells expressing (%)"),color = guide_colorbar(title = "Average Score")) +
  theme(axis.text.x = element_text( size = 10))+ scale_x_discrete(labels= x_names)+ggtitle("Patients")+ scale_size_binned()
p1 + p2
```











```{r}
DotPlot.2(object = xeno, features =  emt_pathways,group.by  = 'orig.ident',split.by = "treatment", scale = T,scale.by = "size",cols = "Blues")+
  guides(size = guide_legend(title = "Cells expressing (%)"),color = guide_colorbar(title = "Average Score")) +
  theme(axis.text.x = element_text( size = 10))+ scale_x_discrete(labels= x_names)+ggtitle("Models")+ scale_size_binned()
```


```{r }

data = FetchData(object = lung,vars = c("time.point",emt_pathways))
df = reshape2::melt(data,value.name = "logTPM") %>% dplyr::rename(gene = variable)


stat.test <- df %>%
    group_by(gene) %>%
  wilcox_test(logTPM ~ time.point,comparisons =  list(c("pre-treatment","on-treatment"))) %>%
  mutate(y.position = 5)

stat.test

stat.test <- stat.test %>% 
  add_xy_position(x = "gene", dodge = 0.8)

ggboxplot(
  df,
  x = "gene",
  y = "logTPM",
  color = "time.point",
  palette = "jco",
  add = "jitter"
)+ stat_pvalue_manual(stat.test,y.position = 4, label = "{p.adj.signif}",remove.bracket =F)+ geom_violin(aes(fill = time.point), trim = FALSE)


```

```{r fig.height=6, fig.width=15}
library(rstatix)
data = FetchData(object = lung,vars = c("time.point",emt_pathways,"patient.ident"))
df = reshape2::melt(data,value.name = "logTPM") %>% dplyr::rename(pathway = variable)

stat.test <- df %>%
    group_by(pathway) %>%
  wilcox_test(logTPM ~ time.point,comparisons =  list(c("pre-treatment","on-treatment"))) %>% 
  add_xy_position(x = "pathway") 


stat.test



p = ggplot(df,aes( x = patient.ident, y = logTPM))+ 
    geom_violin(trim = F,aes(fill = time.point)) +
  geom_boxplot(aes(fill = time.point), width=.2, outlier.shape=NA,
    position = position_dodge(0.9))   +theme_minimal() + stat_pvalue_manual(stat.test,y.position = 3, label = "{p.adj.signif}",remove.bracket = F)

p+facet_wrap(vars(pathway))

```






# calculate score- log tpm


```{r }
#calculate score
gene_list = list(hallmark_emt = hallmark_emt, GSE17708_72H_UP = GSE17708_72H_UP,bayers_mes = bayers_mes,bayers_epi = bayers_epi,Tagli_hEMT = hEMT,Tagli_epi = epi_genes,Tagli_mes = mes_genes)

idx=1
for (dataset in list(xeno,lung)) {
  # dataset = ScaleData(object = dataset,features = unlist(gene_list))
  for (i in seq_along(gene_list)) {
    genes = gene_list[[i]]
    genes = genes[genes %in% rownames(dataset)]
    name = names(gene_list)[i]
    scores = dataset@assays$RNA@data[genes,] %>% colMeans()
    dataset %<>% AddMetaData(metadata = scores,col.name = name)
  }
  dataset$Tagli_emt = dataset$Tagli_mes - dataset$Tagli_epi
  dataset$bayers_EMT = dataset$bayers_mes - dataset$bayers_epi
  if (idx == 1 ) {
    xeno = dataset
  } 
  if (idx==2) {
    lung = dataset
  }
  idx = idx+1
}

x_names <-emt_pathways <- c("hallmark_emt","GSE17708_72H_UP","bayers_EMT","Tagli_emt","Tagli_hEMT")
x_names[1] = "Hallmark \n EMT"
x_names[2] = "GSE17708\n72H_UP"


```

```{r fig.height=5, fig.width=14}
p1 = DotPlot.2(object = xeno, features =  emt_pathways,group.by  = 'treatment',scale = T,scale.by = "size")+
  guides(size = guide_legend(title = "Cells expressing (%)"),color = guide_colorbar(title = "Average Score")) +
  theme(axis.text.x = element_text( size = 10))+ scale_x_discrete(labels= x_names)+ggtitle("Models")+ scale_size_binned()


p2 = DotPlot.2(object = lung, features =  emt_pathways,group.by  = 'time.point',scale = T,scale.by = "size")+
  guides(size = guide_legend(title = "Cells expressing (%)"),color = guide_colorbar(title = "Average Score")) +
  theme(axis.text.x = element_text( size = 10))+ scale_x_discrete(labels= x_names)+ggtitle("Patients")+ scale_size_binned()
p1 + p2
```

```{r}
all_pathways = list()
for (pathway in emt_pathways) {
  data = FetchData(object = lung,vars = c(pathway, "time.point", "patient.ident"))
  all_patients = list()
  for (patient in unique(lung$patient.ident)) {
    mean1 = data %>% filter(patient.ident == patient, time.point == "pre-treatment") %>% pull(1) %>% mean()
    mean2 = data %>% filter(patient.ident == patient, time.point == "on-treatment") %>% pull(1) %>% mean()
    fc = mean1 / mean2
    all_patients[[patient]] = fc
  }
  all_pathways[[pathway]] = all_patients
}


mat = as.data.frame(lapply(all_pathways, unlist))
mat = log2(t(mat) %>% as.data.frame())
breaks <- c(seq(-1,1,by=0.1))
colors_for_plot <- colorRampPalette(colors = c("blue", "white", "red"))(n = length(breaks))

pheatmap::pheatmap(mat,color = colors_for_plot,breaks = breaks,display_numbers = T,main = "log2(FC) pre/on")
```




<script src="https://hypothes.is/embed.js" async></script>

