Load packages

library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)

Load gene expression matrices and melt them

df_blgsp <- read_tsv("blgsp_gene_expr_matrix.tsv") %>%
    gather(Sample, Expression, -(1:2))
df_centro <- read_tsv("centro_gene_expr_matrix.tsv") %>%
    gather(Sample, Expression, -(1:2))

Plot MYC expression

myc_blgsp <- df_blgsp %>% filter(gene_symbol == "MYC") %>% mutate(Cohort = "BLGSP")
myc_centro <- df_centro %>% filter(gene_symbol == "MYC") %>% mutate(Cohort = "Centroblasts")
myc_all <- rbind(myc_blgsp, myc_centro)

interesting_samples <- c("BLGSP-71-06-00081")
p <- ggplot(data = myc_all, aes(x = Cohort, y = Expression)) + 
    geom_boxplot() + 
    scale_x_discrete(limits = c("Centroblasts", "BLGSP")) +
    geom_point(data = filter(myc_all, Sample %in% interesting_samples), 
               color = "red", size = 3) +
    geom_text(data = filter(myc_all, Sample %in% interesting_samples), 
              aes(label = Sample), hjust = -0.05, vjust = 0.5, color = "red", size = 4)
p

Plot IGKV2D (ENSG00000242534) expression

gene <- "ENSG00000242534"
interesting_samples <- c()

blgsp_expr <- df_blgsp %>% filter(gene_symbol == gene | gene_id == gene) %>% mutate(Cohort = "BLGSP")
centro_expr <- df_centro %>% filter(gene_symbol == gene | gene_id == gene) %>% mutate(Cohort = "Centroblasts")
all_expr <- rbind(blgsp_expr, centro_expr)

p <- ggplot(data = all_expr, aes(x = Cohort, y = Expression)) + 
    geom_boxplot() + 
    scale_x_discrete(limits = c("Centroblasts", "BLGSP")) +
    geom_point(data = filter(all_expr, Sample %in% interesting_samples), 
               color = "red", size = 3) +
    geom_text(data = filter(all_expr, Sample %in% interesting_samples), 
              aes(label = Sample), hjust = -0.05, vjust = 0.5, color = "red", size = 4)
p