Load packages

library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)

Load gene expression matrix

df <- read_tsv("gene_expr_matrix.tsv")

Calculate mean and SD of each gene, and normalize SD according to mean (coefficient of variation)

df.summarised <- gather(df, sample, expr, -gene_id, -gene_symbol) %>% 
                    group_by(gene_id, gene_symbol) %>% 
                    summarise(mean = mean(expr), sd_norm = sd(expr) / mean) %>%
                    filter(mean != 0) %>%
                    gather(statistic, value, mean, sd_norm)

Show mean and SD of all genes, highlighting TBP and PRKG1

genes <- c("TBP", "PRKG1", "CDKN1A")
p <- ggplot(data = df.summarised, 
            aes(x=statistic, y=value)) +
        geom_boxplot() + 
        scale_y_log10() +
        geom_point(
            data=filter(df.summarised, gene_symbol %in% genes),
            color="red", size=3) +
        geom_text(
            data = filter(df.summarised, gene_symbol %in% genes),
            aes(label=gene_symbol),
            hjust = -0.2, 
            vjust = 0.5)
p

Find other genes with high mean (above Q3) and low SD (below Q1/4)

# mean_quants <- quantile(df$mean)
# sd_quants <- quantile(df$sd_norm)
# df.candidates <- filter(df, mean > mean_quants["75%"], sd_norm < sd_quants["25%"] / 4)
# print(df.candidates)