Load packages
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
Load gene expression matrix
df <- read_tsv("gene_expr_matrix.tsv")
Calculate mean and SD of each gene, and normalize SD according to mean (coefficient of variation)
mean <- apply(df[,-(1:2)], 1, mean)
sd <- apply(df[,-(1:2)], 1, sd)
df <- cbind(df[,(1:2)], mean)
df <- cbind(df, sd)
df <- mutate(df, sd_norm = sd / mean)
df <- select(df, -sd)
df <- filter(df, mean != 0)
Show mean and SD of all genes, highlighting TBP and PRKG1
genes <- c("TBP", "PRKG1")
p <- ggplot(data = df.melted, aes(x=stat, y=value)) +
geom_boxplot() + scale_y_log10() +
geom_point(
data=filter(df.melted, gene_symbol %in% genes),
color="red", size=3
) +
geom_text(
data=filter(df.melted, gene_symbol %in% genes),
aes(label=gene_symbol),
hjust=-0.2, vjust=0.5
)
p

Find other genes with high mean (above Q3) and low SD (below Q1/4)
mean_quants <- quantile(df$mean)
sd_quants <- quantile(df$sd_norm)
df.candidates <- filter(df, mean > mean_quants["75%"], sd_norm < sd_quants["25%"] / 4)
print(df.candidates)
## gene_id gene_symbol mean sd_norm
## 1 ENSG00000054116 TRAPPC3 13.768193 0.09789388
## 2 ENSG00000055070 SZRD1 40.041360 0.08744261
## 3 ENSG00000084073 ZMPSTE24 15.297467 0.07863105
## 4 ENSG00000116266 STXBP3 7.272953 0.09462641
## 5 ENSG00000116863 ADPRHL2 16.912207 0.09060969
## 6 ENSG00000117682 DHDDS 5.502700 0.08192232
## 7 ENSG00000132716 DCAF8 12.190433 0.09292998
## 8 ENSG00000134644 PUM1 19.814127 0.09842207
## 9 ENSG00000143756 FBXO28 7.791573 0.09665547
## 10 ENSG00000143761 ARF1 97.702393 0.06199841
## 11 ENSG00000162368 CMPK1 40.979193 0.09175348
## 12 ENSG00000162735 PEX19 11.607807 0.09898423
## 13 ENSG00000119977 TCTN3 8.171367 0.08111902
## 14 ENSG00000138107 ACTR1A 27.225293 0.09650163
## 15 ENSG00000148719 DNAJB12 8.241787 0.08946064
## 16 ENSG00000095139 ARCN1 55.190913 0.08017499
## 17 ENSG00000110108 TMEM109 51.584807 0.09487398
## 18 ENSG00000149357 LAMTOR1 27.133293 0.09602785
## 19 ENSG00000162300 ZFPL1 5.944993 0.09730011
## 20 ENSG00000173039 RELA 16.451847 0.09742261
## 21 ENSG00000174903 RAB1B 82.475573 0.09716332
## 22 ENSG00000121749 TBC1D15 5.708540 0.09006590
## 23 ENSG00000100926 TM9SF1 15.965993 0.08002558
## 24 ENSG00000125952 MAX 12.200567 0.09690444
## 25 ENSG00000139977 NAA30 4.079393 0.08279056
## 26 ENSG00000165389 SPTSSA 13.769860 0.09703782
## 27 ENSG00000165525 NEMF 6.904740 0.09841555
## 28 ENSG00000254692 RP11-468E2.1 23.020773 0.07924947
## 29 ENSG00000028528 SNX1 13.060400 0.08332927
## 30 ENSG00000138592 USP8 9.358260 0.08566687
## 31 ENSG00000166946 CCNDBP1 8.603873 0.09873220
## 32 ENSG00000102910 LONP2 8.848120 0.09221575
## 33 ENSG00000067596 DHX8 10.655440 0.09349787
## 34 ENSG00000072849 DERL2 4.114447 0.09584595
## 35 ENSG00000108587 GOSR1 5.651380 0.08764943
## 36 ENSG00000108588 CCDC47 26.065880 0.08765599
## 37 ENSG00000109111 SUPT6H 21.764687 0.09527951
## 38 ENSG00000126581 BECN1 10.894547 0.08018598
## 39 ENSG00000129255 MPDU1 10.699840 0.09556836
## 40 ENSG00000132581 SDF2 6.792480 0.09314164
## 41 ENSG00000141551 CSNK1D 10.832193 0.09634965
## 42 ENSG00000173812 EIF1 86.447020 0.08970647
## 43 ENSG00000185722 ANKFY1 8.514947 0.08198510
## 44 ENSG00000101558 VAPA 11.987320 0.09743620
## 45 ENSG00000079805 DNM2 16.489687 0.07884378
## 46 ENSG00000130175 PRKCSH 41.623493 0.07167409
## 47 ENSG00000077380 DYNC1I2 10.233953 0.09199223
## 48 ENSG00000135930 EIF4E2 9.660193 0.09783050
## 49 ENSG00000100991 TRPC4AP 31.403160 0.07126738
## 50 ENSG00000101084 C20orf24 39.074220 0.09284207
## 51 ENSG00000101150 TPD52L2 31.855780 0.09049130
## 52 ENSG00000101310 SEC23B 20.479713 0.08095560
## 53 ENSG00000118705 RPN2 97.062673 0.09027935
## 54 ENSG00000132670 PTPRA 24.531893 0.09166466
## 55 ENSG00000170471 RALGAPB 7.721227 0.09774953
## 56 ENSG00000259399 RP5-977B1.10 56.240460 0.07589385
## 57 ENSG00000160194 NDUFV3 7.710433 0.09799988
## 58 ENSG00000131381 ZFYVE20 5.155667 0.09260688
## 59 ENSG00000161203 AP2M1 58.964580 0.09797287
## 60 ENSG00000170248 PDCD6IP 18.520807 0.09410393
## 61 ENSG00000181789 COPG1 37.738913 0.09748887
## 62 ENSG00000138768 USO1 26.484053 0.09310918
## 63 ENSG00000127022 CANX 138.598047 0.09629881
## 64 ENSG00000145715 RASA1 9.087413 0.08597600
## 65 ENSG00000174695 TMEM167A 15.917953 0.09153770
## 66 ENSG00000185129 PURA 7.054867 0.09333442
## 67 ENSG00000025796 SEC63 8.066320 0.07189370
## 68 ENSG00000112308 C6orf62 41.887307 0.09891743
## 69 ENSG00000112584 FAM120B 6.167353 0.09880634
## 70 ENSG00000107021 TBC1D13 9.618180 0.08997362
## 71 ENSG00000155827 RNF20 18.136067 0.09217287
## 72 ENSG00000078061 ARAF 20.024633 0.08423786
## 73 ENSG00000126945 HNRNPH2 40.952733 0.09899855