# load TGAC sample data
sampledata <- read.csv("/data2/rnaseq/Cg_Ch_LCM/SampleAlias.txt", sep = "\t",
head = F)
sampledata <- sampledata[-c(1, 2), -c(2, 4)]
names(sampledata) <- c("lib", "sample", "insert")
# load read rRNA contamination data
rrna <- read.csv("/data2/rnaseq/Cg_Ch_LCM/reads/facsall.log", sep = "\t", head = F,
row.names = NULL, as.is = T)
rrna <- rrna[seq(2, nrow(rrna), by = 2), c(1, 5)]
names(rrna) <- c("lib", "rRNA")
rrna$lib <- gsub(rrna$lib, pattern = ".*(LIB[0-9]*).*", replacement = "\\1")
rrna$rRNA <- round(as.numeric(rrna$rRNA), 2)
# load read chloroplast data
chloro <- read.csv("/data2/rnaseq/Cg_Ch_LCM/reads/facsall_chloroplast.log",
sep = "\t", head = F, row.names = NULL, as.is = T)
chloro <- chloro[, c(1, 5)]
names(chloro) <- c("lib", "chloro")
chloro$lib <- gsub(chloro$lib, pattern = ".*(LIB[0-9]*).*", replacement = "\\1")
chloro$chloro <- round(as.numeric(chloro$chloro), 2)
# merge
data <- merge(sampledata, rrna, by = "lib", all.y = F)
data <- merge(data, chloro, by = "lib", all.y = F)
data <- data[!duplicated(data$lib), ]
data
## lib sample insert rRNA chloro
## 1 LIB5486 ChGC1 220 0.33 0.58
## 2 LIB5487 ChGC2 280 0.34 0.55
## 3 LIB5488 ChGC3 303 0.36 0.56
## 7 LIB5489 ChME1 286 0.44 0.56
## 11 LIB5490 ChME2 351 0.35 0.73
## 15 LIB5491 ChME3 316 0.30 0.77
## 19 LIB5492 CgGC1 337 0.26 0.43
## 23 LIB5493 CgGC2 326 0.43 0.37
## 27 LIB5494 CgGC3 334 0.30 0.48
## 31 LIB5495 CgME1 388 0.34 0.51
## 35 LIB5496 CgME2 395 0.34 0.48
## 39 LIB5497 CgME3 453 0.28 0.57
# plot
library(reshape2)
library(ggplot2)
df <- melt(data, id = c("lib", "sample", "insert"), variable.name = "contaminant",
value.name = "proportion")
ggplot(df, aes(x = sample, y = proportion, fill = contaminant, group = sample)) +
geom_bar(stat = "identity")