Tarenaya - Gynandropsis M/GC LCM RNASeq contamination

# load TGAC sample data
sampledata <- read.csv("/data2/rnaseq/Cg_Ch_LCM/SampleAlias.txt", sep = "\t", 
    head = F)
sampledata <- sampledata[-c(1, 2), -c(2, 4)]
names(sampledata) <- c("lib", "sample", "insert")

# load read rRNA contamination data
rrna <- read.csv("/data2/rnaseq/Cg_Ch_LCM/reads/facsall.log", sep = "\t", head = F, 
    row.names = NULL, as.is = T)
rrna <- rrna[seq(2, nrow(rrna), by = 2), c(1, 5)]
names(rrna) <- c("lib", "rRNA")
rrna$lib <- gsub(rrna$lib, pattern = ".*(LIB[0-9]*).*", replacement = "\\1")
rrna$rRNA <- round(as.numeric(rrna$rRNA), 2)

# load read chloroplast data
chloro <- read.csv("/data2/rnaseq/Cg_Ch_LCM/reads/facsall_chloroplast.log", 
    sep = "\t", head = F, row.names = NULL, as.is = T)
chloro <- chloro[, c(1, 5)]
names(chloro) <- c("lib", "chloro")
chloro$lib <- gsub(chloro$lib, pattern = ".*(LIB[0-9]*).*", replacement = "\\1")
chloro$chloro <- round(as.numeric(chloro$chloro), 2)

# merge
data <- merge(sampledata, rrna, by = "lib", all.y = F)
data <- merge(data, chloro, by = "lib", all.y = F)
data <- data[!duplicated(data$lib), ]

data
##        lib sample insert rRNA chloro
## 1  LIB5486  ChGC1    220 0.33   0.58
## 2  LIB5487  ChGC2    280 0.34   0.55
## 3  LIB5488  ChGC3    303 0.36   0.56
## 7  LIB5489  ChME1    286 0.44   0.56
## 11 LIB5490  ChME2    351 0.35   0.73
## 15 LIB5491  ChME3    316 0.30   0.77
## 19 LIB5492  CgGC1    337 0.26   0.43
## 23 LIB5493  CgGC2    326 0.43   0.37
## 27 LIB5494  CgGC3    334 0.30   0.48
## 31 LIB5495  CgME1    388 0.34   0.51
## 35 LIB5496  CgME2    395 0.34   0.48
## 39 LIB5497  CgME3    453 0.28   0.57
# plot
library(reshape2)
library(ggplot2)
df <- melt(data, id = c("lib", "sample", "insert"), variable.name = "contaminant", 
    value.name = "proportion")
ggplot(df, aes(x = sample, y = proportion, fill = contaminant, group = sample)) + 
    geom_bar(stat = "identity")

plot of chunk unnamed-chunk-2