Set up
library(ggplot2)
library(reshape2)
# normalisation function
normalise <- function(df) {
df$count <- 1e+06 * (df$count/sum(df$count))
return(df)
}
setwd("~/hydrogen/sorghum")
fastq <- read.table("raw_read_counts.txt", sep = "\t", header = FALSE)
names(fastq) <- c("count", "cell", "rep") # changing header names
ggplot(fastq, aes(x = paste(cell, rep), y = count, fill = cell)) + geom_bar(stat = "identity") +
ggtitle("Raw read counts") + geom_text(aes(label = count, y = count + 4e+05)) +
labs(x = "Cell and rep") + ylim(0, 3e+07)
Reads trimmed with trimmomatic
setwd("~/hydrogen/sorghum")
fastq <- read.table("t_read_counts.txt", sep = "\t", header = FALSE)
names(fastq) <- c("count", "cell", "rep") # changing header names
ggplot(fastq, aes(x = paste(cell, rep), y = count, fill = cell)) + geom_bar(stat = "identity") +
ggtitle("Trimmed read counts (trimmomatic)") + geom_text(aes(label = count,
y = count + 4e+05)) + labs(x = "Cell and rep") + ylim(0, 3e+07)
Reads trimmed with fastq-mcf
setwd("~/hydrogen/sorghum")
fastq <- read.table("mcf_read_counts.txt", sep = "\t", header = FALSE)
names(fastq) <- c("count", "cell", "rep") # changing header names
ggplot(fastq, aes(x = paste(cell, rep), y = count, fill = cell)) + geom_bar(stat = "identity") +
ggtitle("Trimmed read counts (fastq-mcf)") + geom_text(aes(label = count,
y = count + 4e+05)) + labs(x = "Cell and rep") + ylim(0, 3e+07)
Bundle Sheath (with Trimmomatic)
setwd("~/hydrogen/sorghum")
bs_data <- data.frame()
for (i in 1:3) {
tmp <- read.table(paste("t_read_length_BS-", i, ".txt", sep = ""), header = FALSE)
names(tmp) <- c("length", "count") # changing header names
tmp$rep <- paste("rep", i, sep = "-")
tmp <- normalise(tmp)
bs_data <- rbind(bs_data, tmp)
}
ggplot(bs_data, aes(x = length, y = count, colour = rep)) + geom_line(size = 1) +
scale_colour_brewer(type = "qual", palette = 6) + ggtitle("Bundle Sheath sRNA read lengths (trimmomatic)") +
xlim(17, 49)
Bundle Sheath (with fastq-mcf)
setwd("~/hydrogen/sorghum")
bs_data <- data.frame()
for (i in 1:3) {
tmp <- read.table(paste("mcf_read_length_BS-", i, ".txt", sep = ""), header = FALSE)
names(tmp) <- c("length", "count") # changing header names
tmp$rep <- paste("rep", i, sep = "-")
tmp <- normalise(tmp)
bs_data <- rbind(bs_data, tmp)
}
ggplot(bs_data, aes(x = length, y = count, colour = rep)) + geom_line(size = 1) +
scale_colour_brewer(type = "qual", palette = 6) + ggtitle("Bundle Sheath sRNA read lengths (fastq-mcf)") +
xlim(17, 49)
Mesophyll (with Trimmomatic)
setwd("~/hydrogen/sorghum")
bs_data <- data.frame()
for (i in 1:3) {
tmp <- read.table(paste("t_read_length_M-", i, ".txt", sep = ""), header = FALSE)
names(tmp) <- c("length", "count") # changing header names
tmp$rep <- paste("rep", i, sep = "-")
tmp <- normalise(tmp)
bs_data <- rbind(bs_data, tmp)
}
ggplot(bs_data, aes(x = length, y = count, colour = rep)) + geom_line(size = 1) +
scale_colour_brewer(type = "qual", palette = 6) + ggtitle("Mesophyll sRNA read lengths (trimmomatic)") +
xlim(17, 49)
Bundle Sheath (with fastq-mcf)
setwd("~/hydrogen/sorghum")
bs_data <- data.frame()
for (i in 1:3) {
tmp <- read.table(paste("mcf_read_length_M-", i, ".txt", sep = ""), header = FALSE)
names(tmp) <- c("length", "count") # changing header names
tmp$rep <- paste("rep", i, sep = "-")
tmp <- normalise(tmp)
bs_data <- rbind(bs_data, tmp)
}
ggplot(bs_data, aes(x = length, y = count, colour = rep)) + geom_line(size = 1) +
scale_colour_brewer(type = "qual", palette = 6) + ggtitle("Mesophyll sRNA read lengths (fastq-mcf)") +
xlim(17, 49)
setwd("~/hydrogen/sorghum")
sam_sizes <- read.table("sam_sizes.txt", header = F, sep = "\t")
names(sam_sizes) <- c("cell", "rep", "size")
ggplot(sam_sizes, aes(x = paste(cell, rep), y = size, fill = cell)) + geom_bar(stat = "identity") +
ggtitle("Reads aligned to genome with bowtie") + geom_text(aes(label = size,
y = size + 4e+05)) + labs(x = "Cell and rep") + ylim(0, 3e+07)
Correlation plot
setwd("~/hydrogen/sorghum")
loci <- read.table("srna_expression.txt", header = F, sep = "\t")
names(loci) <- c("chromosome", "start", "stop", "BS-1", "BS-2", "BS-3", "M-1",
"M-2", "M-3")
counts <- loci[, c(4, 5, 6, 7, 8, 9)]
# set rownames here?
correlation <- cor(counts)
# correlation <- cor(counts, method='spearman')
melted <- melt(correlation)
melted$colour[melted$value < 0.7] <- "white"
melted$colour[melted$value >= 0.7] <- "black"
# melted <- melted[-which(melted$value==1),] # remove 1s so the gradient is
# better
ggplot(data = melted, aes_string(x = names(melted)[1], y = names(melted)[2],
fill = "value")) + geom_tile() + ggtitle("sRNA Count Correlation Matrix") +
geom_text(colour = melted$colour, aes(label = round(value, 3))) + xlab("") +
ylab("")
Ranked Correlation plot
correlation <- cor(counts, method = "spearman")
melted <- melt(correlation)
melted$colour[melted$value < 0.22] <- "white"
melted$colour[melted$value >= 0.22] <- "black"
melted <- melted[-which(melted$value == 1), ] # remove 1s so the gradient is better
ggplot(data = melted, aes_string(x = names(melted)[1], y = names(melted)[2],
fill = "value")) + geom_tile() + ggtitle("sRNA Count Spearman Correlation Matrix") +
geom_text(colour = melted$colour, aes(label = round(value, 2))) + xlab("") +
ylab("")