Sorghum

Set up

library(ggplot2)
library(reshape2)
# normalisation function
normalise <- function(df) {
    df$count <- 1e+06 * (df$count/sum(df$count))
    return(df)
}

Raw Data

setwd("~/hydrogen/sorghum")
fastq <- read.table("raw_read_counts.txt", sep = "\t", header = FALSE)
names(fastq) <- c("count", "cell", "rep")  # changing header names

ggplot(fastq, aes(x = paste(cell, rep), y = count, fill = cell)) + geom_bar(stat = "identity") + 
    ggtitle("Raw read counts") + geom_text(aes(label = count, y = count + 4e+05)) + 
    labs(x = "Cell and rep") + ylim(0, 3e+07)

plot of chunk raw

Reads trimmed with trimmomatic

setwd("~/hydrogen/sorghum")
fastq <- read.table("t_read_counts.txt", sep = "\t", header = FALSE)
names(fastq) <- c("count", "cell", "rep")  # changing header names

ggplot(fastq, aes(x = paste(cell, rep), y = count, fill = cell)) + geom_bar(stat = "identity") + 
    ggtitle("Trimmed read counts (trimmomatic)") + geom_text(aes(label = count, 
    y = count + 4e+05)) + labs(x = "Cell and rep") + ylim(0, 3e+07)

plot of chunk trimmo

Reads trimmed with fastq-mcf

setwd("~/hydrogen/sorghum")
fastq <- read.table("mcf_read_counts.txt", sep = "\t", header = FALSE)
names(fastq) <- c("count", "cell", "rep")  # changing header names

ggplot(fastq, aes(x = paste(cell, rep), y = count, fill = cell)) + geom_bar(stat = "identity") + 
    ggtitle("Trimmed read counts (fastq-mcf)") + geom_text(aes(label = count, 
    y = count + 4e+05)) + labs(x = "Cell and rep") + ylim(0, 3e+07)

plot of chunk mcf

Read lengths

Bundle Sheath (with Trimmomatic)

setwd("~/hydrogen/sorghum")
bs_data <- data.frame()
for (i in 1:3) {
    tmp <- read.table(paste("t_read_length_BS-", i, ".txt", sep = ""), header = FALSE)
    names(tmp) <- c("length", "count")  # changing header names
    tmp$rep <- paste("rep", i, sep = "-")
    tmp <- normalise(tmp)
    bs_data <- rbind(bs_data, tmp)
}

ggplot(bs_data, aes(x = length, y = count, colour = rep)) + geom_line(size = 1) + 
    scale_colour_brewer(type = "qual", palette = 6) + ggtitle("Bundle Sheath sRNA read lengths (trimmomatic)") + 
    xlim(17, 49)

plot of chunk bs_trimmo

Bundle Sheath (with fastq-mcf)

setwd("~/hydrogen/sorghum")
bs_data <- data.frame()
for (i in 1:3) {
    tmp <- read.table(paste("mcf_read_length_BS-", i, ".txt", sep = ""), header = FALSE)
    names(tmp) <- c("length", "count")  # changing header names
    tmp$rep <- paste("rep", i, sep = "-")
    tmp <- normalise(tmp)
    bs_data <- rbind(bs_data, tmp)
}

ggplot(bs_data, aes(x = length, y = count, colour = rep)) + geom_line(size = 1) + 
    scale_colour_brewer(type = "qual", palette = 6) + ggtitle("Bundle Sheath sRNA read lengths (fastq-mcf)") + 
    xlim(17, 49)

plot of chunk bs_fastq

Mesophyll (with Trimmomatic)

setwd("~/hydrogen/sorghum")
bs_data <- data.frame()
for (i in 1:3) {
    tmp <- read.table(paste("t_read_length_M-", i, ".txt", sep = ""), header = FALSE)
    names(tmp) <- c("length", "count")  # changing header names
    tmp$rep <- paste("rep", i, sep = "-")
    tmp <- normalise(tmp)
    bs_data <- rbind(bs_data, tmp)
}

ggplot(bs_data, aes(x = length, y = count, colour = rep)) + geom_line(size = 1) + 
    scale_colour_brewer(type = "qual", palette = 6) + ggtitle("Mesophyll sRNA read lengths (trimmomatic)") + 
    xlim(17, 49)

plot of chunk m_trimmo

Bundle Sheath (with fastq-mcf)

setwd("~/hydrogen/sorghum")
bs_data <- data.frame()
for (i in 1:3) {
    tmp <- read.table(paste("mcf_read_length_M-", i, ".txt", sep = ""), header = FALSE)
    names(tmp) <- c("length", "count")  # changing header names
    tmp$rep <- paste("rep", i, sep = "-")
    tmp <- normalise(tmp)
    bs_data <- rbind(bs_data, tmp)
}

ggplot(bs_data, aes(x = length, y = count, colour = rep)) + geom_line(size = 1) + 
    scale_colour_brewer(type = "qual", palette = 6) + ggtitle("Mesophyll sRNA read lengths (fastq-mcf)") + 
    xlim(17, 49)

plot of chunk bs-fastq

bowtie alignment of reads to the sorghum genome

setwd("~/hydrogen/sorghum")
sam_sizes <- read.table("sam_sizes.txt", header = F, sep = "\t")
names(sam_sizes) <- c("cell", "rep", "size")

ggplot(sam_sizes, aes(x = paste(cell, rep), y = size, fill = cell)) + geom_bar(stat = "identity") + 
    ggtitle("Reads aligned to genome with bowtie") + geom_text(aes(label = size, 
    y = size + 4e+05)) + labs(x = "Cell and rep") + ylim(0, 3e+07)

plot of chunk bowtie

sRNA Expression

Correlation plot

setwd("~/hydrogen/sorghum")
loci <- read.table("srna_expression.txt", header = F, sep = "\t")
names(loci) <- c("chromosome", "start", "stop", "BS-1", "BS-2", "BS-3", "M-1", 
    "M-2", "M-3")
counts <- loci[, c(4, 5, 6, 7, 8, 9)]
# set rownames here?

correlation <- cor(counts)
# correlation <- cor(counts, method='spearman')

melted <- melt(correlation)
melted$colour[melted$value < 0.7] <- "white"
melted$colour[melted$value >= 0.7] <- "black"
# melted <- melted[-which(melted$value==1),] # remove 1s so the gradient is
# better
ggplot(data = melted, aes_string(x = names(melted)[1], y = names(melted)[2], 
    fill = "value")) + geom_tile() + ggtitle("sRNA Count Correlation Matrix") + 
    geom_text(colour = melted$colour, aes(label = round(value, 3))) + xlab("") + 
    ylab("")

plot of chunk correlation

Ranked Correlation plot

correlation <- cor(counts, method = "spearman")
melted <- melt(correlation)
melted$colour[melted$value < 0.22] <- "white"
melted$colour[melted$value >= 0.22] <- "black"
melted <- melted[-which(melted$value == 1), ]  # remove 1s so the gradient is better

ggplot(data = melted, aes_string(x = names(melted)[1], y = names(melted)[2], 
    fill = "value")) + geom_tile() + ggtitle("sRNA Count Spearman Correlation Matrix") + 
    geom_text(colour = melted$colour, aes(label = round(value, 2))) + xlab("") + 
    ylab("")

plot of chunk spearman