Combine multiple ggplot figures - Part II

Here a real-world example taken from a current project. To get the job done, this script re-visits some topics from my previous R-sessions.

Import multiple csv files
Create figures for each of these csv files
Combine the figures on one 'page'

# import libraries
library(plyr)
library(reshape)

## 
## Attaching package: 'reshape'
## 
## The following objects are masked from 'package:plyr':
## 
##     rename, round_any

library(ggplot2)
library(gridExtra)

## Loading required package: grid


# set working directory
setwd("~/Prac/R_demo/plot_arrangements")

# +++++++++++++++++++++++++++++
# functions used in this script
# +++++++++++++++++++++++++++++

# Function to log-transform observations and expectations for QQ plots
# Modified after http://gettinggeneticsdone.blogspot.com.au/2010/07/qq-plots-of-p-values-in-r-using-base.html
My.Qq.log <- function(pvalues) {
    # pvalues has to be a vector (e.g. a column in a data frame)
    observed <- -log10(sort(pvalues, decreasing = FALSE))
    expected <- -log10(1:length(observed) / length(observed) )
    out <- data.frame(observed = observed,
                      expected = expected)
}

# create a re-usable theme for ggplot
# removing the grid lines and other components from theme_bw()
# call via p + theme_my, no "()" needed, it's not a function.
theme_my <- theme_bw() + theme(
   axis.line        = element_line(colour = "black"),
   panel.grid.major = element_blank(), 
   panel.grid.minor = element_blank(),
   panel.border     = element_blank(),
   strip.background = element_blank(),
   legend.key       = element_blank()
)

# function to create the plots the qqplots
# this function will be applied to each sub set of data later

MyQQPlots <- function(data, name = "Name") {
  # default for the column that holds the data set name is "Name"
  require(ggplot2)
  current.Name <- unique(data[, names(data) == name]) # get the Name of the current subset

  # axes are hard-coded to "expected" and "observed"
  p <- ggplot(data, aes(x = expected, y = observed))
        p <- p + geom_abline(intercept  = 0, 
                                 slope  = 1, 
                                 colour = "black", linetype = "dashed")
        p <- p + geom_point(aes(colour  = variable))
        p <- p + scale_colour_manual(
                       name = "", 
                     values = c("red", "green", "blue"),
                     labels = c("LM", "LM (X)", "GLM (X+K)"))
        p <- p + labs(x = expression(Expected~~-log[10](italic(p))),
                      y = expression(Observed~~-log[10](italic(p))),
                      title = current.Name)
        p <- p + theme_my 
        p <- p + theme(legend.position = "none")
  return(p)  
}


# +++++++++++++++++++++++++++++
# Now the actual data processing
# +++++++++++++++++++++++++++++

# create a list of all csv files in the current working directory
my.files <- list.files(pattern = ".csv")

# import one example file
df <- read.csv(my.files[1])
head(df)

##   No.Q.or.K Only.Q    Q.K
## 1    0.6966 0.6932 0.7313
## 2    0.5867 0.8123 0.7970
## 3    0.6327 0.9907 0.9821
## 4    0.6912 0.7726 0.8094
## 5    0.4616 0.7333 0.7521
## 6    0.6035 0.9613 0.9728

str(df)

## 'data.frame':    881 obs. of  3 variables:
##  $ No.Q.or.K: num  0.697 0.587 0.633 0.691 0.462 ...
##  $ Only.Q   : num  0.693 0.812 0.991 0.773 0.733 ...
##  $ Q.K      : num  0.731 0.797 0.982 0.809 0.752 ...


# import all csv files
# based on my example script at http://rpubs.com/MarkusLoew/9496
# now modified to use lapply instead of a "for" loop.
# returns a list of data frames  
my.data <- lapply(my.files, function(x) {
        print(x) ## progress indicator
        cur.file      <- read.csv(file = x)
        cur.file$Name <- x
        return(cur.file)
})

## [1] "Eucalypt.csv"
## [1] "Fagus.csv"
## [1] "Globulus.csv"
## [1] "Grandis.csv"
## [1] "Kryptonite.csv"
## [1] "Shale.csv"


# rbind the list elements together into one data frame
my.data <- do.call("rbind", my.data)

# get rid of the ".csv" in the names
my.data$Name <- gsub("\\.csv", "", my.data$Name)
my.data$Name <- as.factor(my.data$Name)

# reshape the data into long format to have them ready for plyr
my.melt <- melt(my.data, id = "Name")

# now the split-apply-combine processing
# apply the log-transform-function to each subgroup
my.qq.log <- ddply(my.melt,
                   .(Name, variable),
                   function(x) My.Qq.log(x$value))

# this creates a list of plots
# one plot for each data set
my.plots <- dlply(my.qq.log,
            .(Name),        
            function(x) MyQQPlots(x))

# arrange all plots on one "page"
do.call("grid.arrange", c(my.plots, ncol = 2))

plot of chunk unnamed-chunk-1

Alternative method using grid.draw to have all y-axes of the plots aligned

# There is an unsolved problem (at least for me unsolved) with the above grid.arrage: How to aling the y-axes of all plots?
# below a work around via ggplotGrob and grid.draw that takes care of that, however I have not found a way (yet) to feed the list directly into grid.draw

# arranging the plots
# continue ad infinitum, is currently done for each individual plot
# have to think about how to make it more elegant - any ideas? Please leave a comment.
# ggplotGrob does not like lists, unfortunately
a <- ggplotGrob(my.plots[[1]])
b <- ggplotGrob(my.plots[[2]])
c <- ggplotGrob(my.plots[[3]])
d <- ggplotGrob(my.plots[[4]])

# arrange several plots on one page with aligned y-axes
# not sure about a 3x2 rows  and column layout
grid.draw(rbind(a, b, c, d, size = "first"))

plot of chunk unnamed-chunk-2