Here a real-world example taken from a current project. To get the job done, this script re-visits some topics from my previous R-sessions.
# import libraries
library(plyr)
library(reshape)
##
## Attaching package: 'reshape'
##
## The following objects are masked from 'package:plyr':
##
## rename, round_any
library(ggplot2)
library(gridExtra)
## Loading required package: grid
# set working directory
setwd("~/Prac/R_demo/plot_arrangements")
# +++++++++++++++++++++++++++++
# functions used in this script
# +++++++++++++++++++++++++++++
# Function to log-transform observations and expectations for QQ plots
# Modified after http://gettinggeneticsdone.blogspot.com.au/2010/07/qq-plots-of-p-values-in-r-using-base.html
My.Qq.log <- function(pvalues) {
# pvalues has to be a vector (e.g. a column in a data frame)
observed <- -log10(sort(pvalues, decreasing = FALSE))
expected <- -log10(1:length(observed) / length(observed) )
out <- data.frame(observed = observed,
expected = expected)
}
# create a re-usable theme for ggplot
# removing the grid lines and other components from theme_bw()
# call via p + theme_my, no "()" needed, it's not a function.
theme_my <- theme_bw() + theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
strip.background = element_blank(),
legend.key = element_blank()
)
# function to create the plots the qqplots
# this function will be applied to each sub set of data later
MyQQPlots <- function(data, name = "Name") {
# default for the column that holds the data set name is "Name"
require(ggplot2)
current.Name <- unique(data[, names(data) == name]) # get the Name of the current subset
# axes are hard-coded to "expected" and "observed"
p <- ggplot(data, aes(x = expected, y = observed))
p <- p + geom_abline(intercept = 0,
slope = 1,
colour = "black", linetype = "dashed")
p <- p + geom_point(aes(colour = variable))
p <- p + scale_colour_manual(
name = "",
values = c("red", "green", "blue"),
labels = c("LM", "LM (X)", "GLM (X+K)"))
p <- p + labs(x = expression(Expected~~-log[10](italic(p))),
y = expression(Observed~~-log[10](italic(p))),
title = current.Name)
p <- p + theme_my
p <- p + theme(legend.position = "none")
return(p)
}
# +++++++++++++++++++++++++++++
# Now the actual data processing
# +++++++++++++++++++++++++++++
# create a list of all csv files in the current working directory
my.files <- list.files(pattern = ".csv")
# import one example file
df <- read.csv(my.files[1])
head(df)
## No.Q.or.K Only.Q Q.K
## 1 0.6966 0.6932 0.7313
## 2 0.5867 0.8123 0.7970
## 3 0.6327 0.9907 0.9821
## 4 0.6912 0.7726 0.8094
## 5 0.4616 0.7333 0.7521
## 6 0.6035 0.9613 0.9728
str(df)
## 'data.frame': 881 obs. of 3 variables:
## $ No.Q.or.K: num 0.697 0.587 0.633 0.691 0.462 ...
## $ Only.Q : num 0.693 0.812 0.991 0.773 0.733 ...
## $ Q.K : num 0.731 0.797 0.982 0.809 0.752 ...
# import all csv files
# based on my example script at http://rpubs.com/MarkusLoew/9496
# now modified to use lapply instead of a "for" loop.
# returns a list of data frames
my.data <- lapply(my.files, function(x) {
print(x) ## progress indicator
cur.file <- read.csv(file = x)
cur.file$Name <- x
return(cur.file)
})
## [1] "Eucalypt.csv"
## [1] "Fagus.csv"
## [1] "Globulus.csv"
## [1] "Grandis.csv"
## [1] "Kryptonite.csv"
## [1] "Shale.csv"
# rbind the list elements together into one data frame
my.data <- do.call("rbind", my.data)
# get rid of the ".csv" in the names
my.data$Name <- gsub("\\.csv", "", my.data$Name)
my.data$Name <- as.factor(my.data$Name)
# reshape the data into long format to have them ready for plyr
my.melt <- melt(my.data, id = "Name")
# now the split-apply-combine processing
# apply the log-transform-function to each subgroup
my.qq.log <- ddply(my.melt,
.(Name, variable),
function(x) My.Qq.log(x$value))
# this creates a list of plots
# one plot for each data set
my.plots <- dlply(my.qq.log,
.(Name),
function(x) MyQQPlots(x))
# arrange all plots on one "page"
do.call("grid.arrange", c(my.plots, ncol = 2))
# There is an unsolved problem (at least for me unsolved) with the above grid.arrage: How to aling the y-axes of all plots?
# below a work around via ggplotGrob and grid.draw that takes care of that, however I have not found a way (yet) to feed the list directly into grid.draw
# arranging the plots
# continue ad infinitum, is currently done for each individual plot
# have to think about how to make it more elegant - any ideas? Please leave a comment.
# ggplotGrob does not like lists, unfortunately
a <- ggplotGrob(my.plots[[1]])
b <- ggplotGrob(my.plots[[2]])
c <- ggplotGrob(my.plots[[3]])
d <- ggplotGrob(my.plots[[4]])
# arrange several plots on one page with aligned y-axes
# not sure about a 3x2 rows and column layout
grid.draw(rbind(a, b, c, d, size = "first"))