library(XML)
library(ggplot2)

file <- "http://dl.dropbox.com/u/361076/d8-pdepend-summary.xml"
doc = xmlParse(file)

# Extract methods into a data frame, and bring the parent class and
# package data in as well.
methods.df <- data.frame(t(xpathSApply(doc, "//method", function(x) {
    c(name = xmlName(x), xmlAttrs(x), class = xmlAttrs(xmlParent(x)), namespace = xmlAttrs(xmlParent(xmlParent(x))))
})))

# Create a sub.namespace key that is only the first two namespaces to
# group by.
methods.df$sub.namespace <- sapply(methods.df$namespace.name, function(namespace) {
    paste(unlist(strsplit(as.character(namespace), "\\\\"))[1:2], collapse = "\\")
})

# This should filter out all non-drupal namespaces. Tried to do that with
# pdepend --ignore option but for some reason it never works.
methods.df <- methods.df[grep("Drupal", methods.df$namespace.name), ]

# Filter out test namespaces.
methods.df <- methods.df[-grep("_test", methods.df$sub.namespace), ]

methods.df$loc <- as.numeric(as.character(methods.df$loc))
methods.df$lloc <- as.numeric(as.character(methods.df$lloc))
methods.df$eloc <- as.numeric(as.character(methods.df$eloc))

All methods

ggplot(methods.df, aes(name, eloc)) + geom_boxplot() + ylab("lines of code") + 
    coord_flip()

plot of chunk unnamed-chunk-2

Methods by namespace

ggplot(methods.df, aes(reorder(sub.namespace, eloc), eloc, colour = sub.namespace)) + 
    geom_boxplot() + xlab("namespace") + ylab("lines of code") + coord_flip()

plot of chunk unnamed-chunk-3

Longest methods


methods.df.sorted <- methods.df[order(-methods.df$eloc), ]

ggplot(head(methods.df.sorted, 20), aes(paste(sub.namespace, class.name, name.1, 
    sep = "\\"), eloc, fill = eloc)) + geom_bar() + coord_flip()

plot of chunk unnamed-chunk-4