library("ggplot2")
library("RCurl")
library("dplyr")

observations <- getURL("https://gist.githubusercontent.com/mrb/ea9a2aa3f41e36f37035/raw/159380f9658e47569fd048a3c6baee858e65ce8d/gistfile1.txt")
observations <- read.csv(text = observations)
observations <- filter(observations, AuthorCount > 0)
observations <- mutate(observations, bucket = ifelse(AuthorCount < 10, AuthorCount, 
    10))

observations$bucket <- factor(observations$bucket, levels = c(1, 2, 3, 4, 5, 
    6, 7, 8, 9, 10), labels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", 
    "10+"))

ggplot(filter(observations, AuthorCount < 120), aes(AuthorCount, GPA, colour = as.factor(AuthorCount))) + 
    xlab("Author Count") + theme(legend.position = "none") + geom_boxplot()

plot of chunk unnamed-chunk-1

With AuthorCount > 10 grouped

ggplot(observations, aes(bucket, GPA, colour = bucket)) + xlab("Author Count") + 
    geom_boxplot()

plot of chunk unnamed-chunk-2

Sorted by median

ggplot(observations, aes(reorder(bucket, -GPA, FUN = median), GPA, colour = bucket)) + 
    xlab("Author Count") + geom_boxplot()

plot of chunk unnamed-chunk-3