library("ggplot2")
library("RCurl")
library("dplyr")
observations <- getURL("https://gist.githubusercontent.com/mrb/ea9a2aa3f41e36f37035/raw/159380f9658e47569fd048a3c6baee858e65ce8d/gistfile1.txt")
observations <- read.csv(text = observations)
observations <- filter(observations, AuthorCount > 0)
observations <- mutate(observations, bucket = ifelse(AuthorCount < 10, AuthorCount,
10))
observations$bucket <- factor(observations$bucket, levels = c(1, 2, 3, 4, 5,
6, 7, 8, 9, 10), labels = c("1", "2", "3", "4", "5", "6", "7", "8", "9",
"10+"))
ggplot(filter(observations, AuthorCount < 120), aes(AuthorCount, GPA, colour = as.factor(AuthorCount))) +
xlab("Author Count") + theme(legend.position = "none") + geom_boxplot()
ggplot(observations, aes(bucket, GPA, colour = bucket)) + xlab("Author Count") +
geom_boxplot()
ggplot(observations, aes(reorder(bucket, -GPA, FUN = median), GPA, colour = bucket)) +
xlab("Author Count") + geom_boxplot()