ggplot2: Toolbox

## knitr configuration: http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", error= TRUE, warning = FALSE, message = FALSE,
               tidy = FALSE, cache = F, echo = T,
               fig.width = 6, fig.height = 6)

## R configuration
options(width = 116, scipen = 5)

References

Load ggplot2

library(ggplot2)

Try out various geoms

df <- data.frame(x = c(3,1,5),
                 y = c(2,4,6),
                 label = c("a","b","c"))

p <- ggplot(data = df, mapping = aes(x = x, y = y, label = label)) + labs(x = NULL, y = NULL)

## Scatterplot
p + layer(geom = "point") + labs(title = "geom_point")

plot of chunk unnamed-chunk-3

## Barchart
p + layer(geom = "bar", stat = "identity") + labs(title = "geom bar, stat identitiy")

plot of chunk unnamed-chunk-3

## Line plot
p + layer(geom = "line") + labs(title = "geom line")

plot of chunk unnamed-chunk-3

## Area plot
p + layer(geom = "area") + labs(title = "geom area")

plot of chunk unnamed-chunk-3

## Path plot
p + layer(geom = "path") + labs(title = "geom path")

plot of chunk unnamed-chunk-3

## Text plot
p + layer(geom = "text") + labs(title = "geom text")

plot of chunk unnamed-chunk-3

## Tile plot
p + layer(geom = "tile") + labs(title = "geom tile")

plot of chunk unnamed-chunk-3

## Polygon plot
p + layer(geom = "polygon") + labs(title = "geom polygon")

plot of chunk unnamed-chunk-3

Display distributions

1d distribution Default histogram is not useful.

p <- ggplot(data = diamonds, mapping = aes(x = depth))
## Default
p + layer(geom = "histogram")

plot of chunk unnamed-chunk-4

## Tuned
p + layer(geom = "histogram", binwidth = 0.1) +
    scale_x_continuous(limit = c(55,70))

plot of chunk unnamed-chunk-4

1d distribution grouped by another variable

p2 <- p + scale_x_continuous(limit = c(58,68))

p2 + layer(geom = "histogram", binwidth = 0.1) +
    facet_grid(cut~.)

plot of chunk unnamed-chunk-5


p2 + layer(geom = "histogram",
           binwidth = 0.1,
           mapping = aes(fill = cut),
           position = "fill")

plot of chunk unnamed-chunk-5


p2 + layer(geom = "freqpoly",
           binwidth = 0.1,
           mapping = aes(y = ..density.., color = cut)
           )

plot of chunk unnamed-chunk-5


p2 + layer(geom = "density",
           binwidth = 0.1,
           mapping = aes(color = cut)
           )

plot of chunk unnamed-chunk-5

Boxplot

p.box <- ggplot(data = diamonds, mapping = aes(x = cut, y = depth))
p.box + layer(geom = "boxplot")

plot of chunk unnamed-chunk-6

If x variable is continuous use round_any() in plyr

library(plyr)

p.box + layer(geom = "boxplot",
              mapping = aes(x = carat,
                  group = round_any(x = carat, accuracy = 0.1, f = floor))) +
    scale_x_continuous(limit = c(0,3))

plot of chunk unnamed-chunk-7

Jitter to avoid overlaps

p.jitter <- ggplot(data = mpg, mapping = aes(x = class, y = cty))

p.jitter + layer(geom = "point")

plot of chunk unnamed-chunk-8

p.jitter + layer(geom = "jitter")

plot of chunk unnamed-chunk-8

jitter geom is just point geom and jitter postion

p.jitter + layer(geom = "point", position = "jitter")

plot of chunk unnamed-chunk-9

Density plot

geom_density(mapping = NULL, data = NULL,
             stat = "density", position = "identity", na.rm = FALSE,
             ...)

stat_density(mapping = NULL, data = NULL, geom = "area",
             position = "stack", adjust = 1, kernel = "gaussian",
             trim = FALSE, na.rm = FALSE, ...)
p.density <- ggplot(data = diamonds, mapping = aes(x = depth)) + xlim(54,70)
summary(p.density)
data: carat, cut, color, clarity, depth, table, price, x, y, z [53940x10]
mapping:  x = depth
scales:   x, xmin, xmax, xend, xintercept 
faceting: facet_null() 

density stat combined with area geom is density plot

p.density + layer(geom = "area", stat = "density", fill = NA, color = "black")

plot of chunk unnamed-chunk-12

Density geom

p.density + layer(geom = "density", fill = "red", alpha = 0.2)

plot of chunk unnamed-chunk-13

p.density + layer(geom = "density", mapping = aes(fill = cut), alpha = 0.2)

plot of chunk unnamed-chunk-13

Deal with overplotting

df <- data.frame(x = rnorm(2000), y = rnorm(2000))

norm <- ggplot(data = df, mapping = aes(x = x, y = y))
summary(norm)
data: x, y [2000x2]
mapping:  x = x, y = y
faceting: facet_null() 

Plot with different shapes

norm + geom_point()

plot of chunk unnamed-chunk-15

norm + geom_point(shape = 1)

plot of chunk unnamed-chunk-15

norm + geom_point(shape = ".")

plot of chunk unnamed-chunk-15

Plot with transparency

norm + geom_point(alpha = 1/3)

plot of chunk unnamed-chunk-16

norm + geom_point(alpha = 1/5)

plot of chunk unnamed-chunk-16

norm + geom_point(alpha = 1/10)

plot of chunk unnamed-chunk-16