ggfortify

This covers following classes:

• base::matrix

• base::table (supports fortify only)

• cluster::clara

• cluster::fanny

• cluster::pam

• changepoint::cpt

• fGarch::fGARCH

• forecast::bats

• forecast::forecast

• forecast::ets

• fracdiff::fracdiff

• MASS::isoMDS (indirectly, see the doc)

• MASS::sammon (indirectly, see the doc)

• stats::acf

• stats::ar

• stats::Arima

• stats::cmdscale (indirectly, see the doc)

• stats::decomposed.ts

• stats::density

• stats::factanal

• stats::HoltWinters

• stats::kmeans

• stats::lm

• stats::prcomp

• stats::princomp

• stats::spec

• stats::stl

• stats::ts

• survival::survfit

• survival::survfit.cox

• strucchange::breakpoints

• strucchange::breakpointsfull

• timeSeries::timeSeries

• tseries::irts

• vars::varprd

• xts::xts

• zoo::zooreg

Helper Functions

• ggdistribution to plot PDF/CDF

• ggcpgram to plot cpgram

• gglagplot to plot lag.plot

• ggtsdiag to plot tsdiag

• ggfreqplot to generalize monthplot


library(devtools)
install_github('sinhrks/ggfortify')

Plotting ts objects

ggfortify let ggplot2 know how to interpret ts objects. After loading ggfortify, you can use ggplot2::autoplot function for ts objects

library(ggplot2)
library(ggfortify)
autoplot(AirPassengers)

autoplot(AirPassengers, ts.colour = 'red', ts.linetype = 'dashed')

library(vars)
## Loading required package: MASS
## Loading required package: strucchange
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: sandwich
## Loading required package: urca
## Loading required package: lmtest
data(Canada)
head(Canada,5)
## [1] 929.6105 929.8040 930.3184 931.4277 932.6620
summary(Canada)
##        e              prod             rw              U         
##  Min.   :928.6   Min.   :401.3   Min.   :386.1   Min.   : 6.700  
##  1st Qu.:935.4   1st Qu.:404.8   1st Qu.:423.9   1st Qu.: 7.782  
##  Median :946.0   Median :406.5   Median :444.4   Median : 9.450  
##  Mean   :944.3   Mean   :407.8   Mean   :440.8   Mean   : 9.321  
##  3rd Qu.:950.0   3rd Qu.:410.7   3rd Qu.:461.1   3rd Qu.:10.607  
##  Max.   :961.8   Max.   :418.0   Max.   :470.0   Max.   :12.770
str(Canada)
##  mts [1:84, 1:4] 930 930 930 931 933 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:4] "e" "prod" "rw" "U"
##  - attr(*, "tsp")= num [1:3] 1980 2001 4
##  - attr(*, "class")= chr [1:2] "mts" "ts"
autoplot(Canada)

autoplot(Canada, facet = FALSE)

autoplot(Canada, facet = TRUE)

#zoo::zooreg
#xts::xts
#timeSeries::timSeries
#tseries::irts

library(xts)
autoplot(as.xts(AirPassengers), ts.colour = 'green')

library(timeSeries)
## Loading required package: timeDate
## 
## Attaching package: 'timeSeries'
## 
## The following object is masked from 'package:zoo':
## 
##     time<-
autoplot(as.timeSeries(AirPassengers), ts.colour = ('red'))

#Plotting with forecast package

library(forecast)
## This is forecast 5.6
d.arima <- auto.arima(AirPassengers)
d.forecast <- forecast(d.arima, level = c(95), h = 50)
autoplot(d.forecast)

autoplot(d.forecast, ts.colour = 'green', predict.colour = 'red',
         predict.linetype = 'dashed', conf.int = FALSE)

Plotting with vars package

ggfortify supports varpred object in vars package.

library(vars)
d.vselect <- VARselect(Canada, lag.max = 5, type = 'const')$selection[1]
d.var <- VAR(Canada, p = d.vselect, type = 'const')

#Available options are the same as forecast.
autoplot(predict(d.var, n.ahead = 50), ts.colour = 'green',
         predict.colour = 'blue', predict.linetype = 'dashed')

#Plotting with changepoint package

ggfortify supports cpt object in changepoint package

library(changepoint)
## Successfully loaded changepoint package version 1.1.5
## Created on 2014-06-25
##  Substantial changes to the structure of the package have occured from version 1.0.  Please see the package NEWS for details.
autoplot(cpt.meanvar(AirPassengers))

#You can change some options for cpt.
autoplot(cpt.meanvar(AirPassengers), cpt.colour = 'blue', cpt.linetype = 'solid')

Plotting with strucchange package

ggfortify supports breakpoints object in strucchange package. Same plotting options as changepoint are available.

library(strucchange)
autoplot(breakpoints(Nile ~ 1), cpt.colour = 'green', cpt.linetype = 'solid')

#################################################

Plotting time series statistics

ggfortify supports following time series related statistics in stats package:

stl, decomposed.ts

acf, pacf, ccf

spec.ar, spec.pgram

cpgram (covered by ggcpgram)

library(ggplot2)
library(ggfortify)
autoplot(stl(AirPassengers, s.window = 'periodic'), ts.colour = 'blue')

#NOTE With acf and spec.*, specify plot = FALSE to suppress default plotting outputs.

autoplot(acf(AirPassengers, plot = FALSE))

autoplot(acf(AirPassengers, plot = FALSE), conf.int.fill = '#0000FF', conf.int.value = 0.8, conf.int.type = 'ma')

autoplot(spec.ar(AirPassengers, plot = FALSE))

#ggcpgram should output the cumulative periodogram as the same as cpgram. Because cpgram doesn’t have return value, we cannot use autoplot(cpgram(...)).

ggcpgram(arima.sim(list(ar = c(0.7, -0.5)), n = 50))

#ggtsdiag should output the similar diagram as tsdiag.

library(forecast)
ggtsdiag(auto.arima(AirPassengers))
## Loading required package: grid

#gglagplot is for lag.plot.
gglagplot(AirPassengers, lags = 4)

#ggfreqplot is a genelarized month.plot. You can pass freq if you want, otherwise time-series’s frequency will be used.
ggfreqplot(AirPassengers)

ggfreqplot(AirPassengers, freq = 4)

Plotting PCA (Principal Component Analysis)

ggfortify let ggplot2 know how to interpret PCA objects. After loading ggfortify, you can use ggplot2::autoplot function for stats::prcomp and stats::princomp objects.

library(ggplot2)
library(ggfortify)
df <- iris[c(1, 2, 3, 4)]
autoplot(prcomp(df))

#PCA result should only contains numeric values. If you want to colorize by non-numeric values which original data has, pass original data using original keyword and then specify column name by colour keyword. Use help(autoplot.prcomp) (or help(autoplot.*) for any other objects) to check available options.
autoplot(prcomp(df), original = iris, colour = 'Species')

#Passing label = TRUE draws each data label using rownames
autoplot(prcomp(df), original = iris, colour = 'Species', label = TRUE)

#Passing loadings = TRUE draws eigenvectors.
autoplot(prcomp(df), original = iris, colour = 'Species', loadings = TRUE)

#You can attach eigenvector labels and change some options.!!!!!!!!!!!!!
autoplot(prcomp(df), original = iris, colour = 'Species',
         loadings = TRUE,loadings.colour = 'blue',
         loadings.label = TRUE,  loadings.label.size = 3)

Plotting Factor Analysis

ggfortify supports stats::factanal object as the same manner as PCAs. Available opitons are the same as PCAs.

Important You must specify scores option when calling factanal to calcurate sores (default scores = NULL). Otherwise, plotting will fail.

d.factanal <- factanal(state.x77, factors = 3, scores = 'regression')
autoplot(d.factanal, original = state.x77, colour = 'Income')

autoplot(d.factanal, label = TRUE, loadings = TRUE, loadings.label = TRUE)

#Plotting K-means

ggfortify supports stats::kmeans object. You must explicitly pass original data to autoplot function via original keyword. Because kmeans object doesn’t store original data. The result will be automatically colorized by categorized cluster.

df <- iris[c(1, 2, 3, 4)]
autoplot(stats::kmeans(df, 3), original = iris)

autoplot(stats::kmeans(df, 4), original = iris)

Plotting Survival Curves using ggplot2 and ggfortify

ggfortify let ggplot2 know how to draw survival curves. After loading ggfortify, you can use ggplot2::autoplot function for survfit objects.

library(ggplot2)
library(ggfortify)
library(survival)
## Loading required package: splines
data(lung)
head(lung)
##   inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
## 1    3  306      2  74   1       1       90       100     1175      NA
## 2    3  455      2  68   1       0       90        90     1225      15
## 3    3 1010      1  56   1       0       90        90       NA      15
## 4    5  210      2  57   1       1       90        60     1150      11
## 5    1  883      2  60   1       0      100        90       NA       0
## 6   12 1022      1  74   1       1       50        80      513       0
summary(lung)
##       inst            time            status           age       
##  Min.   : 1.00   Min.   :   5.0   Min.   :1.000   Min.   :39.00  
##  1st Qu.: 3.00   1st Qu.: 166.8   1st Qu.:1.000   1st Qu.:56.00  
##  Median :11.00   Median : 255.5   Median :2.000   Median :63.00  
##  Mean   :11.09   Mean   : 305.2   Mean   :1.724   Mean   :62.45  
##  3rd Qu.:16.00   3rd Qu.: 396.5   3rd Qu.:2.000   3rd Qu.:69.00  
##  Max.   :33.00   Max.   :1022.0   Max.   :2.000   Max.   :82.00  
##  NA's   :1                                                       
##       sex           ph.ecog          ph.karno        pat.karno     
##  Min.   :1.000   Min.   :0.0000   Min.   : 50.00   Min.   : 30.00  
##  1st Qu.:1.000   1st Qu.:0.0000   1st Qu.: 75.00   1st Qu.: 70.00  
##  Median :1.000   Median :1.0000   Median : 80.00   Median : 80.00  
##  Mean   :1.395   Mean   :0.9515   Mean   : 81.94   Mean   : 79.96  
##  3rd Qu.:2.000   3rd Qu.:1.0000   3rd Qu.: 90.00   3rd Qu.: 90.00  
##  Max.   :2.000   Max.   :3.0000   Max.   :100.00   Max.   :100.00  
##                  NA's   :1        NA's   :1        NA's   :3       
##     meal.cal         wt.loss       
##  Min.   :  96.0   Min.   :-24.000  
##  1st Qu.: 635.0   1st Qu.:  0.000  
##  Median : 975.0   Median :  7.000  
##  Mean   : 928.8   Mean   :  9.832  
##  3rd Qu.:1150.0   3rd Qu.: 15.750  
##  Max.   :2600.0   Max.   : 68.000  
##  NA's   :47       NA's   :14
str(lung)
## 'data.frame':    228 obs. of  10 variables:
##  $ inst     : num  3 3 3 5 1 12 7 11 1 7 ...
##  $ time     : num  306 455 1010 210 883 ...
##  $ status   : num  2 2 1 2 2 1 2 2 2 2 ...
##  $ age      : num  74 68 56 57 60 74 68 71 53 61 ...
##  $ sex      : num  1 1 1 1 1 1 2 2 1 1 ...
##  $ ph.ecog  : num  1 0 0 1 0 1 2 2 1 2 ...
##  $ ph.karno : num  90 90 90 90 100 50 70 60 70 70 ...
##  $ pat.karno: num  100 90 90 60 90 80 60 80 80 70 ...
##  $ meal.cal : num  1175 1225 NA 1150 NA ...
##  $ wt.loss  : num  NA 15 15 11 0 0 10 1 16 34 ...
d.survfit <- survfit(Surv(time, status) ~ sex, data = lung)
autoplot(d.survfit)

#There are some options to change survival curve output. Use help(autoplot.survfit) (or help(autoplot.*) for any other objects) to check available options.

autoplot(d.survfit, surv.linetype = 'dashed', conf.int = FALSE,
         censor.shape = '*', censor.size = 5)

################################################### ##################################################

Plotting Probability Distributions

ggdistribution is a helper function to plot Distributions in the stats package easier using ggplot2.

For example, plot standard normal distribution from -3 to +3:

library(ggplot2)
library(ggfortify)
ggdistribution(dnorm, seq(-3, 3, 0.1), mean = 0, sd = 1)

#ggdistribution accepts PDF/CDF function, sequence, and options passed to PDF/CDF function. Also, it has some options to configure how plot looks. Use help(ggdistribution) to check available options.

ggdistribution(pnorm, seq(-3, 3, 0.1), mean = 0, sd = 1, colour = 'red')

ggdistribution(dpois, seq(0, 20), lambda = 9, fill = 'blue')

#If you want to plot some distributions overwrapped, use p keyword to pass ggplot instance.

p <- ggdistribution(dchisq, seq(0, 20, 0.1), df = 7, colour = 'blue')
p <- ggdistribution(dchisq, seq(0, 20, 0.1), df = 9, colour = 'green', p = p)
ggdistribution(dchisq, seq(0, 20, 0.1), df = 11, colour = 'red', p = p)

Plotting Density

Also, autoplot can accept stats::density

autoplot(density(rnorm(1:50)), fill = 'green')

Plotting cluster package

ggfortify supports cluster::clara, cluster::fanny, cluster::pam classes. Because these instances should contains original data in its property, there is no need to pass original data explicitly

#library(devtools)
#install_github('sinhrks/ggfortify')

library(ggplot2)
#library(ggplot)
library(ggfortify)
library(cluster)

set.seed(1)
df <- iris[-5]

autoplot(clara(df, 3))

autoplot(fanny(df, 3), frame = TRUE)

#If you want probability ellipse, ggplot2 1.0.0 or later is required. Specify whatever supported in ggplot2::stat_ellipse’s type keyword via frame.type option.

autoplot(pam(df, 3), frame = TRUE, frame.type = 'norm')

Plotting Diagnostics for Linear Models

ggfortify let ggplot2 know how to interpret lm objects. After loading ggfortify, you can use ggplot2::autoplot function for lm objects.

library(ggplot2)
library(ggfortify)
autoplot(lm(Petal.Width ~ Petal.Length, data = iris))

par(mfrow = c(1, 2))
m <- lm(Petal.Width ~ Petal.Length, data = iris)

autoplot(m, which = 1:2, ncol = 2)

# Standard plot (for comparison)
plot(m, which = 1:2)

autoplot(m, which = 3:4, ncol = 2)

# Standard plot (for comparison)
plot(m, which = 3:4)

autoplot(m, which = 5:6, ncol = 2)

# Standard plot (for comparison)
plot(m, which = 5:6)

Plotting Diagnostics for Generalized Linear Models

It also suppotgs glm instance.

par(mfrow = c(1, 2))
utils::data(anorexia, package = "MASS")
head(anorexia)
##   Treat Prewt Postwt
## 1  Cont  80.7   80.2
## 2  Cont  89.4   80.1
## 3  Cont  91.8   86.4
## 4  Cont  74.0   86.3
## 5  Cont  78.1   76.1
## 6  Cont  88.3   78.1
tail(anorexia)
##    Treat Prewt Postwt
## 67    FT  82.1   95.5
## 68    FT  77.6   90.7
## 69    FT  83.5   92.5
## 70    FT  89.9   93.8
## 71    FT  86.0   91.7
## 72    FT  87.3   98.0
m <- glm(Postwt ~ Prewt + Treat + offset(Prewt),
         family = gaussian, data = anorexia)

autoplot(m, which = 1:2, ncol = 2)

# Standard plot (for comparison)
plot(m, which = 1:2)

autoplot(m, which = 3:4, ncol = 2)

# Standard plot (for comparison)
plot(m, which = 3:4)

autoplot(m, which = 5:6, ncol = 2)

# Standard plot
plot(m, which = 5:6)

Specifing Plotting Options

Some properties can be changed by passing corresponding keywords. For example, colour keyword is for data points, smooth.colour is for smoothing lines and ad.colour is for additional auxiliary lies. Also, ncol and nrow control facet layout. Use help(autoplot.lm) (or help(autoplot.)* for any other objects) to check available options.

autoplot(m, which = 1:6, colour = 'red',
         smooth.colour = 'black', smooth.linetype = 'dashed',
         ad.colour = 'blue',
         label.size = 3, label.n = 5, label.colour = 'blue',
         ncol = 3)

################################################# ################################################# ################################################# ### Adding objects to a ggplot object

library(gridExtra)

p <- qplot(wt, mpg, colour = hp, data = mtcars)
p1 <- p + coord_cartesian(ylim = c(0, 40))
p2 <- p + scale_colour_continuous(breaks = c(100, 300))
p3 <- p + guides(colour = "colourbar")
# Use a different data frame
m <- mtcars[1:10, ]
p4 <- p %+% m
grid.arrange(p1,p2,p3,p4,ncol=2)

### Adding objects to a theme object
# Compare these results of adding theme objects to other theme objects
add_el <- theme_grey() + theme(text = element_text(family = "Times"))
rep_el <- theme_grey() %+replace% theme(text = element_text(family = "Times"))
add_el$text
## List of 8
##  $ family    : chr "Times"
##  $ face      : chr "plain"
##  $ colour    : chr "black"
##  $ size      : num 12
##  $ hjust     : num 0.5
##  $ vjust     : num 0.5
##  $ angle     : num 0
##  $ lineheight: num 0.9
##  - attr(*, "class")= chr [1:2] "element_text" "element"
##
aes_all(names(mtcars))
## List of 11
##  $ mpg : symbol mpg
##  $ cyl : symbol cyl
##  $ disp: symbol disp
##  $ hp  : symbol hp
##  $ drat: symbol drat
##  $ wt  : symbol wt
##  $ qsec: symbol qsec
##  $ vs  : symbol vs
##  $ am  : symbol am
##  $ gear: symbol gear
##  $ carb: symbol carb
aes_all(c("x", "y", "col", "pch"))
## List of 4
##  $ x     : symbol x
##  $ y     : symbol y
##  $ colour: symbol col
##  $ shape : symbol pch

aes_auto()

df <- data.frame(x = 1, y = 1, colour = 1, label = 1, pch = 1)
aes_auto(df)
## List of 5
##  $ colour: symbol colour
##  $ label : symbol label
##  $ shape : symbol pch
##  $ x     : symbol x
##  $ y     : symbol y
aes_auto(names(df))
## List of 5
##  $ colour: symbol colour
##  $ label : symbol label
##  $ shape : symbol pch
##  $ x     : symbol x
##  $ y     : symbol y
df <- data.frame(xp = 1, y = 1, colour = 1, txt = 1, foo = 1)
aes_auto(df, x = xp, label = txt)
## List of 4
##  $ colour: symbol colour
##  $ y     : symbol y
##  $ x     : symbol xp
##  $ label : symbol txt
aes_auto(names(df), x = xp, label = txt)
## List of 4
##  $ colour: symbol colour
##  $ y     : symbol y
##  $ x     : symbol xp
##  $ label : symbol txt
df <- data.frame(foo = 1:3)
aes_auto(df, x = xp, y = yp)
## List of 2
##  $ x: symbol xp
##  $ y: symbol yp
aes_auto(df)
##  Named list()
rep_el$text
## List of 8
##  $ family    : chr "Times"
##  $ face      : NULL
##  $ colour    : NULL
##  $ size      : NULL
##  $ hjust     : NULL
##  $ vjust     : NULL
##  $ angle     : NULL
##  $ lineheight: NULL
##  - attr(*, "class")= chr [1:2] "element_text" "element"

aes_colour_fill_alpha

# Bar chart example
c <- ggplot(mtcars, aes(factor(cyl)))
# Default plotting
p1 <- c + geom_bar()
# To change the interior colouring use fill aesthetic
p2 <- c + geom_bar(fill = "red")+ggtitle("geom_bar(fill = red)")
# Compare with the colour aesthetic which changes just the bar outline
p3 <- c + geom_bar(colour = "red")+ggtitle("geom_bar(colour = red)")
# Combining both, you can see the changes more clearly
p4 <- c + geom_bar(fill = "white", colour = "red")+ggtitle("geom_bar(fill = white, colour = red)")

grid.arrange(p1,p2,p3,p4,ncol=2)

# The aesthetic fill also takes different colouring scales
# setting fill equal to a factor varible uses a discrete colour scale
k <- ggplot(mtcars, aes(factor(cyl), fill = factor(vs)))
p5 <- k + geom_bar() + ggtitle("aes(factor(cyl), fill = factor(vs)")
# Fill aesthetic can also be used with a continuous variable
m <- ggplot(movies, aes(x = rating))
p6 <- m + geom_histogram()
p7 <- m + geom_histogram(aes(fill = ..count..))

grid.arrange(p5,p6,p7,ncol=2)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

# Some geoms don't use both aesthetics (i.e. geom_point or geom_line)
b <- ggplot(economics, aes(x = date, y = unemploy))
p8 <- b + geom_line()
p9 <- b + geom_line(colour = "green")
p10 <- b + geom_point()
p11 <- b + geom_point(colour = "red")
grid.arrange(p8,p9,p10,p11,ncol=2)

# For large datasets with overplotting the alpha
# aesthetic will make the points more transparent
df <- data.frame(x = rnorm(5000), y = rnorm(5000))
h <- ggplot(df, aes(x,y))

p12 <- h + geom_point()
p13 <- h + geom_point(alpha = 0.5)+ggtitle("alpha = 0.5")
p14 <- h + geom_point(alpha = 1/10)+ggtitle("alpha = 1/10")
#If a geom uses both fill and colour, alpha will only modify the fill colour
c <- ggplot(mtcars, aes(factor(cyl)))
p15 <- c + geom_bar(fill = "dark grey", colour = "black") 
p16 <- c + geom_bar(fill = "dark grey", colour = "black", alpha = 1/3)+ggtitle("alpha = 1/3")
grid.arrange(p12,p13,p14,p15,p16,ncol=2)

# Alpha can also be used to add shading
j <- b + geom_line()
j

yrng <- range(economics$unemploy)
j <- j + geom_rect(aes(NULL, NULL, xmin = start, xmax = end, fill = party),
ymin = yrng[1], ymax = yrng[2], data = presidential)
j

library(scales) # to access the alpha function
j + scale_fill_manual(values = alpha(c("blue", "red"), .3))

aes_group_order Aesthetics: group, order

# By default, the group is set to the interaction of all discrete variables in the
# plot. This often partitions the data correctly, but when it does not, or when
# no discrete variable is used in the plot, you will need to explicitly define the
# grouping structure, by mapping group to a variable that has a different value
# for each group.
# For most applications you can simply specify the grouping with
# various aesthetics (colour, shape, fill, linetype) or with facets.

p <- ggplot(mtcars, aes(wt, mpg))
# A basic scatter plot
p1 <- p + geom_point(size = 4)+ggtitle("size = 4")
# The colour aesthetic
p2 <- p + geom_point(aes(colour = factor(cyl)), size = 4)+ggtitle("aes(colour = factor(cyl))")
# Or you can use shape to distinguish the data
p3 <- p + geom_point(aes(shape = factor(cyl)), size = 4)+ggtitle("aes(shape = factor(cyl))")

# Using fill
a <- ggplot(mtcars, aes(factor(cyl)))
p4 <- a + geom_bar()
p5 <- a + geom_bar(aes(fill = factor(cyl)))+ggtitle("aes(fill = factor(cyl))")
p6 <- a + geom_bar(aes(fill = factor(vs)))+ggtitle("aes(fill = factor(vs))")
grid.arrange(p1,p2,p3,p4,p5,p6,ncol=2)

# Using linetypes
library(reshape2) # for melt
library(plyr) # for colwise

rescale01 <- function(x) (x - min(x)) / diff(range(x))
ec_scaled <- data.frame(
  date = economics$date,
  colwise(rescale01)(economics[, -(1:2)])
)

ecm <- melt(ec_scaled, id = "date")
head(ecm,5)
##         date variable       value
## 1 1967-06-30      pop 0.000000000
## 2 1967-07-31      pop 0.001928276
## 3 1967-08-31      pop 0.003885621
## 4 1967-09-30      pop 0.005804207
## 5 1967-10-31      pop 0.007616205
f <- ggplot(ecm, aes(date, value))
f + geom_line(aes(linetype = variable))

# Using facets
k <- ggplot(diamonds, aes(carat, ..density..)) + geom_histogram(binwidth = 0.2)
k + facet_grid(. ~ cut)

# There are three common cases where the default is not enough, and we
# will consider each one below. In the following examples, we will use a simple
# longitudinal dataset, Oxboys, from the nlme package. It records the heights
# (height) and centered ages (age) of 26 boys (Subject), measured on nine
# occasions (Occasion).
# Multiple groups with one aesthetic
library(nlme)
## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:forecast':
## 
##     getResponse
data(Oxboys)
head(Oxboys,5)
## Grouped Data: height ~ age | Subject
##   Subject     age height Occasion
## 1       1 -1.0000  140.5        1
## 2       1 -0.7479  143.4        2
## 3       1 -0.4630  144.8        3
## 4       1 -0.1643  147.1        4
## 5       1 -0.0027  147.7        5
h <- ggplot(Oxboys, aes(age, height))
# A single line tries to connect all the observations
h + geom_line()

# The group aesthetic maps a different line for each subject
h + geom_line(aes(group = Subject))

# Different groups on different layers
h <- h + geom_line(aes(group = Subject))
# Using the group aesthetic with both geom_line() and geom_smooth()
# groups the data the same way for both layers
h + geom_smooth(aes(group = Subject), method = "lm", se = FALSE)

# Changing the group aesthetic for the smoother layer
# fits a single line of best fit across all boys
h + geom_smooth(aes(group = 1), size = 2, method = "lm", se = FALSE)

# Overriding the default grouping
# The plot has a discrete scale but you want to draw lines that connect across
# groups. This is the strategy used in interaction plots, profile plots, and parallel
# coordinate plots, among others. For example, we draw boxplots of height at
# each measurement occasion
boysbox <- ggplot(Oxboys, aes(Occasion, height))
boysbox + geom_boxplot()

# There is no need to specify the group aesthetic here; the default grouping
# works because occasion is a discrete variable. To overlay individual trajectories
# we again need to override the default grouping for that layer with aes(group = Subject)
boysbox <- boysbox + geom_boxplot()
boysbox + geom_line(aes(group = Subject), colour = "blue")

# Use the order aesthetic to change stacking order of bar charts
w <- ggplot(diamonds, aes(clarity, fill = cut))
w + geom_bar()

w + geom_bar(aes(order = desc(cut)))#!!!!!!!

# Can also be used to change plot order of scatter plots

aes_linetype_size_shape

Differentiation related aesthetics: linetype, size, shape

# an even number (up to eight) of hexidecimal digits which give the lengths in
# consecutive positions in the string.
# 0 = blank, 1 = solid, 2 = dashed, 3 = dotted, 4 = dotdash, 5 = longdash, 6 = twodash
# Data
df <- data.frame(x = 1:10 , y = 1:10)
f <- ggplot(df, aes(x = x, y = y))
f + geom_line(linetype = 2)

f + geom_line(linetype = "dotdash")

# An example with hex strings, the string "33" specifies three units on followed
# by three off and "3313" specifies three units on followed by three off followed
# by one on and finally three off.
f + geom_line(linetype = "3313")

# Mapping line type from a variable
library(plyr)
library(reshape2)
rescale01 <- function(x) (x - min(x)) / diff(range(x))
ec_scaled <- data.frame(
date = economics$date,
colwise(rescale01)(economics[, -(1:2)]))
ecm <- melt(ec_scaled, id = "date")
qplot(date, value, data = ecm, geom = "line", linetype = variable)

# Size examples
# Should be specified with a numerical value (in millimetres),
# or from a variable source
p <- ggplot(mtcars, aes(wt, mpg))
p + geom_point(size = 4)

p + geom_point(aes(size = qsec))

p + geom_point(size = 2.5) + geom_hline(yintercept = 25, size = 3.5)

# Shape examples
# Shape takes four types of values: an integer in [0, 25],
# a single character-- which uses that character as the plotting symbol,
# a . to draw the smallest rectangle that is visible (i.e., about one pixel)
# an NA to draw nothing
p + geom_point()

p + geom_point(shape = 5)

p + geom_point(shape = "k", size = 3)

p + geom_point(shape = ".")

p + geom_point(shape = NA)

# Shape can also be mapped from a variable
p + geom_point(aes(shape = factor(cyl)))

# A look at all 25 symbols
df2 <- data.frame(x = 1:5 , y = 1:25, z = 1:25)
s <- ggplot(df2, aes(x = x, y = y))
s + geom_point(aes(shape = z), size = 4) + scale_shape_identity()

# While all symbols have a foreground colour, symbols 19-25 also take a
# background colour (fill)
s + geom_point(aes(shape = z), size = 4, colour = "Red") +
scale_shape_identity()

s + geom_point(aes(shape = z), size = 4, colour = "Red", fill = "Black") +
scale_shape_identity()

aes_position

Position related aesthetics: x, y, xmin, xmax, ymin, ymax, xend, yend

#Examples
# Generate data: means and standard errors of means for prices
# for each type of cut
dmod <- lm(price ~ cut, data = diamonds)
cuts <- data.frame(cut = unique(diamonds$cut), predict(dmod, data.frame(cut =
unique(diamonds$cut)), se = TRUE)[c("fit", "se.fit")])
se <- ggplot(cuts, aes(x = cut, y = fit, ymin = fit - se.fit,
ymax = fit + se.fit, colour = cut))
se + geom_pointrange()

# Boxplot with precomputed statistics
# generate sample data

library(plyr)
abc <- adply(matrix(rnorm(100), ncol = 5), 2, quantile, c(0, .25, .5, .75, 1))
b <- ggplot(abc, aes(x = X1, ymin = "0%", lower = "25%",
middle = "50%", upper = "75%", ymax = "100%"))
b + geom_boxplot(stat = "identity")

# Using annotate
p <- ggplot(mtcars, aes(wt, mpg)) + geom_point()
p + annotate("rect", xmin = 2, xmax = 3.5, ymin = 2, ymax = 25,
fill = "dark grey", alpha = .5)

# Geom_segment examples
library(grid)
p + geom_segment(aes(x = 2, y = 15, xend = 2, yend = 25),
arrow = arrow(length = unit(0.5, "cm")))

p + geom_segment(aes(x = 2, y = 15, xend = 3, yend = 15),
arrow = arrow(length = unit(0.5, "cm")))

p + geom_segment(aes(x = 5, y = 30, xend = 3.5, yend = 25),
arrow = arrow(length = unit(0.5, "cm")))

# You can also use geom_segment to recreate plot(type = "h") :
counts <- as.data.frame(table(x = rpois(100, 5)))
counts$x <- as.numeric(as.character(counts$x))
with(counts, plot(x, Freq, type = "h", lwd = 10))

qplot(x, Freq, data = counts, geom = "segment", yend = 0, xend = x,
size = I(10))

#aes_string

Generate aesthetic mappings from a string/quoted objects

# Threee ways of generating the same aesthetics
aes(mpg, wt, col = cyl, fill = NULL)
## List of 4
##  $ x     : symbol mpg
##  $ y     : symbol wt
##  $ colour: symbol cyl
##  $ fill  : NULL
aes_string("mpg", "wt", col = "cyl", fill = NULL)
## List of 4
##  $ x     : symbol mpg
##  $ y     : symbol wt
##  $ colour: symbol cyl
##  $ fill  : NULL
aes_q(quote(mpg), quote(wt), col = quote(cyl), fill = NULL)
## List of 4
##  $ x     : symbol mpg
##  $ y     : symbol wt
##  $ colour: symbol cyl
##  $ fill  : NULL
aes(col = cyl, fill = NULL)
## List of 2
##  $ colour: symbol cyl
##  $ fill  : NULL
aes_string(col = "cyl", fill = NULL)
## List of 2
##  $ colour: symbol cyl
##  $ fill  : NULL
aes_q(col = quote(cyl), fill = NULL)
## List of 2
##  $ colour: symbol cyl
##  $ fill  : NULL

annotate

Create an annotation layer.

p <- ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point()
p1 <- p + annotate("text", x = 4, y = 25, label = "Some text")
p2 <- p + annotate("text", x = 2:5, y = 25, label = "Some text")
p3 <- p + annotate("rect", xmin = 3, xmax = 4.2, ymin = 12, ymax = 21,
alpha = .2)
p4 <- p + annotate("segment", x = 2.5, xend = 4, y = 15, yend = 25,
colour = "blue")
p5 <- p + annotate("pointrange", x = 3.5, y = 20, ymin = 12, ymax = 28,
colour = "red", size = 1.5)
p6 <- p + annotate("text", x = 2:3, y = 20:21, label = c("my label", "label 2"))
grid.arrange(p1,p2,p3,p4,p5,p6,ncol=2)

#annotation_custom

Annotation: Custom grob.