How to re-arrange/re-order a plot?

Created on Aug 15 2013
Revised on Thu Aug 15 13:14:06 2013

original post is here and here

1. Reorder your factor levels!!!

Example

options(warn = -1)
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.0.1

head(mtcars)

##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

as.factor(mtcars$carb)

##  [1] 4 4 1 1 2 1 4 2 2 4 4 3 3 3 4 4 4 1 2 1 1 2 2 4 2 1 2 2 4 6 8 2
## Levels: 1 2 3 4 6 8

ggplot(data = mtcars, aes(y = carb, x = mpg, colour = hp)) + geom_point()

plot of chunk unnamed-chunk-1


ggplot(data = mtcars, aes(y = as.factor(carb), x = mpg, colour = hp)) + geom_point()

plot of chunk unnamed-chunk-1


# Rearrange_Guy: But I want 2 to come first and 8 last Helpful_Gal: OK use
# rev with levels

mtcars$carb2 <- factor(mtcars$carb, levels = rev(levels(factor(mtcars$carb))))

ggplot(data = mtcars, aes(y = carb2, x = mpg, colour = hp)) + geom_point()

plot of chunk unnamed-chunk-1


# Rearrange_Guy: Well I just want to specify the order Helpful_Gal: OK
# type it in by hand then

mtcars$carb2 <- factor(mtcars$carb, levels = c("1", "2", "3", "6", "8", "4"))
ggplot(data = mtcars, aes(y = carb2, x = mpg, colour = hp)) + geom_point()

plot of chunk unnamed-chunk-1


# Rearrange_Guy: What about faceting?  I bet it doesn't work for that.
# Helpful_Gal: Um yes it does.

ggplot(data = mtcars, aes(y = carb2, x = mpg, colour = hp)) + geom_point() + 
    facet_grid(cyl ~ .)

plot of chunk unnamed-chunk-1


# Rearrange_Guy: OK Helpful_Gal I want it to go 6, 4, and then 8
# Helpful_Gal: OK

mtcars$cyl2 <- factor(mtcars$cyl, levels = c("6", "4", "8"))
ggplot(data = mtcars, aes(y = carb2, x = mpg, colour = hp)) + geom_point() + 
    facet_grid(cyl2 ~ .)

plot of chunk unnamed-chunk-1


# Rearrange_Guy: Why do you keep making new variables?  Helpful_Gal: It's
# probably not the best idea to overwrite variables just for the sake of
# plotting Rearrange_Guy: Thank you for showing me the way of re-ordering
# and re-arranging.  Helpful_Gal: You welcome.

2. sorting bars/points on another variable

# SECTION 1: REORDERING BY BAR/POINT SIZE Create a data set we can alter

head(mtcars)

##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb carb2
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4     4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4     4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1     1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1     1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2     2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1     1
##                   cyl2
## Mazda RX4            6
## Mazda RX4 Wag        6
## Datsun 710           4
## Hornet 4 Drive       6
## Hornet Sportabout    8
## Valiant              6

mtcars3 <- mtcars2 <- data.frame(car = rownames(mtcars), mtcars, row.names = NULL)
mtcars3$cyl <- mtcars2$cyl <- as.factor(mtcars2$cyl)
head(mtcars2)

##                 car  mpg cyl disp  hp drat    wt  qsec vs am gear carb
## 1         Mazda RX4 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## 2     Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## 3        Datsun 710 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## 4    Hornet 4 Drive 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## 5 Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## 6           Valiant 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
##   carb2 cyl2
## 1     4    6
## 2     4    6
## 3     1    4
## 4     1    6
## 5     2    8
## 6     1    6


## An Example of Unordered Bars/Points

library(ggplot2)
library(gridExtra)

## Warning: package 'gridExtra' was built under R version 3.0.1

## Loading required package: grid

x <- ggplot(mtcars2, aes(y = car, x = mpg)) + geom_point(stat = "identity")
y <- ggplot(mtcars2, aes(x = car, y = mpg)) + geom_bar(stat = "identity") + 
    coord_flip()  # Flipped cartesian coordinates so that horizontal becomes vertical, and vertical, horizontal.
grid.arrange(x, y, ncol = 2)

plot of chunk unnamed-chunk-2


## An Example of Ordered Bars/Points Re-level the cars by mpg
mtcars3$car <- factor(mtcars2$car, levels = mtcars2[order(mtcars$mpg), "car"])
x <- ggplot(mtcars3, aes(y = car, x = mpg)) + geom_point(stat = "identity")
y <- ggplot(mtcars3, aes(x = car, y = mpg)) + geom_bar(stat = "identity") + 
    coord_flip()
grid.arrange(x, y, ncol = 2)

plot of chunk unnamed-chunk-2


## An Example of Ordered and Faceted Bars/Points Re-level the carb by
## average mpg
ag_mtcars <- aggregate(mpg ~ carb, mtcars3, mean)
mtcars3$carb <- factor(mtcars2$carb, levels = ag_mtcars[order(ag_mtcars$mpg), 
    "carb"])
ggplot(mtcars3, aes(y = carb, x = mpg)) + geom_point(stat = "identity", size = 2, 
    aes(color = carb))

plot of chunk unnamed-chunk-2


## An Example of Ordered and Faceted Bars/Points
ggplot(mtcars3, aes(y = car, x = mpg)) + geom_point(stat = "identity") + facet_grid(cyl ~ 
    ., scales = "free", space = "free")

plot of chunk unnamed-chunk-2


# SECTION 2: SPEEDING UP THE WORKFLOW WITH THE PLOTFLOW PACKAGE Getting
# the 'plotflow package' browseURL('https://github.com/trinker/plotflow')

# install.packages('devtools')
library(devtools)

## Warning: package 'devtools' was built under R version 3.0.1

install_github("plotflow", "trinker")

## Installing github repo(s) plotflow/master from trinker

## Downloading plotflow.zip from
## https://github.com/trinker/plotflow/archive/master.zip

## Installing package from
## C:\Users\alice\AppData\Local\Temp\RtmpaW8UiU/plotflow.zip

## Installing plotflow

## "C:/PROGRA~1/R/R-30~1.0/bin/x64/R" --vanilla CMD INSTALL \
## "C:\Users\alice\AppData\Local\Temp\RtmpaW8UiU\plotflow-master" \
## --library="C:/Users/alice/Documents/R/win-library/3.0" \
## --with-keep.source --install-tests

## ```

```r

## What Does `order_by` do?
library(plotflow)
dat <- aggregate(cbind(mpg, hp, disp) ~ carb, mtcars, mean)
dat$carb <- factor(dat$carb)

## compare levels (data set looks the same though)
dat$carb

## [1] 1 2 3 4 6 8
## Levels: 1 2 3 4 6 8

order_by(carb, ~-hp + -mpg, data = dat)$carb

## [1] 1 2 3 4 6 8
## Levels: 8 4 3 6 2 1


## Return just the vector with new levels
order_by(carb, ~-hp + -mpg, dat, df = FALSE)

## [1] 1 2 3 4 6 8
## Levels: 8 4 3 6 2 1


## Use `order_by` to Order Bars
library(ggplot2)

## Reset the data from Section 1
dat2 <- data.frame(car = rownames(mtcars), mtcars, row.names = NULL)
ggplot(order_by(car, ~mpg, dat2), aes(x = car, y = mpg)) + geom_bar(stat = "identity") + 
    coord_flip() + ggtitle("Order Pretty Easy")

plot of chunk unnamed-chunk-2



## Aggregated by Summary Stat Carb Ordered By Summary (Mean) of mpg
## Ordered points with the order_by function
a <- ggplot(order_by(carb, ~mpg, dat2, mean), aes(x = carb, y = mpg)) + geom_point(stat = "identity", 
    aes(colour = carb)) + coord_flip() + ggtitle("Ordered Dot Plots Made Easy")

## Reverse the ordered points
b <- ggplot(order_by(carb, ~-mpg, dat2, mean), aes(x = carb, y = mpg)) + geom_point(stat = "identity", 
    aes(colour = carb)) + coord_flip() + ggtitle("Reverse Order Too!")

grid.arrange(a, b, ncol = 1)

plot of chunk unnamed-chunk-2


## Nested Usage (order_by on an order by dataframe)
ggplot(order_by(gear, ~mpg, dat2, mean), aes(mpg, carb)) + geom_point(aes(color = factor(cyl))) + 
    facet_grid(gear ~ ., scales = "free") + ggtitle("I'm Nested (Yay for me!)")

plot of chunk unnamed-chunk-2


# SECTION 3: USING ORDER_BY ON REAL DATA #
library(RCurl)

## Loading required package: bitops

library(XML)

## Warning: package 'XML' was built under R version 3.0.1

library(rjson)
library(ggplot2)
library(qdap)

## Warning: package 'qdap' was built under R version 3.0.1

## Loading required package: gdata

## Warning: package 'gdata' was built under R version 3.0.1

## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.

## ```

gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.

Attaching package: 'gdata'

下列对象被屏蔽了from 'package:stats':

nobs

下列对象被屏蔽了from 'package:utils':

object.size

Loading required package: reports

Warning: package 'reports' was built under R version 3.0.1

Loading required package: scales

Warning: package 'scales' was built under R version 3.0.1

Warning: replacing previous import 'rescale' when loading 'scales'


```r
library(reshape2)

## Warning: package 'reshape2' was built under R version 3.0.1

library(gridExtra)

## GRab the data
URL <- "http://www.payscale.com/top-tech-employers-compared-2012/job-satisfaction-survey-data"
doc <- htmlTreeParse(URL, useInternalNodes = TRUE)
nodes <- getNodeSet(doc, "//script[@type='text/javascript']")[[19]][[1]]
dat <- gsub("];", "]", capture.output(nodes)[5:27])
ndat <- data.frame(do.call(rbind, fromJSON(paste(dat, collapse = ""))))[, -2]
ndat[, 1:5] <- lapply(ndat, unlist)
IBM <- grepl("International Business Machines", ndat[, 1])
ndat[IBM, 1] <- bracketXtract(ndat[IBM, 1])
ndat[, 1] <- sapply(strsplit(ndat[, 1], "\\s|,"), "[", 1)

## Re-level with order_by
ndat[, "Employer.Name"] <- order_by(Employer.Name, ~Job.Satisfaction, ndat, 
    df = FALSE)
colnames(ndat)[1] <- "Employer"

## Melt the data to long format
mdat <- melt(ndat)

## Using Employer as id variables

mdat[, 2] <- factor(gsub("\\.", " ", mdat[, 2]), levels = gsub("\\.", " ", 
    colnames(ndat)[-1]))
head(mdat)

##     Employer         variable  value
## 1      Adobe Job Satisfaction 0.6875
## 2 Amazon.com Job Satisfaction 0.7723
## 3        AOL Job Satisfaction 0.7714
## 4      Apple Job Satisfaction 0.7800
## 5       Dell Job Satisfaction 0.6890
## 6       eBay Job Satisfaction 0.7097


ggplot(data = mdat, aes(x = Employer, y = value, fill = factor(Employer))) + 
    geom_bar(stat = "identity") + coord_flip() + ylim(c(0, 1)) + facet_wrap(~variable, 
    ncol = 2) + theme(legend.position = "none") + ggtitle("Plot 3: Employee Job Satisfaction at Top Tech Companies") + 
    ylab(c("Job Satisfaction"))

plot of chunk unnamed-chunk-2


## A regression model
mod <- lm(Job.Satisfaction ~ Work.Stress + Job.Meaning + Job.Flexibility, data = ndat)
mod

## 
## Call:
## lm(formula = Job.Satisfaction ~ Work.Stress + Job.Meaning + Job.Flexibility, 
##     data = ndat)
## 
## Coefficients:
##     (Intercept)      Work.Stress      Job.Meaning  Job.Flexibility  
##          0.3101           0.1062           0.5241           0.0733

anova(mod)

## Analysis of Variance Table
## 
## Response: Job.Satisfaction
##                 Df Sum Sq Mean Sq F value Pr(>F)    
## Work.Stress      1 0.0069  0.0069    1.45 0.2452    
## Job.Meaning      1 0.0816  0.0816   17.04 0.0007 ***
## Job.Flexibility  1 0.0006  0.0006    0.13 0.7260    
## Residuals       17 0.0814  0.0048                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(mod)

## 
## Call:
## lm(formula = Job.Satisfaction ~ Work.Stress + Job.Meaning + Job.Flexibility, 
##     data = ndat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12043 -0.03002 -0.00263  0.03268  0.11915 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       0.3101     0.2413    1.29   0.2160    
## Work.Stress       0.1062     0.2147    0.49   0.6273    
## Job.Meaning       0.5241     0.1288    4.07   0.0008 ***
## Job.Flexibility   0.0733     0.2058    0.36   0.7260    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0692 on 17 degrees of freedom
## Multiple R-squared:  0.523,  Adjusted R-squared:  0.438 
## F-statistic: 6.21 on 3 and 17 DF,  p-value: 0.00483


theplot <- ggplot(data = ndat, aes(x = Job.Meaning, y = Job.Satisfaction)) + 
    geom_smooth(method = "lm", fill = "blue", alpha = 0.1, size = 1) + geom_smooth(color = "red", 
    fill = "pink", alpha = 0.3, size = 1) + xlim(c(0.4, 0.9)) + geom_point(aes(size = Job.Flexibility, 
    colour = Work.Stress)) + geom_text(aes(label = Employer), size = 3, hjust = -0.1, 
    vjust = -0.1) + scale_colour_gradient(low = "gold", high = "red")

theplot

## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-2


theplot + annotation_custom(grob = circleGrob(r = unit(0.4, "npc")), xmin = 0.47, 
    xmax = 0.57, ymin = 0.72, ymax = 0.82)

## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-2


ndat$outs <- 1
ndat$outs[ndat$Employer %in% qcv(AOL, Amazon.com, Nvidia, Sony)] <- 0

ggplot(data = ndat, aes(x = Job.Meaning, y = Job.Satisfaction)) + geom_smooth(method = "lm", 
    fill = "blue", alpha = 0.1, size = 1, aes(group = outs)) + geom_smooth(color = "red", 
    fill = "pink", alpha = 0.3, size = 1) + xlim(c(0.4, 0.9)) + geom_point(aes(size = Job.Flexibility, 
    colour = Work.Stress)) + geom_text(aes(label = Employer), size = 3, hjust = -0.1, 
    vjust = -0.1) + scale_colour_gradient(low = "gold", high = "red")

## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-2