This is my guide to ggplot2 and related packages. It contains:
Web sites, blogs and books for ggplot2
Some advanced examples of more complex visualizations
R version :
R.Version()$version.string## [1] "R version 3.4.1 (2017-06-30)"
ggplot2 website(part of tidyverse) with many examples.ggthemes has many themes for ggplot2. Here you can see most of ggthemes functions with examples. It may be introductory but it helps to pick a theme.multiplot() function(uses package grid, in base package in R 3.4.1).RcolorBrewer contains beautifull palletes but most usefull is the addin for R-studio Colourpicker. You can pick your colour in a new window and it returns the related ggplot2 code in your script.ggtern is an extension to ggplot2 specifically for the plotting of ternary diagrams.library(ggcorrplot); library(ggExtra); library(scales); library(ggalt); library(ggfortify); library(ggdendro); library(ggmap); library(gridExtra)' ';ggbiplot; require(GGally); require(ggvis)ggplot2 plots.require(ggplot2)
require(RColorBrewer) # many nice palettes
require(dplyr) # data wrangling
require(ggthemes) # additional themes
require(knitr)
require(xtable) #for tables
library(ggcorrplot) # corrplots
library(ggExtra)
library(scales)
library(ggalt)
library(ggfortify)
library(gridExtra) # Multiple plots in one page and more
library(ggdendro) # dendrogram, plot clusters and tree based methods
library(ggmap) # maps
require(GGally)
require(ggvis)
require(ggpubr) # Add Statistical test results into the plot(compare means etc), arrange plots
options(xtable.comment=FALSE)# This removes the comment % above xtable
print(xtable(summary(iris),
digits=2,
align = "cccccc"),
include.rownames=F)# include.rownames=F removes the first column which is just a countkable(summary(iris)) # for html table output to demonstrate this way to tables too| Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species | |
|---|---|---|---|---|---|
| Min. :4.300 | Min. :2.000 | Min. :1.000 | Min. :0.100 | setosa :50 | |
| 1st Qu.:5.100 | 1st Qu.:2.800 | 1st Qu.:1.600 | 1st Qu.:0.300 | versicolor:50 | |
| Median :5.800 | Median :3.000 | Median :4.350 | Median :1.300 | virginica :50 | |
| Mean :5.843 | Mean :3.057 | Mean :3.758 | Mean :1.199 | NA | |
| 3rd Qu.:6.400 | 3rd Qu.:3.300 | 3rd Qu.:5.100 | 3rd Qu.:1.800 | NA | |
| Max. :7.900 | Max. :4.400 | Max. :6.900 | Max. :2.500 | NA |
mtcarsdiamondsggplot2 plots in one window.multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}p1=ggplot(), p2=ggplot(),…., p_n=ggplot(). The multiplot(p1,p2,...,p_n,cols= desires number of columns)p1=ggplot(diamonds, aes(cut))+
geom_bar(
aes(y=(..count..)/sum(..count..)), # Percentage
fill=c(brewer.pal(5,"Paired")))+
coord_flip()+
xlab("")+ylab("Percentage")+
ggtitle("% of cut in diamonds dataset")
# theme_wsj(base_size = 7.8,base_family = "Times", title_family = "Times")
# + scale_colour_wsj("colors6", "")
p2=ggplot(mtcars,
aes(gear))+
geom_bar(
aes(y=(..count..)), #Count
fill=c(brewer.pal(3,"Paired")))+
xlab("")+
ylab("Count")+ggtitle("Count of gears in mtcars dataset")
multiplot(p1,p2,cols = 2)Barplot example with diamonds and mtcar dataset. The first includes the percentage and the second the count
p3=ggplot(mtcars,
aes(x=gear,y=disp,fill=as.factor(gear)))+
geom_bar(stat="identity")+
theme_bw()+
guides(fill=FALSE) ## No theme and no legend
print(p3)p4=ggMarginal(p = p3,#create a new plot based on an old one
type = 'boxplot', #'boplot','histogram','density'
margins = 'y', # which axis i want marginal
size = 5,
col = '#802E29', # colour of the lines
fill = '#9C8219'
)
print(p4)Boxplot as marginal plot
p5=ggMarginal(p = p3,#create a new plot based on an old one
type = 'density', #'boplot','histogram','density'
margins = 'y', # which axis i want marginal
size = 5,
col = '#802E29', # colour of the lines
fill = '#9C8219'
)
multiplot(p4,p5) # multiplot now creates a marginal with both density and boxplotNow we created a marginal plot with both boxplot and barplot
RColourBrewer package for colour palletes.This chart shows all the palletes included. ggplot(mtcars,
aes(x=as.factor(vs),y=disp,fill=as.factor(gear)))+
geom_bar(stat="identity",
position=position_dodge())+
theme_bw()+
scale_fill_brewer(palette="Dark2")Three variable barplot
p1=ggplot(mtcars,
aes(x=as.factor(vs),
y=disp,
fill=as.factor(gear)))+
geom_bar(stat="identity",
position=position_dodge())+
scale_fill_brewer(palette="Dark2",
name="Gears")+
ggtitle("Three variable barplot with\n legends and labels plus limits")+
ylab("disp variable")+
xlab("vs")+
scale_y_continuous(limits = c(0,600),
breaks = c(0,50,100,150,200,250,300,350,400,450,500,550,600))+
theme(axis.text.y =element_text(angle = 45),
panel.background = element_blank(),
legend.position=c(0.96,0.85))
p2=ggplot(mtcars,
aes(x=as.factor(vs),
y=disp,
fill=as.factor(gear)))+
geom_bar(stat="identity",
position=position_dodge())+
scale_fill_brewer(palette="Accent",
name="Gears")+
ggtitle("Three variable barplot with legends\n and labels plus limits")+
ylab("disp variable")+
xlab("vs")+
scale_y_continuous(limits = c(0,600),
breaks = c(0,50,100,150,200,250,300,350,400,450,500,550,600))+
theme(axis.text.y =element_text(angle = 45),
panel.grid.major = element_blank(),
legend.position=c(0.96,0.85),
plot.title = element_text(hjust=0.5))
multiplot(p1,p2,cols = 2)p1=ggplot(iris,aes(x=Species,y=Sepal.Length))+geom_violin()
p2=ggplot(iris,aes(x=Species,y=Sepal.Length))+geom_boxplot()
p3=ggplot(iris,aes(x=Species,y=Sepal.Length))+geom_tufteboxplot()
multiplot(p1,p2,p3,cols = 3)Boxplot and violin plot for iris dataset
p1=ggplot(iris,
aes(x=Species,
y=Sepal.Length,
fill=Species))+
geom_violin()+
ggtitle("Violin plot for iris data/economist theme")+
theme_economist(base_size = 6)+
theme(plot.title = element_text(hjust = 0.5),
legend.position = "right",
legend.background = element_rect( size=.3))
p2=ggplot(iris,
aes(x=Species,
y=Sepal.Length))+
geom_boxplot()+
theme_gdocs(base_size = 6)+
ggtitle("Boxplot for iris data/gdocs theme")+
theme(plot.title = element_text(hjust = 0.5))
p3=ggplot(iris,
aes(x=Species,
y=Sepal.Length,
fill=Species))+
geom_boxplot(outlier.colour="red",
outlier.shape=3,
outlier.size=2)+
theme_wsj(base_size = 6)+
ggtitle("wsj theme with outliers")+
stat_summary(fun.y=mean,
geom="point",
shape=6,
size=1)+
scale_fill_brewer(palette="Dark2",
name="")+
theme(plot.title = element_text(hjust = 0.5),
legend.text = element_text(size=12,colour = "red"),
legend.position = "bottom")+
coord_flip() #+stat_compare_means(label.x=0.7,label.y =7,method = "t.test")#ggpubr package
p4=ggplot(iris,
aes(x=Species,
y=Sepal.Length,
fill=Species))+
geom_boxplot(outlier.colour="red",
outlier.shape=3,
outlier.size=2)+
theme_dark(base_size = 8)+
ggtitle("Dark theme")+
stat_summary(fun.y=mean,
geom="point",
shape=6,
size=1)+
scale_fill_brewer(palette="Set1",
name="Species")+
theme(plot.title = element_text(hjust = 0.5), ## hjust centers the title of the plot
legend.position=c(0.43,0.85),
legend.text = element_text(colour="blue",
size = 5,
face = "bold"),
legend.background = element_rect(fill="#7A7A7A",
size=.5),
legend.direction="horizontal")+
xlab("")+
ylab("")
multiplot(p1,p2,p3,p4,cols = 2)# another option as found on the internet
makePairs <- function(data)
{
grid <- expand.grid(x = 1:ncol(data), y = 1:ncol(data))
grid <- subset(grid, x != y)
all <- do.call("rbind", lapply(1:nrow(grid), function(i) {
xcol <- grid[i, "x"]
ycol <- grid[i, "y"]
data.frame(xvar = names(data)[ycol], yvar = names(data)[xcol],
x = data[, xcol], y = data[, ycol], data)
}))
all$xvar <- factor(all$xvar, levels = names(data))
all$yvar <- factor(all$yvar, levels = names(data))
densities <- do.call("rbind", lapply(1:ncol(data), function(i) {
data.frame(xvar = names(data)[i], yvar = names(data)[i], x = data[, i])
}))
list(all=all, densities=densities)
}
# expand iris data frame for pairs plot
gg1 = makePairs(iris[,-5])
# new data frame mega iris
mega_iris = data.frame(gg1$all, Species=rep(iris$Species, length=nrow(gg1$all)))
# pairs plot
ggplot(mega_iris, aes_string(x = "x", y = "y")) +
facet_grid(xvar ~ yvar, scales = "free") +
geom_point(aes(colour=Species), na.rm = TRUE, alpha=0.8) +
stat_density(aes(x = x, y = ..scaled.. * diff(range(x)) + min(x)),
data = gg1$densities, position = "identity",
colour = "grey20", geom = "line")ggpairs() creates scatter plots for numerical, boxplot and barplots for factors and even includes correlations. Perfect for exploring small datasets fast. It is a little messy but lets see:ggpairs(iris, title = "",
axisLabels = "show",
aes(colour=Species),
lower = list(combo = wrap(ggally_facethist,
binwidth = 0.4))) - Some more
ggpairs() options to make prettier or different plot. See Documentation:
ggpairs(iris,
columns = c(1:4),
title = "",
aes(colour=Species),
upper = list(continuous="cor",
binwidth=1),
lower=list(continuous="smooth",
binwidth=1),
switch = "y",
axisLabels="none")+
ggtitle("Only numerical values to create scattermatrix and corrplot with reggression lines(smooth) and 'few' theme")+
theme_few(base_size = 8)Only numerical variables
ggpairs(iris,
title = "",
aes(colour=Species),
upper = list(continuous="cor",
binwidth=0.3,
combo="facethist"),
lower=list(continuous="smooth",
combo='box_no_facet',
binwidth=0.3),
switch = "y",
axisLabels="none")+
ggtitle("Good Looking(?) Plot Matrix with theme_calc and smaller letters for iris dataset")+
theme_calc(base_size = 8)All iris data variables
p1=ggplot(iris,aes(x=Sepal.Length,y=Petal.Length))+geom_point()
p2=p1+
geom_smooth(method="lm")+
ggtitle("Regression Line plus Standar Error")
p3=p1+
geom_smooth(method="loess",
linetype="dashed",
se=F)+
ggtitle("Loess dashed line without Standar error")+
theme(plot.title =element_text(size=11))
pall=ggplot(iris,
aes(x=Sepal.Length,
y=Petal.Length))+
geom_point(alpha = 0.3)+
geom_smooth(method="lm")+
geom_smooth(method="loess",
linetype="dashed",
se=F)+
ggtitle("Loess dashed line without Standar error")+
ggtitle("Simple Linear Regression line plus dashed loess line")+
theme(plot.title =element_text(size=9))
p4=ggplot(mtcars,
aes(x=mpg,
y=hp,
group=factor(cyl),
color=factor(cyl)))+
geom_point(size=1.5)+
scale_colour_brewer(palette = "Set2",
name="cyl")+
ggtitle("Three variables")+
theme(plot.title=element_text(hjust=0.5))
p5=ggplot(mtcars,
aes(x=mpg,
y=hp,
group=factor(cyl),
color=factor(cyl)))+
geom_point(size=2.5,
aes(shape=factor(am)))+
scale_colour_brewer(palette = "Set2",
name="cyl")+
ggtitle("Four variables")+
scale_shape(name="am")+
ylab("")+
theme(plot.title = element_text(hjust=0.5))
multiplot(p1,p2,p3,pall,cols = 2)multiplot(p4,p5,cols=2)theme_bw is a complete theme. If you use the theme option, as many times in previous plots, you can change specific plot elements .# See the legend options of grey theme
theme_grey()$legend.key## List of 5
## $ fill : chr "grey95"
## $ colour : chr "white"
## $ size : NULL
## $ linetype : NULL
## $ inherit.blank: logi TRUE
## - attr(*, "class")= chr [1:2] "element_rect" "element"
# Now lets change a specific element of the legend
new_theme <- theme_grey()+
theme(legend.key = element_rect(colour = "red"))
new_theme$legend.key # It changes only the new defined element and keeps all the other characteristics the same## List of 5
## $ fill : chr "grey95"
## $ colour : chr "red"
## $ size : NULL
## $ linetype : NULL
## $ inherit.blank: logi FALSE
## - attr(*, "class")= chr [1:2] "element_rect" "element"
# Now, with the %+replace% it replaces all options with the new options i define. So the undefined options remain NULL
new_theme <- theme_grey() %+replace% theme(legend.key = element_rect(colour = "red"))
new_theme$legend.key## List of 5
## $ fill : NULL
## $ colour : chr "red"
## $ size : NULL
## $ linetype : NULL
## $ inherit.blank: logi FALSE
## - attr(*, "class")= chr [1:2] "element_rect" "element"
plotly to make a ggplot reactive is too easy! It may change the appearence of the plot though.require(plotly)
ggplotly(p4)require(ggplotgui)## Loading required package: ggplotgui
# Run this to open the UI
## ggplot_shiny(mtcars)