Introduction

Preparing data

data<-data.frame(Stat11=rnorm(100,mean=3,sd=2),
Obs21=rnorm(100,mean=4,sd=1),
Obs31=rnorm(100,mean=6,sd=0.5),
Obs41=rnorm(100,mean=10,sd=0.5),
Obs12=rnorm(100,mean=4,sd=2),
Obs22=rnorm(100,mean=4.5,sd=2),
Obs32=rnorm(100,mean=7,sd=0.5),
Obs42=rnorm(100,mean=8,sd=3),
Obs13=rnorm(100,mean=6,sd=0.5),
Obs23=rnorm(100,mean=5,sd=3),
Obs33=rnorm(100,mean=8,sd=0.2),
Obs43=rnorm(100,mean=4,sd=4))

Plot using standard boxplot (without any parameters except X)

boxplot(data)

Chang the labels angle using “las =2”

boxplot(data, las = 2)

Change the labels using “names = c(“name1”, “name1”,…”nameX“)”

# Create names vector
nms <- c("Observation 1","Observation 2","Observation 3","Observation 4","Observation 1","Observation 2","Observation 3","Observation 4","Observation 1","Observation 2","Observation 3","Observation 4")
boxplot(data, las=2,names=nms)

Increase the margins around the plot. So that the labels can be readable and we can add labels outside the plot.

boxplot(data, las=2,par(mar=c(12,5,4,2)+0.1),names= nms)

Group the boxes together using teh “at=” parameter. To create a separator (an empty space separates some boxes), skip the separator position on the “at” parameter. For example, if you need a separator after the first 4 boxes skip “5”, like this example.

boxplot(data, las=2,par(mar=c(12,5,4,2)+ 0.1),names = nms, at =c(1,2,3,4,6,7,8,9,11,12,13,14))

To add colors to the boxes use the “col=” parameter. If the boxes represent groups of observations, adding colors of the size of one group will automatically reflect in all groups. For example, in this example we have 3 groups of 4 observations, thus we will have 4 colors and they will automatically reflect in the the groups.

cols <- c("#d73027","#fdae61","#1a9850","#4575b4")
boxplot(data, las=2,par(mar=c(12,5,4,2)+ 0.1),names = nms, at =c(1,2,3,4,6,7,8,9,11,12,13,14), col=cols)

To add labels outside the plot (e.g. axies labels), use the function mtext(). The mtext() function requires 3 parameters label=“the text you want to print”, side=(1=bottom, 2=left, 3=top, 4=right) and line=the distance from the plot.

boxplot(data, las=2,par(mar=c(12,5,4,2)+ 0.1),names = nms, at =c(1,2,3,4,6,7,8,9,11,12,13,14), col=cols)
mtext("Groups", side = 2, line = 2)
mtext("Observations", side = 1, line = 7)
mtext("Box Plot Example with 3 groups", side = 3, line = 2)

Draw a boxplot with several annotations. Now, we are able to drow the box plot, add lables and change the colors of the boxes.Lets see how we can play arround more to change the lable colors and add in-plot annotations. We will have one script here that contains all previous steps as well as new changes.

# Create data
data<-data.frame(Stat11=rnorm(100,mean=3,sd=2),
Obs21=rnorm(100,mean=4,sd=1),
Obs31=rnorm(100,mean=6,sd=0.5),
Obs41=rnorm(100,mean=10,sd=0.5),
Obs12=rnorm(100,mean=4,sd=2),
Obs22=rnorm(100,mean=4.5,sd=2),
Obs32=rnorm(100,mean=7,sd=0.5),
Obs42=rnorm(100,mean=8,sd=3),
Obs13=rnorm(100,mean=6,sd=0.5),
Obs23=rnorm(100,mean=5,sd=3),
Obs33=rnorm(100,mean=8,sd=0.2),
Obs43=rnorm(100,mean=4,sd=4))

# Create the lables vector
nms <- c("Observation 1","Observation 2","Observation 3","Observation 4","Observation 1","Observation 2","Observation 3","Observation 4","Observation 1","Observation 2","Observation 3","Observation 4")

# Create the colors vector
cols <- c("#d73027","#fdae61","#1a9850","#4575b4")

# Plot the boxplot without labels 
boxplot(data, las=2,par(mar=c(12,5,4,2)+ 0.1), at =c(1,2,3,4,6,7,8,9,11,12,13,14), col=cols, names = c("","","","","","","","","","","",""), ylim=c(-6,18))

# Add lables outside the plot
mtext("Box Plot Example with 3 groups", side = 3, line = 2, font = 3)
mtext("Groups", side = 1, line = 7, cex = 0, font = 3)
mtext("Observations", side = 2, line = 2, cex = 0, font = 3)

# Use mtext() to add the labels with colors
# The font parameter gives a font style (1= normal, 2=bold, 3=italics, 4=bold italics)
mtext("Observation1", side=1, line=1, at=1, las=2, font=1, col=cols[1])
mtext("Observation2", side=1, line=1, at=2, las=2, font=1, col=cols[2])
mtext("Observation3", side=1, line=1, at=3, las=2, font=1, col=cols[3])
mtext("Observation4", side=1, line=1, at=4, las=2, font=1, col=cols[4])
mtext("Observation1", side=1, line=1, at=6, las=2, font=1, col=cols[1])
mtext("Observation2", side=1, line=1, at=7, las=2, font=1, col=cols[2])
mtext("Observation3", side=1, line=1, at=8, las=2, font=1, col=cols[3])
mtext("Observation4", side=1, line=1, at=9, las=2, font=1, col=cols[4])
mtext("Observation1", side=1, line=1, at=11, las=2, font=1, col=cols[1])
mtext("Observation2", side=1, line=1, at=12, las=2, font=1, col=cols[2])
mtext("Observation3", side=1, line=1, at=13, las=2, font=1, col=cols[3])
mtext("Observation4", side=1, line=1, at=14, las=2, font=1, col=cols[4])

# Add in-plot labels
text(1,-4,"*")
text(6,-4,"*")
text(11,-4,"*") 
text(2,9,"A",cex=0.8,font=3)
text(7,11,"A",cex=0.8,font=3)
text(12,15,"A",cex=0.8,font=3)   

Convert the box to Notch. To convert the box to notch, add the “notch=TRUE” parameter to the boxplot() function in the previous code.

Drawing other plots

The Violin Plot

  • The Violin Plots are similar to box plots, except that they also show the kernel probability density of the data at different values. Typically, violin plots will include a marker for the median of the data and a box indicating the interquartile range, as in standard box plots.
  • To draw violin plots we will use the library vioplot and will call the function using what we learn in the boxplot plotting parameters.
library(vioplot)
## Loading required package: sm
## Package 'sm', version 2.2-5.4: type help(sm) for summary information
vioplot(data[[1]], data[[2]], data[[3]], data[[4]], data[[5]], data[[6]], data[[7]], data[[8]], data[[9]], data[[10]], data[[11]], data[[12]])

Group and add names. Similar to the boxplot, we will use the “at=” to group and an empty “names=” parameters to remove names. Then we will add them using the mtext() function.

vioplot(data[[1]], data[[2]], data[[3]], data[[4]], data[[5]], data[[6]], data[[7]], data[[8]], data[[9]], data[[10]], data[[11]], data[[12]], at =c(1,2,3,4,6,7,8,9,11,12,13,14),  names = c("","","","","","","","","","","",""))

# Adjust plot margins  
par(mar=c(8,4,4,2)+ 0.1)

# Add lables outside the plot
mtext("Violin Plot Example with 3 groups", side = 3, line = 2, font = 3)
mtext("Groups", side = 1, line = 7, cex = 0, font = 3)
mtext("Observations", side = 2, line = 2, cex = 0, font = 3)

# Use mtext() to add the labels with colors
# The font parameter gives a font style (1= normal, 2=bold, 3=italics, 4=bold italics)
mtext("Obser1", side=1, line=4, at=1, las=2, font=1, col=cols[1])
mtext("Obser2", side=1, line=4, at=2, las=2, font=1, col=cols[2])
mtext("Obser3", side=1, line=4, at=3, las=2, font=1, col=cols[3])
mtext("Obser4", side=1, line=4, at=4, las=2, font=1, col=cols[4])
mtext("Obser1", side=1, line=4, at=6, las=2, font=1, col=cols[1])
mtext("Obser2", side=1, line=4, at=7, las=2, font=1, col=cols[2])
mtext("Obser3", side=1, line=4, at=8, las=2, font=1, col=cols[3])
mtext("Obser4", side=1, line=4, at=9, las=2, font=1, col=cols[4])
mtext("Obser1", side=1, line=4, at=11, las=2, font=1, col=cols[1])
mtext("Obser2", side=1, line=4, at=12, las=2, font=1, col=cols[2])
mtext("Obser3", side=1, line=4, at=13, las=2, font=1, col=cols[3])
mtext("Obser4", side=1, line=4, at=14, las=2, font=1, col=cols[4])

Color the groups. In the boxplot, we can provide a vector of colors to the “col=” parameters to change the box colors. However, the “col=” parameter in the vioplot() function takes one color only. Thus, we cannot draw multicolor violon plots using the vioplot() function. To solve this, here is a modified version of the vioplot() function, where the “col=” parameter takes vector of colors. You will need to run this function locally to generate colored plot (copy and paste in R console or save to file .R and source() the file).

mc.vioplot <- function (x, ..., range = 1.5, h = NULL, ylim = NULL, names = NULL, 
          horizontal = FALSE, col = "magenta", border = "black", lty = 1, 
          lwd = 1, rectCol = "black", colMed = "white", pchMed = 19, 
          at, add = FALSE, wex = 1, drawRect = TRUE) 
{
      datas <- list(x, ...)
      if(length(col)==1) col <- rep(col,n)
      n <- length(datas)
      if (missing(at)) 
            at <- 1:n
      upper <- vector(mode = "numeric", length = n)
      lower <- vector(mode = "numeric", length = n)
      q1 <- vector(mode = "numeric", length = n)
      q3 <- vector(mode = "numeric", length = n)
      med <- vector(mode = "numeric", length = n)
      base <- vector(mode = "list", length = n)
      height <- vector(mode = "list", length = n)
      baserange <- c(Inf, -Inf)
      args <- list(display = "none")
      if (!(is.null(h))) 
            args <- c(args, h = h)
      for (i in 1:n) {
            data <- datas[[i]]
            data.min <- min(data)
            data.max <- max(data)
            q1[i] <- quantile(data, 0.25)
            q3[i] <- quantile(data, 0.75)
            med[i] <- median(data)
            iqd <- q3[i] - q1[i]
            upper[i] <- min(q3[i] + range * iqd, data.max)
            lower[i] <- max(q1[i] - range * iqd, data.min)
            est.xlim <- c(min(lower[i], data.min), max(upper[i], 
                                                       data.max))
            smout <- do.call("sm.density", c(list(data, xlim = est.xlim), 
                                             args))
            hscale <- 0.4/max(smout$estimate) * wex
            base[[i]] <- smout$eval.points
            height[[i]] <- smout$estimate * hscale
            t <- range(base[[i]])
            baserange[1] <- min(baserange[1], t[1])
            baserange[2] <- max(baserange[2], t[2])
      }
      if (!add) {
            xlim <- if (n == 1) 
                  at + c(-0.5, 0.5)
            else range(at) + min(diff(at))/2 * c(-1, 1)
            if (is.null(ylim)) {
                  ylim <- baserange
            }
      }
      if (is.null(names)) {
            label <- 1:n
      }
      else {
            label <- names
      }
      boxwidth <- 0.05 * wex
      if (!add) 
            plot.new()
      if (!horizontal) {
            if (!add) {
                  plot.window(xlim = xlim, ylim = ylim)
                  axis(2)
                  axis(1, at = at, label = label)
            }
            box()
            for (i in 1:n) {
                  polygon(c(at[i] - height[[i]], rev(at[i] + height[[i]])), 
                          c(base[[i]], rev(base[[i]])), col = col[i], border = border, 
                          lty = lty, lwd = lwd)
                  if (drawRect) {
                        lines(at[c(i, i)], c(lower[i], upper[i]), lwd = lwd, 
                              lty = lty)
                        rect(at[i] - boxwidth/2, q1[i], at[i] + boxwidth/2, 
                             q3[i], col = rectCol)
                        points(at[i], med[i], pch = pchMed, col = colMed)
                  }
            }
      }
      else {
            if (!add) {
                  plot.window(xlim = ylim, ylim = xlim)
                  axis(1)
                  axis(2, at = at, label = label)
            }
            box()
            for (i in 1:n) {
                  polygon(c(base[[i]], rev(base[[i]])), c(at[i] - height[[i]], 
                                                          rev(at[i] + height[[i]])), col = col[i], border = border, 
                          lty = lty, lwd = lwd)
                  if (drawRect) {
                        lines(c(lower[i], upper[i]), at[c(i, i)], lwd = lwd, 
                              lty = lty)
                        rect(q1[i], at[i] - boxwidth/2, q3[i], at[i] + 
                                   boxwidth/2, col = rectCol)
                        points(med[i], at[i], pch = pchMed, col = colMed)
                  }
            }
      }
      invisible(list(upper = upper, lower = lower, median = med, 
                     q1 = q1, q3 = q3))
}
  • We will run the function exactly the same way but we will add the “col=” parameter. Unlike the boxplot() function, here the size of the color vector needs to be = to the number of boxes. Since we have 3 groups with 12 boxes (4 boxes/group) and we need to give the same set of colors to the members of each group, we will pass the “cols” vector that contains 4 colors to the “col=” parameters 3 times.
mc.vioplot(data[[1]], data[[2]], data[[3]], data[[4]], data[[5]], data[[6]], data[[7]], data[[8]], data[[9]], data[[10]], data[[11]], data[[12]], at =c(1,2,3,4,6,7,8,9,11,12,13,14),  names = c("","","","","","","","","","","",""), col=c(cols, cols, cols))

# Adjust plot margins  
par(mar=c(8,4,4,2)+ 0.1)

# Add lables outside the plot
mtext("Violin Plot Example with 3 groups", side = 3, line = 2, font = 3)
mtext("Groups", side = 1, line = 7, cex = 0, font = 3)
mtext("Observations", side = 2, line = 2, cex = 0, font = 3)

# Use mtext() to add the labels with colors
# The font parameter gives a font style (1= normal, 2=bold, 3=italics, 4=bold italics)
mtext("Obser1", side=1, line=4, at=1, las=2, font=1, col=cols[1])
mtext("Obser2", side=1, line=4, at=2, las=2, font=1, col=cols[2])
mtext("Obser3", side=1, line=4, at=3, las=2, font=1, col=cols[3])
mtext("Obser4", side=1, line=4, at=4, las=2, font=1, col=cols[4])
mtext("Obser1", side=1, line=4, at=6, las=2, font=1, col=cols[1])
mtext("Obser2", side=1, line=4, at=7, las=2, font=1, col=cols[2])
mtext("Obser3", side=1, line=4, at=8, las=2, font=1, col=cols[3])
mtext("Obser4", side=1, line=4, at=9, las=2, font=1, col=cols[4])
mtext("Obser1", side=1, line=4, at=11, las=2, font=1, col=cols[1])
mtext("Obser2", side=1, line=4, at=12, las=2, font=1, col=cols[2])
mtext("Obser3", side=1, line=4, at=13, las=2, font=1, col=cols[3])
mtext("Obser4", side=1, line=4, at=14, las=2, font=1, col=cols[4])