1. Two variables: Continuous function

In this section, we’ll see how to connect observations by line. The economics data set [in ggplot2] is used.

library(ggplot2)
data(economics)
head(economics)
## # A tibble: 6 × 6
##   date         pce    pop psavert uempmed unemploy
##   <date>     <dbl>  <dbl>   <dbl>   <dbl>    <dbl>
## 1 1967-07-01  507. 198712    12.6     4.5     2944
## 2 1967-08-01  510. 198911    12.6     4.7     2945
## 3 1967-09-01  516. 199113    11.9     4.6     2958
## 4 1967-10-01  512. 199311    12.9     4.9     3143
## 5 1967-11-01  517. 199498    12.8     4.7     3066
## 6 1967-12-01  525. 199657    11.8     4.8     3018
d = ggplot(economics, aes(x = date, y = unemploy))

# Area plot
d + geom_area()

# Line plot: connecting observations, ordered by x
d + geom_line()

# Connecting observations by stairs
# a subset of economics data set is used
set.seed(1234)
ss <- economics[sample(1:nrow(economics), 20), ]
ggplot(ss, aes(x = date, y = unemploy)) + 
  geom_step()

2. Two variables: Discrete X, Continuous Y

data("ToothGrowth")
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
head(ToothGrowth)
##    len supp dose
## 1  4.2   VC  0.5
## 2 11.5   VC  0.5
## 3  7.3   VC  0.5
## 4  5.8   VC  0.5
## 5  6.4   VC  0.5
## 6 10.0   VC  0.5
e <- ggplot(ToothGrowth, aes(x = dose, y = len))

2.1 Boxplot

# Default plot
e + geom_boxplot()

# Notched box plot
e + geom_boxplot(notch = TRUE)

# Color by group (dose)
e + geom_boxplot(aes(color = dose))

# Change fill color by group (dose)
e + geom_boxplot(aes(fill = dose))

2.2 Violin plot

# Default plot
e + geom_violin(trim = FALSE)

# violin plot with mean points (+/- SD)
e + geom_violin(trim = FALSE) + 
  stat_summary(fun.data="mean_sdl",  fun.args = list(mult=1), 
               geom="pointrange", color = "red")

# Combine with box plot
e + geom_violin(trim = FALSE) + 
  geom_boxplot(width = 0.2)

# Color by group (dose) 
e + geom_violin(aes(color = dose), trim = FALSE)

2.3 Dotplot

# Default plot
e + geom_dotplot(binaxis = "y", stackdir = "center")
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

# Dot plot with mean points (+/- SD)
e + geom_dotplot(binaxis = "y", stackdir = "center") + 
  stat_summary(fun.data="mean_sdl",  fun.args = list(mult=1), 
               geom="pointrange", color = "red")
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

2.4 Combine with box plot

e + geom_boxplot() + 
  geom_dotplot(binaxis = "y", stackdir = "center") 
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

# Add violin plot
e + geom_violin(trim = FALSE) +
  geom_dotplot(binaxis='y', stackdir='center')
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

# Color and fill by group (dose) 
e + geom_dotplot(aes(color = dose, fill = dose), 
                 binaxis = "y", stackdir = "center")
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

2.5 Jitter plot

# Default plot
e + geom_jitter(position=position_jitter(0.2))

# Strip charts with mean points (+/- SD)
e + geom_jitter(position=position_jitter(0.2)) + 
  stat_summary(fun.data="mean_sdl",  fun.args = list(mult=1), 
               geom="pointrange", color = "red")

# Combine with box plot
e + geom_jitter(position=position_jitter(0.2)) + 
  geom_boxplot(binaxis = "y", stackdir = "center") 
## Warning in geom_boxplot(binaxis = "y", stackdir = "center"): Ignoring unknown
## parameters: `binaxis` and `stackdir`

# Add violin plot
e + geom_violin(trim = FALSE) +
  geom_jitter(position=position_jitter(0.2))

# Change color and shape by group (dose) 
e +  geom_jitter(aes(color = dose, shape = dose),
                 position=position_jitter(0.2))

2.6 geom_line(): Line plot

# Data derived from ToothGrowth data sets are used.

df <- data.frame(supp=rep(c("VC", "OJ"), each=3),
                 dose=rep(c("D0.5", "D1", "D2"),2),
                 len=c(6.8, 15, 33, 4.2, 10, 29.5))
head(df)
##   supp dose  len
## 1   VC D0.5  6.8
## 2   VC   D1 15.0
## 3   VC   D2 33.0
## 4   OJ D0.5  4.2
## 5   OJ   D1 10.0
## 6   OJ   D2 29.5
# Change line types by groups (supp)
ggplot(df, aes(x=dose, y=len, group=supp)) +
  geom_line(aes(linetype=supp))+
  geom_point()

# Change line types, point shapes and colors
ggplot(df, aes(x=dose, y=len, group=supp)) +
  geom_line(aes(linetype=supp, color = supp))+
  geom_point(aes(shape=supp, color = supp))

2.7 geom_bar(): Bar plot

# Data derived from ToothGrowth data sets are used.

df <- data.frame(dose=c("D0.5", "D1", "D2"),
                 len=c(4.2, 10, 29.5))
head(df)
##   dose  len
## 1 D0.5  4.2
## 2   D1 10.0
## 3   D2 29.5
df <- ToothGrowth
df$dose <- as.factor(df$dose)
head(df)
##    len supp dose
## 1  4.2   VC  0.5
## 2 11.5   VC  0.5
## 3  7.3   VC  0.5
## 4  5.8   VC  0.5
## 5  6.4   VC  0.5
## 6 10.0   VC  0.5
# Calculate the mean and the SD in each group
#+++++++++++++++++++++++++
# data : a data frame
# varname : the name of the variable to be summariezed
# grps : column names to be used as grouping variables
data_summary <- function(data, varname, grps){
  require(plyr)
  summary_func <- function(x, col){
    c(mean = mean(x[[col]], na.rm=TRUE),
      sd = sd(x[[col]], na.rm=TRUE))
  }
  data_sum<-ddply(data, grps, .fun=summary_func, varname)
  data_sum <- rename(data_sum, c("mean" = varname))
  return(data_sum)
}

df2 <- data.frame(supp=rep(c("VC", "OJ"), each=3),
                  dose=rep(c("D0.5", "D1", "D2"),2),
                  len=c(6.8, 15, 33, 4.2, 10, 29.5))
head(df2)
##   supp dose  len
## 1   VC D0.5  6.8
## 2   VC   D1 15.0
## 3   VC   D2 33.0
## 4   OJ D0.5  4.2
## 5   OJ   D1 10.0
## 6   OJ   D2 29.5
library(plyr)

df2 <- data_summary(df, varname="len", grps= "dose")
# Convert dose to a factor variable
df2$dose=as.factor(df2$dose)
head(df2)
##   dose    len       sd
## 1  0.5 10.605 4.499763
## 2    1 19.735 4.415436
## 3    2 26.100 3.774150
f <- ggplot(df, aes(x = dose, y = len))
# Basic bar plot
f + geom_bar(stat = "identity")

# Change fill color and add labels
f + geom_bar(stat="identity", fill="steelblue")+
  geom_text(aes(label=len), vjust=-0.6, size=3.5)+
  theme_minimal()

# Change bar plot line colors by groups
f + geom_bar(aes(color = dose),
             stat="identity", fill="white")

# Change bar plot fill colors by groups
f + geom_bar(aes(fill = dose), stat="identity")

# geom_errorbar(): Error bars

f <- ggplot(df2, aes(x = dose, y = len, 
                     ymin = len - sd, ymax = len + sd))
# Error bars colored by groups
f + geom_errorbar(aes(color = dose), width = 0.2)

# Combine with line plot
f + geom_line(aes(group = 1)) + 
  geom_errorbar(width = 0.2)

# Combine with bar plot, color by groups
f + geom_bar(aes(color = dose), stat = "identity", fill ="white") + 
  geom_errorbar(aes(color = dose), width = 0.2)

# geom_errorbarh(): Horizontal error bars

df2 <- data_summary(ToothGrowth, varname="len", grps = "dose")
head(df2)
##   dose    len       sd
## 1  0.5 10.605 4.499763
## 2    1 19.735 4.415436
## 3    2 26.100 3.774150
f <- ggplot(df2, aes(x = len, y = dose ,
                     xmin=len-sd, xmax=len+sd))