2. Two variables: Discrete X, Continuous Y
data("ToothGrowth")
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
head(ToothGrowth)
## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
e <- ggplot(ToothGrowth, aes(x = dose, y = len))
2.1 Boxplot
# Default plot
e + geom_boxplot()

# Notched box plot
e + geom_boxplot(notch = TRUE)

# Color by group (dose)
e + geom_boxplot(aes(color = dose))

# Change fill color by group (dose)
e + geom_boxplot(aes(fill = dose))

2.2 Violin plot
# Default plot
e + geom_violin(trim = FALSE)

# violin plot with mean points (+/- SD)
e + geom_violin(trim = FALSE) +
stat_summary(fun.data="mean_sdl", fun.args = list(mult=1),
geom="pointrange", color = "red")

# Combine with box plot
e + geom_violin(trim = FALSE) +
geom_boxplot(width = 0.2)

# Color by group (dose)
e + geom_violin(aes(color = dose), trim = FALSE)

2.3 Dotplot
# Default plot
e + geom_dotplot(binaxis = "y", stackdir = "center")
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

# Dot plot with mean points (+/- SD)
e + geom_dotplot(binaxis = "y", stackdir = "center") +
stat_summary(fun.data="mean_sdl", fun.args = list(mult=1),
geom="pointrange", color = "red")
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

2.4 Combine with box plot
e + geom_boxplot() +
geom_dotplot(binaxis = "y", stackdir = "center")
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

# Add violin plot
e + geom_violin(trim = FALSE) +
geom_dotplot(binaxis='y', stackdir='center')
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

# Color and fill by group (dose)
e + geom_dotplot(aes(color = dose, fill = dose),
binaxis = "y", stackdir = "center")
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

2.5 Jitter plot
# Default plot
e + geom_jitter(position=position_jitter(0.2))

# Strip charts with mean points (+/- SD)
e + geom_jitter(position=position_jitter(0.2)) +
stat_summary(fun.data="mean_sdl", fun.args = list(mult=1),
geom="pointrange", color = "red")

# Combine with box plot
e + geom_jitter(position=position_jitter(0.2)) +
geom_boxplot(binaxis = "y", stackdir = "center")
## Warning in geom_boxplot(binaxis = "y", stackdir = "center"): Ignoring unknown
## parameters: `binaxis` and `stackdir`

# Add violin plot
e + geom_violin(trim = FALSE) +
geom_jitter(position=position_jitter(0.2))

# Change color and shape by group (dose)
e + geom_jitter(aes(color = dose, shape = dose),
position=position_jitter(0.2))

2.6 geom_line(): Line plot
# Data derived from ToothGrowth data sets are used.
df <- data.frame(supp=rep(c("VC", "OJ"), each=3),
dose=rep(c("D0.5", "D1", "D2"),2),
len=c(6.8, 15, 33, 4.2, 10, 29.5))
head(df)
## supp dose len
## 1 VC D0.5 6.8
## 2 VC D1 15.0
## 3 VC D2 33.0
## 4 OJ D0.5 4.2
## 5 OJ D1 10.0
## 6 OJ D2 29.5
# Change line types by groups (supp)
ggplot(df, aes(x=dose, y=len, group=supp)) +
geom_line(aes(linetype=supp))+
geom_point()

# Change line types, point shapes and colors
ggplot(df, aes(x=dose, y=len, group=supp)) +
geom_line(aes(linetype=supp, color = supp))+
geom_point(aes(shape=supp, color = supp))

2.7 geom_bar(): Bar plot
# Data derived from ToothGrowth data sets are used.
df <- data.frame(dose=c("D0.5", "D1", "D2"),
len=c(4.2, 10, 29.5))
head(df)
## dose len
## 1 D0.5 4.2
## 2 D1 10.0
## 3 D2 29.5
df <- ToothGrowth
df$dose <- as.factor(df$dose)
head(df)
## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
# Calculate the mean and the SD in each group
#+++++++++++++++++++++++++
# data : a data frame
# varname : the name of the variable to be summariezed
# grps : column names to be used as grouping variables
data_summary <- function(data, varname, grps){
require(plyr)
summary_func <- function(x, col){
c(mean = mean(x[[col]], na.rm=TRUE),
sd = sd(x[[col]], na.rm=TRUE))
}
data_sum<-ddply(data, grps, .fun=summary_func, varname)
data_sum <- rename(data_sum, c("mean" = varname))
return(data_sum)
}
df2 <- data.frame(supp=rep(c("VC", "OJ"), each=3),
dose=rep(c("D0.5", "D1", "D2"),2),
len=c(6.8, 15, 33, 4.2, 10, 29.5))
head(df2)
## supp dose len
## 1 VC D0.5 6.8
## 2 VC D1 15.0
## 3 VC D2 33.0
## 4 OJ D0.5 4.2
## 5 OJ D1 10.0
## 6 OJ D2 29.5
library(plyr)
df2 <- data_summary(df, varname="len", grps= "dose")
# Convert dose to a factor variable
df2$dose=as.factor(df2$dose)
head(df2)
## dose len sd
## 1 0.5 10.605 4.499763
## 2 1 19.735 4.415436
## 3 2 26.100 3.774150
f <- ggplot(df, aes(x = dose, y = len))
# Basic bar plot
f + geom_bar(stat = "identity")

# Change fill color and add labels
f + geom_bar(stat="identity", fill="steelblue")+
geom_text(aes(label=len), vjust=-0.6, size=3.5)+
theme_minimal()

# Change bar plot line colors by groups
f + geom_bar(aes(color = dose),
stat="identity", fill="white")

# Change bar plot fill colors by groups
f + geom_bar(aes(fill = dose), stat="identity")

# geom_errorbar(): Error bars
f <- ggplot(df2, aes(x = dose, y = len,
ymin = len - sd, ymax = len + sd))
# Error bars colored by groups
f + geom_errorbar(aes(color = dose), width = 0.2)

# Combine with line plot
f + geom_line(aes(group = 1)) +
geom_errorbar(width = 0.2)

# Combine with bar plot, color by groups
f + geom_bar(aes(color = dose), stat = "identity", fill ="white") +
geom_errorbar(aes(color = dose), width = 0.2)

# geom_errorbarh(): Horizontal error bars
df2 <- data_summary(ToothGrowth, varname="len", grps = "dose")
head(df2)
## dose len sd
## 1 0.5 10.605 4.499763
## 2 1 19.735 4.415436
## 3 2 26.100 3.774150
f <- ggplot(df2, aes(x = len, y = dose ,
xmin=len-sd, xmax=len+sd))