Visual Comparison

In this module, we will review the most common visual tools for comparing two or more uni-variate samples.

Box Plot

Data From Same Distribution

set.seed(123)
x<-rnorm(1000)
y<-rnorm(1000)
boxplot(x,y,names = c("Data 1","Data 2"), cex.lab = 1.5, cex.axis = 1.5, col = c("light blue","light green"))

Data From Different Distribution

set.seed(123)
x2<-rgamma(1000,2,1)
y2<-rweibull(1000,2,1)
boxplot(x2,y2,names = c("Data 1","Data 2"), cex.lab = 1.5, cex.axis = 1.5, col = c("red","blue"))

Histogram

Data From Same Distribution

set.seed(123)
x3<-rnorm(1000)
y3<-rnorm(1000)
#Overplot
hist(x3,probability=T,col=rgb(1,0,0,0.75),xlim=c(-4,4),ylim=c(0,0.5),xlab="Data",
     cex.axis=1.5,cex.lab=1.5,main="Histograms")
hist(y3,add=T,probability=T,col=rgb(0,0,1,0.75))
box()
legend(1.1,0.5,c("Data 1","Data 2"),col=c(rgb(1,0,0,0.75),rgb(0,0,1,0.75)), text.col="black",lty=c(1,1),lwd=c(2,2),merge=TRUE,bg="gray90",cex=1.1)

Data From Different Distribution

set.seed(123)
x4<-rbeta(1000,2,1)
y4<-rlogis(1000,2,1)
#Overplot
hist(x4,probability=T,col=rgb(1,0,0,0.75),xlim=c(-4,4),ylim=c(0,0.5),xlab="Data",
     cex.axis=1.5,cex.lab=1.5,main="Histograms")
hist(y4,add=T,probability=T,col=rgb(0,0,1,0.75))
box()
legend(1.1,0.5,c("Data 1","Data 2"),col=c(rgb(1,0,0,0.75),rgb(0,0,1,0.75)), text.col="black",lty=c(1,1),lwd=c(2,2),merge=TRUE,bg="gray90",cex=1.1)

Histogram (Back to Back)

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.3
set.seed(123)
x5<-rnorm(1000)
y5<-rnorm(1000)
df=data.frame(x5=x,y5=y)
bb=ggplot(df,aes(x))+geom_histogram(aes(x=x5,y=..density..),fill="red",bins = 20)    + geom_histogram(aes(x=y5,y=-..density..),fill="green",bins = 20)+
xlab("Data")+ylab("Density")+theme(axis.text.x=element_text(size=14),axis.text.y=element_text(size = 14))
bb
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## i Please use `after_stat(density)` instead.

library(ggplot2)
set.seed(123)
x6<-rweibull(1000,2,1)
y6<-rbeta(1000,2,1)
df=data.frame(x6=x,y6=y)
bb=ggplot(df,aes(x))+geom_histogram(aes(x=x6,y=..density..),fill="red",bins = 20) +geom_histogram(aes(x=y6,y=-..density..),fill="blue",bins = 20)+
xlab("Data")+ylab("Density")+theme(axis.text.x=element_text(size=14),axis.text.y=element_text(size = 14))
bb

Bean Plots

library(beanplot)
## Warning: package 'beanplot' was built under R version 4.1.3
set.seed(123)
x7<-rweibull(1000,2,2)
y7<-rweibull(1000,2,2)
beanplot(x7,y7,col=list("green",c("red","blue")))
legend("topright",fill=c("red","blue"),legend=c("Data 1","Data 2"),box.lty = 0)

beanplot(x7,col = "red",border = "blue")

# Back to Back
df<-data.frame(rbind(cbind(x7,1),cbind(y7,2)))
colnames(df)<-c("Values","Group")
Values<-df[,1]
Group<-df[,2]
beanplot(Values~Group,ll=0.04, main="Bean Plot",side="both",xlab="Value", ylab="Density",col=list("red",c("blue","black")),axes=FALSE)
box()
legend("bottomright",fill=c("red","blue"),legend=c("Data 1","Data 2"),box.lty=0)

Stem and Leaf Diagrams

x<-1:1200
stem(x)
## 
##   The decimal point is 2 digit(s) to the right of the |
## 
##    0 | 00001111111111222222222233333333334444444444555555555566666666667777+14
##    1 | 00000000001111111111222222222233333333334444444444555555555566666666+20
##    2 | 00000000001111111111222222222233333333334444444444555555555566666666+20
##    3 | 00000000001111111111222222222233333333334444444444555555555566666666+20
##    4 | 00000000001111111111222222222233333333334444444444555555555566666666+20
##    5 | 00000000001111111111222222222233333333334444444444555555555566666666+20
##    6 | 00000000001111111111222222222233333333334444444444555555555566666666+20
##    7 | 00000000001111111111222222222233333333334444444444555555555566666666+20
##    8 | 00000000001111111111222222222233333333334444444444555555555566666666+20
##    9 | 00000000001111111111222222222233333333334444444444555555555566666666+20
##   10 | 00000000001111111111222222222233333333334444444444555555555566666666+20
##   11 | 00000000001111111111222222222233333333334444444444555555555566666666+20
##   12 | 000000
stem(x,scale = 2)
## 
##   The decimal point is 2 digit(s) to the right of the |
## 
##    0 | 00001111111111222222222233333333334444444444
##    0 | 55555555556666666666777777777788888888889999999999
##    1 | 00000000001111111111222222222233333333334444444444
##    1 | 55555555556666666666777777777788888888889999999999
##    2 | 00000000001111111111222222222233333333334444444444
##    2 | 55555555556666666666777777777788888888889999999999
##    3 | 00000000001111111111222222222233333333334444444444
##    3 | 55555555556666666666777777777788888888889999999999
##    4 | 00000000001111111111222222222233333333334444444444
##    4 | 55555555556666666666777777777788888888889999999999
##    5 | 00000000001111111111222222222233333333334444444444
##    5 | 55555555556666666666777777777788888888889999999999
##    6 | 00000000001111111111222222222233333333334444444444
##    6 | 55555555556666666666777777777788888888889999999999
##    7 | 00000000001111111111222222222233333333334444444444
##    7 | 55555555556666666666777777777788888888889999999999
##    8 | 00000000001111111111222222222233333333334444444444
##    8 | 55555555556666666666777777777788888888889999999999
##    9 | 00000000001111111111222222222233333333334444444444
##    9 | 55555555556666666666777777777788888888889999999999
##   10 | 00000000001111111111222222222233333333334444444444
##   10 | 55555555556666666666777777777788888888889999999999
##   11 | 00000000001111111111222222222233333333334444444444
##   11 | 55555555556666666666777777777788888888889999999999
##   12 | 000000
stem(x,scale = 1,width = 0.1,atom = 1e02)
## 
##   The decimal point is 2 digit(s) to the right of the |
## 
##    0 | +94
##    1 | +100
##    2 | +100
##    3 | +100
##    4 | +100
##    5 | +100
##    6 | +100
##    7 | +100
##    8 | +100
##    9 | +100
##   10 | +100
##   11 | +100
##   12 | +6
z<-c(12,12,13,12,123,112,121,121,121,321,333,125,45)
stem(z)
## 
##   The decimal point is 2 digit(s) to the right of the |
## 
##   0 | 11115
##   1 | 122223
##   2 | 
##   3 | 23
set.seed(123)
y<-rnorm(1000)
stem(y)
## 
##   The decimal point is at the |
## 
##   -2 | 876655
##   -2 | 32222111111000000
##   -1 | 99999999988888877777777777666666665555555555555555
##   -1 | 44444444444444444333333333333333322222222222222222222111111111111111+21
##   -0 | 99999999999999888888888888888888888888777777777777777777777777777777+60
##   -0 | 44444444444444444444444444444444443333333333333333333333333333333333+101
##    0 | 00000000000000000111111111111111111111111111111111111111111111111222+104
##    0 | 55555555555555555555555555555555566666666666666666666666666666666677+71
##    1 | 00000000000000000000000000011111111111111111111111122222222222222222+12
##    1 | 555555555555566666666677777777777788888999999
##    2 | 0000000011112222333344444
##    2 | 5566677
##    3 | 2

Bee Swarm

library(beeswarm)
set.seed(123)
x9<-rnorm(1000)
y9<-rnorm(1000)

# Bee Swarm plots for the two populations
df<-data.frame(rbind(cbind(x,1),cbind(y,2)))
colnames(df)<-c("Values","Group")
Values<-df[,1]
Group<-df[,2]
boxplot(x9,y9,ylim=c(-4,4),lwd=2,names=c(1,2),cex.axis=1.5,cex.lab=1.5)
beeswarm(Values~Group,data=df,method="swarm",col=c("blue","green"),cex=0.75, cex.axis=1.5,cex.lab=1.5,add=TRUE)
legend("topright",fill=c("blue","green"), legend=c("Data 1","Data 2"),box.lty=0)
box()