Bivariate Densities

Bivariate scatterplots

First prepare the Data (DAT):

library(MASS)  #for mvnorm
set.seed(137)
# Set 1
n1 <- 1000  # number of cases
m1 <- c(-2, 2)  # mean
sd1 <- c(0.5, 2)  # standard deviation
cor1 <- matrix(c(1, 0.3, 0.3, 1), ncol = 2)  # correlation matrix
cov1 <- diag(sd1) %*% cor1 %*% diag(sd1)
set1 <- mvrnorm(n1, m = m1, Sigma = cov1)

# Set 2
n2 <- 2000
m2 <- c(2, 2)
sd2 <- c(2.5, 1)
cor2 <- matrix(c(1, -0.25, -0.25, 1), ncol = 2)
cov2 <- diag(sd2) %*% cor2 %*% diag(sd2)
set2 <- mvrnorm(n2, m = m2, Sigma = cov2)

# Set 3
n3 <- 3000
m3 <- c(0, -2)
sd3 <- c(2.5, 1)
cor3 <- matrix(c(1, 0, 0, 1), ncol = 2)
cov3 <- diag(sd3) %*% cor3 %*% diag(sd3)
set3 <- mvrnorm(n3, m = m3, Sigma = cov3)

# Combine the data rowwise
DAT <- rbind(set1, set2, set3)  # Based on the number of cases the mixture is 1/6 set 1, 2/6 set 2 and 3/6 set 3

And now we present the plots:

source("gridPlotFunctions.R")
gPlot(DAT[, 1], DAT[, 2], xlab = "X values simulated", ylab = "Y values simulated", 
    main = "An Overplottted Scatterplot")

plot of chunk unnamed-chunk-2


gPlot(DAT[, 1], DAT[, 2], pch = 19, xlab = "X values simulated", ylab = "Y values simulated", 
    main = "I. More overplotting with filled dots")

plot of chunk unnamed-chunk-2

We can plot(y,x) which flips the plot about the diagonal from low to high values:

gPlot(DAT[, 2], DAT[, 1], pch = 19, ylab = "X values simulated", xlab = "Y values simulated", 
    main = "II. More overplotting with filled dots")

plot of chunk unnamed-chunk-3

Primitive emulation of a gaussian splatting by overplotting dots of different size

Below we overplot circles with four size to provide a crude emulation of spatting gaussians.

x <- DAT[, 1]
y <- DAT[, 2]
alpha = 0.01
gPlot(x, y, type = "n", xlab = "X Values Simulated: Unitless", ylab = "Y Values Simulated: Unitless", 
    main = "III. Primitive Splatting: Fuzzy, Further Processing Would Help")
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 2, lwd = NA)
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 1.5, 
    lwd = NA)
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 1, lwd = NA)
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 0.5, 
    lwd = NA)

plot of chunk unnamed-chunk-4

With a different **alpha**

x <- DAT[, 1]
y <- DAT[, 2]
alpha = 0.09
gPlot(x, y, type = "n", xlab = "X Values Simulated: Unitless", ylab = "Y Values Simulated: Unitless", 
    main = "IV. Primitive Splatting: Fuzzy, Further Processing Would Help")
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 2, lwd = NA)
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 1.5, 
    lwd = NA)
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 1, lwd = NA)
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 0.5, 
    lwd = NA)

plot of chunk unnamed-chunk-5

*Plotting contours and points *

library('ash')
bin2d <- bin2(DAT, nbin=c(30, 30))
bin2d.sm <- ash2(bin2d)
plot(DAT, col='#B0B0B0')
contour(bin2d.sm, nlevels=12, xlab="X With No Units",
  ylab="Y With No Units", main="V. Mixture of Correlated Normals",
  add=T, col="#A00000",cex=2,lwd=2,las=1)

plot of chunk unnamed-chunk-6

Plotting filled contours The colorRampPalette creates a color function that will interpolate to return a given number of colors. Here the sequential colors used come from Cindy Brewer.

filled.contour(bin2d.sm, xlab = "X Values Simulated: Unitless", ylab = "Y Values Simulated: Unitless", 
    main = "VI. Average Shift Histogram Bivariate Density", key.title = title("Key\nDensity"))

plot of chunk unnamed-chunk-7

Modifying the contour levels and color palette

Another view with a bias closer to 1 and few levels specified explicitly

YlOrBr <- c("#FFFFD4", "#FED98E", "#FE9929", "#D95F0E", "#993404")
filled.contour(bin2d.sm, color = colorRampPalette(YlOrBr, space = "Lab", bias = 0.8), 
    xlab = "X Values Simulated: Unitless", ylab = "Y Values Simulated: Unitless", 
    levels = seq(0, 0.021, by = 0.003), main = "VII. Average Shift Histogram Bivariate Density", 
    key.title = title("Key\nDensity"))  # \n uses a second line

plot of chunk unnamed-chunk-8

Plotting a perspective view of the density surface

Note that the light comes from the left. Different orientations of the density surface would be helpful Some software puts a contour plots beneath a density surface plot.

persp(bin2d.sm, xlab = "X: No Units", ylab = "Y: No Units", zlab = "Density", 
    main = "VIII. A Single Perspective View Can Hide Features", col = "lightblue", 
    r = 4, phi = 40, shade = 0.2)

plot of chunk unnamed-chunk-9

Changing the binning grid

The grid resolution when binning and the grid shifting details for averaging grid densities provide two smoothing parameters. What happens with using a grid resolution of 40 by 40 rather than 30 by 30?

bin2d <- bin2(DAT, nbin = c(40, 40))
bin2d.sm <- ash2(bin2d)

Now, let's rerun the plot VII:

YlOrBr <- c("#FFFFD4", "#FED98E", "#FE9929", "#D95F0E", "#993404")
filled.contour(bin2d.sm, color = colorRampPalette(YlOrBr, space = "Lab", bias = 0.8), 
    xlab = "X Values Simulated: Unitless", ylab = "Y Values Simulated: Unitless", 
    levels = seq(0, 0.021, by = 0.003), main = "VIIa. Average Shift Histogram Bivariate Density", 
    key.title = title("Key\nDensity"))

plot of chunk unnamed-chunk-11

As we can see, the finer grid cells do less smoothing and higher density appeared. Now, in the contour levels seq() function, let's increase the upper value so a white hole doesn't appear:

YlOrBr <- c("#FFFFD4", "#FED98E", "#FE9929", "#D95F0E", "#993404")
filled.contour(bin2d.sm, color = colorRampPalette(YlOrBr, space = "Lab", bias = 0.8), 
    xlab = "X Values Simulated: Unitless", ylab = "Y Values Simulated: Unitless", 
    levels = seq(0, 0.025, by = 0.003), main = "VIIb. Average Shift Histogram Bivariate Density", 
    key.title = title("Key\nDensity"))

plot of chunk unnamed-chunk-12

Sequential Color Encoding

source("hbinFunctions.R")
ans <- hbin(DAT[, 1], DAT[, 2], xbins = 25)
names(ans)
## [1] "cell"  "count" "xMean" "yMean"
plot(ans, nclass = 5, main = "Hexagon Binning")

plot of chunk unnamed-chunk-13

Hexagon Size Encoding

The “lattice” style centers the hexagon symbols in the hexagon cell. The “centroid” style moves the symbol closer to the center of mass in the cell but stops when necessary to keep the symbol completely inside the cell. The big symbols cannot move very far.

plot(ans, style = "centroid", col.one = "#0080FF", border = "#505050")

plot of chunk unnamed-chunk-14

Nest size inside color contours to show more density details

n <- 4e+05
x <- rnorm(n)
y <- rnorm(n)
bigger <- hbin(x, y, xbins = 30)
plot(bigger, style = "nested.lat", main = "Hexagon Binning")

plot of chunk unnamed-chunk-15