Bivariate scatterplots
First prepare the Data (DAT):
library(MASS) #for mvnorm
set.seed(137)
# Set 1
n1 <- 1000 # number of cases
m1 <- c(-2, 2) # mean
sd1 <- c(0.5, 2) # standard deviation
cor1 <- matrix(c(1, 0.3, 0.3, 1), ncol = 2) # correlation matrix
cov1 <- diag(sd1) %*% cor1 %*% diag(sd1)
set1 <- mvrnorm(n1, m = m1, Sigma = cov1)
# Set 2
n2 <- 2000
m2 <- c(2, 2)
sd2 <- c(2.5, 1)
cor2 <- matrix(c(1, -0.25, -0.25, 1), ncol = 2)
cov2 <- diag(sd2) %*% cor2 %*% diag(sd2)
set2 <- mvrnorm(n2, m = m2, Sigma = cov2)
# Set 3
n3 <- 3000
m3 <- c(0, -2)
sd3 <- c(2.5, 1)
cor3 <- matrix(c(1, 0, 0, 1), ncol = 2)
cov3 <- diag(sd3) %*% cor3 %*% diag(sd3)
set3 <- mvrnorm(n3, m = m3, Sigma = cov3)
# Combine the data rowwise
DAT <- rbind(set1, set2, set3) # Based on the number of cases the mixture is 1/6 set 1, 2/6 set 2 and 3/6 set 3
And now we present the plots:
source("gridPlotFunctions.R")
gPlot(DAT[, 1], DAT[, 2], xlab = "X values simulated", ylab = "Y values simulated",
main = "An Overplottted Scatterplot")
gPlot(DAT[, 1], DAT[, 2], pch = 19, xlab = "X values simulated", ylab = "Y values simulated",
main = "I. More overplotting with filled dots")
We can plot(y,x) which flips the plot about the diagonal from low to high values:
gPlot(DAT[, 2], DAT[, 1], pch = 19, ylab = "X values simulated", xlab = "Y values simulated",
main = "II. More overplotting with filled dots")
Primitive emulation of a gaussian splatting by overplotting dots of different size
Below we overplot circles with four size to provide a crude emulation of spatting gaussians.
x <- DAT[, 1]
y <- DAT[, 2]
alpha = 0.01
gPlot(x, y, type = "n", xlab = "X Values Simulated: Unitless", ylab = "Y Values Simulated: Unitless",
main = "III. Primitive Splatting: Fuzzy, Further Processing Would Help")
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 2, lwd = NA)
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 1.5,
lwd = NA)
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 1, lwd = NA)
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 0.5,
lwd = NA)
With a different **alpha**
x <- DAT[, 1]
y <- DAT[, 2]
alpha = 0.09
gPlot(x, y, type = "n", xlab = "X Values Simulated: Unitless", ylab = "Y Values Simulated: Unitless",
main = "IV. Primitive Splatting: Fuzzy, Further Processing Would Help")
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 2, lwd = NA)
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 1.5,
lwd = NA)
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 1, lwd = NA)
points(x, y, pch = 21, bg = rgb(0.4, 0.2, 0.8, alpha), col = NA, cex = 0.5,
lwd = NA)
*Plotting contours and points *
library('ash')
bin2d <- bin2(DAT, nbin=c(30, 30))
bin2d.sm <- ash2(bin2d)
plot(DAT, col='#B0B0B0')
contour(bin2d.sm, nlevels=12, xlab="X With No Units",
ylab="Y With No Units", main="V. Mixture of Correlated Normals",
add=T, col="#A00000",cex=2,lwd=2,las=1)
Plotting filled contours The colorRampPalette creates a color function that will interpolate to return a given number of colors. Here the sequential colors used come from Cindy Brewer.
filled.contour(bin2d.sm, xlab = "X Values Simulated: Unitless", ylab = "Y Values Simulated: Unitless",
main = "VI. Average Shift Histogram Bivariate Density", key.title = title("Key\nDensity"))
Modifying the contour levels and color palette
Another view with a bias closer to 1 and few levels specified explicitly
YlOrBr <- c("#FFFFD4", "#FED98E", "#FE9929", "#D95F0E", "#993404")
filled.contour(bin2d.sm, color = colorRampPalette(YlOrBr, space = "Lab", bias = 0.8),
xlab = "X Values Simulated: Unitless", ylab = "Y Values Simulated: Unitless",
levels = seq(0, 0.021, by = 0.003), main = "VII. Average Shift Histogram Bivariate Density",
key.title = title("Key\nDensity")) # \n uses a second line
Plotting a perspective view of the density surface
Note that the light comes from the left. Different orientations of the density surface would be helpful Some software puts a contour plots beneath a density surface plot.
persp(bin2d.sm, xlab = "X: No Units", ylab = "Y: No Units", zlab = "Density",
main = "VIII. A Single Perspective View Can Hide Features", col = "lightblue",
r = 4, phi = 40, shade = 0.2)
Changing the binning grid
The grid resolution when binning and the grid shifting details for averaging grid densities provide two smoothing parameters. What happens with using a grid resolution of 40 by 40 rather than 30 by 30?
bin2d <- bin2(DAT, nbin = c(40, 40))
bin2d.sm <- ash2(bin2d)
Now, let's rerun the plot VII:
YlOrBr <- c("#FFFFD4", "#FED98E", "#FE9929", "#D95F0E", "#993404")
filled.contour(bin2d.sm, color = colorRampPalette(YlOrBr, space = "Lab", bias = 0.8),
xlab = "X Values Simulated: Unitless", ylab = "Y Values Simulated: Unitless",
levels = seq(0, 0.021, by = 0.003), main = "VIIa. Average Shift Histogram Bivariate Density",
key.title = title("Key\nDensity"))
As we can see, the finer grid cells do less smoothing and higher density appeared. Now, in the contour levels seq() function, let's increase the upper value so a white hole doesn't appear:
YlOrBr <- c("#FFFFD4", "#FED98E", "#FE9929", "#D95F0E", "#993404")
filled.contour(bin2d.sm, color = colorRampPalette(YlOrBr, space = "Lab", bias = 0.8),
xlab = "X Values Simulated: Unitless", ylab = "Y Values Simulated: Unitless",
levels = seq(0, 0.025, by = 0.003), main = "VIIb. Average Shift Histogram Bivariate Density",
key.title = title("Key\nDensity"))
Sequential Color Encoding
source("hbinFunctions.R")
ans <- hbin(DAT[, 1], DAT[, 2], xbins = 25)
names(ans)
## [1] "cell" "count" "xMean" "yMean"
plot(ans, nclass = 5, main = "Hexagon Binning")
Hexagon Size Encoding
The “lattice” style centers the hexagon symbols in the hexagon cell. The “centroid” style moves the symbol closer to the center of mass in the cell but stops when necessary to keep the symbol completely inside the cell. The big symbols cannot move very far.
plot(ans, style = "centroid", col.one = "#0080FF", border = "#505050")
Nest size inside color contours to show more density details
n <- 4e+05
x <- rnorm(n)
y <- rnorm(n)
bigger <- hbin(x, y, xbins = 30)
plot(bigger, style = "nested.lat", main = "Hexagon Binning")