Created on Aug 19 2013
Revised on Mon Aug 19 17:33:25 2013
suppressPackageStartupMessages(library(gplots))
library(gplots)
bandplot was created to look for changes in the mean or variance of scatter plots, particularly plots of regression residuals.
# fixed mean, changing variance
x <- 1:1000
y <- rnorm(1000, mean = 1, sd = 1 + x/1000)
bandplot(x, y)
# fixed varance, changing mean
x <- 1:1000
y <- rnorm(1000, mean = x/1000, sd = 1)
bandplot(x, y)
# changing mean and variance
x <- abs(rnorm(500))
y <- rnorm(500, mean = 2 * x, sd = 2 + 2 * x)
# the changing mean and dispersion are hard to see whith the points alone:
plot(x, y)
# regression picks up the mean trend, but not the change in variance
reg <- lm(y ~ x)
summary(reg)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.849 -2.324 -0.006 2.255 11.347
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.167 0.292 0.57 0.57
## x 1.676 0.296 5.67 2.5e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.9 on 498 degrees of freedom
## Multiple R-squared: 0.0605, Adjusted R-squared: 0.0587
## F-statistic: 32.1 on 1 and 498 DF, p-value: 2.48e-08
# using bandplot on the original data helps to show the mean and variance
# trend
bandplot(x, y)
# using bandplot on the residuals helps to see that regression removes the
# mean trend but leaves the trend in variability
bandplot(predict(reg), resid(reg))
data(mtcars)
x <- as.matrix(mtcars)
rc <- rainbow(nrow(x), start = 0, end = 0.3)
cc <- rainbow(ncol(x), start = 0, end = 0.3)
## demonstrate the effect of row and column dendogram options
heatmap.2(x) ## default - dendrogram plotted and reordering done.
heatmap.2(x, dendrogram = "none") ## no dendrogram plotted, but reordering done.
heatmap.2(x, dendrogram = "row") ## row dendrogram plotted and row reordering done.
heatmap.2(x, dendrogram = "col") ## col dendrogram plotted and col reordering done.
heatmap.2(x, keysize = 2) ## default - dendrogram plotted and reordering done.
data(cars)
# default method
plot(cars, main = "lowess(cars)")
lines(lowess(cars), col = 2)
lines(lowess(cars, f = 0.2), col = 3)
legend(5, 120, c(paste("f = ", c("2/3", ".2"))), lty = 1, col = 2:3)
# formula method
plot(dist ~ speed, data = cars, main = "lowess(cars)")
lines(lowess(dist ~ speed, data = cars), col = 2)
lines(lowess(dist ~ speed, data = cars, f = 0.2), col = 3)
legend(5, 120, c(paste("f = ", c("2/3", ".2"))), lty = 1, col = 2:3)
# plot means and
data(state)
tmp <- split(state.area, state.region)
means <- sapply(tmp, mean)
stdev <- sqrt(sapply(tmp, var))
n <- sapply(tmp, length)
ciw <- qt(0.975, n) * stdev/sqrt(n)
# plain
plotCI(x = means, uiw = ciw)
# prettier
plotCI(x = means, uiw = ciw, col = "black", barcol = "blue", lwd = 1)
# give mean values
plotCI(x = means, uiw = ciw, col = "black", barcol = "blue", labels = round(means,
-3), xaxt = "n", xlim = c(0, 5))
axis(side = 1, at = 1:4, labels = names(tmp), cex = 0.7)
# better yet, just use plotmeans ... #
plotmeans(state.area ~ state.region)