Basic plot: Bar plot, pie chart, histgrams and density plot, Scatterplots

Created on Aug 23 2013
Revised on Mon Aug 26 13:14:49 2013

original post is here ,here, here and here and here

Bar plot

Simple Bar Plot

counts <- table(mtcars$gear)
barplot(counts, main = "Car Distribution", xlab = "Number of Gears")

plot of chunk unnamed-chunk-1

Simple Horizontal Bar Plot with Added Labels

barplot(counts, main = "Car Distribution", horiz = TRUE, names.arg = c("3 Gears", 
    "4 Gears", "5 Gears"))

plot of chunk unnamed-chunk-2

Stacked Bar Plot

counts <- table(mtcars$vs, mtcars$gear)
barplot(counts, main = "Car Distribution by Gears and VS", xlab = "Number of Gears", 
    col = c("darkblue", "red"), legend = rownames(counts))

plot of chunk unnamed-chunk-3

Grouped Bar Plot

barplot(counts, main = "Car Distribution by Gears and VS", xlab = "Number of Gears", 
    col = c("darkblue", "red"), legend = rownames(counts), beside = TRUE)

plot of chunk unnamed-chunk-4

Pie Charts

Simple Pie Chart

slices <- c(10, 12, 4, 16, 8)
lbls <- c("US", "UK", "Australia", "Germany", "France")
pie(slices, labels = lbls, main = "Pie Chart of Countries")

plot of chunk unnamed-chunk-5

Pie Chart with Annotated Percentages

pct <- round(slices/sum(slices) * 100)
lbls <- paste(lbls, pct)  # add percents to labels 
lbls <- paste(lbls, "%", sep = "")  # ad % to labels 
pie(slices, labels = lbls, col = rainbow(length(lbls)), main = "Pie Chart of Countries")

plot of chunk unnamed-chunk-6

3D Pie Chart

library(plotrix)
slices <- c(10, 12, 4, 16, 8)
lbls <- c("US", "UK", "Australia", "Germany", "France")
pie3D(slices, labels = lbls, explode = 0.1, main = "Pie Chart of Countries ")

plot of chunk unnamed-chunk-7

Scatterplots

Simple Scatterplot

attach(mtcars)
plot(wt, mpg, main = "Scatterplot Example", xlab = "Car Weight ", ylab = "Miles Per Gallon ", 
    pch = 19)

plot of chunk unnamed-chunk-8

Add fit lines

attach(mtcars)
## 下列对象被屏蔽了from mtcars (position 3):
## 
##     am, carb, cyl, disp, drat, gear, hp, mpg, qsec, vs, wt
plot(wt, mpg, main = "Scatterplot Example", xlab = "Car Weight ", ylab = "Miles Per Gallon ", 
    pch = 19)
abline(lm(mpg ~ wt), col = "red")  # regression line (y~x) 
lines(lowess(wt, mpg), col = "blue")  # lowess line (x,y)

plot of chunk unnamed-chunk-9

Scatterplot Matrices

Basic Scatterplot Matrix

pairs(~mpg + disp + drat + wt, data = mtcars, main = "Simple Scatterplot Matrix")

plot of chunk unnamed-chunk-10

Scatterplot Matrices from the lattice Package

The lattice package provides options to condition the scatterplot matrix on a factor.

library(lattice)
super.sym <- trellis.par.get("superpose.symbol")
splom(mtcars[c(1, 3, 5, 6)], groups = cyl, data = mtcars, panel = panel.superpose, 
    key = list(title = "Three Cylinder Options", columns = 3, points = list(pch = super.sym$pch[1:3], 
        col = super.sym$col[1:3]), text = list(c("4 Cylinder", "6 Cylinder", 
        "8 Cylinder"))))
## Warning: explicit 'data' specification ignored

plot of chunk unnamed-chunk-11

Scatterplot Matrices from the car Package

The car package can condition the scatterplot matrix on a factor, and optionally include lowess and linear best fit lines, and boxplot, densities, or histograms in the principal diagonal, as well as rug plots in the margins of the cells.

library(car)
## Loading required package: MASS Loading required package: nnet
scatterplot.matrix(~mpg + disp + drat + wt | cyl, data = mtcars, main = "Three Cylinder Options")
## Warning: 'scatterplot.matrix' is deprecated. Use 'scatterplotMatrix'
## instead. See help("Deprecated") and help("car-deprecated").

plot of chunk unnamed-chunk-12

Scatterplot Matrices from the glus Package

The gclus package provides options to rearrange the variables so that those with higher correlations are closer to the principal diagonal. It can also color code the cells to reflect the size of the correlations.

library(gclus)
## Loading required package: cluster
dta <- mtcars[c(1, 3, 5, 6)]  # get data 
dta.r <- abs(cor(dta))  # get correlations
dta.col <- dmat.color(dta.r)  # get colors
# reorder variables so those with highest correlation are closest to the
# diagonal
dta.o <- order.single(dta.r)
cpairs(dta, dta.o, panel.colors = dta.col, gap = 0.5, main = "Variables Ordered and Colored by Correlation")

plot of chunk unnamed-chunk-13

Histograms

Simple Histogram

hist(mtcars$mpg)

plot of chunk unnamed-chunk-14

Colored Histogram with Different Number of Bins

hist(mtcars$mpg, breaks = 12, col = "red")

plot of chunk unnamed-chunk-15

Add a Normal Curve

x <- mtcars$mpg
h <- hist(x, breaks = 10, col = "red", xlab = "Miles Per Gallon", main = "Histogram with Normal Curve")
xfit <- seq(min(x), max(x), length = 40)
yfit <- dnorm(xfit, mean = mean(x), sd = sd(x))
yfit <- yfit * diff(h$mids[1:2]) * length(x)
lines(xfit, yfit, col = "blue", lwd = 2)

plot of chunk unnamed-chunk-16

Kernel Density Plots

Kernal density plots are usually a much more effective way to view the distribution of a variable. Create the plot using plot(density(x)) where x is a numeric vector.

simple Kernel Density Plot

plot(density(mtcars$mpg))

plot of chunk unnamed-chunk-17

Filled Density Plot

d <- density(mtcars$mpg)
plot(d, main = "Kernel Density of Miles Per Gallon")
polygon(d, col = "red", border = "blue")

plot of chunk unnamed-chunk-18

Comparing Groups VIA Kernal Density

The sm.density.compare() function in the sm package allows you to superimpose the kernal density plots of two or more groups. The format is sm.density.compare(x, factor) where x is a numeric vector and factor is the grouping variable.

# Compare MPG distributions for cars with 4,6, or 8 cylinders
library(sm)
## Package `sm', version 2.2-5: type help(sm) for summary information
## 
## Attaching package: 'sm'
## 
## 下列对象被屏蔽了from 'package:MASS':
## 
## muscle
attach(mtcars)
## 下列对象被屏蔽了from mtcars (position 10):
## 
##     am, carb, cyl, disp, drat, gear, hp, mpg, qsec, vs, wt
## 下列对象被屏蔽了from mtcars (position 11):
## 
##     am, carb, cyl, disp, drat, gear, hp, mpg, qsec, vs, wt

# create value labels
cyl.f <- factor(cyl, levels = c(4, 6, 8), labels = c("4 cylinder", "6 cylinder", 
    "8 cylinder"))

# plot densities
sm.density.compare(mpg, cyl, xlab = "Miles Per Gallon")
title(main = "MPG Distribution by Car Cylinders")

# add legend via mouse click
colfill <- c(2:(2 + length(levels(cyl.f))))
legend(locator(1), levels(cyl.f), fill = colfill)
## Error: invalid coordinate lengths

plot of chunk unnamed-chunk-19

par()

xaxt=“n” will suppresses plotting of the axis

# par() # view current settings
opar <- par()  # make a copy of current settings
par(col.lab = "red")  # red x and y labels 
hist(mtcars$mpg)  # create a plot with these new settings 

plot of chunk unnamed-chunk-20

par(opar)  # restore original settings
## Warning: graphical parameter "cin" cannot be set Warning: graphical
## parameter "cra" cannot be set Warning: graphical parameter "csi" cannot be
## set Warning: graphical parameter "cxy" cannot be set Warning: graphical
## parameter "din" cannot be set

text() and mtext()

text() places text within the graph while mtext() places text in one of the four margins.

attach(mtcars)
## 下列对象被屏蔽了from mtcars (position 3):
## 
##     am, carb, cyl, disp, drat, gear, hp, mpg, qsec, vs, wt
## 下列对象被屏蔽了from mtcars (position 11):
## 
##     am, carb, cyl, disp, drat, gear, hp, mpg, qsec, vs, wt
## 下列对象被屏蔽了from mtcars (position 12):
## 
##     am, carb, cyl, disp, drat, gear, hp, mpg, qsec, vs, wt
plot(wt, mpg, main = "Milage vs. Car Weight", xlab = "Weight", ylab = "Mileage", 
    pch = 18, col = "blue")
text(wt, mpg, row.names(mtcars), cex = 0.6, pos = 4, col = "red")

plot of chunk unnamed-chunk-21

combine figures

par(mfrow = c(2, 2))
plot(wt, mpg, main = "Scatterplot of wt vs. mpg")
plot(wt, disp, main = "Scatterplot of wt vs disp")
hist(wt, main = "Histogram of wt")
boxplot(wt, main = "Boxplot of wt")

plot of chunk unnamed-chunk-22

# One figure in row 1 and two figures in row 2 row 1 is 1/3 the height of
# row 2 column 2 is 1/4 the width of the column 1
layout(matrix(c(1, 1, 2, 3), 2, 2, byrow = TRUE), widths = c(3, 1), heights = c(1, 
    2))
hist(wt)
hist(mpg)
hist(disp)

plot of chunk unnamed-chunk-23

# Add boxplots to a scatterplot
par(fig = c(0, 0.8, 0, 0.8), new = TRUE)
## Warning: calling par(new=TRUE) with no plot
plot(mtcars$wt, mtcars$mpg, xlab = "Miles Per Gallon", ylab = "Car Weight")
par(fig = c(0, 0.8, 0.55, 1), new = TRUE)
boxplot(mtcars$wt, horizontal = TRUE, axes = FALSE)
par(fig = c(0.65, 1, 0, 0.8), new = TRUE)
boxplot(mtcars$mpg, axes = FALSE)
mtext("Enhanced Scatterplot", side = 3, outer = TRUE, line = -3)

plot of chunk unnamed-chunk-24

To understand this graph, think of the full graph area as going from (0,0) in the lower left corner to (1,1) in the upper right corner. The format of the fig= parameter is a numerical vector of the form c(x1, x2, y1, y2). The first fig= sets up the scatterplot going from 0 to 0.8 on the x axis and 0 to 0.8 on the y axis. The top boxplot goes from 0 to 0.8 on the x axis and 0.55 to 1 on the y axis. I chose 0.55 rather than 0.8 so that the top figure will be pulled closer to the scatter plot. The right hand boxplot goes from 0.65 to 1 on the x axis and 0 to 0.8 on the y axis. Again, I chose a value to pull the right hand boxplot closer to the scatterplot. You have to experiment to get it just right.

legend()

attach(mtcars)
## 下列对象被屏蔽了from mtcars (position 3):
## 
##     am, carb, cyl, disp, drat, gear, hp, mpg, qsec, vs, wt
## 下列对象被屏蔽了from mtcars (position 4):
## 
##     am, carb, cyl, disp, drat, gear, hp, mpg, qsec, vs, wt
## 下列对象被屏蔽了from mtcars (position 12):
## 
##     am, carb, cyl, disp, drat, gear, hp, mpg, qsec, vs, wt
## 下列对象被屏蔽了from mtcars (position 13):
## 
##     am, carb, cyl, disp, drat, gear, hp, mpg, qsec, vs, wt
boxplot(mpg ~ cyl, main = "Milage by Car Weight", yaxt = "n", xlab = "Milage", 
    horizontal = TRUE, col = terrain.colors(3))
legend("topright", inset = 0.05, title = "Number of Cylinders", c("4", "6", 
    "8"), fill = terrain.colors(3), horiz = TRUE)

plot of chunk unnamed-chunk-25