Base Plotting System

Title: Main procedures and graphs to perform in exploratory data analysis using base plotting System

Synopsis: This document is aimed at helping to remember the main graphs to produce when performing exploratory data analysis.

What do my data look like?

Graphics

1. Plots

Standard plot

varA =sort(mtcars[,1])
plot(varA,xlab='w',ylab='Values', ylim=c(10,22))

Pinpoint a specific graph within a collection

Plotting a linear regression for example brings a collection of graphs that can be acessed with the which parameter.

Let´s see:

library(ggplot2)
diamantes = diamonds[diamonds$cut=='Good',]
diamond_lm = lm(price ~ carat, data = diamantes)


plot(diamond_lm, which = 1)

plot(diamond_lm, which = 2)

plot(diamond_lm, which = 3)

plot(diamond_lm, which = 4)

Coloring the dots

x = rnorm(100)
y = rnorm(100)
plot(x,y, pch=23, cex= 3, bg="blue")

Controling the range of the x and y axis

testdat = data.frame(x=1:100, y = rnorm(100))
testdat[50,2] =  100 ## Intencional outliers
plot(testdat$x,testdat$y,  type="l", xlim=c(-3,3))

Controlling the transparency of the marker

x = rnorm(10000)
y = rnorm(10000)
plot(x,y, col=rgb(0,0,0,0.2), pch=19)

Mimiking the barplot() function

xyz = mtcars
xyz$cyl = factor(xyz$cyl)
plot(xyz$cyl)

Using Pairs

        #install.packages("ISLR")
        library(ISLR)
        pairs(Smarket[,-9])

2. Boxplots

Changing the orientation of a bloxplot

varA =sort(mtcars[,1])
boxplot(varA,horizontal=TRUE, main='BoxPlotExample', border=rainbow(1),ylab='Seconds', ylim=c(10,40))

Plotting n boxplots from a dataset

mtcarssplit = split(mtcars,mtcars$cyl)
mpg4 = mtcarssplit$'4'[,1]
mpg6 = mtcarssplit$'6'[,1]
mpg8 = mtcarssplit$'8'[,1]
boxplot(mpg4,mpg6,mpg8,border=rainbow(3),names=(c('mpg4','mpg6','mpg8')))

Plotting all possbile boxplots from a dataset

attach(InsectSprays)
boxplot(InsectSprays$count~InsectSprays$spray, col=InsectSprays$count)

Surpressing outliers from a boxplot

nysalaries = c(33750,33750,33750, 33750, 44000,44000,44000,44000, 45566.67,65000,95000, 103500, 112495.5, 138188, 141666.67, 181500, 185000, 190000, 194375, 195000, 205000, 292500, 301999, 4600000, 5600000)
par(mfrow=c(1,2))
boxplot(nysalaries) # with outliers
boxplot(nysalaries, range=0) # without outliers

3. Histograms

Frequency (Represents the number of times an interval occurs in the variable

Click here to see a detailed explanations

var = c(36,25,38,46,55,68,72,55,36,38,67,45,22,48,91,46,52,61,58,55)
hist(var, col=rainbow(8))

Density (Represents the percentage of times the interval occurs in the variable)

var = c(36,25,38,46,55,68,72,55,36,38,67,45,22,48,91,46,52,61,58,55)
hist(var, freq=FALSE, col=rainbow(8))

A histogram of dates by month

tmp <- seq(as.POSIXct('2011-08-01 13:00'), as.POSIXct('2011-12-05 03:00'),len=100)
tmp = as.character(tmp)
tmp = substr(tmp,1,10)
timeinterval = as.Date(tmp,"%Y-%m-%d")
hist(timeinterval,"month")

The least squares estimate is the empirical mean

# Use
galton = read.csv("galton.csv")
# Instead of 
  #library(UsingR)
  #data(Galton)
  #galton =  Galton
hist(galton$child,col="blue",breaks=100)
meanChild <- mean(galton$child)
lines(rep(meanChild,100),seq(0,150,length=100),col="red",lwd=5)

4. Barplot

Simple barplot

varA = mtcars[,2]
varAtable  = table(varA)
sum(varAtable)
## [1] 32
cyl_names = c("4", "6", "8") 
barplot(varAtable, col=rainbow(6), names.arg =cyl_names, main = "Cylinders distribution")

5. Pieplot

Simple pieplot

varA = mtcars[,2]
varAtable  = table(varA)
cyl_names = c("4", "6", "8") 
pie(varAtable, col=rainbow(6), label =cyl_names, main = "Cylinders distribution")

6. Scatterplots Using the corrplot() function

Simple pieplot

Visit here to see very detailed examples of each parameter of the corrplot() function. Very useful.

      #?corrplot
      #install.packages("corrplot")
      M <- cor(mtcars)
      library(corrplot)
      corrplot.mixed(M)

Adding graphical aids to a plot

Adding and colouring points to a graph

library(datasets)
with(airquality, plot(Wind, Ozone, main="Ozone and Wind in New York City"), type="n")
with(subset(airquality, Month==5), points(Wind, Ozone, col="red"))

Adding a legend to a graph

library(datasets)
with(airquality, plot(Wind, Ozone, main="Ozone and Wind in New York City"), type="n")
with(subset(airquality, Month==5), points(Wind, Ozone, col="blue"))
with(subset(airquality, Month!=5), points(Wind, Ozone, col="red"))
legend("topright", pch=1, col= c("blue", "red"), legend= c("May","Other Months"))

Baseplot with a regression line

library(datasets)
with(airquality, plot(Wind, Ozone, main="Ozone and Wind in New York City"), pch=20)
model = lm(Ozone ~ Wind, airquality)
abline(model, lwd=2)

Multiple base plots

library(datasets)
par(mfrow = c(1,2))
with(airquality, plot(Wind, Ozone, main="Ozone and Wind"))
with(airquality, plot(Solar.R, Ozone, main="Ozone and Solar Radiation"))

Multiple base plots

library(datasets)
par(mfrow = c(1,3), mar=c(4,4,2,1), oma=c(8,8,2,8))
with(airquality,   plot(Wind, Ozone, main="Ozone and Wind"))
with(airquality, plot(Solar.R, Ozone, main="Ozone and Solar Radiation"))
with(airquality, plot(Temp, Ozone, main="Ozone and Temperature"))
with(airquality, mtext("Ozone and Weather in New York City", outer=TRUE))

Plotting with different colours by group

x = rnorm(100)
y = x + rnorm(100)
g = gl(2,50) # two groups with 50 interations each
g = gl(2,50, labels= c("male","female"))
plot(x,y, type="n")
points(x[g =="male"], y[g =="male"], col="red")
points(x[g =="female"], y[g =="female"], col="blue")

Changing the size of the dot (cex)

        #install.packages("UsingR")
        library(UsingR)
## Loading required package: MASS
## Loading required package: HistData
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: splines
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
## 
## Attaching package: 'UsingR'
## The following object is masked _by_ '.GlobalEnv':
## 
##     galton
## The following object is masked from 'package:survival':
## 
##     cancer
        galton = Galton
        data(galton)
        y <- galton$child - mean(galton$child)
        x <- galton$parent - mean(galton$parent)
        freqData <- as.data.frame(table(x, y))
        names(freqData) <- c("child", "parent", "freq")
        plot(
             as.numeric(as.vector(freqData$parent)),
             as.numeric(as.vector(freqData$child)),
             pch = 21, col = "black", bg = "lightblue",
             cex = .15 * freqData$freq,
             xlab = "parent",
             ylab = "child"
              )

Graphics Annotations

1. legend

varA = mtcars[,2]
varAtable  = table(varA)
cyl_names = c("4", "6", "8") 
barplot(varAtable, col=rainbow(6), names.arg =cyl_names, main = "Cylinders distribution")
legend("topleft",c("4 Cilindros","6 Cilindros","8 Cilindros"),pch=15,col=rainbow(6), bty="o", bg="white")

library(datasets)
with(airquality, plot(Wind, Ozone, main="Ozone and Wind in New York City"), type="n")
with(subset(airquality, Month==5), points(Wind, Ozone, col="blue"))
with(subset(airquality, Month!=5), points(Wind, Ozone, col="red"))
legend("topright", pch=1, col= c("blue", "red"), legend= c("May","Other Months"))

x = rnorm(100)
y = rnorm(100)
plot(x,y)
legend("topleft","Legend")
# or
legend("topleft",legend="Legend") # just the same

2. abline

library(datasets)
with(airquality, plot(Wind, Ozone, main="Ozone and Wind in New York City"), pch=20)
model = lm(Ozone ~ Wind, airquality)
abline(model, col='red', lwd=2)

xyz = mtcars
hist(xyz$wt, col="green")
abline(h=2)
abline(v=2.2)

3. title

library(datasets)
with(airquality, plot(Wind, Ozone))
title(main="Ozone and Wind in New York City") ## add a title

x = rnorm(100)
y = rnorm(100)
plot(x,y)
title(main="Scatterplot",xlab="1", ylab="2")

4. points

library(datasets)
with(airquality, plot(Wind, Ozone, main="Ozone and Wind in New York City"))
with(subset(airquality, Month==5), points(Wind, Ozone, col="blue"))

5. mtext

# Plotting a title above the titles of multiple graphs
library(datasets)
par(mfrow = c(1,3), mar=c(4,4,2,1), oma=c(8,8,2,8))
with(airquality, {
plot(Wind, Ozone, main="Ozone and Wind")
plot(Solar.R, Ozone, main="Ozone and Solar Radiation")
plot(Temp, Ozone, main="Ozone and Temperatur")
mtext("Ozone and Weather in New York City", outer=TRUE)
})

6. text

x = rnorm(100)
y = rnorm(100)
plot(x,y)
text(2,2,"Label")

# Add labels to each point plotted
set.seed(1234)
par(mar = c(0,0,0,0))
x = rnorm(12, mean=rep(1:3, each=4), sd=0.2)
y = rnorm(12, mean=rep(1,2,1), sd=0.2)
plot(x,y, col="blue", pch=19, cex=2)
text(x + 0.05, y + 0.05, labels= as.character(1:12))

7. windows

windows()

8. rug

with(cars, plot(speed, dist))
rug(cars$speed)

9. lines

10. axis

Graphics devices

A graphic device is something where you can make a plot appear. To see devices available use ?Devices

1. Create a PDF File

pdf(file = "myplot.pdf")
with(faithful, plot(eruptions, waiting))
title(main="Old Faithful Geyser Data")
dev.off()
## png 
##   2

2. Create a PNG File

png(file = "myplot.png")
with(faithful, plot(eruptions, waiting))
title(main="Old Faithful Geyser Data")
dev.off()
## png 
##   2

3. Create a JPEG File

jpeg(file = "myplot.jpg")
with(faithful, plot(eruptions, waiting))
title(main="Old Faithful Geyser Data")
dev.off()
## png 
##   2

4. Create a SVG File

svg(file = "myplot.svg")
with(faithful, plot(eruptions, waiting))
title(main="Old Faithful Geyser Data")
dev.off()
## png 
##   2

5. Create a WIN.METAFILE File

win.metafile(file = "myplot.win.metafile")
with(faithful, plot(eruptions, waiting))
title(main="Old Faithful Geyser Data")
dev.off()
## png 
##   2

6. Create a POSTSCRIPT File

postscript(file = "myplot.ps")
with(faithful, plot(eruptions, waiting))
title(main="Old Faithful Geyser Data")
dev.off()
## png 
##   2

7. Create a TIFF File

tiff(file = "myplot.tiff")
with(faithful, plot(eruptions, waiting))
title(main="Old Faithful Geyser Data")
dev.off()
## png 
##   2

8. Create a BMP File

bmp(file = "myplot.bmp")
with(faithful, plot(eruptions, waiting))
title(main="Old Faithful Geyser Data")
dev.off()
## png 
##   2

9. Dev.copy() copies the current graphic device to a file

Copies a plot you just generated on the screen to a file.

dev.cur()
## png 
##   2
dev.set(2) # Switchs you to another graphis device
## png 
##   2
library(datasets)
with(faithful, plot(eruptions, waiting))
title(main="Old faithful Geyser Data")

dev.copy(png, file="geyserplot.png")
## png 
##   4
dev.off()
## png 
##   2