ggplot2_tutorial.R

frankdavenport — May 23, 2013, 9:55 AM

#---Gggplot2 Tutorial

#Author: Frank Davenport



#-------Set up----------------------------
rm(list=ls())

library(ggplot2) #the ggplot2 package
library(scales) #complements ggplot2 and also works with other graphics packages

#---Two Packages that are are great for data manipulation (also by ggplto2 author)
library(reshape2)
library(plyr)


#=========================================

#------Take a Sample from the Diamond Data-----
head(diamonds) #take a peek
  carat       cut color clarity depth table price    x    y    z
1  0.23     Ideal     E     SI2  61.5    55   326 3.95 3.98 2.43
2  0.21   Premium     E     SI1  59.8    61   326 3.89 3.84 2.31
3  0.23      Good     E     VS1  56.9    65   327 4.05 4.07 2.31
4  0.29   Premium     I     VS2  62.4    58   334 4.20 4.23 2.63
5  0.31      Good     J     SI2  63.3    58   335 4.34 4.35 2.75
6  0.24 Very Good     J    VVS2  62.8    57   336 3.94 3.96 2.48

set.seed(1410) #make this reproducible--follows pg. 11 from original ggplot2 book
pd<-diamonds[sample(nrow(diamonds),1000),] #pd stands for 'plot data'

#============================================


#----Lets make some basic plots----------

#Give the basic plot information
p<-ggplot(data=pd,aes(x=carat,y=price))
#p  # does not draw anything, because we have not supplied any layers or geoms

#--Examine as points
p1<-p+geom_point()
p1

plot of chunk unnamed-chunk-1


#-Examine as a line
p2<-p+geom_line()
p2

plot of chunk unnamed-chunk-1


#--Plot as points, and a fit line
p3<-p+geom_point()+geom_smooth() #smooth fits a line, you can specify how, or it will pick a method for you, in this case it does a gam
p3
geom_smooth: method="auto" and size of largest group is >=1000, so using
gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the
smoothing method.

plot of chunk unnamed-chunk-1


#=============================================================


#---------Lets Specify x and y in the geom, not the call to ggplot-------

#--In the examples above the arguments for geom_() are inherieted from ggplot
#---We can also specify the x,y in the geom, rather than the call to ggplot
p<-ggplot(data=pd) #no aesthetics (aes()) specified here
p0<-p+geom_point(aes(x=carat,y=price)) #instead we specify it in geom_point() but we get the same result
p0

plot of chunk unnamed-chunk-1


#--This is useful, as not all geoms, take the same aesthetics
p<-p+geom_density(aes(x=price)) #looks weird
p

plot of chunk unnamed-chunk-1


p1<-p+geom_density(aes(x=price),fill='red') #we can manually specify some aesthetics 
p1+labs(title='Here we specify the fill color')

plot of chunk unnamed-chunk-1



p2<-p+geom_density(aes(x=price,fill=cut)) #or we can fill based on an attribute; Note that here, fill is inside aes()
p2+labs(title='Here we Map the fill color \nto an attribute of the data')

plot of chunk unnamed-chunk-1


#The help file for each geom specifices what aesthetics it takes

#==============================================================

#-----Lets Quickly explore some common geoms
p<-ggplot(data=pd)

p+geom_histogram(aes(x=price,fill=cut)) #histogram
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.

plot of chunk unnamed-chunk-1


p+geom_violin(aes(x=cut,y=price)) #violin plot

plot of chunk unnamed-chunk-1


p+geom_boxplot(aes(x=cut,y=price)) #box plot

plot of chunk unnamed-chunk-1


p+geom_text(aes(x=carat,y=price,label=cut)) #the text geom

plot of chunk unnamed-chunk-1


#---The hex geom is usful for visualizing large datasets
ggplot(data=diamonds)+geom_hex(aes(x=carat,y=price),bins=10) #this shows the same plot with the full diamond dataset

plot of chunk unnamed-chunk-1


#============================================================


#---Different Variations on bar graphs, by playing with the 'position' argument
p<-ggplot(data=pd,aes(x=color))

p+geom_bar()+labs(title='Standard Bar Graph')  #standard bar

plot of chunk unnamed-chunk-1


p+geom_bar(aes(fill=cut))+labs(title='Filled by Cut') #put colors in by another variables

plot of chunk unnamed-chunk-1


p+geom_bar(aes(fill=cut),position='fill')+labs(title='A stacked Ratio using "position=fill"') # stacked ratio

plot of chunk unnamed-chunk-1


p+geom_bar(aes(fill=cut),position='dodge')+labs(title='Side by Side, using "position=dodge"') #put them side by side

plot of chunk unnamed-chunk-1





#=============================================================


#----------NOW LETS LOOK AT SCALES AND FACETS----------------

#---Lets Explore a Few More Aesthetics, and Facets------------------
p<-ggplot(data=pd,aes(x=carat,y=price))
p1<-p+geom_point(aes(shape=cut))+labs(title='Different Shapes for Different Cuts')
p1

plot of chunk unnamed-chunk-1


p2<-p+geom_point(aes(size=depth))+labs(title='Different Sizes for Different Depths')
p2

plot of chunk unnamed-chunk-1


p2a<-p+geom_point(alpha=0.25,aes(size=depth))+labs(title='Different Sizes with Transparency')
p2a

plot of chunk unnamed-chunk-1


p3<-p2+facet_wrap(~cut)+labs(title='Different Facets for Each Cut')
p3

plot of chunk unnamed-chunk-1


p4<-p2+facet_grid(cut~color)+labs(title='A Facet Grid, based on Cut and Color')
p4

plot of chunk unnamed-chunk-1


p4a<-p4+geom_smooth(method='lm',size=1)+labs(title='We added a Linear Regression Line to each Facet')
p4a