gggplot1.R

ShimadaYoshio — Oct 23, 2013, 11:14 PM

require(ggplot2)
Loading required package: ggplot2
Warning: package 'ggplot2' was built under R version 3.0.2
library(gridExtra)
Loading required package: grid

df <- data.frame(
  x  = (x  <- rnorm(100)),
  y  = (y  <- rnorm(100, x)),
  zx = (zx <- (rep(1:10, 10) - 5) / 2),
  zy = (zy <- (rep(1:10, each=10) - 5) / 2),
  z  = (z  <- zx*zy),
  w  = 1:100,
  g  = rep(1:2, 50),
  h  = c(rep(1, 50), rep(2, 50))
)
head(df,5)
          x        y   zx zy z w g h
1 -0.303181 -0.37569 -2.0 -2 4 1 1 1
2 -0.524697  2.98596 -1.5 -2 3 2 2 1
3  2.038368  1.62565 -1.0 -2 2 3 1 1
4 -0.302334 -1.51847 -0.5 -2 1 4 2 1
5  0.007234 -0.06949  0.0 -2 0 5 1 1
sdf <- subset(df, w <= 10)

NANANA
[1] NA
NANANA
[1] NA

p1 <- ggplot(df,  aes(x = x, y = y)) + ggtitle("line1") + 
  geom_line()
p2 <- ggplot(df,  aes(x = w, y = y)) + geom_line() + 
  ggtitle("line2")
p  <- ggplot(df,  aes(x = x, y = y, colour = factor(g))) + 
  ggtitle("line3") + geom_line()
q  <- ggplot(sdf, aes(x = factor(w), y = y)) + geom_bar() +
  ggtitle("bar")
r  <- ggplot(df,  aes(x = x, y = ..density..)) + 
  geom_histogram() + ggtitle("histogram")
c  <- ggplot(df,  aes(x = zx, y = zy, z = z)) + geom_contour() + 
  ggtitle("contour")
grid.arrange(p1,p2,ncol=1)

plot of chunk unnamed-chunk-1

grid.arrange(p,q,ncol=1)
Mapping a variable to y and also using stat="bin".  With stat="bin", it
will attempt to set the y value to the count of cases in each group.  This
can result in unexpected behavior and will not be allowed in a future
version of ggplot2.  If you want y to represent counts of cases, use
stat="bin" and don't map a variable to y.  If you want y to represent
values in the data, use stat="identity".  See ?geom_bar for examples.
(Deprecated; last used in version 0.9.2)
Warning: Stacking not well defined when ymin != 0

plot of chunk unnamed-chunk-1

grid.arrange(r,c,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.

plot of chunk unnamed-chunk-1


NANANA
[1] NA
data(mtcars)
head(mtcars)
                   mpg cyl disp  hp drat    wt  qsec vs am gear carb
Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
summary(mtcars)
      mpg            cyl            disp             hp       
 Min.   :10.4   Min.   :4.00   Min.   : 71.1   Min.   : 52.0  
 1st Qu.:15.4   1st Qu.:4.00   1st Qu.:120.8   1st Qu.: 96.5  
 Median :19.2   Median :6.00   Median :196.3   Median :123.0  
 Mean   :20.1   Mean   :6.19   Mean   :230.7   Mean   :146.7  
 3rd Qu.:22.8   3rd Qu.:8.00   3rd Qu.:326.0   3rd Qu.:180.0  
 Max.   :33.9   Max.   :8.00   Max.   :472.0   Max.   :335.0  
      drat            wt            qsec            vs       
 Min.   :2.76   Min.   :1.51   Min.   :14.5   Min.   :0.000  
 1st Qu.:3.08   1st Qu.:2.58   1st Qu.:16.9   1st Qu.:0.000  
 Median :3.69   Median :3.33   Median :17.7   Median :0.000  
 Mean   :3.60   Mean   :3.22   Mean   :17.8   Mean   :0.438  
 3rd Qu.:3.92   3rd Qu.:3.61   3rd Qu.:18.9   3rd Qu.:1.000  
 Max.   :4.93   Max.   :5.42   Max.   :22.9   Max.   :1.000  
       am             gear           carb     
 Min.   :0.000   Min.   :3.00   Min.   :1.00  
 1st Qu.:0.000   1st Qu.:3.00   1st Qu.:2.00  
 Median :0.000   Median :4.00   Median :2.00  
 Mean   :0.406   Mean   :3.69   Mean   :2.81  
 3rd Qu.:1.000   3rd Qu.:4.00   3rd Qu.:4.00  
 Max.   :1.000   Max.   :5.00   Max.   :8.00  
str(mtcars)
'data.frame':   32 obs. of  11 variables:
 $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
 $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
 $ disp: num  160 160 108 258 360 ...
 $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
 $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
 $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
 $ qsec: num  16.5 17 18.6 19.4 17 ...
 $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
 $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
 $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
 $ carb: num  4 4 1 1 2 1 4 2 2 4 ...

p <- ggplot(mtcars, aes(wt, mpg))
NANANA
[1] NA
# Add aesthetic mappings
a2 <- p + geom_point(aes(colour = qsec)) + 
NANA
Error: Don't know how to add NA to a plot
a3 <- p + geom_point(aes(alpha = qsec)) + 
NANA
Error: Don't know how to add NA to a plot
a4 <- p + geom_point(aes(colour = factor(cyl))) + 
NANA
Error: Don't know how to add NA to a plot
a5 <- p + geom_point(aes(shape = factor(cyl))) + 
NANA
Error: Don't know how to add NA to a plot
a6 <- p + geom_point(aes(size = qsec)) + 
NANA
Error: Don't know how to add NA to a plot
grid.arrange(a1,a2,ncol=1)
Error: object 'a1' not found
grid.arrange(a3,a4,ncol=1)
Error: object 'a3' not found
grid.arrange(a5,a6,ncol=1)
Error: object 'a5' not found

# Change scales
a1 <- p + geom_point(aes(colour = cyl)) + 
NANA
Error: Don't know how to add NA to a plot
a2 <- p + geom_point(aes(size = qsec)) + scale_area()
scale_area is deprecated. Use scale_size_area instead.  Note that the
behavior of scale_size_area is slightly different: by default it makes the
area proportional to the numeric value. (Deprecated; last used in version
0.9.2)
a3 <- p + geom_point(aes(shape = factor(cyl))) + 
NANA
Error: Don't know how to add NA to a plot
grid.arrange(a1,a2,a3,ncol=1)
Error: object 'a1' not found


# Set aesthetics to fixed value
a1 <- p + geom_point(colour = "red", size = 3)
a2 <- qplot(wt, mpg, data = mtcars, colour = I("red"), 
            size = I(3))
grid.arrange(a1,a2,ncol=1)

plot of chunk unnamed-chunk-1



# Varying alpha is useful for large datasets
d <- ggplot(diamonds, aes(carat, price))
b1 <- d + geom_point(alpha = 1/10) + ggtitle("alpha = 1/10")
b2 <- d + geom_point(alpha = 1/20) + ggtitle("alpha = 1/20")
b3 <- d + geom_point(alpha = 1/100) + ggtitle("alpha = 1/100")
grid.arrange(b1,b2,b3,ncol=1)

plot of chunk unnamed-chunk-1



# You can create interesting shapes by layering multiple points of
# different sizes
p <- ggplot(mtcars, aes(mpg, wt))

a1 <- p + geom_point(colour="grey50", size = 4) + 
  geom_point(aes(colour = cyl))
a2 <- p + aes(shape = factor(cyl)) +
  geom_point(aes(colour = factor(cyl)), size = 4) +
  geom_point(colour="grey90", size = 1.5)

a3 <- p + geom_point(colour="black", size = 4.5) +
  geom_point(colour="pink", size = 4) +
  geom_point(aes(shape = factor(cyl)))

# These extra layers don't usually appear in the legend, but we can
# force their inclusion
a4 <- p + geom_point(colour="black", size = 10, 
                     show_guide = TRUE) +
  geom_point(colour="pink", size = 5, show_guide = TRUE) +
  geom_point(aes(shape = factor(cyl)))
grid.arrange(a1,a2,a3,a4,ncol=2)

plot of chunk unnamed-chunk-1



# Transparent points:
a1 <- qplot(mpg, wt, data = mtcars, size = I(5), 
            alpha = I(0.2))

# geom_point warns when missing values have been dropped from the data set
# and not plotted, you can turn this off by setting na.rm = TRUE
mtcars2 <- transform(mtcars, mpg = ifelse(runif(32) < 0.2, NA, mpg))
a2 <- qplot(wt, mpg, data = mtcars2)
a3 <- qplot(wt, mpg, data = mtcars2, na.rm = TRUE)

# Use qplot instead
a4 <- qplot(wt, mpg, data = mtcars)
a5 <- qplot(wt, mpg, data = mtcars, colour = factor(cyl))
a6 <- qplot(wt, mpg, data = mtcars, colour = I("red"))
grid.arrange(a1,a2,ncol=1)
Warning: Removed 3 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-1

grid.arrange(a3,a4,ncol=1)

plot of chunk unnamed-chunk-1

grid.arrange(a5,a6,ncol=1)

plot of chunk unnamed-chunk-1


##^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
data(mpg)
head(mpg)
  manufacturer model displ year cyl      trans drv cty hwy fl   class
1         audi    a4   1.8 1999   4   auto(l5)   f  18  29  p compact
2         audi    a4   1.8 1999   4 manual(m5)   f  21  29  p compact
3         audi    a4   2.0 2008   4 manual(m6)   f  20  31  p compact
4         audi    a4   2.0 2008   4   auto(av)   f  21  30  p compact
5         audi    a4   2.8 1999   6   auto(l5)   f  16  26  p compact
6         audi    a4   2.8 1999   6 manual(m5)   f  18  26  p compact
summary(mpg)
     manufacturer                 model         displ           year     
 dodge     :37    caravan 2wd        : 11   Min.   :1.60   Min.   :1999  
 toyota    :34    ram 1500 pickup 4wd: 10   1st Qu.:2.40   1st Qu.:1999  
 volkswagen:27    civic              :  9   Median :3.30   Median :2004  
 ford      :25    dakota pickup 4wd  :  9   Mean   :3.47   Mean   :2004  
 chevrolet :19    jetta              :  9   3rd Qu.:4.60   3rd Qu.:2008  
 audi      :18    mustang            :  9   Max.   :7.00   Max.   :2008  
 (Other)   :74    (Other)            :177                                
      cyl              trans    drv          cty            hwy      
 Min.   :4.00   auto(l4)  :83   4:103   Min.   : 9.0   Min.   :12.0  
 1st Qu.:4.00   manual(m5):58   f:106   1st Qu.:14.0   1st Qu.:18.0  
 Median :6.00   auto(l5)  :39   r: 25   Median :17.0   Median :24.0  
 Mean   :5.89   manual(m6):19           Mean   :16.9   Mean   :23.4  
 3rd Qu.:8.00   auto(s6)  :16           3rd Qu.:19.0   3rd Qu.:27.0  
 Max.   :8.00   auto(l6)  : 6           Max.   :35.0   Max.   :44.0  
                (Other)   :13                                        
 fl             class   
 c:  1   2seater   : 5  
 d:  5   compact   :47  
 e:  8   midsize   :41  
 p: 52   minivan   :11  
 r:168   pickup    :33  
         subcompact:35  
         suv       :62  
str(mpg)
'data.frame':   234 obs. of  11 variables:
 $ manufacturer: Factor w/ 15 levels "audi","chevrolet",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ model       : Factor w/ 38 levels "4runner 4wd",..: 2 2 2 2 2 2 2 3 3 3 ...
 $ displ       : num  1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
 $ year        : int  1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
 $ cyl         : int  4 4 4 4 6 6 6 4 4 4 ...
 $ trans       : Factor w/ 10 levels "auto(av)","auto(l3)",..: 4 9 10 1 4 9 1 9 4 10 ...
 $ drv         : Factor w/ 3 levels "4","f","r": 2 2 2 2 2 2 2 1 1 1 ...
 $ cty         : int  18 21 20 21 16 18 18 18 16 20 ...
 $ hwy         : int  29 29 31 30 26 26 27 26 25 28 ...
 $ fl          : Factor w/ 5 levels "c","d","e","p",..: 4 4 4 4 4 4 4 4 4 4 ...
 $ class       : Factor w/ 7 levels "2seater","compact",..: 2 2 2 2 2 2 2 2 2 2 ...

p <- ggplot(mpg, aes(displ, hwy))
a1 <- p + geom_point()

a2 <- p + geom_point(position = "jitter")

# Add aesthetic mappings
a3 <- p + geom_jitter(aes(colour = cyl))

# Vary parameters
a4 <- p + geom_jitter(position = position_jitter(width = .5)) + 
  ggtitle("width = .5")

a5 <- p + geom_jitter(position = position_jitter(height = 1)) +
  ggtitle("height = 1")
grid.arrange(a1,a2,ncol=1)

plot of chunk unnamed-chunk-1

grid.arrange(a3,a4,a5,ncol=1)

plot of chunk unnamed-chunk-1



# Use qplot instead
a1 <- qplot(displ, hwy, data = mpg, geom = "jitter")
a2 <- qplot(class, hwy, data = mpg, geom = c("boxplot", "jitter"))
a3 <- qplot(class, hwy, data = mpg, geom = c("jitter", "boxplot"))
grid.arrange(a1,a2,a3,ncol=1)

plot of chunk unnamed-chunk-1


NANANA
[1] NA
data(movies)
head(movies)
                     title year length budget rating votes   r1   r2  r3
1                        $ 1971    121     NA    6.4   348  4.5  4.5 4.5
2        $1000 a Touchdown 1939     71     NA    6.0    20  0.0 14.5 4.5
3   $21 a Day Once a Month 1941      7     NA    8.2     5  0.0  0.0 0.0
4                  $40,000 1996     70     NA    8.2     6 14.5  0.0 0.0
5 $50,000 Climax Show, The 1975     71     NA    3.4    17 24.5  4.5 0.0
6                    $pent 2000     91     NA    4.3    45  4.5  4.5 4.5
    r4   r5   r6   r7   r8   r9  r10 mpaa Action Animation Comedy Drama
1  4.5 14.5 24.5 24.5 14.5  4.5  4.5           0         0      1     1
2 24.5 14.5 14.5 14.5  4.5  4.5 14.5           0         0      1     0
3  0.0  0.0 24.5  0.0 44.5 24.5 24.5           0         1      0     0
4  0.0  0.0  0.0  0.0  0.0 34.5 45.5           0         0      1     0
5 14.5 14.5  4.5  0.0  0.0  0.0 24.5           0         0      0     0
6 14.5 14.5 14.5  4.5  4.5 14.5 14.5           0         0      0     1
  Documentary Romance Short
1           0       0     0
2           0       0     0
3           0       0     1
4           0       0     0
5           0       0     0
6           0       0     0
summary(movies)
    title                year          length         budget        
 Length:58788       Min.   :1893   Min.   :   1   Min.   :0.00e+00  
 Class :character   1st Qu.:1958   1st Qu.:  74   1st Qu.:2.50e+05  
 Mode  :character   Median :1983   Median :  90   Median :3.00e+06  
                    Mean   :1976   Mean   :  82   Mean   :1.34e+07  
                    3rd Qu.:1997   3rd Qu.: 100   3rd Qu.:1.50e+07  
                    Max.   :2005   Max.   :5220   Max.   :2.00e+08  
                                                  NA's   :53573     
     rating          votes              r1               r2       
 Min.   : 1.00   Min.   :     5   Min.   :  0.00   Min.   : 0.00  
 1st Qu.: 5.00   1st Qu.:    11   1st Qu.:  0.00   1st Qu.: 0.00  
 Median : 6.10   Median :    30   Median :  4.50   Median : 4.50  
 Mean   : 5.93   Mean   :   632   Mean   :  7.01   Mean   : 4.02  
 3rd Qu.: 7.00   3rd Qu.:   112   3rd Qu.:  4.50   3rd Qu.: 4.50  
 Max.   :10.00   Max.   :157608   Max.   :100.00   Max.   :84.50  

       r3              r4               r5              r6      
 Min.   : 0.00   Min.   :  0.00   Min.   :  0.0   Min.   : 0.0  
 1st Qu.: 0.00   1st Qu.:  0.00   1st Qu.:  4.5   1st Qu.: 4.5  
 Median : 4.50   Median :  4.50   Median :  4.5   Median :14.5  
 Mean   : 4.72   Mean   :  6.37   Mean   :  9.8   Mean   :13.0  
 3rd Qu.: 4.50   3rd Qu.:  4.50   3rd Qu.: 14.5   3rd Qu.:14.5  
 Max.   :84.50   Max.   :100.00   Max.   :100.0   Max.   :84.5  

       r7              r8              r9              r10       
 Min.   :  0.0   Min.   :  0.0   Min.   :  0.00   Min.   :  0.0  
 1st Qu.:  4.5   1st Qu.:  4.5   1st Qu.:  4.50   1st Qu.:  4.5  
 Median : 14.5   Median : 14.5   Median :  4.50   Median : 14.5  
 Mean   : 15.6   Mean   : 13.9   Mean   :  8.95   Mean   : 16.9  
 3rd Qu.: 24.5   3rd Qu.: 24.5   3rd Qu.: 14.50   3rd Qu.: 24.5  
 Max.   :100.0   Max.   :100.0   Max.   :100.00   Max.   :100.0  

    mpaa           Action         Animation          Comedy     
      :53864   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
 NC-17:   16   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000  
 PG   :  528   Median :0.0000   Median :0.0000   Median :0.000  
 PG-13: 1003   Mean   :0.0797   Mean   :0.0628   Mean   :0.294  
 R    : 3377   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:1.000  
               Max.   :1.0000   Max.   :1.0000   Max.   :1.000  

     Drama        Documentary        Romance           Short      
 Min.   :0.000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
 1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000  
 Median :0.000   Median :0.0000   Median :0.0000   Median :0.000  
 Mean   :0.371   Mean   :0.0591   Mean   :0.0807   Mean   :0.161  
 3rd Qu.:1.000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.000  
 Max.   :1.000   Max.   :1.0000   Max.   :1.0000   Max.   :1.000  

str(movies)
'data.frame':   58788 obs. of  24 variables:
 $ title      : chr  "$" "$1000 a Touchdown" "$21 a Day Once a Month" "$40,000" ...
 $ year       : int  1971 1939 1941 1996 1975 2000 2002 2002 1987 1917 ...
 $ length     : int  121 71 7 70 71 91 93 25 97 61 ...
 $ budget     : int  NA NA NA NA NA NA NA NA NA NA ...
 $ rating     : num  6.4 6 8.2 8.2 3.4 4.3 5.3 6.7 6.6 6 ...
 $ votes      : int  348 20 5 6 17 45 200 24 18 51 ...
 $ r1         : num  4.5 0 0 14.5 24.5 4.5 4.5 4.5 4.5 4.5 ...
 $ r2         : num  4.5 14.5 0 0 4.5 4.5 0 4.5 4.5 0 ...
 $ r3         : num  4.5 4.5 0 0 0 4.5 4.5 4.5 4.5 4.5 ...
 $ r4         : num  4.5 24.5 0 0 14.5 14.5 4.5 4.5 0 4.5 ...
 $ r5         : num  14.5 14.5 0 0 14.5 14.5 24.5 4.5 0 4.5 ...
 $ r6         : num  24.5 14.5 24.5 0 4.5 14.5 24.5 14.5 0 44.5 ...
 $ r7         : num  24.5 14.5 0 0 0 4.5 14.5 14.5 34.5 14.5 ...
 $ r8         : num  14.5 4.5 44.5 0 0 4.5 4.5 14.5 14.5 4.5 ...
 $ r9         : num  4.5 4.5 24.5 34.5 0 14.5 4.5 4.5 4.5 4.5 ...
 $ r10        : num  4.5 14.5 24.5 45.5 24.5 14.5 14.5 14.5 24.5 4.5 ...
 $ mpaa       : Factor w/ 5 levels "","NC-17","PG",..: 1 1 1 1 1 1 5 1 1 1 ...
 $ Action     : int  0 0 0 0 0 0 1 0 0 0 ...
 $ Animation  : int  0 0 1 0 0 0 0 0 0 0 ...
 $ Comedy     : int  1 1 0 1 0 0 0 0 0 0 ...
 $ Drama      : int  1 0 0 0 0 1 1 0 1 0 ...
 $ Documentary: int  0 0 0 0 0 0 0 1 0 0 ...
 $ Romance    : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Short      : int  0 0 1 0 0 0 0 1 0 0 ...

# Summarise number of movie ratings by year of movie
mry <- do.call(rbind, by(movies, round(movies$rating), function(df) {
  nums <- tapply(df$length, df$year, length)
  data.frame(rating=round(df$rating[1]), year = as.numeric(names(nums)), 
             number=as.vector(nums))
}))

p <- ggplot(mry, aes(x=year, y=number, group=rating))
a1 <- p + geom_line()

# Add aesthetic mappings
a2 <- p + geom_line(aes(size = rating))

a3 <- p + geom_line(aes(colour = rating))
# Change scale
a4 <- p + geom_line(aes(colour = rating)) + scale_colour_gradient(low="red")
grid.arrange(a1,a2,a3,a4,ncol=2)

plot of chunk unnamed-chunk-1


#^^^^^^
a1 <- p + geom_line(aes(size = rating)) + 
  scale_size(range = c(0.1, 3))
# Set aesthetics to fixed value
a2 <- p + geom_line(colour = "red", size = 1)
# Use qplot instead
a3 <- qplot(year, number, data=mry, group=rating, geom="line")
grid.arrange(a1,a2,a3,ncol=1)

plot of chunk unnamed-chunk-1


# Using a time series
data(economics)
head(economics,3)
        date   pce    pop psavert uempmed unemploy
1 1967-06-30 507.8 198712     9.8     4.5     2944
2 1967-07-31 510.9 198911     9.8     4.7     2945
3 1967-08-31 516.7 199113     9.0     4.6     2958
summary(economics)
      date                 pce            pop            psavert     
 Min.   :1967-06-30   Min.   : 508   Min.   :198712   Min.   :-3.00  
 1st Qu.:1977-06-07   1st Qu.:1272   1st Qu.:220094   1st Qu.: 4.00  
 Median :1987-05-15   Median :3082   Median :242516   Median : 7.60  
 Mean   :1987-05-16   Mean   :3654   Mean   :246349   Mean   : 6.72  
 3rd Qu.:1997-04-22   3rd Qu.:5474   3rd Qu.:272277   3rd Qu.: 9.50  
 Max.   :2007-03-31   Max.   :9705   Max.   :301913   Max.   :14.60  
    uempmed         unemploy    
 Min.   : 4.00   Min.   : 2685  
 1st Qu.: 5.80   1st Qu.: 6052  
 Median : 6.90   Median : 7188  
 Mean   : 7.12   Mean   : 6997  
 3rd Qu.: 8.38   3rd Qu.: 8250  
 Max.   :12.30   Max.   :12051  
str(economics)
'data.frame':   478 obs. of  6 variables:
 $ date    : Date, format: "1967-06-30" "1967-07-31" ...
 $ pce     : num  508 511 517 513 518 ...
 $ pop     : int  198712 198911 199113 199311 199498 199657 199808 199920 200056 200208 ...
 $ psavert : num  9.8 9.8 9 9.8 9.7 9.4 9 9.5 8.9 9.6 ...
 $ uempmed : num  4.5 4.7 4.6 4.9 4.7 4.8 5.1 4.5 4.1 4.6 ...
 $ unemploy: int  2944 2945 2958 3143 3066 3018 2878 3001 2877 2709 ...

a1 <- qplot(date, pop, data=economics, geom="line")
a2 <- qplot(date, pop, data=economics, geom="line", log="y")
a3 <- qplot(date, pop, data=subset(economics, 
                                   date > as.Date("2006-1-1")), 
            geom="line")
a4 <- qplot(date, pop, data=economics, size=unemploy/pop, 
            geom="line")

# Use the arrow parameter to add an arrow to the line
# See ?grid::arrow for more details
c <- ggplot(economics, aes(x = date, y = pop))
# Arrow defaults to "last"
library(grid)
a5 <- c + geom_line(arrow = arrow())
a6 <- c + geom_line(arrow = arrow(angle = 15, ends = "both", 
                                  type = "closed"))
grid.arrange(a1,a2,ncol=1)

plot of chunk unnamed-chunk-1

grid.arrange(a3,a4,ncol=1)

plot of chunk unnamed-chunk-1

grid.arrange(a5,a6,ncol=1)

plot of chunk unnamed-chunk-1


# See scale_date for examples of plotting multiple times series on
# a single graph

# A simple pcp example

y2005 <- runif(300, 20, 120)
y2010 <- y2005 * runif(300, -1.05, 1.5)
group <- rep(LETTERS[1:3], each = 100)

df <- data.frame(id = seq_along(group), group, y2005, y2010)
library(reshape2) # for melt
dfm <- melt(df, id.var = c("id", "group"))
head(dfm,5)
  id group variable value
1  1     A    y2005 92.50
2  2     A    y2005 75.95
3  3     A    y2005 47.39
4  4     A    y2005 53.69
5  5     A    y2005 24.68

a1 <- ggplot(dfm, aes(variable, value, group = id, 
                      colour = group)) +
  geom_path(alpha = 0.5)

dfm1 <- subset(dfm,group=="A")
a2 <- ggplot(dfm1, aes(variable, value, group = id, colour = group)) +
  geom_path(alpha = 0.5,colour=2)
dfm1 <- subset(dfm,group=="B")
a3 <- ggplot(dfm1, aes(variable, value, group = id, colour = group)) +
  geom_path(alpha = 0.5,colour=3)
dfm1 <- subset(dfm,group=="C")
a4 <- ggplot(dfm1, aes(variable, value, group = id, colour = group)) +
  geom_path(alpha = 0.5,colour=4)
grid.arrange(a1,a2,ncol=1)

plot of chunk unnamed-chunk-1

grid.arrange(a3,a4,ncol=1)

plot of chunk unnamed-chunk-1


NANANA
[1] NA
# Simple quantiles/ECDF from examples(plot)
x <- sort(rnorm(47))
a1 <- qplot(seq_along(x), x, geom="step") + ggtitle("step")
# Steps go horizontally, then vertically (default)
a2 <- qplot(seq_along(x), x, geom="step", direction = "hv")
a3 <- plot(x, type = "s")

plot of chunk unnamed-chunk-1

# Steps go vertically, then horizontally
a4 <- qplot(seq_along(x), x, geom="step", direction = "vh")
a5 <- plot(x, type = "S")

plot of chunk unnamed-chunk-1

grid.arrange(a1,a2,a4,ncol=1)

plot of chunk unnamed-chunk-1


par(mfrow=c(1,1))
plot(x, type = "s")

plot of chunk unnamed-chunk-1

par(mfrow=c(1,1))

# Also works with other aesthetics
df <- data.frame(
  x = sort(rnorm(50)),
  trt = sample(c("a", "b"), 50, rep = TRUE)
)
qplot(seq_along(x), x, data = df, geom="step", colour = trt)

plot of chunk unnamed-chunk-1


NANANA
[1] NA
# See stat_smooth for examples of using built in model fitting
# if you need some more flexible, this example shows you how to
# plot the fits from any model of your choosing
a1 <- qplot(wt, mpg, data=mtcars, colour=factor(cyl))

model <- lm(mpg ~ wt + factor(cyl), data=mtcars)
summary(model)

Call:
lm(formula = mpg ~ wt + factor(cyl), data = mtcars)

Residuals:
   Min     1Q Median     3Q    Max 
-4.589 -1.236 -0.516  1.384  5.792 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)    33.991      1.888   18.01  < 2e-16 ***
wt             -3.206      0.754   -4.25  0.00021 ***
factor(cyl)6   -4.256      1.386   -3.07  0.00472 ** 
factor(cyl)8   -6.071      1.652   -3.67  0.00100 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.56 on 28 degrees of freedom
Multiple R-squared:  0.837, Adjusted R-squared:  0.82 
F-statistic: 48.1 on 3 and 28 DF,  p-value: 3.59e-11

grid <- with(mtcars, expand.grid(
  wt = seq(min(wt), max(wt), length = 20),
  cyl = levels(factor(cyl))
))

grid$mpg <- stats::predict(model, newdata=grid)#莠域クャ蛟、

a2 <- qplot(wt, mpg, data=mtcars, colour=factor(cyl)) + 
NANA
Error: Don't know how to add NA to a plot

# or with standard errors

err <- stats::predict(model, newdata=grid, se = TRUE)
err
$fit
    1     2     3     4     5     6     7     8     9    10    11    12 
29.14 28.48 27.82 27.16 26.50 25.84 25.18 24.52 23.86 23.20 22.54 21.88 
   13    14    15    16    17    18    19    20    21    22    23    24 
21.22 20.56 19.90 19.24 18.58 17.92 17.26 16.60 24.89 24.23 23.57 22.91 
   25    26    27    28    29    30    31    32    33    34    35    36 
22.25 21.59 20.93 20.27 19.61 18.95 18.29 17.63 16.97 16.31 15.65 14.99 
   37    38    39    40    41    42    43    44    45    46    47    48 
14.33 13.67 13.01 12.35 23.07 22.41 21.75 21.09 20.43 19.77 19.11 18.45 
   49    50    51    52    53    54    55    56    57    58    59    60 
17.79 17.13 16.47 15.81 15.15 14.49 13.83 13.17 12.51 11.85 11.19 10.53 

$se.fit
     1      2      3      4      5      6      7      8      9     10 
0.9663 0.8815 0.8176 0.7798 0.7719 0.7948 0.8461 0.9209 1.0142 1.1212 
    11     12     13     14     15     16     17     18     19     20 
1.2385 1.3634 1.4939 1.6288 1.7671 1.9079 2.0508 2.1954 2.3413 2.4884 
    21     22     23     24     25     26     27     28     29     30 
1.5481 1.4301 1.3199 1.2195 1.1316 1.0592 1.0057 0.9742 0.9670 0.9844 
    31     32     33     34     35     36     37     38     39     40 
1.0253 1.0870 1.1663 1.2597 1.3645 1.4781 1.5987 1.7249 1.8555 1.9896 
    41     42     43     44     45     46     47     48     49     50 
1.9950 1.8500 1.7068 1.5658 1.4278 1.2937 1.1648 1.0431 0.9314 0.8338 
    51     52     53     54     55     56     57     58     59     60 
0.7556 0.7036 0.6835 0.6982 0.7456 0.8201 0.9151 1.0249 1.1452 1.2731 

$df
[1] 28

$residual.scale
[1] 2.557
grid$ucl <- err$fit + 1.96 * err$se.fit
grid$lcl <- err$fit - 1.96 * err$se.fit

a3 <- qplot(wt, mpg, data=mtcars, colour=factor(cyl)) +
  geom_smooth(aes(ymin = lcl, ymax = ucl), data=grid, stat="identity")
grid.arrange(a1,a2,a3,ncol=1)

plot of chunk unnamed-chunk-1



NANANA
[1] NA
d <- ggplot(diamonds, aes(x = x, y = y)) + xlim(4,10) +
  ylim(4,10)
a1 <- d + geom_bin2d()
a2 <- d + geom_bin2d(binwidth = c(0.1, 0.1))
grid.arrange(a1,a2,ncol=1)

plot of chunk unnamed-chunk-1


d <- ggplot(diamonds, aes(carat, price))
a1 <- d + stat_bin2d() + ggtitle("stat_bin2d()")#####
a2 <- d + geom_bin2d() + ggtitle("geom_bin2d()")
# You can control the size of the bins by specifying the number of
# bins in each direction:
a3 <- d + stat_bin2d(bins = 10) + ggtitle("stat_bin2d(bins = 10)")#
a4 <- d + stat_bin2d(bins = 30) + ggtitle("stat_bin2d(bins = 30)")#
# Or by specifying the width of the bins
a5 <- d + stat_bin2d(binwidth = c(1, 1000)) + ggtitle("stat_bin2d(binwidth = c(1, 1000)")#
a6 <- d + stat_bin2d(binwidth = c(.1, 500)) + ggtitle("stat_bin2d(binwidth = c(.1, 500)")#
# Or with a list of breaks
x <- seq(min(diamonds$carat), max(diamonds$carat), by = 0.1)
y <- seq(min(diamonds$price), max(diamonds$price), length = 50)
a7 <- d + stat_bin2d(breaks = list(x = x, y = y)) + ggtitle("breaks = list(x = x, y = y)")#
# With qplot
a8 <- qplot(x, y, data = diamonds, geom="bin2d",
            xlim = c(4, 10), ylim = c(4, 10)) + ggtitle("bin2d")

a9 <- qplot(x, y, data = diamonds, geom="bin2d", binwidth = c(0.1, 0.1),
            xlim = c(4, 10), ylim = c(4, 10)) + ggtitle("bin2d,binwidth")
grid.arrange(a1,a2,a3,ncol=1)     

plot of chunk unnamed-chunk-1

grid.arrange(a4,a5,a6,ncol=1)

plot of chunk unnamed-chunk-1

grid.arrange(a7,a8,a9,ncol=1)

plot of chunk unnamed-chunk-1

NANANA
[1] NA

d <- ggplot(diamonds, aes(carat, price))
a1 <- d + stat_binhex()
a2 <- d + geom_hex()
grid.arrange(a1,a2,ncol=1) 

plot of chunk unnamed-chunk-1


# You can control the size of the bins by specifying the number of
# bins in each direction:
a4 <- d + stat_binhex(bins = 10)
a5 <- d + stat_binhex(bins = 30)
grid.arrange(a4,a5,ncol=1) 

plot of chunk unnamed-chunk-1


# Or by specifying the width of the bins
a6 <- d + stat_binhex(binwidth = c(1, 1000))
a7 <- d + stat_binhex(binwidth = c(.1, 500))
grid.arrange(a6,a7,ncol=1) 

plot of chunk unnamed-chunk-1


# With qplot
a8 <- qplot(x, y, data = diamonds, geom="hex", xlim = c(4, 10), 
            ylim = c(4, 10))
a9 <- qplot(x, y, data = diamonds, geom="hex", xlim = c(4, 10),
            ylim = c(4, 10),binwidth = c(0.1, 0.1))
grid.arrange(a8,a9,ncol=1)
Warning: Removed 478 rows containing missing values (stat_hexbin).
Warning: Removed 478 rows containing missing values (stat_hexbin).

plot of chunk unnamed-chunk-1


#grid.arrange(a1,a2,a4,a5,a6,a7,a8,a9,ncol=3)

d <- ggplot(iris, aes(iris[,1], iris[,3],colour=iris[,5]))
d + stat_binhex()

plot of chunk unnamed-chunk-1



msamp <- movies[sample(nrow(movies), 1000), ]
m <- ggplot(msamp, aes(year, rating)) + geom_point()
a1 <- m + stat_quantile()###
a2 <- m + stat_quantile(quantiles = 0.5)

q10 <- seq(0.05, 0.95, by=0.05)
a3 <- m + stat_quantile(quantiles = q10)

# You can also use rqss to fit smooth quantiles
a4 <- m + stat_quantile(method = "rqss")

# Note that rqss doesn't pick a smoothing constant automatically, so
# you'll need to tweak lambda yourself
a5 <- m + stat_quantile(method = "rqss", lambda = 10)
a6 <- m + stat_quantile(method = "rqss", lambda = 100)

# Use 'votes' as weights for the quantile calculation
a7 <- m + stat_quantile(aes(weight=votes))

# Change scale
a8 <- m + stat_quantile(aes(colour = ..quantile..), quantiles = q10)
a9 <- m + stat_quantile(aes(colour = ..quantile..), quantiles = q10) +
  scale_colour_gradient2(midpoint = 0.5)

# Set aesthetics to fixed value
a10 <- m + stat_quantile(colour = "red", size = 2, linetype = 2)
# Use qplot instead
a11 <- qplot(year, rating, data=movies, geom="quantile")

grid.arrange(a1,a2,a3,a4,ncol=2) 
Smoothing formula not specified. Using: y ~ x Smoothing formula not
specified. Using: y ~ x Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ qss(x, lambda = 1)

plot of chunk unnamed-chunk-1

grid.arrange(a5,a6,a7,a8,ncol=2)
Smoothing formula not specified. Using: y ~ qss(x, lambda = 10) Smoothing
formula not specified. Using: y ~ qss(x, lambda = 100) Smoothing formula
not specified. Using: y ~ x Smoothing formula not specified. Using: y ~ x

plot of chunk unnamed-chunk-1

grid.arrange(a9,a10,a11,ncol=2)
Smoothing formula not specified. Using: y ~ x Smoothing formula not
specified. Using: y ~ x Smoothing formula not specified. Using: y ~ x

plot of chunk unnamed-chunk-1

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Generate data: means and standard errors of means for prices
# for each type of cut
dmod <- lm(price ~ cut, data=diamonds)
cuts <- data.frame(cut=unique(diamonds$cut), 
                   predict(dmod, data.frame(cut = unique(diamonds$cut)), 
                           se=TRUE)[c("fit","se.fit")])
head(cuts,5)
        cut  fit se.fit
1     Ideal 3458  27.00
2   Premium 4584  33.75
3      Good 3929  56.59
4 Very Good 3982  36.06
5      Fair 4359  98.79
a1 <- qplot(cut, fit, data=cuts)

# With a bar chart, we are comparing lengths, so the y-axis is
# automatically extended to include 0
a2 <- qplot(cut, fit, data=cuts, geom="bar")

# Display estimates and standard errors in various ways
se <- ggplot(cuts, aes(cut, fit,
                       ymin = fit - se.fit, ymax=fit + se.fit, 
                       colour = cut))
a3 <- se + geom_linerange()####
a4 <- se + geom_pointrange()

a5 <- se + geom_errorbar(width = 0.5)
a6 <- se + geom_crossbar(width = 0.5)
# Use coord_flip to flip the x and y axes
a7 <- se + geom_linerange() + coord_flip()
grid.arrange(a1,a2,ncol=1)
Mapping a variable to y and also using stat="bin".  With stat="bin", it
will attempt to set the y value to the count of cases in each group.  This
can result in unexpected behavior and will not be allowed in a future
version of ggplot2.  If you want y to represent counts of cases, use
stat="bin" and don't map a variable to y.  If you want y to represent
values in the data, use stat="identity".  See ?geom_bar for examples.
(Deprecated; last used in version 0.9.2)

plot of chunk unnamed-chunk-1

grid.arrange(a3,a4,ncol=1)

plot of chunk unnamed-chunk-1

grid.arrange(a5,a6,a7,ncol=1)

plot of chunk unnamed-chunk-1

#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# Generate data
huron <- data.frame(year = 1875:1972, level = as.vector(LakeHuron))

library(plyr) # to access round_any
huron$decade <- round_any(huron$year, 10, floor)
h <- ggplot(huron, aes(x=year))
a1 <- h + geom_ribbon(aes(ymin=0, ymax=level))
a2 <- h + geom_area(aes(y = level))

# Add aesthetic mappings
a3 <- h + geom_ribbon(aes(ymin=level-1, ymax=level+1))
a4 <- h + geom_ribbon(aes(ymin=level-1, ymax=level+1)) + 
  geom_line(aes(y=level))

# Take out some values in the middle for an example of NA handling
huron[huron$year > 1900 & huron$year < 1910, "level"] <- NA
h <- ggplot(huron, aes(x=year))
a5 <- h + geom_ribbon(aes(ymin=level-1, ymax=level+1)) + 
  geom_line(aes(y=level))

# Another data set, with multiple y's for each x
m <-  ggplot(movies, aes(y=votes, x=year))
a7 <- m + geom_point()

# The default summary isn't that useful
a8 <- m + stat_summary(geom="ribbon", fun.ymin="min", fun.ymax="max")
a9 <- m + stat_summary(geom="ribbon", fun.data="median_hilow")
# Use qplot instead
a10 <- qplot(year, level, data=huron, geom=c("area", "line"))
grid.arrange(a1,a2,a3,a4,ncol=2)

plot of chunk unnamed-chunk-1

grid.arrange(a5,a7,a8,ncol=1)

plot of chunk unnamed-chunk-1

grid.arrange(a9,a10,ncol=1)
Warning: Removed 9 rows containing missing values (position_stack).

plot of chunk unnamed-chunk-1

##^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
data(mtcars)
c <- ggplot(mtcars, aes(qsec, wt))
a1 <- c + stat_smooth()
a2 <- c + stat_smooth() + geom_point()
grid.arrange(a1,a2,nol=2)
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method. geom_smooth:
method="auto" and size of largest group is <1000, so using loess. Use
'method = x' to change the smoothing method.
Error: input must be grobs!

# Adjust parameters
a1 <- c + stat_smooth(se = FALSE) + geom_point() + 
  ggtitle("stat_smooth(se = FALSE)")
a1
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-1

a2 <- c + stat_smooth(span = 0.9) + geom_point() + geom_point() + 
  ggtitle("stat_smooth(span = 0.9)")
a2
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-1

a3 <- c + stat_smooth(level = 0.99) + geom_point()+ geom_point() + 
  ggtitle("stat_smooth(level = 0.99) ")
a3
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-1

a4 <- c + stat_smooth(method = "lm") + geom_point()+ geom_point() + 
  ggtitle("stat_smooth(method = lm)")
a4

plot of chunk unnamed-chunk-1

grid.arrange(a1,a2,a3,a4,ncol=2)
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method. geom_smooth:
method="auto" and size of largest group is <1000, so using loess. Use
'method = x' to change the smoothing method. geom_smooth: method="auto"
and size of largest group is <1000, so using loess. Use 'method = x' to
change the smoothing method.

plot of chunk unnamed-chunk-1


library(splines)
library(MASS)
Warning: package 'MASS' was built under R version 3.0.1
a1 <- c + stat_smooth(method = "lm", formula = y ~ ns(x,3)) +
  geom_point()
a2 <- c + stat_smooth(method = rlm, formula= y ~ ns(x,3)) + geom_point()
# The default confidence band uses a transparent colour.
# This currently only works on a limited number of graphics devices
# (including Quartz, PDF, and Cairo) so you may need to set the
# fill colour to a opaque colour, as shown below
a3 <- c + stat_smooth(fill = "grey50", size = 2, alpha = 1)
a4 <- c + stat_smooth(fill = "blue", size = 2, alpha = 1)
# The colour of the line can be controlled with the colour aesthetic
a5 <- c + stat_smooth(fill="blue", colour="darkblue", size=2)
a6 <- c + stat_smooth(fill="blue", colour="darkblue", size=2, alpha = 0.2)
a7 <- c + geom_point() +
  stat_smooth(fill="blue", colour="darkblue", size=2, alpha = 0.2)
grid.arrange(a1,a2,a3,a4,,ncol=2)
Error: argument is missing, with no default
grid.arrange(a5,a6,a7,ncol=1)
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method. geom_smooth:
method="auto" and size of largest group is <1000, so using loess. Use
'method = x' to change the smoothing method. geom_smooth: method="auto"
and size of largest group is <1000, so using loess. Use 'method = x' to
change the smoothing method.

plot of chunk unnamed-chunk-1

#
#Smoothers for subsets
c <- ggplot(mtcars, aes(y=wt, x=mpg)) + facet_grid(. ~ cyl)
a1<- c + stat_smooth(method=lm) + geom_point()
a2 <- c + stat_smooth(method=lm, fullrange = TRUE) + geom_point()
# Geoms and stats are automatically split by aesthetics that are factors
c <- ggplot(mtcars, aes(y=wt, x=mpg, colour=factor(cyl)))
a3 <- c + stat_smooth(method=lm) + geom_point()
a4 <- c + stat_smooth(method=lm, aes(fill = factor(cyl))) + geom_point()
a5 <- c + stat_smooth(method=lm, fullrange=TRUE, alpha = 0.1) + 
  geom_point()
grid.arrange(a1,a2,ncol=1)

plot of chunk unnamed-chunk-1

grid.arrange(a3,a4,a5,ncol=1)

plot of chunk unnamed-chunk-1


# Use qplot instead
a1 <- qplot(qsec, wt, data=mtcars, geom=c("smooth", "point"))
# Example with logistic regression
data("kyphosis", package="rpart")
a2 <- qplot(Age, Kyphosis, data=kyphosis)
a3 <- qplot(Age, data=kyphosis, facets = . ~ Kyphosis, binwidth = 10)
a4 <- qplot(Age, Kyphosis, data=kyphosis, position="jitter")

a5 <- qplot(Age, Kyphosis, data=kyphosis, 
            position=position_jitter(height=0.1))
a6 <- qplot(Age, as.numeric(Kyphosis) - 1, data = kyphosis) +
  stat_smooth(method="glm", family="binomial")
a7 <- qplot(Age, as.numeric(Kyphosis) - 1, data=kyphosis) +
  stat_smooth(method="glm", family="binomial", formula = y ~ ns(x, 2))
grid.arrange(a1,a2,a3,a4,ncol=2)
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-1

grid.arrange(a5,a6,a7,ncol=1)

plot of chunk unnamed-chunk-1


##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

NANANA
[1] NA
p <- ggplot(mtcars, aes(factor(cyl), mpg))
a1 <- p + geom_boxplot()
a2 <- qplot(factor(cyl), mpg, data = mtcars, geom = "boxplot")
a3 <- p + geom_boxplot() + geom_jitter()
a4 <- p + geom_boxplot() + coord_flip()
a5 <- qplot(factor(cyl), mpg, data = mtcars, geom = "boxplot") +
  coord_flip()
a6 <- p + geom_boxplot(notch = TRUE)
a7 <- p + geom_boxplot(notch = TRUE, notchwidth = .3)
a8 <- p + geom_boxplot(outlier.colour = "green", outlier.size = 3)
grid.arrange(a1,a2,a3,a4,a5,a6,a7,a8,ncol=3)
notch went outside hinges. Try setting notch=FALSE. notch went outside
hinges. Try setting notch=FALSE. notch went outside hinges. Try setting
notch=FALSE. notch went outside hinges. Try setting notch=FALSE.

plot of chunk unnamed-chunk-1


# Add aesthetic mappings
# Note that boxplots are automatically dodged when any aesthetic is
# a factor
a1 <- p + geom_boxplot(aes(fill = cyl)) + ggtitle("fill = cyl")
a2 <- p + geom_boxplot(aes(fill = factor(cyl))) + ggtitle("fill = factor(cyl)")
a3 <- p + geom_boxplot(aes(fill = factor(vs))) + ggtitle("fill = factor(vs)")
a4 <- p + geom_boxplot(aes(fill = factor(am))) + ggtitle("fill = factor(am)")
grid.arrange(a1,a2,a3,a4,ncol=2)

plot of chunk unnamed-chunk-1


# Set aesthetics to fixed value
a1 <- p + geom_boxplot(fill = "grey80", colour = "#3366FF") + 
  ggtitle("fill = grey80")
a2 <- qplot(factor(cyl), mpg, data = mtcars, geom = "boxplot",
            colour = I("#3366FF")) + ggtitle("boxplot")
grid.arrange(a1,a2,ncol=1)

plot of chunk unnamed-chunk-1


# Scales vs. coordinate transforms -------
# Scale transformations occur before the boxplot statistics are computed.
# Coordinate transformations occur afterwards.  Observe the effect on the
# number of outliers.
library(plyr) # to access round_any
m <- ggplot(movies, aes(y = votes, x = rating,
                        group = round_any(rating, 0.5)))

a1 <- m + geom_boxplot() + ggtitle("boxplot")
a2 <- m + geom_boxplot() + scale_y_log10() + ggtitle("scale_y_log10")
a3 <- m + geom_boxplot() + coord_trans(y = "log10") + ggtitle("log10")
a4 <- m + geom_boxplot() + scale_y_log10() + coord_trans(y = "log10") + 
  ggtitle("scale_y_log10,log10")
grid.arrange(a1,a2,a3,a4,ncol=1)
Warning: position_dodge requires constant width: output may be incorrect
Warning: position_dodge requires constant width: output may be incorrect
Warning: position_dodge requires constant width: output may be incorrect
Warning: position_dodge requires constant width: output may be incorrect

plot of chunk unnamed-chunk-1


# Boxplots with continuous x:
# Use the group aesthetic to group observations in boxplots
a1 <- qplot(year, budget, data = movies, geom = "boxplot")
a2 <- qplot(year, budget, data = movies, geom = "boxplot",
            group = round_any(year, 10, floor))
grid.arrange(a1,a2,ncol=2)
Warning: Removed 53573 rows containing non-finite values (stat_boxplot).
Warning: Removed 53573 rows containing non-finite values (stat_boxplot).
Warning: position_dodge requires constant width: output may be incorrect

plot of chunk unnamed-chunk-1


# Using precomputed statistics
# generate sample data
abc <- adply(matrix(rnorm(100), ncol = 5), 2, 
             quantile, c(0, .25, .5, .75, 1))
abc
  X1     0%     25%     50%    75%  100%
1  1 -2.498 -0.5968  0.0642 0.7384 1.323
2  2 -1.975 -0.5898  0.2200 0.7591 1.548
3  3 -2.305 -0.9384 -0.1083 0.7961 2.258
4  4 -1.383 -0.5142  0.3853 0.7614 0.967
5  5 -1.339 -0.8860 -0.2558 0.7127 1.384

b <- ggplot(abc, aes(x = X1, ymin = `0%`, lower = `25%`, middle = `50%`, upper = `75%`, ymax = `100%`))
b + geom_boxplot(stat = "identity")

plot of chunk unnamed-chunk-1

b + geom_boxplot(stat = "identity") + coord_flip()

plot of chunk unnamed-chunk-1

b + geom_boxplot(aes(fill = X1), stat = "identity")

plot of chunk unnamed-chunk-1


NANANA
[1] NA

a1 <- ggplot(mtcars, aes(x = mpg)) + geom_dotplot() + 
  ggtitle("geom_dotplot")
a2 <- ggplot(mtcars, aes(x = mpg)) + geom_dotplot(binwidth = 1.5) +
  ggtitle("binwidth = 1.5")
# Use fixed-width bins
a3 <- ggplot(mtcars, aes(x = mpg)) +
  geom_dotplot(method="histodot", binwidth = 1.5) + 
  ggtitle("histodot,binwidth = 1.5")
# Some other stacking methods
a4 <- ggplot(mtcars, aes(x = mpg)) +
  geom_dotplot(binwidth = 1.5, stackdir = "center") +
  ggtitle("binwidth = 1.5,center")
a5 <- ggplot(mtcars, aes(x = mpg)) +
  geom_dotplot(binwidth = 1.5, stackdir = "centerwhole") +
  ggtitle("binwidth = 1.5,centerwhole")
grid.arrange(a1,a2,a3,a4,a5,ncol=2)
stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.

plot of chunk unnamed-chunk-1



# y axis isn't really meaningful, so hide it
a1 <- ggplot(mtcars, aes(x = mpg)) + geom_dotplot(binwidth = 1.5) +
  scale_y_continuous(name = "", breaks = NA)

#Overlap dots vertically
a2 <- ggplot(mtcars, aes(x = mpg)) + geom_dotplot(binwidth = 1.5, 
                                                  stackratio = .7)

# Expand dot diameter
a3 <- ggplot(mtcars, aes(x  =mpg)) + geom_dotplot(binwidth = 1.5, 
                                                  dotsize = 1.25)
grid.arrange(a1,a2,a3,ncol=1)
Warning: breaks = NA is deprecated. Please use breaks = NULL to remove
breaks in the scale. (Deprecated; last used in version 0.8.9)

plot of chunk unnamed-chunk-1


# Examples with stacking along y axis instead of x
a1 <- ggplot(mtcars, aes(x = 1, y = mpg)) +
  geom_dotplot(binaxis = "y", stackdir = "center")

a2 <- ggplot(mtcars, aes(x = factor(cyl), y = mpg)) +
  geom_dotplot(binaxis = "y", stackdir = "center")
a3 <- ggplot(mtcars, aes(x = factor(cyl), y = mpg)) +
  geom_dotplot(binaxis = "y", stackdir = "centerwhole")
a4 <- ggplot(mtcars, aes(x = factor(vs), fill = factor(cyl), y = mpg)) +
  geom_dotplot(binaxis = "y", stackdir = "center", position = "dodge")
# binpositions="all" ensures that the bins are aligned between groups
a5 <- ggplot(mtcars, aes(x = factor(am), y = mpg)) +
grid.arrange(a1,a2,a3,ncol=1)
stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this. stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to
adjust this. stat_bindot: binwidth defaulted to range/30. Use 'binwidth =
x' to adjust this.

plot of chunk unnamed-chunk-1

grid.arrange(a4,a5,ncol=1)
stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Error: No layers in plot

# Stacking multiple groups, with different fill
a1 <-   ggplot(mtcars, aes(x = mpg, fill = factor(cyl))) +
  geom_dotplot(stackgroups = TRUE, binwidth = 1, binpositions = "all")


a2 <- ggplot(mtcars, aes(x = mpg, fill = factor(cyl))) +
  geom_dotplot(stackgroups = TRUE, binwidth = 1,
               method = "histodot")
a3 <- ggplot(mtcars, aes(x = mpg, fill = factor(cyl))) +
  m_dotplot(binaxis = "y", stackdir = "center", binpositions="all")
Error: could not find function "m_dotplot"

a4 <- ggplot(mtcars, aes(x = 1, y = mpg, fill = factor(cyl))) +
  geom_dotplot(binaxis = "y", stackgroups = TRUE, binwidth = 1, 
               method = "histodot")
grid.arrange(a1,a2,a3,a4,ncol=1)
stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.

plot of chunk unnamed-chunk-1


# Violin plot ###
p <- ggplot(mtcars, aes(factor(cyl), mpg))

a1 <- p + geom_violin() + ggtitle("violin")
a2 <- qplot(factor(cyl), mpg, data = mtcars, geom = "violin")
a3 <- p + geom_violin() + geom_jitter(height = 0) +
  ggtitle("violin,height = 0")
a4 <- p + geom_violin() + coord_flip() + ggtitle("violin,coord_flip()")
a5 <- qplot(factor(cyl), mpg, data = mtcars, geom = "violin") +
  coord_flip()
# Scale maximum width proportional to sample size:
a6 <- p + geom_violin(scale = "count") + ggtitle("violin,count")
grid.arrange(a1,a2,a3,ncol=1)

plot of chunk unnamed-chunk-1

grid.arrange(a4,a5,a6,ncol=1)

plot of chunk unnamed-chunk-1


# Scale maximum width to 1 for all violins:
a1 <- p + geom_violin(scale = "width")
# Default is to trim violins to the range of the data. To disable:
a2 <- p + geom_violin(trim = FALSE)
# Use a smaller bandwidth for closer density fit (default is 1).
a3 <- p + geom_violin(adjust = .5)
grid.arrange(a1,a2,a3,ncol=2)

plot of chunk unnamed-chunk-1


# Add aesthetic mappings
# Note that violins are automatically dodged when any aesthetic is
# a factor
a1 <- p + geom_violin(aes(fill = cyl))
a2 <- p + geom_violin(aes(fill = factor(cyl)))
a3 <- p + geom_violin(aes(fill = factor(vs)))
a4 <- p + geom_violin(aes(fill = factor(am)))
# Set aesthetics to fixed value
a5 <- p + geom_violin(fill = "grey80", colour = "#3366FF")
a6 <- qplot(factor(cyl), mpg, data = mtcars, geom = "violin",
            colour = I("#3366FF"))
grid.arrange(a1,a2,a3,a4,a5,a6,ncol=2)

plot of chunk unnamed-chunk-1


# Scales vs. coordinate transforms -------
# Scale transformations occur before the density statistics are computed.
# Coordinate transformations occur afterwards.  Observe the effect on the
# number of outliers.
library(plyr) # to access round_any
m <- ggplot(movies, aes(y = votes, x = rating,
                        group = round_any(rating, 0.5)))
a1 <- m + geom_violin()
a2 <- m + geom_violin() + scale_y_log10()
a3 <- m + geom_violin() + coord_trans(y = "log10")
a4 <- m + geom_violin() + scale_y_log10() + coord_trans(y = "log10")
# Violin plots with continuous x:
# Use the group aesthetic to group observations in violins
a5 <- qplot(year, budget, data = movies, geom = "violin")
a6 <- qplot(year, budget, data = movies, geom = "violin",
            group = round_any(year, 10, floor))
grid.arrange(a1,a2,a3,a4,a5,a6,ncol=2)
Warning: position_dodge requires constant width: output may be incorrect
Warning: position_dodge requires constant width: output may be incorrect
Warning: position_dodge requires constant width: output may be incorrect
Warning: position_dodge requires constant width: output may be incorrect
Warning: Removed 53573 rows containing non-finite values (stat_ydensity).
Warning: Removed 53573 rows containing non-finite values (stat_ydensity).
Warning: position_dodge requires constant width: output may be incorrect

plot of chunk unnamed-chunk-1


NANANA
[1] NA
#Generate data
c <- ggplot(mtcars, aes(factor(cyl)))

# By default, uses stat="bin", which gives the count in each category
a1 <- c + geom_bar() + ggtitle("bar")
a2 <- c + geom_bar(width=.5) + ggtitle("bar,width=.5")
a3 <- c + geom_bar() + coord_flip() + ggtitle("bar,coord_flip()")
a4 <- c + geom_bar(fill="white", colour="darkgreen") +
  ggtitle("bar,fill=white")
grid.arrange(a1,a2,a3,a4,ncol=2)

plot of chunk unnamed-chunk-1


# Use qplot
a1 <- qplot(factor(cyl), data=mtcars, geom="bar")
a2 <- qplot(factor(cyl), data=mtcars, geom="bar", fill=factor(cyl))
grid.arrange(a1,a2,ncol=1)

plot of chunk unnamed-chunk-1


# When the data contains y values in a column, use stat="identity"
library(plyr)
# Calculate the mean mpg for each level of cyl
mm <- ddply(mtcars, "cyl", summarise, mmpg = mean(mpg))
a1 <- ggplot(mm, aes(x = factor(cyl), y = mmpg)) + 
  geom_bar(stat = "identity")
# Stacked bar charts
a2 <- qplot(factor(cyl), data=mtcars, geom="bar", fill=factor(vs))
a3 <- qplot(factor(cyl), data=mtcars, geom="bar", fill=factor(gear))
grid.arrange(a1,a2,a3,ncol=1)

plot of chunk unnamed-chunk-1


# Stacked bar charts are easy in ggplot2, but not effective visually,
# particularly when there are many different things being stacked
a1 <- ggplot(diamonds, aes(clarity, fill=cut)) + geom_bar()
a2 <- ggplot(diamonds, aes(color, fill=cut)) + geom_bar() + 
  coord_flip()

# Faceting is a good alternative:
a3 <- ggplot(diamonds, aes(clarity)) + geom_bar() +
  facet_wrap(~ cut)

# If the x axis is ordered, using a line instead of bars is another

# possibility:
a4 <- ggplot(diamonds, aes(clarity)) +
  geom_freqpoly(aes(group = cut, colour = cut))
# Dodged bar charts
a5 <- ggplot(diamonds, aes(clarity, fill=cut)) + 
  geom_bar(position="dodge")
# compare with
a6 <- ggplot(diamonds, aes(cut, fill=cut)) + geom_bar() +
  facet_grid(. ~ clarity)
grid.arrange(a1,a2,a3,a4,a5,a6,ncol=2)

plot of chunk unnamed-chunk-1


# But again, probably better to use frequency polygons instead:
a1 <- ggplot(diamonds, aes(clarity, colour=cut)) +
  geom_freqpoly(aes(group = cut))

# Often we don't want the height of the bar to represent the
# count of observations, but the sum of some other variable.
# For example, the following plot shows the number of diamonds
# of each colour
a2 <- qplot(color, data=diamonds, geom="bar")

# If, however, we want to see the total number of carats in each colour
# we need to weight by the carat variable
a3 <- qplot(color, data=diamonds, geom="bar", weight=carat, 
            ylab="carat")
grid.arrange(a1,a2,a3,ncol=1)

plot of chunk unnamed-chunk-1


# A bar chart used to display means
meanprice <- tapply(diamonds$price, diamonds$cut, mean)
cut <- factor(levels(diamonds$cut), levels = levels(diamonds$cut))
a1 <- qplot(cut, meanprice)
a2 <- qplot(cut, meanprice, geom="bar", stat="identity")
a3 <- qplot(cut, meanprice, geom="bar", stat="identity", 
            fill = I("grey50"))
grid.arrange(a1,a2,a3,ncol=1)

plot of chunk unnamed-chunk-1


# Another stacked bar chart example
k <- ggplot(mpg, aes(manufacturer, fill=class))
a1 <- k + geom_bar()

# Use scales to change aesthetics defaults
a2 <- k + geom_bar() + scale_fill_brewer()
a3 <- k + geom_bar() + scale_fill_grey()
grid.arrange(a1,a2,a3,ncol=1)

plot of chunk unnamed-chunk-1



# To change plot order of class varible
# use factor() to change order of levels
mpg$class <- factor(mpg$class, levels = c("midsize", "minivan",
                                          "suv", "compact", "2seater", "subcompact", "pickup"))
m <- ggplot(mpg, aes(manufacturer, fill=class))
m + geom_bar()

plot of chunk unnamed-chunk-1




## An interval represented by a vertical line.###
# Generate data: means and standard errors of means for prices
# for each type of cut
dmod <- lm(price ~ cut, data=diamonds)
cuts <- data.frame(cut=unique(diamonds$cut), 
                   predict(dmod, 
                           data.frame(cut = unique(diamonds$cut)), 
                           se=TRUE)[c("fit","se.fit")])
cuts
        cut  fit se.fit
1     Ideal 3458  27.00
2   Premium 4584  33.75
3      Good 3929  56.59
4 Very Good 3982  36.06
5      Fair 4359  98.79

a1 <- qplot(cut, fit, data=cuts)

# With a bar chart, we are comparing lengths, so the y-axis is
# automatically extended to include 0
a2 <- qplot(cut, fit, data=cuts, geom="bar")

# Display estimates and standard errors in various ways
se <- ggplot(cuts, aes(cut, fit,
                       ymin = fit - se.fit, ymax=fit + se.fit, colour = cut))
a3 <- se + geom_linerange()

a4 <- se + geom_pointrange()
a5 <- se + geom_errorbar(width = 0.5)
a5 <- se + geom_crossbar(width = 0.5)
# Use coord_flip to flip the x and y axes
a6 <- se + geom_linerange() + coord_flip()
grid.arrange(a1,a2,a3,a4,a5,a6,ncol=2)
Mapping a variable to y and also using stat="bin".  With stat="bin", it
will attempt to set the y value to the count of cases in each group.  This
can result in unexpected behavior and will not be allowed in a future
version of ggplot2.  If you want y to represent counts of cases, use
stat="bin" and don't map a variable to y.  If you want y to represent
values in the data, use stat="identity".  See ?geom_bar for examples.
(Deprecated; last used in version 0.9.2)

plot of chunk unnamed-chunk-1


##   histogram    ##
set.seed(5689)
movies <- movies[sample(nrow(movies), 1000), ]
# Simple examples
a1 <- qplot(rating, data=movies, geom="histogram") + 
  ggtitle("histogram")
a2 <- qplot(rating, data=movies, weight=votes, geom="histogram") +
  ggtitle("histogram,weight=votes")
a3 <- qplot(rating, data=movies, weight=votes, geom="histogram", 
            binwidth=1) + ggtitle("histogram,binwidth=1")
a4 <- qplot(rating, data=movies, weight=votes, geom="histogram", 
            binwidth=0.1) + ggtitle("histogram,binwidth=0.1")
grid.arrange(a1,a2,a3,a4,ncol=2)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-1


# More complex
m <- ggplot(movies, aes(x=rating))
a1 <- m + geom_histogram()
a2 <- m + geom_histogram(aes(y = ..density..)) + geom_density()####
a3 <- m + geom_histogram(binwidth = 1)
a4 <- m + geom_histogram(binwidth = 0.5)
a5 <- m + geom_histogram(binwidth = 0.1)
# Add aesthetic mappings
a6 <- m + geom_histogram(aes(weight = votes))
a7 <- m + geom_histogram(aes(y = ..count..))

a8 <- m + geom_histogram(aes(fill = ..count..))
# Change scales
a9 <- m + geom_histogram(aes(fill = ..count..)) +
  scale_fill_gradient("Count", low = "green", high = "red")
grid.arrange(a1,a2,a3,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-1

grid.arrange(a4,a5,a6,ncol=1)
Warning: position_stack requires constant width: output may be incorrect
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-1

grid.arrange(a7,a8,a9,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-1


# Often we don't want the height of the bar to represent the
# count of observations, but the sum of some other variable.
# For example, the following plot shows the number of movies
# in each rating.
a1 <- qplot(rating, data=movies, geom="bar", binwidth = 0.1)
# If, however, we want to see the number of votes cast in each
# category, we need to weight by the votes variable
a2 <- qplot(rating, data=movies, geom="bar", binwidth = 0.1,
            weight=votes, ylab = "votes")
grid.arrange(a1,a2,ncol=1)
Warning: position_stack requires constant width: output may be incorrect
Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-1



m <- ggplot(movies, aes(x = votes))
# For transformed scales, binwidth applies to the transformed data.
# The bins have constant width on the transformed scale.
a1 <- m + geom_histogram() + scale_x_log10()
a2 <- m + geom_histogram(binwidth = 1) + scale_x_log10()
a3 <- m + geom_histogram() + scale_x_sqrt()
a4 <- m + geom_histogram(binwidth = 10) + scale_x_sqrt()
grid.arrange(a1,a2,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.

plot of chunk unnamed-chunk-1

grid.arrange(a3,a4,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.

plot of chunk unnamed-chunk-1


# For transformed coordinate systems, the binwidth applies to the
# raw data.  The bins have constant width on the original scale.

# Using log scales does not work here, because the first
# bar is anchored at zero, and so when transformed becomes negative
# infinity.  This is not a problem when transforming the scales, because
# no observations have 0 ratings.
a1 <- m + geom_histogram(origin = 0) + coord_trans(x = "log10")
# Use origin = 0, to make sure we don't take sqrt of negative values
a2 <- m + geom_histogram(origin = 0) + coord_trans(x = "sqrt")
a3 <- m + geom_histogram(origin = 0, binwidth = 1000) + 
  coord_trans(x = "sqrt")
grid.arrange(a1,a2,a3,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this. stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to
adjust this.

plot of chunk unnamed-chunk-1


# You can also transform the y axis.  Remember that the base of the bars
# has value 0, so log transformations are not appropriate
m <- ggplot(movies, aes(x = rating))
a1 <- m + geom_histogram(binwidth = 0.5) + scale_y_sqrt()
a2 <- m + geom_histogram(binwidth = 0.5) + scale_y_reverse()
# Set aesthetics to fixed value
a3 <- m + geom_histogram(colour = "darkgreen", fill = "white", 
                         binwidth = 0.5)
grid.arrange(a1,a2,a3,ncol=1)
Warning: Stacking not well defined when ymin != 0

plot of chunk unnamed-chunk-1


#Use facets
a1 <- m <- m + geom_histogram(binwidth = 0.5)
a2 <- m + facet_grid(Action ~ Comedy)
grid.arrange(a1,a2,ncol=1)

plot of chunk unnamed-chunk-1


# Often more useful to use density on the y axis when facetting
a1 <- m <- m + aes(y = ..density..)
a2 <- m + facet_grid(Action ~ Comedy)
a3 <- m + facet_wrap(~ mpaa)
grid.arrange(a1,a2,a3,ncol=1)

plot of chunk unnamed-chunk-1


# Multiple histograms on the same graph
# see ?position, ?position_fill, etc for more details.
set.seed(6298)
diamonds_small <- diamonds[sample(nrow(diamonds), 1000), ]
ggplot(diamonds_small, aes(x=price)) + geom_bar()
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.

plot of chunk unnamed-chunk-1


hist_cut <- ggplot(diamonds_small, aes(x=price, fill=cut))
a1 <- hist_cut + geom_bar() # defaults to stacking
a2 <- hist_cut + geom_bar(position="fill")
a3 <- hist_cut + geom_bar(position="dodge")
# This is easy in ggplot2, but not visually effective.  It's better
# to use a frequency polygon or density plot.  Like this:
a4 <- ggplot(diamonds_small, aes(price, ..density.., colour = cut)) +
  geom_freqpoly(binwidth = 1000)
# Or this:
a5 <- ggplot(diamonds_small, aes(price, colour = cut)) +
  geom_density()
# Or if you want to be fancy, maybe even this:
a6 <- ggplot(diamonds_small, aes(price, fill = cut)) +
  geom_density(alpha = 0.2)
# Which looks better when the distributions are more distinct
a7 <- ggplot(diamonds_small, aes(depth, fill = cut)) +
  geom_density(alpha = 0.2) + xlim(55, 70)
grid.arrange(a1,a2,a3,a4,ncol=2)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this. stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to
adjust this. stat_bin: binwidth defaulted to range/30. Use 'binwidth = x'
to adjust this.

plot of chunk unnamed-chunk-1

grid.arrange(a5,a6,a7,ncol=1)
Warning: Removed 2 rows containing non-finite values (stat_density).

plot of chunk unnamed-chunk-1


NANANA
[1] NA
a1 <- qplot(carat, data = diamonds, geom = "freqpoly")
a2 <- qplot(carat, data = diamonds, geom = "freqpoly", 
            binwidth = 0.1)
a3 <- qplot(carat, data = diamonds, geom = "freqpoly", 
            binwidth = 0.01)
a4 <- qplot(price, data = diamonds, geom = "freqpoly", 
            binwidth = 1000)
a5 <- qplot(price, data = diamonds, geom = "freqpoly", 
            binwidth = 1000,colour = color)
a6 <- qplot(price, ..density.., data = diamonds, 
            geom = "freqpoly",
            binwidth = 1000, colour = color)
grid.arrange(a1,a2,a3,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.

plot of chunk unnamed-chunk-1

grid.arrange(a4,a5,a6,ncol=1)

plot of chunk unnamed-chunk-1


NANANA
[1] NA
m <- ggplot(movies, aes(x = rating))
a1 <- m + geom_density()
# Adjust parameters
a2 <- m + geom_density(kernel = "rectangular")
a3 <- m + geom_density(kernel = "biweight")
a4 <- m + geom_density(kernel = "epanechnikov")
a5 <- m + geom_density(adjust=1/5) # Very rough
a6 <- m + geom_density(adjust=5) # Very smooth
grid.arrange(a1,a2,a3,ncol=1)

plot of chunk unnamed-chunk-1

grid.arrange(a4,a5,a6,ncol=1)

plot of chunk unnamed-chunk-1


# Adjust aesthetics
a1 <- m + geom_density(aes(fill=factor(Drama)), size=2)
# Scale so peaks have same height:
a2 <- m + geom_density(aes(fill=factor(Drama), y = ..scaled..), size=2)
a3 <- m + geom_density(colour="darkgreen", size=2)
a4 <- m + geom_density(colour="darkgreen", size=2, fill=NA)
a5 <- m + geom_density(colour="darkgreen", size=2, fill="green")
grid.arrange(a1,a2,a3,ncol=1)

plot of chunk unnamed-chunk-1

grid.arrange(a4,a5,ncol=1)

plot of chunk unnamed-chunk-1


# Change scales
(m <- ggplot(movies, aes(x=votes)) + geom_density(trim = TRUE))

plot of chunk unnamed-chunk-1

m + scale_x_log10()

plot of chunk unnamed-chunk-1

m + coord_trans(x="log10")

plot of chunk unnamed-chunk-1

m + scale_x_log10() + coord_trans(x="log10")

plot of chunk unnamed-chunk-1


# Also useful with
m + stat_bin()
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.

plot of chunk unnamed-chunk-1

# Make a volcano plot
ggplot(diamonds, aes(x = price)) +
  stat_density(aes(ymax = ..density..,  ymin = -..density..),
               fill = "grey50", colour = "grey50",
               geom = "ribbon", position = "identity") +
  facet_grid(. ~ cut) + coord_flip()

plot of chunk unnamed-chunk-1

# Stacked density plots
# If you want to create a stacked density plot, you need to use
# the 'count' (density * n) variable instead of the default density

# Loses marginal densities
a1 <- qplot(rating, ..density.., data=movies, geom="density", 
            fill=mpaa, position="stack")
# Preserves marginal densities
a2 <- qplot(rating, ..count.., data=movies, geom="density", fill=mpaa, 
            position="stack")
# You can use position="fill" to produce a conditional density estimate
a3 <- qplot(rating, ..count.., data=movies, geom="density", fill=mpaa,
            position="fill")
grid.arrange(a1,a2,a3,ncol=1)

plot of chunk unnamed-chunk-1


# Need to be careful with weighted data
m <- ggplot(movies, aes(x=rating, weight=votes))
a1 <- m + geom_histogram(aes(y = ..count..)) + 
  geom_density(fill=NA)
m <- ggplot(movies, aes(x=rating, weight=votes/sum(votes)))
a3 <- m + geom_histogram(aes(y=..density..)) + 
  geom_density(fill=NA, colour="black")
grid.arrange(a1,a3,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: sum(weights) != 1 -- will not get true density Warning:
position_stack requires constant width: output may be incorrect
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-1


library(plyr) # to access round_any
movies$decade <- round_any(movies$year, 10)
m <- ggplot(movies, aes(x=rating, colour=decade, group=decade))
a1 <- m + geom_density(fill=NA)
a2 <- m + geom_density(fill=NA) + aes(y = ..count..)
#a2 <- m + geom_density(fill=NA) + aes(y = ..count..) + colour(group)
grid.arrange(a1,a2,ncol=2)

plot of chunk unnamed-chunk-1


# Use qplot instead
a1 <- qplot(length, data=movies, geom="density", weight=rating)
a2 <- qplot(length, data=movies, geom="density", 
            weight=rating/sum(rating))
grid.arrange(a1,a2,ncol=1)
Warning: sum(weights) != 1 -- will not get true density

plot of chunk unnamed-chunk-1



NANANA
[1] NA
a1 <- qplot(carat, data = diamonds, geom = "freqpoly")
a2 <- qplot(carat, data = diamonds, geom = "freqpoly",
            binwidth = 0.1)
a3 <- qplot(carat, data = diamonds, geom = "freqpoly", 
            binwidth = 0.01)
a4 <- qplot(price, data = diamonds, geom = "freqpoly", 
            binwidth = 1000)
a5 <- qplot(price, data = diamonds, geom = "freqpoly", 
            binwidth = 1000,
            colour = color)
a6 <- qplot(price, ..density.., data = diamonds, geom = "freqpoly",
            binwidth = 1000, colour = color)
grid.arrange(a1,a2,a3,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.

plot of chunk unnamed-chunk-1

grid.arrange(a4,a5,a6,ncol=1)

plot of chunk unnamed-chunk-1


NANANA
[1] NA
# Generate data
library(reshape2) # for melt
volcano3d <- melt(volcano)
names(volcano3d) <- c("x", "y", "z")

# Basic plot
v <- ggplot(volcano3d, aes(x, y, z = z))
a1 <- v + stat_contour()

# Setting bins creates evenly spaced contours in the range of the data
a2 <- v + stat_contour(bins = 2)

a3 <- v + stat_contour(bins = 10)

# Setting binwidth does the same thing, parameterised by the distance
# between contours
a5 <- v + stat_contour(binwidth = 2)

a6 <- v + stat_contour(binwidth = 5)
a7 <- v + stat_contour(binwidth = 10)

a8 >- v + stat_contour(binwidth = 2, size = 0.5, colour = "grey50") +
  stat_contour(binwidth = 10, size = 1)
Error: invalid argument to unary operator
grid.arrange(a1,a2,a3,a4,ncol=2)

plot of chunk unnamed-chunk-1

grid.arrange(a5,a6,a7,a8,ncol=2)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-1


# Add aesthetic mappings
a1 <- v + stat_contour(aes(size = ..level..))
a2 <- v + stat_contour(aes(colour = ..level..))
# Change scale
a3 <- v + stat_contour(aes(colour = ..level..), size = 2) +
  scale_colour_gradient(low = "brown", high = "white")
# Set aesthetics to fixed value
a4 <- v + stat_contour(colour = "red")
a5 <- v + stat_contour(size = 2, linetype = 4)

# Try different geoms
a6 <- v + stat_contour(geom="polygon", aes(fill=..level..))
a7 <- v + geom_tile(aes(fill = z)) + stat_contour()
grid.arrange(a1,a2,a3,a4,ncol=2)

plot of chunk unnamed-chunk-1

grid.arrange(a5,a6,a7,ncol=1)

plot of chunk unnamed-chunk-1


# Use qplot instead
a1 <- qplot(x, y, z = z, data = volcano3d, geom = "contour")
a2 <-qplot(x, y, z = z, data = volcano3d, stat = "contour", 
           geom = "path")
grid.arrange(a1,a2,ncol=1)

plot of chunk unnamed-chunk-1


## 2d density estimation ##
library("MASS")
data(geyser, "MASS")
Warning: data set 'MASS' not found
head(geyser,3)
  waiting duration
1      80    4.017
2      71    2.150
3      57    4.000
#Warning message:
NANANA
[1] NA

a1 <- m <- ggplot(geyser, aes(x = duration, y = waiting)) +
  geom_point() + xlim(0.5, 6) + ylim(40, 110)
a2 <- m + geom_density2d()

dens <- kde2d(geyser$duration, geyser$waiting, n = 50,
              lims = c(0.5, 6, 40, 110))
densdf <- data.frame(expand.grid(duration = dens$x, 
                                 waiting = dens$y),
                     z = as.vector(dens$z))
a3 <- m + geom_contour(aes(z=z), data=densdf)

a4 <- m + geom_density2d() + scale_y_log10()
Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

a5 <- m + geom_density2d() + coord_trans(y="log10")

a6 <- m + stat_density2d(aes(fill = ..level..), geom="polygon")
a7 <- qplot(duration, waiting, data=geyser, geom=c("point","density2d")) +
  xlim(0.5, 6) + ylim(40, 110)
grid.arrange(a1,a2,ncol=1)

plot of chunk unnamed-chunk-1

grid.arrange(a3,a4,a5,a6,a7,ncol=2)

plot of chunk unnamed-chunk-1


# If you map an aesthetic to a categorical variable, you will get a
# set of contours for each value of that variable
set.seed(4393)
dsmall <- diamonds[sample(nrow(diamonds), 1000), ]
a1 <- qplot(x, y, data = dsmall, geom = "density2d", colour = cut)
a2 <- qplot(x, y, data = dsmall, geom = "density2d", 
            linetype = cut)
grid.arrange(a1,a2,ncol=1)

plot of chunk unnamed-chunk-1



a3 <- qplot(carat, price, data = dsmall, geom = "density2d", 
            colour = cut)
d <- ggplot(dsmall, aes(carat, price)) + xlim(1,3)
a5 <- d + geom_point() + geom_density2d()
# If we turn contouring off, we can use use geoms like tiles:
a6 <- d + stat_density2d(geom="tile", aes(fill = ..density..), 
                         contour = FALSE)
a7 <- last_plot() + scale_fill_gradient(limits=c(1e-5,8e-4))
# Or points:
a8 <- d + stat_density2d(geom="point", aes(size = ..density..), 
                         contour = FALSE)
grid.arrange(a3,a5,a6,a7,a8,ncol=2)
Warning: Removed 684 rows containing non-finite values (stat_density2d).
Warning: Removed 684 rows containing missing values (geom_point). Warning:
Removed 684 rows containing non-finite values (stat_density2d). Warning:
Removed 684 rows containing non-finite values (stat_density2d). Warning:
Removed 684 rows containing non-finite values (stat_density2d).

plot of chunk unnamed-chunk-1



NANANA
[1] NA
# Generate data
pp <- function (n,r=4) {
  x <- seq(-r*pi, r*pi, len=n)
  df <- expand.grid(x=x, y=x)
  df$r <- sqrt(df$x^2 + df$y^2)
  df$z <- cos(df$r^2)*exp(-df$r/6)
  df
}
p <- ggplot(pp(20), aes(x=x,y=y))

a1 <- p + geom_tile() #pretty useless!

# Add aesthetic mappings
a2 <- p + geom_tile(aes(fill=z))

# Change scale
a3 <- p + geom_tile(aes(fill=z)) + 
  scale_fill_gradient(low="green", high="red")
# Use qplot instead
a4 <- qplot(x, y, data=pp(20), geom="tile", fill=z)
a5 <- qplot(x, y, data=pp(100), geom="tile", fill=z)
# Missing values
p <- ggplot(pp(20)[sample(20*20, size=200),], aes(x=x,y=y,fill=z))
a6 <- p + geom_tile()
grid.arrange(a1,a2,a3,ncol=1)

plot of chunk unnamed-chunk-1

grid.arrange(a4,a5,a6,ncol=1)

plot of chunk unnamed-chunk-1


# Input that works with image
image(t(volcano)[ncol(volcano):1,])

plot of chunk unnamed-chunk-1

library(reshape2) # for melt
a2 <- ggplot(melt(volcano), aes(x=Var1, y=Var2, fill=value)) + 
  geom_tile()
# inspired by the image-density plots of Ken Knoblauch
cars <- ggplot(mtcars, aes(y=factor(cyl), x=mpg))
a4 <- cars + geom_point()

a5 <- cars + stat_bin(aes(fill=..count..), geom="tile", 
                      binwidth=3, position="identity")
a6 <- cars + stat_bin(aes(fill=..density..), geom="tile", 
                      binwidth=3, position="identity")
a7 <- cars + stat_density(aes(fill=..density..), geom="tile", 
                          position="identity")
a8 <- cars + stat_density(aes(fill=..count..), geom="tile", 
                          position="identity")
grid.arrange(a2,a4,a5,a6,a7,a8,ncol=2)
Mapping a variable to y and also using stat="bin".  With stat="bin", it
will attempt to set the y value to the count of cases in each group.  This
can result in unexpected behavior and will not be allowed in a future
version of ggplot2.  If you want y to represent counts of cases, use
stat="bin" and don't map a variable to y.  If you want y to represent
values in the data, use stat="identity".  See ?geom_bar for examples.
(Deprecated; last used in version 0.9.2) Mapping a variable to y and also
using stat="bin".  With stat="bin", it will attempt to set the y value to
the count of cases in each group.  This can result in unexpected behavior
and will not be allowed in a future version of ggplot2.  If you want y to
represent counts of cases, use stat="bin" and don't map a variable to y.
If you want y to represent values in the data, use stat="identity".  See
?geom_bar for examples. (Deprecated; last used in version 0.9.2)

plot of chunk unnamed-chunk-1