ShimadaYoshio — Oct 23, 2013, 11:14 PM
require(ggplot2)
Loading required package: ggplot2
Warning: package 'ggplot2' was built under R version 3.0.2
library(gridExtra)
Loading required package: grid
df <- data.frame(
x = (x <- rnorm(100)),
y = (y <- rnorm(100, x)),
zx = (zx <- (rep(1:10, 10) - 5) / 2),
zy = (zy <- (rep(1:10, each=10) - 5) / 2),
z = (z <- zx*zy),
w = 1:100,
g = rep(1:2, 50),
h = c(rep(1, 50), rep(2, 50))
)
head(df,5)
x y zx zy z w g h
1 -0.303181 -0.37569 -2.0 -2 4 1 1 1
2 -0.524697 2.98596 -1.5 -2 3 2 2 1
3 2.038368 1.62565 -1.0 -2 2 3 1 1
4 -0.302334 -1.51847 -0.5 -2 1 4 2 1
5 0.007234 -0.06949 0.0 -2 0 5 1 1
sdf <- subset(df, w <= 10)
NANANA
[1] NA
NANANA
[1] NA
p1 <- ggplot(df, aes(x = x, y = y)) + ggtitle("line1") +
geom_line()
p2 <- ggplot(df, aes(x = w, y = y)) + geom_line() +
ggtitle("line2")
p <- ggplot(df, aes(x = x, y = y, colour = factor(g))) +
ggtitle("line3") + geom_line()
q <- ggplot(sdf, aes(x = factor(w), y = y)) + geom_bar() +
ggtitle("bar")
r <- ggplot(df, aes(x = x, y = ..density..)) +
geom_histogram() + ggtitle("histogram")
c <- ggplot(df, aes(x = zx, y = zy, z = z)) + geom_contour() +
ggtitle("contour")
grid.arrange(p1,p2,ncol=1)
grid.arrange(p,q,ncol=1)
Mapping a variable to y and also using stat="bin". With stat="bin", it
will attempt to set the y value to the count of cases in each group. This
can result in unexpected behavior and will not be allowed in a future
version of ggplot2. If you want y to represent counts of cases, use
stat="bin" and don't map a variable to y. If you want y to represent
values in the data, use stat="identity". See ?geom_bar for examples.
(Deprecated; last used in version 0.9.2)
Warning: Stacking not well defined when ymin != 0
grid.arrange(r,c,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
NANANA
[1] NA
data(mtcars)
head(mtcars)
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
summary(mtcars)
mpg cyl disp hp
Min. :10.4 Min. :4.00 Min. : 71.1 Min. : 52.0
1st Qu.:15.4 1st Qu.:4.00 1st Qu.:120.8 1st Qu.: 96.5
Median :19.2 Median :6.00 Median :196.3 Median :123.0
Mean :20.1 Mean :6.19 Mean :230.7 Mean :146.7
3rd Qu.:22.8 3rd Qu.:8.00 3rd Qu.:326.0 3rd Qu.:180.0
Max. :33.9 Max. :8.00 Max. :472.0 Max. :335.0
drat wt qsec vs
Min. :2.76 Min. :1.51 Min. :14.5 Min. :0.000
1st Qu.:3.08 1st Qu.:2.58 1st Qu.:16.9 1st Qu.:0.000
Median :3.69 Median :3.33 Median :17.7 Median :0.000
Mean :3.60 Mean :3.22 Mean :17.8 Mean :0.438
3rd Qu.:3.92 3rd Qu.:3.61 3rd Qu.:18.9 3rd Qu.:1.000
Max. :4.93 Max. :5.42 Max. :22.9 Max. :1.000
am gear carb
Min. :0.000 Min. :3.00 Min. :1.00
1st Qu.:0.000 1st Qu.:3.00 1st Qu.:2.00
Median :0.000 Median :4.00 Median :2.00
Mean :0.406 Mean :3.69 Mean :2.81
3rd Qu.:1.000 3rd Qu.:4.00 3rd Qu.:4.00
Max. :1.000 Max. :5.00 Max. :8.00
str(mtcars)
'data.frame': 32 obs. of 11 variables:
$ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
$ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
$ disp: num 160 160 108 258 360 ...
$ hp : num 110 110 93 110 175 105 245 62 95 123 ...
$ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
$ wt : num 2.62 2.88 2.32 3.21 3.44 ...
$ qsec: num 16.5 17 18.6 19.4 17 ...
$ vs : num 0 0 1 1 0 1 0 1 1 1 ...
$ am : num 1 1 1 0 0 0 0 0 0 0 ...
$ gear: num 4 4 4 3 3 3 3 4 4 4 ...
$ carb: num 4 4 1 1 2 1 4 2 2 4 ...
p <- ggplot(mtcars, aes(wt, mpg))
NANANA
[1] NA
# Add aesthetic mappings
a2 <- p + geom_point(aes(colour = qsec)) +
NANA
Error: Don't know how to add NA to a plot
a3 <- p + geom_point(aes(alpha = qsec)) +
NANA
Error: Don't know how to add NA to a plot
a4 <- p + geom_point(aes(colour = factor(cyl))) +
NANA
Error: Don't know how to add NA to a plot
a5 <- p + geom_point(aes(shape = factor(cyl))) +
NANA
Error: Don't know how to add NA to a plot
a6 <- p + geom_point(aes(size = qsec)) +
NANA
Error: Don't know how to add NA to a plot
grid.arrange(a1,a2,ncol=1)
Error: object 'a1' not found
grid.arrange(a3,a4,ncol=1)
Error: object 'a3' not found
grid.arrange(a5,a6,ncol=1)
Error: object 'a5' not found
# Change scales
a1 <- p + geom_point(aes(colour = cyl)) +
NANA
Error: Don't know how to add NA to a plot
a2 <- p + geom_point(aes(size = qsec)) + scale_area()
scale_area is deprecated. Use scale_size_area instead. Note that the
behavior of scale_size_area is slightly different: by default it makes the
area proportional to the numeric value. (Deprecated; last used in version
0.9.2)
a3 <- p + geom_point(aes(shape = factor(cyl))) +
NANA
Error: Don't know how to add NA to a plot
grid.arrange(a1,a2,a3,ncol=1)
Error: object 'a1' not found
# Set aesthetics to fixed value
a1 <- p + geom_point(colour = "red", size = 3)
a2 <- qplot(wt, mpg, data = mtcars, colour = I("red"),
size = I(3))
grid.arrange(a1,a2,ncol=1)
# Varying alpha is useful for large datasets
d <- ggplot(diamonds, aes(carat, price))
b1 <- d + geom_point(alpha = 1/10) + ggtitle("alpha = 1/10")
b2 <- d + geom_point(alpha = 1/20) + ggtitle("alpha = 1/20")
b3 <- d + geom_point(alpha = 1/100) + ggtitle("alpha = 1/100")
grid.arrange(b1,b2,b3,ncol=1)
# You can create interesting shapes by layering multiple points of
# different sizes
p <- ggplot(mtcars, aes(mpg, wt))
a1 <- p + geom_point(colour="grey50", size = 4) +
geom_point(aes(colour = cyl))
a2 <- p + aes(shape = factor(cyl)) +
geom_point(aes(colour = factor(cyl)), size = 4) +
geom_point(colour="grey90", size = 1.5)
a3 <- p + geom_point(colour="black", size = 4.5) +
geom_point(colour="pink", size = 4) +
geom_point(aes(shape = factor(cyl)))
# These extra layers don't usually appear in the legend, but we can
# force their inclusion
a4 <- p + geom_point(colour="black", size = 10,
show_guide = TRUE) +
geom_point(colour="pink", size = 5, show_guide = TRUE) +
geom_point(aes(shape = factor(cyl)))
grid.arrange(a1,a2,a3,a4,ncol=2)
# Transparent points:
a1 <- qplot(mpg, wt, data = mtcars, size = I(5),
alpha = I(0.2))
# geom_point warns when missing values have been dropped from the data set
# and not plotted, you can turn this off by setting na.rm = TRUE
mtcars2 <- transform(mtcars, mpg = ifelse(runif(32) < 0.2, NA, mpg))
a2 <- qplot(wt, mpg, data = mtcars2)
a3 <- qplot(wt, mpg, data = mtcars2, na.rm = TRUE)
# Use qplot instead
a4 <- qplot(wt, mpg, data = mtcars)
a5 <- qplot(wt, mpg, data = mtcars, colour = factor(cyl))
a6 <- qplot(wt, mpg, data = mtcars, colour = I("red"))
grid.arrange(a1,a2,ncol=1)
Warning: Removed 3 rows containing missing values (geom_point).
grid.arrange(a3,a4,ncol=1)
grid.arrange(a5,a6,ncol=1)
##^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
data(mpg)
head(mpg)
manufacturer model displ year cyl trans drv cty hwy fl class
1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
3 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
4 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact
6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compact
summary(mpg)
manufacturer model displ year
dodge :37 caravan 2wd : 11 Min. :1.60 Min. :1999
toyota :34 ram 1500 pickup 4wd: 10 1st Qu.:2.40 1st Qu.:1999
volkswagen:27 civic : 9 Median :3.30 Median :2004
ford :25 dakota pickup 4wd : 9 Mean :3.47 Mean :2004
chevrolet :19 jetta : 9 3rd Qu.:4.60 3rd Qu.:2008
audi :18 mustang : 9 Max. :7.00 Max. :2008
(Other) :74 (Other) :177
cyl trans drv cty hwy
Min. :4.00 auto(l4) :83 4:103 Min. : 9.0 Min. :12.0
1st Qu.:4.00 manual(m5):58 f:106 1st Qu.:14.0 1st Qu.:18.0
Median :6.00 auto(l5) :39 r: 25 Median :17.0 Median :24.0
Mean :5.89 manual(m6):19 Mean :16.9 Mean :23.4
3rd Qu.:8.00 auto(s6) :16 3rd Qu.:19.0 3rd Qu.:27.0
Max. :8.00 auto(l6) : 6 Max. :35.0 Max. :44.0
(Other) :13
fl class
c: 1 2seater : 5
d: 5 compact :47
e: 8 midsize :41
p: 52 minivan :11
r:168 pickup :33
subcompact:35
suv :62
str(mpg)
'data.frame': 234 obs. of 11 variables:
$ manufacturer: Factor w/ 15 levels "audi","chevrolet",..: 1 1 1 1 1 1 1 1 1 1 ...
$ model : Factor w/ 38 levels "4runner 4wd",..: 2 2 2 2 2 2 2 3 3 3 ...
$ displ : num 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
$ year : int 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
$ cyl : int 4 4 4 4 6 6 6 4 4 4 ...
$ trans : Factor w/ 10 levels "auto(av)","auto(l3)",..: 4 9 10 1 4 9 1 9 4 10 ...
$ drv : Factor w/ 3 levels "4","f","r": 2 2 2 2 2 2 2 1 1 1 ...
$ cty : int 18 21 20 21 16 18 18 18 16 20 ...
$ hwy : int 29 29 31 30 26 26 27 26 25 28 ...
$ fl : Factor w/ 5 levels "c","d","e","p",..: 4 4 4 4 4 4 4 4 4 4 ...
$ class : Factor w/ 7 levels "2seater","compact",..: 2 2 2 2 2 2 2 2 2 2 ...
p <- ggplot(mpg, aes(displ, hwy))
a1 <- p + geom_point()
a2 <- p + geom_point(position = "jitter")
# Add aesthetic mappings
a3 <- p + geom_jitter(aes(colour = cyl))
# Vary parameters
a4 <- p + geom_jitter(position = position_jitter(width = .5)) +
ggtitle("width = .5")
a5 <- p + geom_jitter(position = position_jitter(height = 1)) +
ggtitle("height = 1")
grid.arrange(a1,a2,ncol=1)
grid.arrange(a3,a4,a5,ncol=1)
# Use qplot instead
a1 <- qplot(displ, hwy, data = mpg, geom = "jitter")
a2 <- qplot(class, hwy, data = mpg, geom = c("boxplot", "jitter"))
a3 <- qplot(class, hwy, data = mpg, geom = c("jitter", "boxplot"))
grid.arrange(a1,a2,a3,ncol=1)
NANANA
[1] NA
data(movies)
head(movies)
title year length budget rating votes r1 r2 r3
1 $ 1971 121 NA 6.4 348 4.5 4.5 4.5
2 $1000 a Touchdown 1939 71 NA 6.0 20 0.0 14.5 4.5
3 $21 a Day Once a Month 1941 7 NA 8.2 5 0.0 0.0 0.0
4 $40,000 1996 70 NA 8.2 6 14.5 0.0 0.0
5 $50,000 Climax Show, The 1975 71 NA 3.4 17 24.5 4.5 0.0
6 $pent 2000 91 NA 4.3 45 4.5 4.5 4.5
r4 r5 r6 r7 r8 r9 r10 mpaa Action Animation Comedy Drama
1 4.5 14.5 24.5 24.5 14.5 4.5 4.5 0 0 1 1
2 24.5 14.5 14.5 14.5 4.5 4.5 14.5 0 0 1 0
3 0.0 0.0 24.5 0.0 44.5 24.5 24.5 0 1 0 0
4 0.0 0.0 0.0 0.0 0.0 34.5 45.5 0 0 1 0
5 14.5 14.5 4.5 0.0 0.0 0.0 24.5 0 0 0 0
6 14.5 14.5 14.5 4.5 4.5 14.5 14.5 0 0 0 1
Documentary Romance Short
1 0 0 0
2 0 0 0
3 0 0 1
4 0 0 0
5 0 0 0
6 0 0 0
summary(movies)
title year length budget
Length:58788 Min. :1893 Min. : 1 Min. :0.00e+00
Class :character 1st Qu.:1958 1st Qu.: 74 1st Qu.:2.50e+05
Mode :character Median :1983 Median : 90 Median :3.00e+06
Mean :1976 Mean : 82 Mean :1.34e+07
3rd Qu.:1997 3rd Qu.: 100 3rd Qu.:1.50e+07
Max. :2005 Max. :5220 Max. :2.00e+08
NA's :53573
rating votes r1 r2
Min. : 1.00 Min. : 5 Min. : 0.00 Min. : 0.00
1st Qu.: 5.00 1st Qu.: 11 1st Qu.: 0.00 1st Qu.: 0.00
Median : 6.10 Median : 30 Median : 4.50 Median : 4.50
Mean : 5.93 Mean : 632 Mean : 7.01 Mean : 4.02
3rd Qu.: 7.00 3rd Qu.: 112 3rd Qu.: 4.50 3rd Qu.: 4.50
Max. :10.00 Max. :157608 Max. :100.00 Max. :84.50
r3 r4 r5 r6
Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 0.0
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 4.5 1st Qu.: 4.5
Median : 4.50 Median : 4.50 Median : 4.5 Median :14.5
Mean : 4.72 Mean : 6.37 Mean : 9.8 Mean :13.0
3rd Qu.: 4.50 3rd Qu.: 4.50 3rd Qu.: 14.5 3rd Qu.:14.5
Max. :84.50 Max. :100.00 Max. :100.0 Max. :84.5
r7 r8 r9 r10
Min. : 0.0 Min. : 0.0 Min. : 0.00 Min. : 0.0
1st Qu.: 4.5 1st Qu.: 4.5 1st Qu.: 4.50 1st Qu.: 4.5
Median : 14.5 Median : 14.5 Median : 4.50 Median : 14.5
Mean : 15.6 Mean : 13.9 Mean : 8.95 Mean : 16.9
3rd Qu.: 24.5 3rd Qu.: 24.5 3rd Qu.: 14.50 3rd Qu.: 24.5
Max. :100.0 Max. :100.0 Max. :100.00 Max. :100.0
mpaa Action Animation Comedy
:53864 Min. :0.0000 Min. :0.0000 Min. :0.000
NC-17: 16 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
PG : 528 Median :0.0000 Median :0.0000 Median :0.000
PG-13: 1003 Mean :0.0797 Mean :0.0628 Mean :0.294
R : 3377 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.000
Max. :1.0000 Max. :1.0000 Max. :1.000
Drama Documentary Romance Short
Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.000
1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
Median :0.000 Median :0.0000 Median :0.0000 Median :0.000
Mean :0.371 Mean :0.0591 Mean :0.0807 Mean :0.161
3rd Qu.:1.000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000
Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.000
str(movies)
'data.frame': 58788 obs. of 24 variables:
$ title : chr "$" "$1000 a Touchdown" "$21 a Day Once a Month" "$40,000" ...
$ year : int 1971 1939 1941 1996 1975 2000 2002 2002 1987 1917 ...
$ length : int 121 71 7 70 71 91 93 25 97 61 ...
$ budget : int NA NA NA NA NA NA NA NA NA NA ...
$ rating : num 6.4 6 8.2 8.2 3.4 4.3 5.3 6.7 6.6 6 ...
$ votes : int 348 20 5 6 17 45 200 24 18 51 ...
$ r1 : num 4.5 0 0 14.5 24.5 4.5 4.5 4.5 4.5 4.5 ...
$ r2 : num 4.5 14.5 0 0 4.5 4.5 0 4.5 4.5 0 ...
$ r3 : num 4.5 4.5 0 0 0 4.5 4.5 4.5 4.5 4.5 ...
$ r4 : num 4.5 24.5 0 0 14.5 14.5 4.5 4.5 0 4.5 ...
$ r5 : num 14.5 14.5 0 0 14.5 14.5 24.5 4.5 0 4.5 ...
$ r6 : num 24.5 14.5 24.5 0 4.5 14.5 24.5 14.5 0 44.5 ...
$ r7 : num 24.5 14.5 0 0 0 4.5 14.5 14.5 34.5 14.5 ...
$ r8 : num 14.5 4.5 44.5 0 0 4.5 4.5 14.5 14.5 4.5 ...
$ r9 : num 4.5 4.5 24.5 34.5 0 14.5 4.5 4.5 4.5 4.5 ...
$ r10 : num 4.5 14.5 24.5 45.5 24.5 14.5 14.5 14.5 24.5 4.5 ...
$ mpaa : Factor w/ 5 levels "","NC-17","PG",..: 1 1 1 1 1 1 5 1 1 1 ...
$ Action : int 0 0 0 0 0 0 1 0 0 0 ...
$ Animation : int 0 0 1 0 0 0 0 0 0 0 ...
$ Comedy : int 1 1 0 1 0 0 0 0 0 0 ...
$ Drama : int 1 0 0 0 0 1 1 0 1 0 ...
$ Documentary: int 0 0 0 0 0 0 0 1 0 0 ...
$ Romance : int 0 0 0 0 0 0 0 0 0 0 ...
$ Short : int 0 0 1 0 0 0 0 1 0 0 ...
# Summarise number of movie ratings by year of movie
mry <- do.call(rbind, by(movies, round(movies$rating), function(df) {
nums <- tapply(df$length, df$year, length)
data.frame(rating=round(df$rating[1]), year = as.numeric(names(nums)),
number=as.vector(nums))
}))
p <- ggplot(mry, aes(x=year, y=number, group=rating))
a1 <- p + geom_line()
# Add aesthetic mappings
a2 <- p + geom_line(aes(size = rating))
a3 <- p + geom_line(aes(colour = rating))
# Change scale
a4 <- p + geom_line(aes(colour = rating)) + scale_colour_gradient(low="red")
grid.arrange(a1,a2,a3,a4,ncol=2)
#^^^^^^
a1 <- p + geom_line(aes(size = rating)) +
scale_size(range = c(0.1, 3))
# Set aesthetics to fixed value
a2 <- p + geom_line(colour = "red", size = 1)
# Use qplot instead
a3 <- qplot(year, number, data=mry, group=rating, geom="line")
grid.arrange(a1,a2,a3,ncol=1)
# Using a time series
data(economics)
head(economics,3)
date pce pop psavert uempmed unemploy
1 1967-06-30 507.8 198712 9.8 4.5 2944
2 1967-07-31 510.9 198911 9.8 4.7 2945
3 1967-08-31 516.7 199113 9.0 4.6 2958
summary(economics)
date pce pop psavert
Min. :1967-06-30 Min. : 508 Min. :198712 Min. :-3.00
1st Qu.:1977-06-07 1st Qu.:1272 1st Qu.:220094 1st Qu.: 4.00
Median :1987-05-15 Median :3082 Median :242516 Median : 7.60
Mean :1987-05-16 Mean :3654 Mean :246349 Mean : 6.72
3rd Qu.:1997-04-22 3rd Qu.:5474 3rd Qu.:272277 3rd Qu.: 9.50
Max. :2007-03-31 Max. :9705 Max. :301913 Max. :14.60
uempmed unemploy
Min. : 4.00 Min. : 2685
1st Qu.: 5.80 1st Qu.: 6052
Median : 6.90 Median : 7188
Mean : 7.12 Mean : 6997
3rd Qu.: 8.38 3rd Qu.: 8250
Max. :12.30 Max. :12051
str(economics)
'data.frame': 478 obs. of 6 variables:
$ date : Date, format: "1967-06-30" "1967-07-31" ...
$ pce : num 508 511 517 513 518 ...
$ pop : int 198712 198911 199113 199311 199498 199657 199808 199920 200056 200208 ...
$ psavert : num 9.8 9.8 9 9.8 9.7 9.4 9 9.5 8.9 9.6 ...
$ uempmed : num 4.5 4.7 4.6 4.9 4.7 4.8 5.1 4.5 4.1 4.6 ...
$ unemploy: int 2944 2945 2958 3143 3066 3018 2878 3001 2877 2709 ...
a1 <- qplot(date, pop, data=economics, geom="line")
a2 <- qplot(date, pop, data=economics, geom="line", log="y")
a3 <- qplot(date, pop, data=subset(economics,
date > as.Date("2006-1-1")),
geom="line")
a4 <- qplot(date, pop, data=economics, size=unemploy/pop,
geom="line")
# Use the arrow parameter to add an arrow to the line
# See ?grid::arrow for more details
c <- ggplot(economics, aes(x = date, y = pop))
# Arrow defaults to "last"
library(grid)
a5 <- c + geom_line(arrow = arrow())
a6 <- c + geom_line(arrow = arrow(angle = 15, ends = "both",
type = "closed"))
grid.arrange(a1,a2,ncol=1)
grid.arrange(a3,a4,ncol=1)
grid.arrange(a5,a6,ncol=1)
# See scale_date for examples of plotting multiple times series on
# a single graph
# A simple pcp example
y2005 <- runif(300, 20, 120)
y2010 <- y2005 * runif(300, -1.05, 1.5)
group <- rep(LETTERS[1:3], each = 100)
df <- data.frame(id = seq_along(group), group, y2005, y2010)
library(reshape2) # for melt
dfm <- melt(df, id.var = c("id", "group"))
head(dfm,5)
id group variable value
1 1 A y2005 92.50
2 2 A y2005 75.95
3 3 A y2005 47.39
4 4 A y2005 53.69
5 5 A y2005 24.68
a1 <- ggplot(dfm, aes(variable, value, group = id,
colour = group)) +
geom_path(alpha = 0.5)
dfm1 <- subset(dfm,group=="A")
a2 <- ggplot(dfm1, aes(variable, value, group = id, colour = group)) +
geom_path(alpha = 0.5,colour=2)
dfm1 <- subset(dfm,group=="B")
a3 <- ggplot(dfm1, aes(variable, value, group = id, colour = group)) +
geom_path(alpha = 0.5,colour=3)
dfm1 <- subset(dfm,group=="C")
a4 <- ggplot(dfm1, aes(variable, value, group = id, colour = group)) +
geom_path(alpha = 0.5,colour=4)
grid.arrange(a1,a2,ncol=1)
grid.arrange(a3,a4,ncol=1)
NANANA
[1] NA
# Simple quantiles/ECDF from examples(plot)
x <- sort(rnorm(47))
a1 <- qplot(seq_along(x), x, geom="step") + ggtitle("step")
# Steps go horizontally, then vertically (default)
a2 <- qplot(seq_along(x), x, geom="step", direction = "hv")
a3 <- plot(x, type = "s")
# Steps go vertically, then horizontally
a4 <- qplot(seq_along(x), x, geom="step", direction = "vh")
a5 <- plot(x, type = "S")
grid.arrange(a1,a2,a4,ncol=1)
par(mfrow=c(1,1))
plot(x, type = "s")
par(mfrow=c(1,1))
# Also works with other aesthetics
df <- data.frame(
x = sort(rnorm(50)),
trt = sample(c("a", "b"), 50, rep = TRUE)
)
qplot(seq_along(x), x, data = df, geom="step", colour = trt)
NANANA
[1] NA
# See stat_smooth for examples of using built in model fitting
# if you need some more flexible, this example shows you how to
# plot the fits from any model of your choosing
a1 <- qplot(wt, mpg, data=mtcars, colour=factor(cyl))
model <- lm(mpg ~ wt + factor(cyl), data=mtcars)
summary(model)
Call:
lm(formula = mpg ~ wt + factor(cyl), data = mtcars)
Residuals:
Min 1Q Median 3Q Max
-4.589 -1.236 -0.516 1.384 5.792
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 33.991 1.888 18.01 < 2e-16 ***
wt -3.206 0.754 -4.25 0.00021 ***
factor(cyl)6 -4.256 1.386 -3.07 0.00472 **
factor(cyl)8 -6.071 1.652 -3.67 0.00100 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.56 on 28 degrees of freedom
Multiple R-squared: 0.837, Adjusted R-squared: 0.82
F-statistic: 48.1 on 3 and 28 DF, p-value: 3.59e-11
grid <- with(mtcars, expand.grid(
wt = seq(min(wt), max(wt), length = 20),
cyl = levels(factor(cyl))
))
grid$mpg <- stats::predict(model, newdata=grid)#èŽ åŸŸï½¸ï½¬è›Ÿï½¤
a2 <- qplot(wt, mpg, data=mtcars, colour=factor(cyl)) +
NANA
Error: Don't know how to add NA to a plot
# or with standard errors
err <- stats::predict(model, newdata=grid, se = TRUE)
err
$fit
1 2 3 4 5 6 7 8 9 10 11 12
29.14 28.48 27.82 27.16 26.50 25.84 25.18 24.52 23.86 23.20 22.54 21.88
13 14 15 16 17 18 19 20 21 22 23 24
21.22 20.56 19.90 19.24 18.58 17.92 17.26 16.60 24.89 24.23 23.57 22.91
25 26 27 28 29 30 31 32 33 34 35 36
22.25 21.59 20.93 20.27 19.61 18.95 18.29 17.63 16.97 16.31 15.65 14.99
37 38 39 40 41 42 43 44 45 46 47 48
14.33 13.67 13.01 12.35 23.07 22.41 21.75 21.09 20.43 19.77 19.11 18.45
49 50 51 52 53 54 55 56 57 58 59 60
17.79 17.13 16.47 15.81 15.15 14.49 13.83 13.17 12.51 11.85 11.19 10.53
$se.fit
1 2 3 4 5 6 7 8 9 10
0.9663 0.8815 0.8176 0.7798 0.7719 0.7948 0.8461 0.9209 1.0142 1.1212
11 12 13 14 15 16 17 18 19 20
1.2385 1.3634 1.4939 1.6288 1.7671 1.9079 2.0508 2.1954 2.3413 2.4884
21 22 23 24 25 26 27 28 29 30
1.5481 1.4301 1.3199 1.2195 1.1316 1.0592 1.0057 0.9742 0.9670 0.9844
31 32 33 34 35 36 37 38 39 40
1.0253 1.0870 1.1663 1.2597 1.3645 1.4781 1.5987 1.7249 1.8555 1.9896
41 42 43 44 45 46 47 48 49 50
1.9950 1.8500 1.7068 1.5658 1.4278 1.2937 1.1648 1.0431 0.9314 0.8338
51 52 53 54 55 56 57 58 59 60
0.7556 0.7036 0.6835 0.6982 0.7456 0.8201 0.9151 1.0249 1.1452 1.2731
$df
[1] 28
$residual.scale
[1] 2.557
grid$ucl <- err$fit + 1.96 * err$se.fit
grid$lcl <- err$fit - 1.96 * err$se.fit
a3 <- qplot(wt, mpg, data=mtcars, colour=factor(cyl)) +
geom_smooth(aes(ymin = lcl, ymax = ucl), data=grid, stat="identity")
grid.arrange(a1,a2,a3,ncol=1)
NANANA
[1] NA
d <- ggplot(diamonds, aes(x = x, y = y)) + xlim(4,10) +
ylim(4,10)
a1 <- d + geom_bin2d()
a2 <- d + geom_bin2d(binwidth = c(0.1, 0.1))
grid.arrange(a1,a2,ncol=1)
d <- ggplot(diamonds, aes(carat, price))
a1 <- d + stat_bin2d() + ggtitle("stat_bin2d()")#####
a2 <- d + geom_bin2d() + ggtitle("geom_bin2d()")
# You can control the size of the bins by specifying the number of
# bins in each direction:
a3 <- d + stat_bin2d(bins = 10) + ggtitle("stat_bin2d(bins = 10)")#
a4 <- d + stat_bin2d(bins = 30) + ggtitle("stat_bin2d(bins = 30)")#
# Or by specifying the width of the bins
a5 <- d + stat_bin2d(binwidth = c(1, 1000)) + ggtitle("stat_bin2d(binwidth = c(1, 1000)")#
a6 <- d + stat_bin2d(binwidth = c(.1, 500)) + ggtitle("stat_bin2d(binwidth = c(.1, 500)")#
# Or with a list of breaks
x <- seq(min(diamonds$carat), max(diamonds$carat), by = 0.1)
y <- seq(min(diamonds$price), max(diamonds$price), length = 50)
a7 <- d + stat_bin2d(breaks = list(x = x, y = y)) + ggtitle("breaks = list(x = x, y = y)")#
# With qplot
a8 <- qplot(x, y, data = diamonds, geom="bin2d",
xlim = c(4, 10), ylim = c(4, 10)) + ggtitle("bin2d")
a9 <- qplot(x, y, data = diamonds, geom="bin2d", binwidth = c(0.1, 0.1),
xlim = c(4, 10), ylim = c(4, 10)) + ggtitle("bin2d,binwidth")
grid.arrange(a1,a2,a3,ncol=1)
grid.arrange(a4,a5,a6,ncol=1)
grid.arrange(a7,a8,a9,ncol=1)
NANANA
[1] NA
d <- ggplot(diamonds, aes(carat, price))
a1 <- d + stat_binhex()
a2 <- d + geom_hex()
grid.arrange(a1,a2,ncol=1)
# You can control the size of the bins by specifying the number of
# bins in each direction:
a4 <- d + stat_binhex(bins = 10)
a5 <- d + stat_binhex(bins = 30)
grid.arrange(a4,a5,ncol=1)
# Or by specifying the width of the bins
a6 <- d + stat_binhex(binwidth = c(1, 1000))
a7 <- d + stat_binhex(binwidth = c(.1, 500))
grid.arrange(a6,a7,ncol=1)
# With qplot
a8 <- qplot(x, y, data = diamonds, geom="hex", xlim = c(4, 10),
ylim = c(4, 10))
a9 <- qplot(x, y, data = diamonds, geom="hex", xlim = c(4, 10),
ylim = c(4, 10),binwidth = c(0.1, 0.1))
grid.arrange(a8,a9,ncol=1)
Warning: Removed 478 rows containing missing values (stat_hexbin).
Warning: Removed 478 rows containing missing values (stat_hexbin).
#grid.arrange(a1,a2,a4,a5,a6,a7,a8,a9,ncol=3)
d <- ggplot(iris, aes(iris[,1], iris[,3],colour=iris[,5]))
d + stat_binhex()
msamp <- movies[sample(nrow(movies), 1000), ]
m <- ggplot(msamp, aes(year, rating)) + geom_point()
a1 <- m + stat_quantile()###
a2 <- m + stat_quantile(quantiles = 0.5)
q10 <- seq(0.05, 0.95, by=0.05)
a3 <- m + stat_quantile(quantiles = q10)
# You can also use rqss to fit smooth quantiles
a4 <- m + stat_quantile(method = "rqss")
# Note that rqss doesn't pick a smoothing constant automatically, so
# you'll need to tweak lambda yourself
a5 <- m + stat_quantile(method = "rqss", lambda = 10)
a6 <- m + stat_quantile(method = "rqss", lambda = 100)
# Use 'votes' as weights for the quantile calculation
a7 <- m + stat_quantile(aes(weight=votes))
# Change scale
a8 <- m + stat_quantile(aes(colour = ..quantile..), quantiles = q10)
a9 <- m + stat_quantile(aes(colour = ..quantile..), quantiles = q10) +
scale_colour_gradient2(midpoint = 0.5)
# Set aesthetics to fixed value
a10 <- m + stat_quantile(colour = "red", size = 2, linetype = 2)
# Use qplot instead
a11 <- qplot(year, rating, data=movies, geom="quantile")
grid.arrange(a1,a2,a3,a4,ncol=2)
Smoothing formula not specified. Using: y ~ x Smoothing formula not
specified. Using: y ~ x Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ qss(x, lambda = 1)
grid.arrange(a5,a6,a7,a8,ncol=2)
Smoothing formula not specified. Using: y ~ qss(x, lambda = 10) Smoothing
formula not specified. Using: y ~ qss(x, lambda = 100) Smoothing formula
not specified. Using: y ~ x Smoothing formula not specified. Using: y ~ x
grid.arrange(a9,a10,a11,ncol=2)
Smoothing formula not specified. Using: y ~ x Smoothing formula not
specified. Using: y ~ x Smoothing formula not specified. Using: y ~ x
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Generate data: means and standard errors of means for prices
# for each type of cut
dmod <- lm(price ~ cut, data=diamonds)
cuts <- data.frame(cut=unique(diamonds$cut),
predict(dmod, data.frame(cut = unique(diamonds$cut)),
se=TRUE)[c("fit","se.fit")])
head(cuts,5)
cut fit se.fit
1 Ideal 3458 27.00
2 Premium 4584 33.75
3 Good 3929 56.59
4 Very Good 3982 36.06
5 Fair 4359 98.79
a1 <- qplot(cut, fit, data=cuts)
# With a bar chart, we are comparing lengths, so the y-axis is
# automatically extended to include 0
a2 <- qplot(cut, fit, data=cuts, geom="bar")
# Display estimates and standard errors in various ways
se <- ggplot(cuts, aes(cut, fit,
ymin = fit - se.fit, ymax=fit + se.fit,
colour = cut))
a3 <- se + geom_linerange()####
a4 <- se + geom_pointrange()
a5 <- se + geom_errorbar(width = 0.5)
a6 <- se + geom_crossbar(width = 0.5)
# Use coord_flip to flip the x and y axes
a7 <- se + geom_linerange() + coord_flip()
grid.arrange(a1,a2,ncol=1)
Mapping a variable to y and also using stat="bin". With stat="bin", it
will attempt to set the y value to the count of cases in each group. This
can result in unexpected behavior and will not be allowed in a future
version of ggplot2. If you want y to represent counts of cases, use
stat="bin" and don't map a variable to y. If you want y to represent
values in the data, use stat="identity". See ?geom_bar for examples.
(Deprecated; last used in version 0.9.2)
grid.arrange(a3,a4,ncol=1)
grid.arrange(a5,a6,a7,ncol=1)
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# Generate data
huron <- data.frame(year = 1875:1972, level = as.vector(LakeHuron))
library(plyr) # to access round_any
huron$decade <- round_any(huron$year, 10, floor)
h <- ggplot(huron, aes(x=year))
a1 <- h + geom_ribbon(aes(ymin=0, ymax=level))
a2 <- h + geom_area(aes(y = level))
# Add aesthetic mappings
a3 <- h + geom_ribbon(aes(ymin=level-1, ymax=level+1))
a4 <- h + geom_ribbon(aes(ymin=level-1, ymax=level+1)) +
geom_line(aes(y=level))
# Take out some values in the middle for an example of NA handling
huron[huron$year > 1900 & huron$year < 1910, "level"] <- NA
h <- ggplot(huron, aes(x=year))
a5 <- h + geom_ribbon(aes(ymin=level-1, ymax=level+1)) +
geom_line(aes(y=level))
# Another data set, with multiple y's for each x
m <- ggplot(movies, aes(y=votes, x=year))
a7 <- m + geom_point()
# The default summary isn't that useful
a8 <- m + stat_summary(geom="ribbon", fun.ymin="min", fun.ymax="max")
a9 <- m + stat_summary(geom="ribbon", fun.data="median_hilow")
# Use qplot instead
a10 <- qplot(year, level, data=huron, geom=c("area", "line"))
grid.arrange(a1,a2,a3,a4,ncol=2)
grid.arrange(a5,a7,a8,ncol=1)
grid.arrange(a9,a10,ncol=1)
Warning: Removed 9 rows containing missing values (position_stack).
##^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
data(mtcars)
c <- ggplot(mtcars, aes(qsec, wt))
a1 <- c + stat_smooth()
a2 <- c + stat_smooth() + geom_point()
grid.arrange(a1,a2,nol=2)
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method. geom_smooth:
method="auto" and size of largest group is <1000, so using loess. Use
'method = x' to change the smoothing method.
Error: input must be grobs!
# Adjust parameters
a1 <- c + stat_smooth(se = FALSE) + geom_point() +
ggtitle("stat_smooth(se = FALSE)")
a1
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method.
a2 <- c + stat_smooth(span = 0.9) + geom_point() + geom_point() +
ggtitle("stat_smooth(span = 0.9)")
a2
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method.
a3 <- c + stat_smooth(level = 0.99) + geom_point()+ geom_point() +
ggtitle("stat_smooth(level = 0.99) ")
a3
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method.
a4 <- c + stat_smooth(method = "lm") + geom_point()+ geom_point() +
ggtitle("stat_smooth(method = lm)")
a4
grid.arrange(a1,a2,a3,a4,ncol=2)
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method. geom_smooth:
method="auto" and size of largest group is <1000, so using loess. Use
'method = x' to change the smoothing method. geom_smooth: method="auto"
and size of largest group is <1000, so using loess. Use 'method = x' to
change the smoothing method.
library(splines)
library(MASS)
Warning: package 'MASS' was built under R version 3.0.1
a1 <- c + stat_smooth(method = "lm", formula = y ~ ns(x,3)) +
geom_point()
a2 <- c + stat_smooth(method = rlm, formula= y ~ ns(x,3)) + geom_point()
# The default confidence band uses a transparent colour.
# This currently only works on a limited number of graphics devices
# (including Quartz, PDF, and Cairo) so you may need to set the
# fill colour to a opaque colour, as shown below
a3 <- c + stat_smooth(fill = "grey50", size = 2, alpha = 1)
a4 <- c + stat_smooth(fill = "blue", size = 2, alpha = 1)
# The colour of the line can be controlled with the colour aesthetic
a5 <- c + stat_smooth(fill="blue", colour="darkblue", size=2)
a6 <- c + stat_smooth(fill="blue", colour="darkblue", size=2, alpha = 0.2)
a7 <- c + geom_point() +
stat_smooth(fill="blue", colour="darkblue", size=2, alpha = 0.2)
grid.arrange(a1,a2,a3,a4,,ncol=2)
Error: argument is missing, with no default
grid.arrange(a5,a6,a7,ncol=1)
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method. geom_smooth:
method="auto" and size of largest group is <1000, so using loess. Use
'method = x' to change the smoothing method. geom_smooth: method="auto"
and size of largest group is <1000, so using loess. Use 'method = x' to
change the smoothing method.
#
#Smoothers for subsets
c <- ggplot(mtcars, aes(y=wt, x=mpg)) + facet_grid(. ~ cyl)
a1<- c + stat_smooth(method=lm) + geom_point()
a2 <- c + stat_smooth(method=lm, fullrange = TRUE) + geom_point()
# Geoms and stats are automatically split by aesthetics that are factors
c <- ggplot(mtcars, aes(y=wt, x=mpg, colour=factor(cyl)))
a3 <- c + stat_smooth(method=lm) + geom_point()
a4 <- c + stat_smooth(method=lm, aes(fill = factor(cyl))) + geom_point()
a5 <- c + stat_smooth(method=lm, fullrange=TRUE, alpha = 0.1) +
geom_point()
grid.arrange(a1,a2,ncol=1)
grid.arrange(a3,a4,a5,ncol=1)
# Use qplot instead
a1 <- qplot(qsec, wt, data=mtcars, geom=c("smooth", "point"))
# Example with logistic regression
data("kyphosis", package="rpart")
a2 <- qplot(Age, Kyphosis, data=kyphosis)
a3 <- qplot(Age, data=kyphosis, facets = . ~ Kyphosis, binwidth = 10)
a4 <- qplot(Age, Kyphosis, data=kyphosis, position="jitter")
a5 <- qplot(Age, Kyphosis, data=kyphosis,
position=position_jitter(height=0.1))
a6 <- qplot(Age, as.numeric(Kyphosis) - 1, data = kyphosis) +
stat_smooth(method="glm", family="binomial")
a7 <- qplot(Age, as.numeric(Kyphosis) - 1, data=kyphosis) +
stat_smooth(method="glm", family="binomial", formula = y ~ ns(x, 2))
grid.arrange(a1,a2,a3,a4,ncol=2)
geom_smooth: method="auto" and size of largest group is <1000, so using
loess. Use 'method = x' to change the smoothing method.
grid.arrange(a5,a6,a7,ncol=1)
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
NANANA
[1] NA
p <- ggplot(mtcars, aes(factor(cyl), mpg))
a1 <- p + geom_boxplot()
a2 <- qplot(factor(cyl), mpg, data = mtcars, geom = "boxplot")
a3 <- p + geom_boxplot() + geom_jitter()
a4 <- p + geom_boxplot() + coord_flip()
a5 <- qplot(factor(cyl), mpg, data = mtcars, geom = "boxplot") +
coord_flip()
a6 <- p + geom_boxplot(notch = TRUE)
a7 <- p + geom_boxplot(notch = TRUE, notchwidth = .3)
a8 <- p + geom_boxplot(outlier.colour = "green", outlier.size = 3)
grid.arrange(a1,a2,a3,a4,a5,a6,a7,a8,ncol=3)
notch went outside hinges. Try setting notch=FALSE. notch went outside
hinges. Try setting notch=FALSE. notch went outside hinges. Try setting
notch=FALSE. notch went outside hinges. Try setting notch=FALSE.
# Add aesthetic mappings
# Note that boxplots are automatically dodged when any aesthetic is
# a factor
a1 <- p + geom_boxplot(aes(fill = cyl)) + ggtitle("fill = cyl")
a2 <- p + geom_boxplot(aes(fill = factor(cyl))) + ggtitle("fill = factor(cyl)")
a3 <- p + geom_boxplot(aes(fill = factor(vs))) + ggtitle("fill = factor(vs)")
a4 <- p + geom_boxplot(aes(fill = factor(am))) + ggtitle("fill = factor(am)")
grid.arrange(a1,a2,a3,a4,ncol=2)
# Set aesthetics to fixed value
a1 <- p + geom_boxplot(fill = "grey80", colour = "#3366FF") +
ggtitle("fill = grey80")
a2 <- qplot(factor(cyl), mpg, data = mtcars, geom = "boxplot",
colour = I("#3366FF")) + ggtitle("boxplot")
grid.arrange(a1,a2,ncol=1)
# Scales vs. coordinate transforms -------
# Scale transformations occur before the boxplot statistics are computed.
# Coordinate transformations occur afterwards. Observe the effect on the
# number of outliers.
library(plyr) # to access round_any
m <- ggplot(movies, aes(y = votes, x = rating,
group = round_any(rating, 0.5)))
a1 <- m + geom_boxplot() + ggtitle("boxplot")
a2 <- m + geom_boxplot() + scale_y_log10() + ggtitle("scale_y_log10")
a3 <- m + geom_boxplot() + coord_trans(y = "log10") + ggtitle("log10")
a4 <- m + geom_boxplot() + scale_y_log10() + coord_trans(y = "log10") +
ggtitle("scale_y_log10,log10")
grid.arrange(a1,a2,a3,a4,ncol=1)
Warning: position_dodge requires constant width: output may be incorrect
Warning: position_dodge requires constant width: output may be incorrect
Warning: position_dodge requires constant width: output may be incorrect
Warning: position_dodge requires constant width: output may be incorrect
# Boxplots with continuous x:
# Use the group aesthetic to group observations in boxplots
a1 <- qplot(year, budget, data = movies, geom = "boxplot")
a2 <- qplot(year, budget, data = movies, geom = "boxplot",
group = round_any(year, 10, floor))
grid.arrange(a1,a2,ncol=2)
Warning: Removed 53573 rows containing non-finite values (stat_boxplot).
Warning: Removed 53573 rows containing non-finite values (stat_boxplot).
Warning: position_dodge requires constant width: output may be incorrect
# Using precomputed statistics
# generate sample data
abc <- adply(matrix(rnorm(100), ncol = 5), 2,
quantile, c(0, .25, .5, .75, 1))
abc
X1 0% 25% 50% 75% 100%
1 1 -2.498 -0.5968 0.0642 0.7384 1.323
2 2 -1.975 -0.5898 0.2200 0.7591 1.548
3 3 -2.305 -0.9384 -0.1083 0.7961 2.258
4 4 -1.383 -0.5142 0.3853 0.7614 0.967
5 5 -1.339 -0.8860 -0.2558 0.7127 1.384
b <- ggplot(abc, aes(x = X1, ymin = `0%`, lower = `25%`, middle = `50%`, upper = `75%`, ymax = `100%`))
b + geom_boxplot(stat = "identity")
b + geom_boxplot(stat = "identity") + coord_flip()
b + geom_boxplot(aes(fill = X1), stat = "identity")
NANANA
[1] NA
a1 <- ggplot(mtcars, aes(x = mpg)) + geom_dotplot() +
ggtitle("geom_dotplot")
a2 <- ggplot(mtcars, aes(x = mpg)) + geom_dotplot(binwidth = 1.5) +
ggtitle("binwidth = 1.5")
# Use fixed-width bins
a3 <- ggplot(mtcars, aes(x = mpg)) +
geom_dotplot(method="histodot", binwidth = 1.5) +
ggtitle("histodot,binwidth = 1.5")
# Some other stacking methods
a4 <- ggplot(mtcars, aes(x = mpg)) +
geom_dotplot(binwidth = 1.5, stackdir = "center") +
ggtitle("binwidth = 1.5,center")
a5 <- ggplot(mtcars, aes(x = mpg)) +
geom_dotplot(binwidth = 1.5, stackdir = "centerwhole") +
ggtitle("binwidth = 1.5,centerwhole")
grid.arrange(a1,a2,a3,a4,a5,ncol=2)
stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
# y axis isn't really meaningful, so hide it
a1 <- ggplot(mtcars, aes(x = mpg)) + geom_dotplot(binwidth = 1.5) +
scale_y_continuous(name = "", breaks = NA)
#Overlap dots vertically
a2 <- ggplot(mtcars, aes(x = mpg)) + geom_dotplot(binwidth = 1.5,
stackratio = .7)
# Expand dot diameter
a3 <- ggplot(mtcars, aes(x =mpg)) + geom_dotplot(binwidth = 1.5,
dotsize = 1.25)
grid.arrange(a1,a2,a3,ncol=1)
Warning: breaks = NA is deprecated. Please use breaks = NULL to remove
breaks in the scale. (Deprecated; last used in version 0.8.9)
# Examples with stacking along y axis instead of x
a1 <- ggplot(mtcars, aes(x = 1, y = mpg)) +
geom_dotplot(binaxis = "y", stackdir = "center")
a2 <- ggplot(mtcars, aes(x = factor(cyl), y = mpg)) +
geom_dotplot(binaxis = "y", stackdir = "center")
a3 <- ggplot(mtcars, aes(x = factor(cyl), y = mpg)) +
geom_dotplot(binaxis = "y", stackdir = "centerwhole")
a4 <- ggplot(mtcars, aes(x = factor(vs), fill = factor(cyl), y = mpg)) +
geom_dotplot(binaxis = "y", stackdir = "center", position = "dodge")
# binpositions="all" ensures that the bins are aligned between groups
a5 <- ggplot(mtcars, aes(x = factor(am), y = mpg)) +
grid.arrange(a1,a2,a3,ncol=1)
stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this. stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to
adjust this. stat_bindot: binwidth defaulted to range/30. Use 'binwidth =
x' to adjust this.
grid.arrange(a4,a5,ncol=1)
stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Error: No layers in plot
# Stacking multiple groups, with different fill
a1 <- ggplot(mtcars, aes(x = mpg, fill = factor(cyl))) +
geom_dotplot(stackgroups = TRUE, binwidth = 1, binpositions = "all")
a2 <- ggplot(mtcars, aes(x = mpg, fill = factor(cyl))) +
geom_dotplot(stackgroups = TRUE, binwidth = 1,
method = "histodot")
a3 <- ggplot(mtcars, aes(x = mpg, fill = factor(cyl))) +
m_dotplot(binaxis = "y", stackdir = "center", binpositions="all")
Error: could not find function "m_dotplot"
a4 <- ggplot(mtcars, aes(x = 1, y = mpg, fill = factor(cyl))) +
geom_dotplot(binaxis = "y", stackgroups = TRUE, binwidth = 1,
method = "histodot")
grid.arrange(a1,a2,a3,a4,ncol=1)
stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
# Violin plot ###
p <- ggplot(mtcars, aes(factor(cyl), mpg))
a1 <- p + geom_violin() + ggtitle("violin")
a2 <- qplot(factor(cyl), mpg, data = mtcars, geom = "violin")
a3 <- p + geom_violin() + geom_jitter(height = 0) +
ggtitle("violin,height = 0")
a4 <- p + geom_violin() + coord_flip() + ggtitle("violin,coord_flip()")
a5 <- qplot(factor(cyl), mpg, data = mtcars, geom = "violin") +
coord_flip()
# Scale maximum width proportional to sample size:
a6 <- p + geom_violin(scale = "count") + ggtitle("violin,count")
grid.arrange(a1,a2,a3,ncol=1)
grid.arrange(a4,a5,a6,ncol=1)
# Scale maximum width to 1 for all violins:
a1 <- p + geom_violin(scale = "width")
# Default is to trim violins to the range of the data. To disable:
a2 <- p + geom_violin(trim = FALSE)
# Use a smaller bandwidth for closer density fit (default is 1).
a3 <- p + geom_violin(adjust = .5)
grid.arrange(a1,a2,a3,ncol=2)
# Add aesthetic mappings
# Note that violins are automatically dodged when any aesthetic is
# a factor
a1 <- p + geom_violin(aes(fill = cyl))
a2 <- p + geom_violin(aes(fill = factor(cyl)))
a3 <- p + geom_violin(aes(fill = factor(vs)))
a4 <- p + geom_violin(aes(fill = factor(am)))
# Set aesthetics to fixed value
a5 <- p + geom_violin(fill = "grey80", colour = "#3366FF")
a6 <- qplot(factor(cyl), mpg, data = mtcars, geom = "violin",
colour = I("#3366FF"))
grid.arrange(a1,a2,a3,a4,a5,a6,ncol=2)
# Scales vs. coordinate transforms -------
# Scale transformations occur before the density statistics are computed.
# Coordinate transformations occur afterwards. Observe the effect on the
# number of outliers.
library(plyr) # to access round_any
m <- ggplot(movies, aes(y = votes, x = rating,
group = round_any(rating, 0.5)))
a1 <- m + geom_violin()
a2 <- m + geom_violin() + scale_y_log10()
a3 <- m + geom_violin() + coord_trans(y = "log10")
a4 <- m + geom_violin() + scale_y_log10() + coord_trans(y = "log10")
# Violin plots with continuous x:
# Use the group aesthetic to group observations in violins
a5 <- qplot(year, budget, data = movies, geom = "violin")
a6 <- qplot(year, budget, data = movies, geom = "violin",
group = round_any(year, 10, floor))
grid.arrange(a1,a2,a3,a4,a5,a6,ncol=2)
Warning: position_dodge requires constant width: output may be incorrect
Warning: position_dodge requires constant width: output may be incorrect
Warning: position_dodge requires constant width: output may be incorrect
Warning: position_dodge requires constant width: output may be incorrect
Warning: Removed 53573 rows containing non-finite values (stat_ydensity).
Warning: Removed 53573 rows containing non-finite values (stat_ydensity).
Warning: position_dodge requires constant width: output may be incorrect
NANANA
[1] NA
#Generate data
c <- ggplot(mtcars, aes(factor(cyl)))
# By default, uses stat="bin", which gives the count in each category
a1 <- c + geom_bar() + ggtitle("bar")
a2 <- c + geom_bar(width=.5) + ggtitle("bar,width=.5")
a3 <- c + geom_bar() + coord_flip() + ggtitle("bar,coord_flip()")
a4 <- c + geom_bar(fill="white", colour="darkgreen") +
ggtitle("bar,fill=white")
grid.arrange(a1,a2,a3,a4,ncol=2)
# Use qplot
a1 <- qplot(factor(cyl), data=mtcars, geom="bar")
a2 <- qplot(factor(cyl), data=mtcars, geom="bar", fill=factor(cyl))
grid.arrange(a1,a2,ncol=1)
# When the data contains y values in a column, use stat="identity"
library(plyr)
# Calculate the mean mpg for each level of cyl
mm <- ddply(mtcars, "cyl", summarise, mmpg = mean(mpg))
a1 <- ggplot(mm, aes(x = factor(cyl), y = mmpg)) +
geom_bar(stat = "identity")
# Stacked bar charts
a2 <- qplot(factor(cyl), data=mtcars, geom="bar", fill=factor(vs))
a3 <- qplot(factor(cyl), data=mtcars, geom="bar", fill=factor(gear))
grid.arrange(a1,a2,a3,ncol=1)
# Stacked bar charts are easy in ggplot2, but not effective visually,
# particularly when there are many different things being stacked
a1 <- ggplot(diamonds, aes(clarity, fill=cut)) + geom_bar()
a2 <- ggplot(diamonds, aes(color, fill=cut)) + geom_bar() +
coord_flip()
# Faceting is a good alternative:
a3 <- ggplot(diamonds, aes(clarity)) + geom_bar() +
facet_wrap(~ cut)
# If the x axis is ordered, using a line instead of bars is another
# possibility:
a4 <- ggplot(diamonds, aes(clarity)) +
geom_freqpoly(aes(group = cut, colour = cut))
# Dodged bar charts
a5 <- ggplot(diamonds, aes(clarity, fill=cut)) +
geom_bar(position="dodge")
# compare with
a6 <- ggplot(diamonds, aes(cut, fill=cut)) + geom_bar() +
facet_grid(. ~ clarity)
grid.arrange(a1,a2,a3,a4,a5,a6,ncol=2)
# But again, probably better to use frequency polygons instead:
a1 <- ggplot(diamonds, aes(clarity, colour=cut)) +
geom_freqpoly(aes(group = cut))
# Often we don't want the height of the bar to represent the
# count of observations, but the sum of some other variable.
# For example, the following plot shows the number of diamonds
# of each colour
a2 <- qplot(color, data=diamonds, geom="bar")
# If, however, we want to see the total number of carats in each colour
# we need to weight by the carat variable
a3 <- qplot(color, data=diamonds, geom="bar", weight=carat,
ylab="carat")
grid.arrange(a1,a2,a3,ncol=1)
# A bar chart used to display means
meanprice <- tapply(diamonds$price, diamonds$cut, mean)
cut <- factor(levels(diamonds$cut), levels = levels(diamonds$cut))
a1 <- qplot(cut, meanprice)
a2 <- qplot(cut, meanprice, geom="bar", stat="identity")
a3 <- qplot(cut, meanprice, geom="bar", stat="identity",
fill = I("grey50"))
grid.arrange(a1,a2,a3,ncol=1)
# Another stacked bar chart example
k <- ggplot(mpg, aes(manufacturer, fill=class))
a1 <- k + geom_bar()
# Use scales to change aesthetics defaults
a2 <- k + geom_bar() + scale_fill_brewer()
a3 <- k + geom_bar() + scale_fill_grey()
grid.arrange(a1,a2,a3,ncol=1)
# To change plot order of class varible
# use factor() to change order of levels
mpg$class <- factor(mpg$class, levels = c("midsize", "minivan",
"suv", "compact", "2seater", "subcompact", "pickup"))
m <- ggplot(mpg, aes(manufacturer, fill=class))
m + geom_bar()
## An interval represented by a vertical line.###
# Generate data: means and standard errors of means for prices
# for each type of cut
dmod <- lm(price ~ cut, data=diamonds)
cuts <- data.frame(cut=unique(diamonds$cut),
predict(dmod,
data.frame(cut = unique(diamonds$cut)),
se=TRUE)[c("fit","se.fit")])
cuts
cut fit se.fit
1 Ideal 3458 27.00
2 Premium 4584 33.75
3 Good 3929 56.59
4 Very Good 3982 36.06
5 Fair 4359 98.79
a1 <- qplot(cut, fit, data=cuts)
# With a bar chart, we are comparing lengths, so the y-axis is
# automatically extended to include 0
a2 <- qplot(cut, fit, data=cuts, geom="bar")
# Display estimates and standard errors in various ways
se <- ggplot(cuts, aes(cut, fit,
ymin = fit - se.fit, ymax=fit + se.fit, colour = cut))
a3 <- se + geom_linerange()
a4 <- se + geom_pointrange()
a5 <- se + geom_errorbar(width = 0.5)
a5 <- se + geom_crossbar(width = 0.5)
# Use coord_flip to flip the x and y axes
a6 <- se + geom_linerange() + coord_flip()
grid.arrange(a1,a2,a3,a4,a5,a6,ncol=2)
Mapping a variable to y and also using stat="bin". With stat="bin", it
will attempt to set the y value to the count of cases in each group. This
can result in unexpected behavior and will not be allowed in a future
version of ggplot2. If you want y to represent counts of cases, use
stat="bin" and don't map a variable to y. If you want y to represent
values in the data, use stat="identity". See ?geom_bar for examples.
(Deprecated; last used in version 0.9.2)
## histogram ##
set.seed(5689)
movies <- movies[sample(nrow(movies), 1000), ]
# Simple examples
a1 <- qplot(rating, data=movies, geom="histogram") +
ggtitle("histogram")
a2 <- qplot(rating, data=movies, weight=votes, geom="histogram") +
ggtitle("histogram,weight=votes")
a3 <- qplot(rating, data=movies, weight=votes, geom="histogram",
binwidth=1) + ggtitle("histogram,binwidth=1")
a4 <- qplot(rating, data=movies, weight=votes, geom="histogram",
binwidth=0.1) + ggtitle("histogram,binwidth=0.1")
grid.arrange(a1,a2,a3,a4,ncol=2)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
Warning: position_stack requires constant width: output may be incorrect
# More complex
m <- ggplot(movies, aes(x=rating))
a1 <- m + geom_histogram()
a2 <- m + geom_histogram(aes(y = ..density..)) + geom_density()####
a3 <- m + geom_histogram(binwidth = 1)
a4 <- m + geom_histogram(binwidth = 0.5)
a5 <- m + geom_histogram(binwidth = 0.1)
# Add aesthetic mappings
a6 <- m + geom_histogram(aes(weight = votes))
a7 <- m + geom_histogram(aes(y = ..count..))
a8 <- m + geom_histogram(aes(fill = ..count..))
# Change scales
a9 <- m + geom_histogram(aes(fill = ..count..)) +
scale_fill_gradient("Count", low = "green", high = "red")
grid.arrange(a1,a2,a3,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
grid.arrange(a4,a5,a6,ncol=1)
Warning: position_stack requires constant width: output may be incorrect
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
grid.arrange(a7,a8,a9,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
# Often we don't want the height of the bar to represent the
# count of observations, but the sum of some other variable.
# For example, the following plot shows the number of movies
# in each rating.
a1 <- qplot(rating, data=movies, geom="bar", binwidth = 0.1)
# If, however, we want to see the number of votes cast in each
# category, we need to weight by the votes variable
a2 <- qplot(rating, data=movies, geom="bar", binwidth = 0.1,
weight=votes, ylab = "votes")
grid.arrange(a1,a2,ncol=1)
Warning: position_stack requires constant width: output may be incorrect
Warning: position_stack requires constant width: output may be incorrect
m <- ggplot(movies, aes(x = votes))
# For transformed scales, binwidth applies to the transformed data.
# The bins have constant width on the transformed scale.
a1 <- m + geom_histogram() + scale_x_log10()
a2 <- m + geom_histogram(binwidth = 1) + scale_x_log10()
a3 <- m + geom_histogram() + scale_x_sqrt()
a4 <- m + geom_histogram(binwidth = 10) + scale_x_sqrt()
grid.arrange(a1,a2,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
grid.arrange(a3,a4,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
# For transformed coordinate systems, the binwidth applies to the
# raw data. The bins have constant width on the original scale.
# Using log scales does not work here, because the first
# bar is anchored at zero, and so when transformed becomes negative
# infinity. This is not a problem when transforming the scales, because
# no observations have 0 ratings.
a1 <- m + geom_histogram(origin = 0) + coord_trans(x = "log10")
# Use origin = 0, to make sure we don't take sqrt of negative values
a2 <- m + geom_histogram(origin = 0) + coord_trans(x = "sqrt")
a3 <- m + geom_histogram(origin = 0, binwidth = 1000) +
coord_trans(x = "sqrt")
grid.arrange(a1,a2,a3,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this. stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to
adjust this.
# You can also transform the y axis. Remember that the base of the bars
# has value 0, so log transformations are not appropriate
m <- ggplot(movies, aes(x = rating))
a1 <- m + geom_histogram(binwidth = 0.5) + scale_y_sqrt()
a2 <- m + geom_histogram(binwidth = 0.5) + scale_y_reverse()
# Set aesthetics to fixed value
a3 <- m + geom_histogram(colour = "darkgreen", fill = "white",
binwidth = 0.5)
grid.arrange(a1,a2,a3,ncol=1)
Warning: Stacking not well defined when ymin != 0
#Use facets
a1 <- m <- m + geom_histogram(binwidth = 0.5)
a2 <- m + facet_grid(Action ~ Comedy)
grid.arrange(a1,a2,ncol=1)
# Often more useful to use density on the y axis when facetting
a1 <- m <- m + aes(y = ..density..)
a2 <- m + facet_grid(Action ~ Comedy)
a3 <- m + facet_wrap(~ mpaa)
grid.arrange(a1,a2,a3,ncol=1)
# Multiple histograms on the same graph
# see ?position, ?position_fill, etc for more details.
set.seed(6298)
diamonds_small <- diamonds[sample(nrow(diamonds), 1000), ]
ggplot(diamonds_small, aes(x=price)) + geom_bar()
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
hist_cut <- ggplot(diamonds_small, aes(x=price, fill=cut))
a1 <- hist_cut + geom_bar() # defaults to stacking
a2 <- hist_cut + geom_bar(position="fill")
a3 <- hist_cut + geom_bar(position="dodge")
# This is easy in ggplot2, but not visually effective. It's better
# to use a frequency polygon or density plot. Like this:
a4 <- ggplot(diamonds_small, aes(price, ..density.., colour = cut)) +
geom_freqpoly(binwidth = 1000)
# Or this:
a5 <- ggplot(diamonds_small, aes(price, colour = cut)) +
geom_density()
# Or if you want to be fancy, maybe even this:
a6 <- ggplot(diamonds_small, aes(price, fill = cut)) +
geom_density(alpha = 0.2)
# Which looks better when the distributions are more distinct
a7 <- ggplot(diamonds_small, aes(depth, fill = cut)) +
geom_density(alpha = 0.2) + xlim(55, 70)
grid.arrange(a1,a2,a3,a4,ncol=2)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this. stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to
adjust this. stat_bin: binwidth defaulted to range/30. Use 'binwidth = x'
to adjust this.
grid.arrange(a5,a6,a7,ncol=1)
Warning: Removed 2 rows containing non-finite values (stat_density).
NANANA
[1] NA
a1 <- qplot(carat, data = diamonds, geom = "freqpoly")
a2 <- qplot(carat, data = diamonds, geom = "freqpoly",
binwidth = 0.1)
a3 <- qplot(carat, data = diamonds, geom = "freqpoly",
binwidth = 0.01)
a4 <- qplot(price, data = diamonds, geom = "freqpoly",
binwidth = 1000)
a5 <- qplot(price, data = diamonds, geom = "freqpoly",
binwidth = 1000,colour = color)
a6 <- qplot(price, ..density.., data = diamonds,
geom = "freqpoly",
binwidth = 1000, colour = color)
grid.arrange(a1,a2,a3,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
grid.arrange(a4,a5,a6,ncol=1)
NANANA
[1] NA
m <- ggplot(movies, aes(x = rating))
a1 <- m + geom_density()
# Adjust parameters
a2 <- m + geom_density(kernel = "rectangular")
a3 <- m + geom_density(kernel = "biweight")
a4 <- m + geom_density(kernel = "epanechnikov")
a5 <- m + geom_density(adjust=1/5) # Very rough
a6 <- m + geom_density(adjust=5) # Very smooth
grid.arrange(a1,a2,a3,ncol=1)
grid.arrange(a4,a5,a6,ncol=1)
# Adjust aesthetics
a1 <- m + geom_density(aes(fill=factor(Drama)), size=2)
# Scale so peaks have same height:
a2 <- m + geom_density(aes(fill=factor(Drama), y = ..scaled..), size=2)
a3 <- m + geom_density(colour="darkgreen", size=2)
a4 <- m + geom_density(colour="darkgreen", size=2, fill=NA)
a5 <- m + geom_density(colour="darkgreen", size=2, fill="green")
grid.arrange(a1,a2,a3,ncol=1)
grid.arrange(a4,a5,ncol=1)
# Change scales
(m <- ggplot(movies, aes(x=votes)) + geom_density(trim = TRUE))
m + scale_x_log10()
m + coord_trans(x="log10")
m + scale_x_log10() + coord_trans(x="log10")
# Also useful with
m + stat_bin()
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
# Make a volcano plot
ggplot(diamonds, aes(x = price)) +
stat_density(aes(ymax = ..density.., ymin = -..density..),
fill = "grey50", colour = "grey50",
geom = "ribbon", position = "identity") +
facet_grid(. ~ cut) + coord_flip()
# Stacked density plots
# If you want to create a stacked density plot, you need to use
# the 'count' (density * n) variable instead of the default density
# Loses marginal densities
a1 <- qplot(rating, ..density.., data=movies, geom="density",
fill=mpaa, position="stack")
# Preserves marginal densities
a2 <- qplot(rating, ..count.., data=movies, geom="density", fill=mpaa,
position="stack")
# You can use position="fill" to produce a conditional density estimate
a3 <- qplot(rating, ..count.., data=movies, geom="density", fill=mpaa,
position="fill")
grid.arrange(a1,a2,a3,ncol=1)
# Need to be careful with weighted data
m <- ggplot(movies, aes(x=rating, weight=votes))
a1 <- m + geom_histogram(aes(y = ..count..)) +
geom_density(fill=NA)
m <- ggplot(movies, aes(x=rating, weight=votes/sum(votes)))
a3 <- m + geom_histogram(aes(y=..density..)) +
geom_density(fill=NA, colour="black")
grid.arrange(a1,a3,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: sum(weights) != 1 -- will not get true density Warning:
position_stack requires constant width: output may be incorrect
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
library(plyr) # to access round_any
movies$decade <- round_any(movies$year, 10)
m <- ggplot(movies, aes(x=rating, colour=decade, group=decade))
a1 <- m + geom_density(fill=NA)
a2 <- m + geom_density(fill=NA) + aes(y = ..count..)
#a2 <- m + geom_density(fill=NA) + aes(y = ..count..) + colour(group)
grid.arrange(a1,a2,ncol=2)
# Use qplot instead
a1 <- qplot(length, data=movies, geom="density", weight=rating)
a2 <- qplot(length, data=movies, geom="density",
weight=rating/sum(rating))
grid.arrange(a1,a2,ncol=1)
Warning: sum(weights) != 1 -- will not get true density
NANANA
[1] NA
a1 <- qplot(carat, data = diamonds, geom = "freqpoly")
a2 <- qplot(carat, data = diamonds, geom = "freqpoly",
binwidth = 0.1)
a3 <- qplot(carat, data = diamonds, geom = "freqpoly",
binwidth = 0.01)
a4 <- qplot(price, data = diamonds, geom = "freqpoly",
binwidth = 1000)
a5 <- qplot(price, data = diamonds, geom = "freqpoly",
binwidth = 1000,
colour = color)
a6 <- qplot(price, ..density.., data = diamonds, geom = "freqpoly",
binwidth = 1000, colour = color)
grid.arrange(a1,a2,a3,ncol=1)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
grid.arrange(a4,a5,a6,ncol=1)
NANANA
[1] NA
# Generate data
library(reshape2) # for melt
volcano3d <- melt(volcano)
names(volcano3d) <- c("x", "y", "z")
# Basic plot
v <- ggplot(volcano3d, aes(x, y, z = z))
a1 <- v + stat_contour()
# Setting bins creates evenly spaced contours in the range of the data
a2 <- v + stat_contour(bins = 2)
a3 <- v + stat_contour(bins = 10)
# Setting binwidth does the same thing, parameterised by the distance
# between contours
a5 <- v + stat_contour(binwidth = 2)
a6 <- v + stat_contour(binwidth = 5)
a7 <- v + stat_contour(binwidth = 10)
a8 >- v + stat_contour(binwidth = 2, size = 0.5, colour = "grey50") +
stat_contour(binwidth = 10, size = 1)
Error: invalid argument to unary operator
grid.arrange(a1,a2,a3,a4,ncol=2)
grid.arrange(a5,a6,a7,a8,ncol=2)
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
# Add aesthetic mappings
a1 <- v + stat_contour(aes(size = ..level..))
a2 <- v + stat_contour(aes(colour = ..level..))
# Change scale
a3 <- v + stat_contour(aes(colour = ..level..), size = 2) +
scale_colour_gradient(low = "brown", high = "white")
# Set aesthetics to fixed value
a4 <- v + stat_contour(colour = "red")
a5 <- v + stat_contour(size = 2, linetype = 4)
# Try different geoms
a6 <- v + stat_contour(geom="polygon", aes(fill=..level..))
a7 <- v + geom_tile(aes(fill = z)) + stat_contour()
grid.arrange(a1,a2,a3,a4,ncol=2)
grid.arrange(a5,a6,a7,ncol=1)
# Use qplot instead
a1 <- qplot(x, y, z = z, data = volcano3d, geom = "contour")
a2 <-qplot(x, y, z = z, data = volcano3d, stat = "contour",
geom = "path")
grid.arrange(a1,a2,ncol=1)
## 2d density estimation ##
library("MASS")
data(geyser, "MASS")
Warning: data set 'MASS' not found
head(geyser,3)
waiting duration
1 80 4.017
2 71 2.150
3 57 4.000
#Warning message:
NANANA
[1] NA
a1 <- m <- ggplot(geyser, aes(x = duration, y = waiting)) +
geom_point() + xlim(0.5, 6) + ylim(40, 110)
a2 <- m + geom_density2d()
dens <- kde2d(geyser$duration, geyser$waiting, n = 50,
lims = c(0.5, 6, 40, 110))
densdf <- data.frame(expand.grid(duration = dens$x,
waiting = dens$y),
z = as.vector(dens$z))
a3 <- m + geom_contour(aes(z=z), data=densdf)
a4 <- m + geom_density2d() + scale_y_log10()
Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.
a5 <- m + geom_density2d() + coord_trans(y="log10")
a6 <- m + stat_density2d(aes(fill = ..level..), geom="polygon")
a7 <- qplot(duration, waiting, data=geyser, geom=c("point","density2d")) +
xlim(0.5, 6) + ylim(40, 110)
grid.arrange(a1,a2,ncol=1)
grid.arrange(a3,a4,a5,a6,a7,ncol=2)
# If you map an aesthetic to a categorical variable, you will get a
# set of contours for each value of that variable
set.seed(4393)
dsmall <- diamonds[sample(nrow(diamonds), 1000), ]
a1 <- qplot(x, y, data = dsmall, geom = "density2d", colour = cut)
a2 <- qplot(x, y, data = dsmall, geom = "density2d",
linetype = cut)
grid.arrange(a1,a2,ncol=1)
a3 <- qplot(carat, price, data = dsmall, geom = "density2d",
colour = cut)
d <- ggplot(dsmall, aes(carat, price)) + xlim(1,3)
a5 <- d + geom_point() + geom_density2d()
# If we turn contouring off, we can use use geoms like tiles:
a6 <- d + stat_density2d(geom="tile", aes(fill = ..density..),
contour = FALSE)
a7 <- last_plot() + scale_fill_gradient(limits=c(1e-5,8e-4))
# Or points:
a8 <- d + stat_density2d(geom="point", aes(size = ..density..),
contour = FALSE)
grid.arrange(a3,a5,a6,a7,a8,ncol=2)
Warning: Removed 684 rows containing non-finite values (stat_density2d).
Warning: Removed 684 rows containing missing values (geom_point). Warning:
Removed 684 rows containing non-finite values (stat_density2d). Warning:
Removed 684 rows containing non-finite values (stat_density2d). Warning:
Removed 684 rows containing non-finite values (stat_density2d).
NANANA
[1] NA
# Generate data
pp <- function (n,r=4) {
x <- seq(-r*pi, r*pi, len=n)
df <- expand.grid(x=x, y=x)
df$r <- sqrt(df$x^2 + df$y^2)
df$z <- cos(df$r^2)*exp(-df$r/6)
df
}
p <- ggplot(pp(20), aes(x=x,y=y))
a1 <- p + geom_tile() #pretty useless!
# Add aesthetic mappings
a2 <- p + geom_tile(aes(fill=z))
# Change scale
a3 <- p + geom_tile(aes(fill=z)) +
scale_fill_gradient(low="green", high="red")
# Use qplot instead
a4 <- qplot(x, y, data=pp(20), geom="tile", fill=z)
a5 <- qplot(x, y, data=pp(100), geom="tile", fill=z)
# Missing values
p <- ggplot(pp(20)[sample(20*20, size=200),], aes(x=x,y=y,fill=z))
a6 <- p + geom_tile()
grid.arrange(a1,a2,a3,ncol=1)
grid.arrange(a4,a5,a6,ncol=1)
# Input that works with image
image(t(volcano)[ncol(volcano):1,])
library(reshape2) # for melt
a2 <- ggplot(melt(volcano), aes(x=Var1, y=Var2, fill=value)) +
geom_tile()
# inspired by the image-density plots of Ken Knoblauch
cars <- ggplot(mtcars, aes(y=factor(cyl), x=mpg))
a4 <- cars + geom_point()
a5 <- cars + stat_bin(aes(fill=..count..), geom="tile",
binwidth=3, position="identity")
a6 <- cars + stat_bin(aes(fill=..density..), geom="tile",
binwidth=3, position="identity")
a7 <- cars + stat_density(aes(fill=..density..), geom="tile",
position="identity")
a8 <- cars + stat_density(aes(fill=..count..), geom="tile",
position="identity")
grid.arrange(a2,a4,a5,a6,a7,a8,ncol=2)
Mapping a variable to y and also using stat="bin". With stat="bin", it
will attempt to set the y value to the count of cases in each group. This
can result in unexpected behavior and will not be allowed in a future
version of ggplot2. If you want y to represent counts of cases, use
stat="bin" and don't map a variable to y. If you want y to represent
values in the data, use stat="identity". See ?geom_bar for examples.
(Deprecated; last used in version 0.9.2) Mapping a variable to y and also
using stat="bin". With stat="bin", it will attempt to set the y value to
the count of cases in each group. This can result in unexpected behavior
and will not be allowed in a future version of ggplot2. If you want y to
represent counts of cases, use stat="bin" and don't map a variable to y.
If you want y to represent values in the data, use stat="identity". See
?geom_bar for examples. (Deprecated; last used in version 0.9.2)