# install.packages("ggplot2")
library(ggplot2)
?mpg
## starting httpd help server ... done
head(mpg)
## # A tibble: 6 x 11
## manufacturer model displ year cyl trans drv cty hwy fl
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p
## 3 audi a4 2.0 2008 4 manual(m6) f 20 31 p
## 4 audi a4 2.0 2008 4 auto(av) f 21 30 p
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p
## # ... with 1 more variables: class <chr>
str(mpg)
## Classes 'tbl_df', 'tbl' and 'data.frame': 234 obs. of 11 variables:
## $ manufacturer: chr "audi" "audi" "audi" "audi" ...
## $ model : chr "a4" "a4" "a4" "a4" ...
## $ displ : num 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr "f" "f" "f" "f" ...
## $ cty : int 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr "p" "p" "p" "p" ...
## $ class : chr "compact" "compact" "compact" "compact" ...
summary(mpg)
## manufacturer model displ year
## Length:234 Length:234 Min. :1.600 Min. :1999
## Class :character Class :character 1st Qu.:2.400 1st Qu.:1999
## Mode :character Mode :character Median :3.300 Median :2004
## Mean :3.472 Mean :2004
## 3rd Qu.:4.600 3rd Qu.:2008
## Max. :7.000 Max. :2008
## cyl trans drv cty
## Min. :4.000 Length:234 Length:234 Min. : 9.00
## 1st Qu.:4.000 Class :character Class :character 1st Qu.:14.00
## Median :6.000 Mode :character Mode :character Median :17.00
## Mean :5.889 Mean :16.86
## 3rd Qu.:8.000 3rd Qu.:19.00
## Max. :8.000 Max. :35.00
## hwy fl class
## Min. :12.00 Length:234 Length:234
## 1st Qu.:18.00 Class :character Class :character
## Median :24.00 Mode :character Mode :character
## Mean :23.44
## 3rd Qu.:27.00
## Max. :44.00
qplot(displ, hwy, data = mpg)
Can display additional variables with aesthetics (like shape, colour, size) or facetting (small multiples displaying different subsets)
qplot(displ, hwy, colour = class, data = mpg)
Small multiples displaying different subsets of the data. Useful for exploring conditional relationships. Useful for large data.
facet_grid(): 2d grid, rows ~ cols, . for no split facet_wrap(): 1d ribbon wrapped into 2d
qplot(displ, hwy, data = mpg) +
facet_grid(. ~ cyl)
qplot(displ, hwy, data = mpg) +
facet_grid(drv ~ .)
qplot(displ, hwy, data = mpg) +
facet_grid(drv ~ cyl)
qplot(displ, hwy, data = mpg) +
facet_wrap(~ class)
qplot(cty, hwy, data = mpg)
qplot(cty, hwy, data = mpg, geom = "jitter")
qplot(class, hwy, data = mpg)
qplot(reorder(class, hwy), hwy, data = mpg)
qplot(reorder(class, hwy), hwy, data = mpg, geom = "jitter")
qplot(reorder(class, hwy), hwy, data = mpg, geom = "boxplot")
qplot(reorder(class, hwy), hwy, data = mpg,
geom = c("jitter", "boxplot"))
~54,000 round diamonds from http://www.diamondse.info/. Carat, colour, clarity, cut Total depth, table, depth, width, height Price
Used to display the distribution of a variable Categorical variable ??? bar chart Continuous variable ??? histogram
# With only one variable, qplot guesses that
# you want a bar chart or histogram
qplot(cut, data = diamonds)
qplot(carat, data = diamonds)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(carat, data = diamonds, binwidth = 1)
qplot(carat, data = diamonds, binwidth = 0.1)
qplot(carat, data = diamonds, binwidth = 0.01)
resolution(diamonds$carat)
## [1] 0.01
last_plot() + xlim(0, 3)
## Warning: Removed 32 rows containing non-finite values (stat_bin).
qplot(table, data = diamonds, binwidth = 1)
# To zoom in on a plot region use xlim() and ylim()
qplot(table, data = diamonds, binwidth = 1) +
xlim(50, 70)
## Warning: Removed 12 rows containing non-finite values (stat_bin).
qplot(table, data = diamonds, binwidth = 0.1) +
xlim(50, 70)
## Warning: Removed 12 rows containing non-finite values (stat_bin).
qplot(table, data = diamonds, binwidth = 0.1) +
xlim(50, 70) + ylim(0, 50)
## Warning: Removed 12 rows containing non-finite values (stat_bin).
## Warning: Removed 15 rows containing missing values (geom_bar).
# Note that this type of zooming discards data
# outside of the plot regions
# See coord_cartesian() for an alternative
qplot(depth, data = diamonds, binwidth = 0.2)
qplot(depth, data = diamonds, binwidth = 0.2,
fill = cut) + xlim(55, 70)
## Warning: Removed 45 rows containing non-finite values (stat_bin).
qplot(depth, data = diamonds, binwidth = 0.2)+
xlim(55, 70) + facet_wrap(~ cut)
## Warning: Removed 45 rows containing non-finite values (stat_bin).
qplot(price, data = diamonds, binwidth = 500) + facet_wrap(~ cut)
Each histogram far away from the others, but we know stacking is hard to read ??? use another way of displaying densities Varying relative abundance makes comparisons difficult ??? rescale to ensure constant area
# Large distances make comparisons hard
qplot(price, data = diamonds, binwidth = 500) +
facet_wrap(~ cut)
# Stacked heights hard to compare
qplot(price, data = diamonds, binwidth = 500, fill = cut)
# Much better - but still have differing relative abundance
qplot(price, data = diamonds, binwidth = 500,
geom = "freqpoly", colour = cut)
# Instead of displaying count on y-axis, display density
# .. indicates that variable isn't in original data
qplot(price, ..density.., data = diamonds, binwidth = 500,
geom = "freqpoly", colour = cut)
# To use with histogram, you need to be explicit
qplot(price, ..density.., data = diamonds, binwidth = 500,
geom = "histogram") + facet_wrap(~ cut)
# There are two ways to add additional geoms
# 1) A vector of geom names:
qplot(price, carat, data = diamonds,
geom = c("point", "smooth"))
## `geom_smooth()` using method = 'gam'
# 2) Add on extra geoms
qplot(price, carat, data = diamonds) + geom_smooth()
## `geom_smooth()` using method = 'gam'
# This how you get help about a specific geom:
# ?geom_smooth
# To set aesthetics to a particular value, you need
# to wrap that value in I()
qplot(price, carat, data = diamonds, colour = "blue")
qplot(price, carat, data = diamonds, colour = I("blue"))
# Practical application: varying alpha
qplot(price, carat, data = diamonds, alpha = I(1/10))
qplot(price, carat, data = diamonds, alpha = I(1/50))
qplot(price, carat, data = diamonds, alpha = I(1/100))
qplot(price, carat, data = diamonds, alpha = I(1/250))
qplot(carat, price, data = diamonds,
colour = clarity)
qplot(log10(carat), log10(price),
data = diamonds, colour = clarity)
qplot(log10(carat), log10(carat / price),
data = diamonds, colour = clarity)
qplot(log10(carat), log10(price), data = diamonds,
geom = "hex", bins = 10) + facet_wrap(~ clarity)
## Warning: Computation failed in `stat_binhex()`:
## Package `hexbin` required for `stat_binhex`.
## Please install and try again.
## Warning: Computation failed in `stat_binhex()`:
## Package `hexbin` required for `stat_binhex`.
## Please install and try again.
## Warning: Computation failed in `stat_binhex()`:
## Package `hexbin` required for `stat_binhex`.
## Please install and try again.
## Warning: Computation failed in `stat_binhex()`:
## Package `hexbin` required for `stat_binhex`.
## Please install and try again.
## Warning: Computation failed in `stat_binhex()`:
## Package `hexbin` required for `stat_binhex`.
## Please install and try again.
## Warning: Computation failed in `stat_binhex()`:
## Package `hexbin` required for `stat_binhex`.
## Please install and try again.
## Warning: Computation failed in `stat_binhex()`:
## Package `hexbin` required for `stat_binhex`.
## Please install and try again.
## Warning: Computation failed in `stat_binhex()`:
## Package `hexbin` required for `stat_binhex`.
## Please install and try again.
qplot(log10(carat), log10(price), data = diamonds,
colour = clarity, geom = "smooth")
## `geom_smooth()` using method = 'gam'