library(tidyverse)
data("diamonds")
str(diamonds)
## tibble [53,940 × 10] (S3: tbl_df/tbl/data.frame)
## $ carat : num [1:53940] 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num [1:53940] 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num [1:53940] 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int [1:53940] 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num [1:53940] 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num [1:53940] 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num [1:53940] 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
ggplot(diamonds, aes(carat, price))+
geom_point()
# If using a categorical variable each category will have a color
ggplot(diamonds, aes(carat, price, color=clarity))+
geom_point()
# if not ordered..
ggplot(diamonds, aes(carat, price, color=as.character(clarity)))+
geom_point()
# If using a numeric variable there will be a color gradient
ggplot(diamonds, aes(carat, price, color=depth))+
geom_point()
ggplot(diamonds, aes(carat, price, color="blue"))+
geom_point()
ggplot(diamonds, aes(carat, price))+
geom_point(color="blue")
ggplot(diamonds, aes(carat, price, alpha=clarity))+
geom_point()
ggplot(diamonds, aes(carat, price, shape=clarity))+
geom_point()
## Warning: Using shapes for an ordinal variable is not advised
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 8. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 5445 rows containing missing values (geom_point).
ggplot(diamonds, aes(carat, price, size=clarity))+
geom_point()
Use facets to explore subsets
ggplot(diamonds, aes(carat, price))+
geom_point()+
facet_grid(.~cut)
We have already constructed scatterplots with geom_point()
let’s take a look at other geometries.
ggplot(diamonds, aes(carat, price))+
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Remove the se
ggplot(diamonds, aes(carat, price))+
geom_smooth(se=F)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Vanilla
ggplot(diamonds, aes(y=price))+
geom_boxplot()
# Side-by-side
ggplot(diamonds, aes(y=price, fill=cut))+
geom_boxplot()
ggplot(diamonds, aes(x=cut, fill=cut))+
geom_bar()
Here we will take a quick detour from the diamonds dataset to look at data from IMDB on action movies:
# install.packages("ggplot2movies")
library(ggplot2movies)
data(movies)
movies_yg <- group_by(movies, year, Action)
movies_sum <- summarise(movies_yg, n_movies = n())
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
ggplot(movies_sum, aes(x = year)) +
geom_line(aes(y = n_movies, colour = factor(Action)))
ggplot(diamonds, aes(carat, price))+
geom_point()+
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(diamonds, aes(carat, price))+
geom_point()+
geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(diamonds, aes(carat, price))+
geom_point()+
geom_smooth(method="lm", se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
ggplot(diamonds, aes(carat, price, color=cut))+
geom_point()+
geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(diamonds, aes(carat, price, color=cut))+
geom_point()+
geom_smooth(method="lm", se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
ggplot(diamonds, aes(x=cut, fill=clarity))+
geom_bar()
ggplot(diamonds, aes(x=cut, fill=clarity))+
geom_bar(position="fill")
ggplot(diamonds, aes(x=cut, fill=clarity))+
geom_bar(position="dodge")
ggplot(diamonds, aes(carat, price))+
geom_point()+
theme_minimal()
ggplot(diamonds, aes(carat, price))+
geom_point()+
theme_bw()