Part I: Review geom_point

0. Basics

Recall our work from last class with using geom_point:

Load example data

library(tidyverse)
data("diamonds")
str(diamonds)
## tibble [53,940 × 10] (S3: tbl_df/tbl/data.frame)
##  $ carat  : num [1:53940] 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
##  $ cut    : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
##  $ color  : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
##  $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
##  $ depth  : num [1:53940] 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
##  $ table  : num [1:53940] 55 61 65 58 58 57 57 55 61 61 ...
##  $ price  : int [1:53940] 326 326 327 334 335 336 336 337 337 338 ...
##  $ x      : num [1:53940] 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
##  $ y      : num [1:53940] 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
##  $ z      : num [1:53940] 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...

Starting with a simple scatterplot:

ggplot(diamonds, aes(carat, price))+
  geom_point()

1. Aesthetic Mapping

A. Color

Categorical vs Continuous Data
# If using a categorical variable each category will have a color
ggplot(diamonds, aes(carat, price, color=clarity))+
  geom_point()

# if not ordered..
ggplot(diamonds, aes(carat, price, color=as.character(clarity)))+
  geom_point()

# If using a numeric variable there will be a color gradient 
ggplot(diamonds, aes(carat, price, color=depth))+
  geom_point()

CAUTION

Be careful of where you place the color statement

ggplot(diamonds, aes(carat, price, color="blue"))+
  geom_point()

ggplot(diamonds, aes(carat, price))+
  geom_point(color="blue")

B. Transparency

ggplot(diamonds, aes(carat, price, alpha=clarity))+
  geom_point()

C. Shape

ggplot(diamonds, aes(carat, price, shape=clarity))+
  geom_point()
## Warning: Using shapes for an ordinal variable is not advised
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 8. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 5445 rows containing missing values (geom_point).

D. Size

ggplot(diamonds, aes(carat, price, size=clarity))+
  geom_point()

2. Facet

Use facets to explore subsets

ggplot(diamonds, aes(carat, price))+
  geom_point()+
  facet_grid(.~cut)

Part II: Geometries

1. Using a single geometry

A. Scatterplot

We have already constructed scatterplots with geom_point() let’s take a look at other geometries.

B. Smooth

ggplot(diamonds, aes(carat, price))+
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# Remove the se
ggplot(diamonds, aes(carat, price))+
  geom_smooth(se=F)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

C. Boxplot

# Vanilla
ggplot(diamonds, aes(y=price))+
  geom_boxplot()

# Side-by-side
ggplot(diamonds, aes(y=price, fill=cut))+
  geom_boxplot()

D. Bar Chart

ggplot(diamonds, aes(x=cut, fill=cut))+
  geom_bar()

E. Time Series / Line Plots

Here we will take a quick detour from the diamonds dataset to look at data from IMDB on action movies:

# install.packages("ggplot2movies")
library(ggplot2movies)
data(movies)

movies_yg <- group_by(movies, year, Action)
movies_sum <- summarise(movies_yg, n_movies = n())
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
ggplot(movies_sum, aes(x = year)) +
  geom_line(aes(y = n_movies, colour = factor(Action)))

2 Using multiple geometries at the same time

Point and Smooth

Adding geom_smooth on top of your geom_point can help you see what relationships/trends may exist between you response and explanatory variables.

# POINT AND SMOOTH
ggplot(diamonds, aes(carat, price))+
  geom_point()+
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Refining your geom_smooth

1. Removing the Standard Error Band
# TURNING OFF SE (STANDARD ERROR BARS) IN GEOM_SMOOTH
ggplot(diamonds, aes(carat, price))+
  geom_point()+
  geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

2. Specifying the Model
# CHANGING THE METHOD TO LINEAR MODEL
ggplot(diamonds, aes(carat, price))+
  geom_point()+
  geom_smooth(method="lm", se=FALSE)
## `geom_smooth()` using formula 'y ~ x'

3. Using Color to Group
# INCLUDING COLOR AS A GROUPING VARIABLE
ggplot(diamonds, aes(carat, price, color=cut))+
  geom_point()+
  geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

4. Combining all the above!
ggplot(diamonds, aes(carat, price, color=cut))+
  geom_point()+
  geom_smooth(method="lm", se=FALSE)
## `geom_smooth()` using formula 'y ~ x'

Part III: Position Adjustments (for Bars)

A. Stacked (Default)

ggplot(diamonds, aes(x=cut, fill=clarity))+
  geom_bar()

B. Fill

ggplot(diamonds, aes(x=cut, fill=clarity))+
  geom_bar(position="fill")

C. Dodge (Side-by-side)

ggplot(diamonds, aes(x=cut, fill=clarity))+
  geom_bar(position="dodge")

Part IV: Themes

A. Minimal

ggplot(diamonds, aes(carat, price))+
  geom_point()+
  theme_minimal()

B. BW

ggplot(diamonds, aes(carat, price))+
  geom_point()+
  theme_bw()