These notes are split into two documents due to size.
 Â
# Load the ggplot2 package
library(ggplot2)
# Explore the mtcars data frame with str()
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
# Execute the following command
ggplot(mtcars, aes(x = cyl, y = mpg)) +
geom_point()
cyl
is a categorical variable by wrapping it in factor()
# Load the ggplot2 package
library(ggplot2)
# Change the command below so that cyl is treated as factor
ggplot(mtcars, aes(x = factor(cyl), y = mpg)) +
geom_point()
# A scatter plot has been made for you
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point()
# Replace ___ with the correct column
ggplot(mtcars, aes(x = wt, y = mpg, color = disp)) +
geom_point()
# Replace ___ with the correct column
ggplot(mtcars, aes(x = wt, y = mpg, size = disp)) +
geom_point()
color
can be mapped to either a discreate or continuous variable
shape
, only make sense on a discreate variable
ggplot(mtcars, aes(x = wt, y = mpg, shape = disp)) +
geom_point()
## Error: A continuous variable can not be mapped to shape
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
levels(iris$Species) <- c("Setosa", "Versicolor", "Virginica")
## Data and Aesthetics Layer (essential)
p <- ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
## Geometries Layer (essential)
geom_jitter(alpha = 0.6)
p
p <- p +
## Facets (optional)
facet_grid(. ~ Species) +
## Statistics (optional)
stat_smooth(method = "lm", se = F, col = "red") +
## Coordinates Layer (optional)
scale_y_continuous("Sepal Width (cm)", limits = c(2,5), expand = c(0,0)) +
scale_x_continuous("Sepal Length (cm)", limits = c(4,8), expand = c(0,0)) +
coord_equal()
p
p <- p +
## Theme Layer (optional)
theme(panel.background = element_blank(),
plot.background = element_blank(),
legend.background = element_blank(),
legend.key = element_blank(),
strip.background = element_blank(),
axis.text = element_text(colour = "black"),
axis.ticks = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
strip.text = element_blank(),
panel.margin = unit(1, "lines")
)
p
# Explore the diamonds data frame with str()
str(diamonds)
## Classes 'tbl_df', 'tbl' and 'data.frame': 53940 obs. of 10 variables:
## $ carat : num 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
# Add geom_point() with +
ggplot(diamonds, aes(x = carat, y = price)) +
geom_point()
# Add geom_point() and geom_smooth() with +
ggplot(diamonds, aes(x = carat, y = price)) +
geom_point() +
geom_smooth()
# 1 - The plot you created in the previous exercise
# ggplot(diamonds, aes(x = carat, y = price)) +
# geom_point() +
# geom_smooth()
# 2 - Copy the above command but show only the smooth line
ggplot(diamonds, aes(x = carat, y = price)) +
geom_smooth()
# 3 - Copy the above command and assign the correct value to col in aes()
ggplot(diamonds, aes(x = carat, y = price, color = clarity)) +
geom_smooth()
# 4 - Keep the color settings from previous command. Plot only the points with argument alpha.
ggplot(diamonds, aes(x = carat, y = price, color = clarity)) +
geom_point(alpha = .4)
# Create the object containing the data and aes layers: dia_plot
dia_plot <- ggplot(diamonds, aes(x = carat, y = price))
# Add a geom layer with + and geom_point()
dia_plot + geom_point()
# Add the same geom layer, but with aes() inside
dia_plot + geom_point(aes(color = clarity))
# 1 - The dia_plot object has been created for you
dia_plot <- ggplot(diamonds, aes(x = carat, y = price))
# 2 - Expand dia_plot by adding geom_point() with alpha set to 0.2
dia_plot <- dia_plot + geom_point(alpha = 0.2)
# 3 - Plot dia_plot with additional geom_smooth() with se set to FALSE
dia_plot + geom_smooth(se = F)
# 4 - Copy the command from above and add aes() with the correct mapping to geom_smooth()
dia_plot + geom_smooth(aes(col = clarity), se = F)
 Â
# Plot the correct variables of mtcars
plot(mtcars$wt, mtcars$mpg, col = mtcars$cyl)
# Change cyl inside mtcars to a factor
mtcars$fcyl <- as.factor(mtcars$cyl)
# Make the same plot as in the first instruction
plot(mtcars$wt, mtcars$mpg, col = mtcars$fcyl)
lm
s need to be calculated separately and wrapped into the abline
function with lapply
. wah# Use lm() to calculate a linear model and save it as carModel
carModel <- lm(mpg ~ wt, data = mtcars)
# Basic plot
mtcars$cyl <- as.factor(mtcars$cyl)
plot(mtcars$wt, mtcars$mpg, col = mtcars$cyl)
# Call abline() with carModel as first argument and set lty to 2
abline(carModel, lty = 2)
# Plot each subset efficiently with lapply
# You don't have to edit this code
plot(mtcars$wt, mtcars$mpg, col = mtcars$cyl)
## this prints out a bunch of null values in list because nothing is returned from the abline function
## I have added results='hide' to prevent all that printing in the notebook
lapply(mtcars$cyl, function(x) {
abline(lm(mpg ~ wt, mtcars, subset = (cyl == x)), col = x)
})
# This code will draw the legend of the plot
# You don't have to edit this code
legend(x = 5, y = 33, legend = levels(mtcars$cyl),
col = 1:3, pch = 1, bty = "n")
# Plot 1: add geom_point() to this command to create a scatter plot
ggplot(mtcars, aes(x = wt, y = mpg, col = cyl)) +
geom_point() # Fill in using instructions Plot 1
# Plot 2: include the lines of the linear models, per cyl
ggplot(mtcars, aes(x = wt, y = mpg, col = cyl)) +
geom_point() + # Copy from Plot 1
geom_smooth(method = 'lm', se = F) # Fill in using instructions Plot 2
# Plot 3: include a lm for the entire dataset in its whole
ggplot(mtcars, aes(x = wt, y = mpg, col = cyl)) +
geom_point() + # Copy from Plot 2
geom_smooth(method = 'lm', se = F) + # Copy from Plot 2
geom_smooth(aes(group = 1), method = 'lm', se = F, linetype = 2) # Fill in using instructions Plot 3
# Load the tidyr package
library(tidyr)
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "Setosa","Versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
# Fill in the ___ to produce to the correct iris.tidy dataset
iris.tidy <- iris %>%
gather(key, Value, -Species) %>%
separate(key, c("Part", "Measure"), "\\.")
str(iris.tidy)
## 'data.frame': 600 obs. of 4 variables:
## $ Species: Factor w/ 3 levels "Setosa","Versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Part : chr "Sepal" "Sepal" "Sepal" "Sepal" ...
## $ Measure: chr "Length" "Length" "Length" "Length" ...
## $ Value : num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
length
and width
are in a column, we can easily split on this variable in the facet_grid
# Think about which dataset you would use to get the plot shown right
# Fill in the ___ to produce the plot given to the right
ggplot(iris.tidy, aes(x = Species, y = Value, col = Part)) +
geom_jitter() +
facet_grid(. ~ Measure)
# Add column with unique ids (don't need to change)
iris$Flower <- 1:nrow(iris)
# Fill in the ___ to produce to the correct iris.wide dataset
iris.wide <- iris %>%
gather(key, value, -Flower, -Species) %>%
separate(key, c("Part", "Measure"), "\\.") %>%
spread(Measure, value)
length
vs width
on the x and y axis we need to have them in sepeate columns so we can assign one variable to each aesthetic x
and y
# The 3 data frames (iris, iris.wide and iris.tidy) are available in your environment
# Execute head() on iris, iris.wide and iris.tidy (in that order)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species Flower
## 1 5.1 3.5 1.4 0.2 Setosa 1
## 2 4.9 3.0 1.4 0.2 Setosa 2
## 3 4.7 3.2 1.3 0.2 Setosa 3
## 4 4.6 3.1 1.5 0.2 Setosa 4
## 5 5.0 3.6 1.4 0.2 Setosa 5
## 6 5.4 3.9 1.7 0.4 Setosa 6
head(iris.tidy)
## Species Part Measure Value
## 1 Setosa Sepal Length 5.1
## 2 Setosa Sepal Length 4.9
## 3 Setosa Sepal Length 4.7
## 4 Setosa Sepal Length 4.6
## 5 Setosa Sepal Length 5.0
## 6 Setosa Sepal Length 5.4
head(iris.wide)
## Species Flower Part Length Width
## 1 Setosa 1 Petal 1.4 0.2
## 2 Setosa 1 Sepal 5.1 3.5
## 3 Setosa 2 Petal 1.4 0.2
## 4 Setosa 2 Sepal 4.9 3.0
## 5 Setosa 3 Petal 1.3 0.2
## 6 Setosa 3 Sepal 4.7 3.2
# Think about which dataset you would use to get the plot shown right
# Fill in the ___ to produce the plot given to the right
ggplot(iris.wide, aes(x = Length, y = Width, color = Part)) +
geom_jitter() +
facet_grid(. ~ Species)
 Â
aes()
functionstr(mtcars)
## 'data.frame': 32 obs. of 12 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : Factor w/ 3 levels "4","6","8": 2 2 1 2 3 2 3 1 1 2 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
## $ fcyl: Factor w/ 3 levels "4","6","8": 2 2 1 2 3 2 3 1 1 2 ...
# 1 - Map mpg to x and cyl to y
ggplot(mtcars, aes(mpg, cyl)) +
geom_point()
# 2 - Reverse: Map cyl to x and mpg to y
ggplot(mtcars, aes(cyl, mpg)) +
geom_point()
# 3 - Map wt to x, mpg to y and cyl to col
ggplot(mtcars, aes(x = wt, y = mpg, col = cyl)) +
geom_point()
# 4 - Change shape and size of the points in the above plot
## here the shape and size are attributes
## the wt mpg and cyl are mapped to aesthetics, x, y, and color
ggplot(mtcars, aes(wt, mpg, col = cyl)) +
geom_point(shape = 1, size = 4)
mtcars$am <- factor(mtcars$am)
# am and cyl are factors, wt is numeric
class(mtcars$am)
## [1] "factor"
class(mtcars$cyl)
## [1] "factor"
class(mtcars$wt)
## [1] "numeric"
# 1 - Map cyl to fill
ggplot(mtcars, aes(x = wt, y = mpg, fill = cyl)) +
geom_point(shape = 1, size = 4)
# 2 - Change shape and alpha of the points in the above plot
ggplot(mtcars, aes(x = wt, y = mpg, fill = cyl)) +
geom_point(shape = 21, size = 4, alpha= .6)
# 3 - Map am to col in the above plot
ggplot(mtcars, aes(x = wt, y = mpg, fill = cyl, col = am)) +
geom_point(shape = 21, size = 4, alpha= .6, stroke = 1.5)
# Map cyl to size
ggplot(mtcars, aes(wt, mpg, size = cyl)) + geom_point()
# Map cyl to alpha
ggplot(mtcars, aes(wt, mpg, alpha = cyl)) + geom_point()
# Map cyl to shape
ggplot(mtcars, aes(wt, mpg, shape = cyl)) + geom_point()
# Map cyl to label
ggplot(mtcars, aes(wt, mpg, label = cyl)) + geom_text()
color
aestheticcolor
and a fill
aesthetic.# 1 - First scatter plot, with col aesthetic:
ggplot(mtcars, aes(wt, mpg, col = cyl)) +
geom_point()
# Define a hexadecimal color
my_color <- "#4ABEFF"
# 2 - Plot 1, but set col attributes in geom layer:
ggplot(mtcars, aes(wt, mpg, col = cyl)) +
geom_point(col = my_color)
# 3 - Plot 2, with fill instead of col aesthetic, plut shape and size attributes in geom layer.
ggplot(mtcars, aes(wt, mpg, fill = cyl)) +
geom_point(size = 10, shape = 23, color = my_color, stroke = 1.5)
# Expand to draw points with alpha 0.5
ggplot(mtcars, aes(x = wt, y = mpg, fill = cyl)) +
geom_point(alpha = 0.5, size = 4)
# Expand to draw points with shape 24 and color yellow
ggplot(mtcars, aes(x = wt, y = mpg, fill = cyl)) +
geom_point(shape = 24, color = 'yellow', size = 4)
# Expand to draw text with label rownames(mtcars) and color red
ggplot(mtcars, aes(x = wt, y = mpg, fill = cyl)) +
geom_text(label = rownames(mtcars), color = 'red')
mtcars
variables:
mpg
– Miles/(US) galloncyl
– Number of cylindersdisp
– Displacement (cu.in.)hp
– Gross horsepowerdrat
– Rear axle ratiowt
– Weight (lb/1000)qsec
– 1/4 mile timevs
– V/S engine.am
– Transmission (0 = automatic, 1 = manual)gear
– Number of forward gearscarb
– Number of carburetors# Map mpg onto x, qsec onto y and factor(cyl) onto col
ggplot(mtcars, aes(mpg, qsec, col = factor(cyl))) +
geom_point()
# Add mapping: factor(am) onto shape
ggplot(mtcars,
aes(mpg, qsec,
col = factor(cyl),
shape = factor(am)
)) +
geom_point()
# Add mapping: (hp/wt) onto size
ggplot(mtcars,
aes(mpg, qsec,
col = factor(cyl),
shape = factor(am),
size = (hp/wt)
)) +
geom_point()
cyl.am <- ggplot(mtcars, aes(x = factor(cyl), fill = factor(am)))
# The base layer, cyl.am, is available for you
# Add geom (position = "stack" by default)
cyl.am +
geom_bar()
# Fill - show proportion
cyl.am +
geom_bar(position = "fill")
# Dodging - principles of similarity and proximity
cyl.am +
geom_bar(position = "dodge")
# Clean up the axes with scale_ functions
val = c("#E41A1C", "#377EB8")
lab = c("Manual", "Automatic")
cyl.am +
geom_bar(position = "dodge") +
scale_x_discrete(name = "Cylinders") +
scale_y_continuous(name = "Number") +
scale_fill_manual(name = "Transmission",
values = val,
labels = lab)
## This will give an error because its missing y aesthetic
# ggplot(mtcars, aes(x = mpg)) + geom_point()
# 1 - Create jittered plot of mtcars, mpg onto x, 0 onto y
ggplot(mtcars, aes(x = mpg, y = 0)) +
geom_jitter()
# 2 - Add function to change y axis limits
ggplot(mtcars, aes(x = mpg, y = 0)) +
geom_jitter() +
scale_y_continuous(limits = c(-2,2))
# Basic scatter plot: wt on x-axis and mpg on y-axis; map cyl to col
ggplot(mtcars, aes(wt, mpg, col = cyl)) +
geom_point(size = 4)
# Hollow circles - an improvement
ggplot(mtcars, aes(wt, mpg, col = cyl)) +
geom_point(size = 4, shape = 1)
# Add transparency - very nice
ggplot(mtcars, aes(wt, mpg, col = cyl)) +
geom_point(size = 4, alpha = .6)
# Scatter plot: carat (x), price (y), clarity (color)
ggplot(diamonds, aes(carat, price, col = clarity)) +
geom_point()
# Adjust for overplotting
ggplot(diamonds, aes(carat, price, col = clarity)) +
geom_point(alpha = 0.5)
# Scatter plot: clarity (x), carat (y), price (color)
ggplot(diamonds, aes(clarity, carat, col = price)) +
geom_point(alpha = 0.5)
# Dot plot with jittering
ggplot(diamonds, aes(clarity, carat, col = price)) +
geom_point(alpha = 0.5, position = "jitter")