I had to split this up becasue of the size of the document.
abline, area, bar, bin2d, blank, boxplotcontour, crossbar, density, density2d, dotploterrorbar, errorbarh, freqpoly, hex, histogram, hlinejitter, line, linerange, map, path, point, pointrangepolygon, quantile, raster, rect, ribbon, rugsegment, smooth, step, text, tile, violin, vlinepoints, jitter, ablinehistogram, bar, errorbarline# Shown in the viewer:
ggplot(mtcars, aes(x = cyl, y = wt)) +
geom_point()# Solutions:
# 1 - With geom_jitter()
ggplot(mtcars, aes(x = cyl, y = wt)) +
geom_jitter()# 2 - Set width in geom_jitter()
ggplot(mtcars, aes(x = cyl, y = wt)) +
geom_jitter(width = 0.1)# 3 - Set position = position_jitter() in geom_point() ()
ggplot(mtcars, aes(x = cyl, y = wt)) +
geom_point(position = position_jitter(0.1))# Examine the structure of Vocab
library(car)
str(Vocab)## 'data.frame': 21638 obs. of 4 variables:
## $ year : int 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 ...
## $ sex : Factor w/ 2 levels "Female","Male": 1 1 2 1 2 2 1 2 2 1 ...
## $ education : int 9 14 14 17 14 14 12 10 11 9 ...
## $ vocabulary: int 3 6 9 8 1 7 6 6 5 1 ...
# Basic scatter plot of vocabulary (y) against education (x). Use geom_point()
ggplot(Vocab, aes(education, vocabulary)) +
geom_point()# Use geom_jitter() instead of geom_point()
ggplot(Vocab, aes(education, vocabulary)) +
geom_jitter()# Using the above plotting command, set alpha to a very low 0.2
ggplot(Vocab, aes(education, vocabulary)) +
geom_jitter(alpha = 0.2)# Using the above plotting command, set the shape to 1
ggplot(Vocab, aes(education, vocabulary)) +
geom_jitter(alpha = 0.2, shape = 1)# 1 - Make a univariate histogram
ggplot(mtcars, aes(x = mpg)) +
geom_histogram()# 2 - Plot 1, plus set binwidth to 1 in the geom layer
ggplot(mtcars, aes(x = mpg)) +
geom_histogram(binwidth = 1)..density..count# 3 - Plot 2, plus MAP ..density.. to the y aesthetic (i.e. in a second aes() function)
ggplot(mtcars, aes(x = mpg)) +
geom_histogram(aes(y = ..density..), binwidth = 1)# 4 - plot 3, plus SET the fill attribute to "#377EB8"
ggplot(mtcars, aes(x = mpg)) +
geom_histogram(aes(y = ..density..), binwidth = 1, fill = "#377EB8")# Draw a bar plot of cyl, filled according to am
ggplot(mtcars, aes(x = cyl, fill = am)) +
geom_bar()stack is the default# Change the position argument to stack
ggplot(mtcars, aes(x = cyl, fill = am)) +
geom_bar(position = "stack")# Change the position argument to fill
ggplot(mtcars, aes(x = cyl, fill = am)) +
geom_bar(position = "fill")# Change the position argument to dodge
ggplot(mtcars, aes(x = cyl, fill = am)) +
geom_bar(position = "dodge")# 1 - The last plot form the previous exercise
# ggplot(mtcars, aes(x = cyl, fill = am)) +
# geom_bar(position = "dodge")
# 2 - Define posn_d with position_dodge()
posn_d <- position_dodge(width = 0.2)
# 3 - Change the position argument to posn_d
ggplot(mtcars, aes(x = cyl, fill = am)) +
geom_bar(position = posn_d)# 4 - Use posn_d as position and adjust alpha to 0.6
ggplot(mtcars, aes(x = cyl, fill = am)) +
geom_bar(position = posn_d, alpha = 0.6)# A basic histogram, add coloring defined by cyl
ggplot(mtcars, aes(mpg, fill = cyl)) +
geom_histogram(binwidth = 1)# Change position to identity
ggplot(mtcars, aes(mpg, fill = cyl)) +
geom_histogram(binwidth = 1, position = 'identity')# Change geom to freqpoly (position is identity by default)
ggplot(mtcars, aes(mpg, col = cyl)) +
geom_freqpoly(binwidth = 1)# Example of how to use a brewed color palette
ggplot(mtcars, aes(x = cyl, fill = am)) +
geom_bar() +
scale_fill_brewer(palette = "Set1")# Use str() on Vocab to check out the structure
Vocab$education <- as.factor(Vocab$education)
Vocab$vocabulary <- as.factor(Vocab$vocabulary)
str(Vocab)## 'data.frame': 21638 obs. of 4 variables:
## $ year : int 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 ...
## $ sex : Factor w/ 2 levels "Female","Male": 1 1 2 1 2 2 1 2 2 1 ...
## $ education : Factor w/ 21 levels "0","1","2","3",..: 10 15 15 18 15 15 13 11 12 10 ...
## $ vocabulary: Factor w/ 11 levels "0","1","2","3",..: 4 7 10 9 2 8 7 7 6 2 ...
# Plot education on x and vocabulary on fill
# Use the default brewed color palette
ggplot(Vocab, aes(x = education, fill = vocabulary)) +
geom_bar(position = 'fill') +
scale_fill_brewer()new_col <- colorRampPalette(c("#FFFFFF", "#0000FF"))
new_col(4) # the newly extrapolated colours## [1] "#FFFFFF" "#AAAAFF" "#5555FF" "#0000FF"
munsell::plot_hex(new_col(4)) # Quick and dirty plotlibrary(RColorBrewer)
# Final plot of last exercise
ggplot(Vocab, aes(x = education, fill = vocabulary)) +
geom_bar(position = "fill") +
scale_fill_brewer()# Definition of a set of blue colors
blues <- brewer.pal(9, "Blues") # from the RColorBrewer package
blues## [1] "#F7FBFF" "#DEEBF7" "#C6DBEF" "#9ECAE1" "#6BAED6" "#4292C6" "#2171B5"
## [8] "#08519C" "#08306B"
# 1 - Make a color range using colorRampPalette() and the set of blues
blue_range <- colorRampPalette(blues)
# This is our new pallete. We can create it with as many colors as we want.
munsell::plot_hex(blue_range(11)) # 2 - Use blue_range to adjust the color of the bars, use scale_fill_manual()
ggplot(Vocab, aes(x = education, fill = vocabulary)) +
geom_bar(position = "fill") +
scale_fill_manual(values = blue_range(11)) - Nice. Thats much better.
# 1 - Basic histogram plot command
ggplot(mtcars, aes(mpg)) +
geom_histogram(binwidth = 1)# 2 - Plot 1, Expand aesthetics: am onto fill
ggplot(mtcars, aes(mpg, fill = am)) +
geom_histogram(binwidth = 1)# 3 - Plot 2, change position = "dodge"
ggplot(mtcars, aes(mpg, fill = am)) +
geom_histogram(binwidth = 1, position = "dodge")# 4 - Plot 3, change position = "fill"
## In this case, none of these positions really work well, because it's difficult to compare the distributions directly.
ggplot(mtcars, aes(mpg, fill = am)) +
geom_histogram(binwidth = 1, position = "fill")# 5 - Plot 4, plus change position = "identity" and alpha = 0.4
ggplot(mtcars, aes(mpg, fill = am)) +
geom_histogram(binwidth = 1,
position = "identity",
alpha = 0.4)# 6 - Plot 5, plus change mapping: cyl onto fill
ggplot(mtcars, aes(mpg, fill = cyl)) +
geom_histogram(binwidth = 1,
position = "identity",
alpha = 0.4)# Print out head of economics
head(economics)## # A tibble: 6 x 6
## date pce pop psavert uempmed unemploy
## <date> <dbl> <int> <dbl> <dbl> <int>
## 1 1967-07-01 507.4 198712 12.5 4.5 2944
## 2 1967-08-01 510.5 198911 12.5 4.7 2945
## 3 1967-09-01 516.3 199113 11.7 4.6 2958
## 4 1967-10-01 512.9 199311 12.5 4.9 3143
## 5 1967-11-01 518.1 199498 12.5 4.7 3066
## 6 1967-12-01 525.8 199657 12.1 4.8 3018
# Plot unemploy as a function of date using a line plot
ggplot(economics, aes(x = date, y = unemploy)) +
geom_line()# Adjust plot to represent the fraction of total population that is unemployed
ggplot(economics, aes(x = date, y = unemploy/pop)) +
geom_line()# Basic line plot
# ggplot(economics, aes(x = date, y = unemploy/pop)) +
# geom_line()
# Expand the following command with geom_rect() to draw the recess periods
ggplot(economics, aes(x = date, y = unemploy/pop)) +
geom_rect(data = recess,
aes(xmin = begin,
xmax = end,
ymin = -Inf,
ymax = Inf),
inherit.aes = FALSE,
fill = "red",
alpha = 0.2
) +
geom_line()# Check the structure as a starting point
str(fish.species)## 'data.frame': 61 obs. of 8 variables:
## $ Year : int 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 ...
## $ Pink : int 100600 259000 132600 235900 123400 244400 203400 270119 200798 200085 ...
## $ Chum : int 139300 155900 113800 99800 148700 143700 158480 125377 132407 113114 ...
## $ Sockeye : int 64100 51200 58200 66100 83800 72000 84800 69676 100520 62472 ...
## $ Coho : int 30500 40900 33600 32400 38300 45100 40000 39900 39200 32865 ...
## $ Rainbow : int 0 100 100 100 100 100 100 100 100 100 ...
## $ Chinook : int 23200 25500 24900 25300 24500 27700 25300 21200 20900 20335 ...
## $ Atlantic: int 10800 9701 9800 8800 9600 7800 8100 9000 8801 8700 ...
# Use gather to go from fish.species to fish.tidy
fish.tidy <- gather(fish.species, Species, Capture, -Year)
str(fish.tidy)## 'data.frame': 427 obs. of 3 variables:
## $ Year : int 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 ...
## $ Species: chr "Pink" "Pink" "Pink" "Pink" ...
## $ Capture: int 100600 259000 132600 235900 123400 244400 203400 270119 200798 200085 ...
# Recreate the plot shown on the right
ggplot(fish.tidy, aes(x = Year, y = Capture, col = Species)) +
geom_line()
# The old way (shown)
plot(mpg ~ wt, data = mtcars) # formula notationwith(mtcars, plot(wt, mpg)) # x, y notation# Using ggplot:
ggplot(mtcars, aes(wt, mpg)) +
geom_point()# Using qplot:
qplot(wt, mpg, data = mtcars)# Categorical variable mapped onto size:
# cyl
qplot(wt, mpg, data = mtcars, size = factor(cyl))# gear
qplot(wt, mpg, data = mtcars, size = factor(gear))# Continuous variable mapped onto col:
# hp
qplot(wt, mpg, data = mtcars, col = hp)# qsec
qplot(wt, mpg, data = mtcars, col = qsec)# qplot() with x only
qplot(x = factor(cyl), data = mtcars)# qplot() with x and y
qplot(x = factor(cyl), y = factor(vs), data = mtcars)# qplot() with geom set to jitter manually
qplot(x = factor(cyl), y = factor(vs), data = mtcars, geom = 'jitter')# cyl and am are factors, wt is numeric
class(mtcars$cyl)## [1] "numeric"
class(mtcars$am)## [1] "numeric"
class(mtcars$wt)## [1] "numeric"
# "Basic" dot plot, with geom_point():
ggplot(mtcars, aes(cyl, wt, col = am)) +
geom_point(position = position_jitter(0.2, 0))# 1 - "True" dot plot, with geom_dotplot():
ggplot(mtcars, aes(cyl, wt, fill = am)) +
geom_dotplot(binaxis = "y", stackdir = "center")# 2 - qplot with geom "dotplot", binaxis = "y" and stackdir = "center"
qplot(
cyl, wt,
data = mtcars,
fill = am,
geom = "dotplot",
binaxis = "y",
stackdir = "center"
)# ChickWeight is available in your workspace
# 1 - Check out the head of ChickWeight
head(ChickWeight)## Grouped Data: weight ~ Time | Chick
## weight Time Chick Diet
## 1 42 0 1 1
## 2 51 2 1 1
## 3 59 4 1 1
## 4 64 6 1 1
## 5 76 8 1 1
## 6 93 10 1 1
# 2 - Basic line plot
ggplot(ChickWeight, aes(x = Time, y = weight)) +
geom_line(aes(group = Chick))# 3 - Take plot 2, map Diet onto col.
ggplot(ChickWeight,
aes(x = Time, y = weight, col = Diet)) +
geom_line(
aes(group = Chick))# 4 - Take plot 3, add geom_smooth()
ggplot(ChickWeight,
aes(x = Time, y = weight, col = Diet)) +
geom_line(
aes(group = Chick), alpha = 0.3) +
geom_smooth(lwd = 2, se = F)# titanic is avaliable in your workspace
# 1 - Check the structure of titanic
str(titanic)## 'data.frame': 714 obs. of 4 variables:
## $ Survived: int 0 1 1 1 0 0 0 1 1 1 ...
## $ Pclass : int 3 1 3 1 3 1 3 3 2 3 ...
## $ Sex : Factor w/ 2 levels "female","male": 2 1 1 1 2 2 2 1 1 1 ...
## $ Age : num 22 38 26 35 35 54 2 27 14 4 ...
# 2 - Use ggplot() for the first instruction
ggplot(titanic,
aes(x = Pclass, fill = Sex)) +
geom_bar(
position = "dodge")# 3 - Plot 2, add facet_grid() layer
ggplot(titanic,
aes(x = Pclass, fill = Sex)) +
geom_bar(
position = "dodge") +
facet_grid(. ~ Survived)# 4 - Define an object for position jitterdodge, to use below
posn.jd <- position_jitterdodge(0.5, 0, 0.6)
# 5 - Plot 3, but use the position object from instruction 4
ggplot(titanic,
aes(x = Pclass, y = Age, col = Sex)) +
geom_point(
size = 3, alpha = 0.5, position = posn.jd) +
facet_grid(. ~ Survived)