# load packages
#install.packages("tidyverse")
library(tidyverse)
#install.packages("rsconnect")
#install.packages('shiny')
#library(shiny)
#library(rsconnect)Visualizations STARTER
Example dataset
Aesthetic mapping
ggplot(data = diamonds)Exercise
- Use the code chunk below to create a scatterplot for
tablevsdepth.
Other attributes
ggplot(data = diamonds,
aes(x = carat,
y = price)) +
geom_point()ggplot(data = diamonds,
aes(x = carat,
y = price)) +
geom_point(color = purple)Error: object 'purple' not found
Incorporating more variables via aes()
ggplot(data = iris,
aes(x = Sepal.Length,
y = Sepal.Width))Exercise
Using the code chunk below, create a scatterplot using a sample of the diamonds dataset that includes the following features (code for the sample is provided, check the help page to see how it works!):
Visualizes
tablevsdepth;Each observation is sized based on the weight
carat;All observations have the shape of a diamond (HINT: Google “geom_point shapes”).
diamonds_sample <- sample_n(diamonds, size = 100)Histograms
geom_histogram()
ggplot(data = diamonds)ggplot(data = iris,
aes(x = Petal.Length)) +
geom_histogram()`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Exercise
- Use the code chunk below to create a histogram for
caratthat has 20 bins with white bars and black outlines.
Titles and labels
ggplot(data = diamonds,
aes(x = carat)) +
geom_histogram(binwidth = 0.1,
fill = "red",
color = "purple")Exercise
Using the code chunk below, modify the code used to make the previous plot to:
Visualize sepal length instead of petal length;
Change the X-axis label to be more readable;
Give the plot an informative title.
Boxplots
geom_boxplot()
x <- rnorm(50)
summary(x) Min. 1st Qu. Median Mean 3rd Qu. Max.
-3.02828 -0.64452 -0.05638 -0.05394 0.57124 2.16353
ggplot(data = diamonds)Exercise
- Using the code chunk below, create a vertical boxplot of
Sepal.Lengthusing the iris dataset.
Comparitive boxplots
ggplot(data = diamonds,
aes(x = carat)) +
geom_boxplot()Transformations and more customizations
ggplot(data = diamonds,
aes(x = cut,
y = price)) +
geom_boxplot(fill = "darkblue",
color = "lightblue")Exercise
Using the code chunk below, create a comparative boxplot using the iris dataset that includes the following features:
Compares
Sepal.Lengthfor eachSpecies;Horizontal boxplots (more than one way to do this);
Sepal.Lengthis originally measured in millimeters (mm); convert this to meters (m) for this plot;More informative axis label for
Sepal.Lengthbased on new scale;A cool theme (try a few!).
Bar graphs
Bar graphs
geom_bar() wtih raw data
ggplot(data = diamonds,
aes(x = cut))
geom_bar() wtih count data
table(cut = diamonds$cut)cut
Fair Good Very Good Premium Ideal
1610 4906 12082 13791 21551
cut_table <- count(diamonds, cut)
glimpse(cut_table)Rows: 5
Columns: 2
$ cut <ord> Fair, Good, Very Good, Premium, Ideal
$ n <int> 1610, 4906, 12082, 13791, 21551
ggplot(data = diamonds,
aes(x = cut)) +
geom_bar()ggplot(data = cut_table,
aes(x = cut,
y = n))Exercise
- Using the code chunk below, create a bar graph of
Speciesusing a sample from the iris dataset provided below. HINT: Make sure to inspect the data first.
# create a sample from of data and summarize
# -> rename so in slightly different format than example and have to work with it
iris_sample <- iris %>%
sample_n(size = 50) %>%
count(Species) %>%
rename(Count = n)iris_sample Species Count
1 setosa 18
2 versicolor 14
3 virginica 18
Stacked bar graph
ggplot(data = diamonds,
aes(x = cut)) +
geom_bar()Proportionally stacked bar graph
ggplot(data = diamonds,
aes(x = cut,
fill = clarity)) +
geom_bar()Side-by-side bar graph
ggplot(data = diamonds,
aes(x = cut,
fill = clarity)) +
geom_bar(position = "fill")Line plots
geom_line()
data_sun <- data.frame(year = c(1700:1988),
sunspots = as.vector(sunspot.year))
glimpse(data_sun)Rows: 289
Columns: 2
$ year <int> 1700, 1701, 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, 1…
$ sunspots <dbl> 5, 11, 16, 23, 36, 58, 29, 20, 10, 8, 3, 0, 0, 2, 11, 27, 47,…
ggplot(data = data_sun)Facets
Facets
facet_wrap()
ggplot(data = diamonds,
aes(x = table,
y = depth)) +
geom_point()Controlling rows and columns
ggplot(data = diamonds,
aes(x = carat)) +
geom_boxplot() +
facet_wrap(~ cut)ggplot(data = diamonds,
aes(x = price)) +
geom_histogram() +
facet_wrap(~ clarity,
nrow = 1)`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Scales for facets
ggplot(data = diamonds,
aes(x = price)) +
geom_histogram() +
facet_wrap(~ cut)`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Exercise
Using the code chunk below, create a boxplots using the iris dataset that includes the following features:
Plots
Sepal.Lengthfaceted bySpecies;Stack the facets vertically to emphasize the differences between each Species;
Think about if we want to adjust the scales of the panel, why or why not?
Note that we made this plot previously with comparitive boxplots by specifying
aes(y = Species); so this is an alternative.
facet_grid()
ggplot(data = diamonds,
aes(x = table)) +
geom_boxplot()Exercise
- Using the code chunk below, create scatterplots of
caratvspricethat is colored bycutand facetted bycolorandclarity. Try a few different options forscalesto see how they affect the plot.
# create a sample from of data with fewer levels (variety from ordinal scale of each)
diamonds_sample <- diamonds %>%
filter(cut %in% unique(diamonds$cut)[c(1,3,5)],
color %in% unique(diamonds$color)[c(1,4,7)],
clarity %in% unique(diamonds$clarity)[c(1,4,8)]) %>%
sample_n(size = 5000)diamonds_sample# A tibble: 5,000 × 10
carat cut color clarity depth table price x y z
<dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
1 1.51 Good E SI2 63.1 58 9343 7.28 7.31 4.6
2 0.53 Ideal D VS2 61.4 56 1956 5.23 5.2 3.2
3 0.5 Fair E VS2 65 58 1188 4.96 4.92 3.21
4 1.03 Ideal D SI2 61.8 56 4679 6.46 6.51 4.01
5 0.3 Ideal D VS2 62 56 911 4.35 4.33 2.69
6 0.33 Ideal H IF 61.6 55 838 4.48 4.51 2.77
7 0.53 Ideal H SI2 61.6 54 1020 5.22 5.26 3.23
8 0.51 Ideal E VS2 61.5 57 1781 5.12 5.09 3.14
9 0.33 Ideal E VS2 61.8 55 723 4.45 4.48 2.76
10 0.93 Ideal H VS2 61.5 57 4218 6.3 6.26 3.86
# ℹ 4,990 more rows
Density histograms and density curves
Density histograms
ggplot(data = diamonds,
aes(x = carat)) +
geom_histogram() +
facet_wrap(~ cut,
scales = "free_y")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Density curves
ggplot(data = diamonds,
aes(x = carat,
y = after_stat(density))) +
geom_histogram(bins = 10)ggplot(data = diamonds,
aes(x = carat))Exercise
Multiple density curves
ggplot(data = diamonds,
aes(x = carat)) +
geom_density()
Amount of smoothing
ggplot(data = diamonds,
aes(x = carat)) +
geom_density()Exercise
Using the code chunk below, use the iris dataset to do the following:
Create density curves of a
Petal.Lengthcolored by eachSpecies;Adjust the amount of smoothing to find a level that shows the overall trends well with some detail, but that is not overly exact (it’s a balance);
Add
fill = Speciesin theaes()statement to see the result of this;Notice how the colors are completely opaque (not “see through”). To make them more transparent, add the following option locally
geom_density(alpha = 0.5). Try a few different values (0 \le alpha \le 1) to see the result. Note that this option can also be used with lots othergeom_*()as well.What does this plot tell us about the petal lengths of the different species?
Application
Recreating
# load basketball data
# TO DO FIRST:
# -> 1) create a folder named "data" in the same location where you saved this file and save the file "data-bsu-game.RData" in it
# -> 2) remove eval = FALSE from the code chunk header
load("data/data-bsu-game.RData")
data_bsu_gameExercise
- Using the code chuck below, copy the final code used to create `Visual 1’ from above and modify it to recreate ‘Visual 2’.