Using ggplot2 and Supporting Libraries

Author

David McCabe

Published

December 9, 2025

Grammer of Graphics (GOG)

The GOG offers a powerful intuative structure for building economist style 2-D non-interactive plots.

Data:

The data.table containing the information which is rendered graphically by a ggplot.

the housing dataset
library(ggplot2)
library(data.table)
housing<-fread("landdata-states.csv")
head(housing)
    State region  Date Home.Value Structure.Cost Land.Value Land.Share..Pct.
   <char> <char> <int>      <int>          <int>      <int>            <num>
1:     AK   West 20101     224952         160599      64352             28.6
2:     AK   West 20102     225511         160252      65259             28.9
3:     AK   West 20093     225820         163791      62029             27.5
4:     AK   West 20094     224994         161787      63207             28.1
5:     AK   West 20074     234590         155400      79190             33.8
6:     AK   West 20081     233714         157458      76256             32.6
   Home.Price.Index Land.Price.Index
              <num>            <num>
1:            1.481            1.552
2:            1.484            1.576
3:            1.486            1.494
4:            1.481            1.524
5:            1.544            1.885
6:            1.538            1.817

Asthetic Mapping

An attribute of a geometric object specified using the aes command. An Asthetic Mapping mapping between some column of the data.table and a scaled asthetic of the geoms. An asthetic of each data point is scaled according to the specified value. Asthetic mappings are controlled using scales.

example scatter plot asthetics
# scale scatterplot datapoint colour by Home.value... 
myPlot+geom_point(aes(color = Home.Value))

Asthetic mapping is destinct from a raw geom asthetic which does not vary over data points

# select a single colour for all datapoints... 
myPlot+geom_point(color = "blue")

alternatively we can specify the data and asthetic mapping in the ggplot command which gets forwarded to the associated geom_ calls

ggplot(
  data = housing[Date==20011],
  mapping = aes(y=Structure.Cost, x=log(Land.Value), color = Home.Value)
)+
geom_point()# uses mapping from parent ggplot command

Geometric Object

A visual artifact in a plot such as; a line, text box, error bar

available geometric objects…
help.search("geom_", package = "ggplot2")

# Basic Geoms
geom_point()         # Scatter plot, points
geom_line()          # Line plot, lines connecting points
geom_path()          # Similar to geom_line, but points are connected in order
geom_bar()           # Bar plot, height of the bar is proportional to a value
geom_col()           # Bar plot, height represents the value directly
geom_histogram()     # Histogram, distribution of a single variable
geom_area()          # Area plot, shaded region under a line
geom_tile()          # Rectangles of arbitrary size, useful for heatmaps
geom_raster()        # Rectangles for regularly spaced data, faster than geom_tile

# Smooth Geoms
geom_smooth()        # Add a smoothed conditional mean (e.g., LOESS, linear models)

# Textual Geoms
geom_text()          # Text annotations
geom_label()         # Label annotations with a background
geom_text_repel()    # Avoid overlapping text labels (from ggrepel package)
geom_label_repel()   # Avoid overlapping label boxes (from ggrepel package)

# Ribbon Geoms
geom_ribbon()        # Shaded area between two lines (useful for confidence intervals)

# Boxplot Geoms
geom_boxplot()       # Boxplot for visualizing distributions
geom_violin()        # Violin plot, shows distribution with density estimates

# Error Bars and Range Geoms
geom_errorbar()      # Error bars for continuous variables
geom_errorbarh()     # Horizontal error bars
geom_linerange()     # Line ranges, often used for confidence intervals
geom_crossbar()      # Horizontal bar with error bars above and below
geom_pointrange()    # Point with vertical error bar

# Jittered Points
geom_jitter()        # Scatter plot with jitter to reduce overplotting

# Density Plots
geom_density()       # Kernel density estimate plot
geom_density_2d()    # 2D density estimate, contours
geom_density_2d_filled() # 2D density estimate, filled contours

# Function Geoms
geom_function()      # Plot mathematical functions

# Rug Geoms
geom_rug()           # Marginal distribution plots on x and y axis

# Dot Plot
geom_dotplot()       # Dot plot, count of points in each bin

# Step Geoms
geom_step()          # Step function plot

# Hexbin Plot
geom_hex()           # Hexagonal binning for 2D data

# Contour Plots
geom_contour()       # Contour plot, for 3D surface projections
geom_contour_filled()# Filled contour plot

# Polygon Geoms
geom_polygon()       # Polygon shapes, useful for maps

# Segment and Arrow Geoms
geom_segment()       # Line segments between specified points
geom_curve()         # Curved lines between specified points
geom_abline()        # Lines with specified slope and intercept (y = mx + b)
geom_hline()         # Horizontal lines
geom_vline()         # Vertical lines

# Spoke Geoms
geom_spoke()         # Line segments defined by angles and lengths

# Quantile Plot
geom_quantile()      # Quantile regression

# Interactive Geoms (from ggiraph package)
geom_point_interactive()  # Interactive points
geom_bar_interactive()    # Interactive bars
geom_tile_interactive()   # Interactive tiles

# Map Geoms
geom_map()           # Map data
geom_sf()            # Simple features (for maps using sf objects)
geom_sf_label()      # Label points in sf plots
geom_sf_text()       # Text annotations in sf plots

# Error Handling Geoms
geom_blank()         # Useful for creating empty plots or dummy layers

# Custom and Unusual Geoms
geom_freqpoly()      # Frequency polygon, like a line histogram
geom_hex()           # Hexagonal binning, useful for overplotting in 2D
geom_quantile()      # Quantile regression
geom_rug()           # Adds tick marks at the margins
geom_treemap()       # Treemap, requires treemapify package
geom_qq()            # Quantile-Quantile plot
geom_qq_line()       # QQ plot line

# Ribbon Plot (shaded regions)
geom_ribbon()        # Shaded area between two lines
geom_violin()        # Violin plot, showing distribution

Statistical Transformation

Data is typically processed to create a plot e.g. boxplot calculates the median and inter-quartile range points while a smoother calculates an interpolated set of values.

Each geometric object has default stat_ that may be overridden. Don’t do this - generally you should use a different geom with defaults to the correct statistical model

Geometric Object Statistical Transform Statistical Transform Identifier Description
geom_bar stat_count() “count” counts the number of cases at each x position
geom_col stat_identity() “identity” leaves the data as is
geom_freqpoly stat_bin() “bin” display the counts with lines (histogram line)
geom_histogram stat_bin() “bin” display the counts with bars (histogram bars)
geom_smooth stat_smooth() “smooth” aids the eye in seeing patterns
Stat commands
# Basic Stats
stat_identity()         # Leaves data unchanged, useful when raw data should be plotted
stat_count()            # Counts the number of occurrences of each x value (used in bar plots)
stat_bin()              # Bins data along the x-axis (used in histograms)
stat_smooth()           # Computes a smoothed conditional mean (e.g., LOESS or regression lines)
stat_boxplot()          # Computes boxplot statistics
stat_summary()          # Summarizes y values at unique x positions (e.g., mean, median)
stat_summary_bin()      # Summarizes y values at binned x positions

# Density Stats
stat_density()          # Kernel density estimate
stat_density_2d()       # 2D kernel density estimate, computes contour lines
stat_density_2d_filled()# 2D kernel density estimate with filled contours

# Bin Stats
stat_bin_2d()           # Bins two-dimensional data (creates hexagonal or rectangular heatmaps)
stat_binhex()           # Hexagonal binning of 2D data

# Function Stats
stat_function()         # Computes a function on a range of x values

# Contour Stats
stat_contour()          # Computes contour lines for 3D data
stat_contour_filled()   # Computes filled contour regions

# Summary Stats
stat_summary_hex()      # Applies a summary function to binned hexagonal data
stat_summary_2d()       # Applies a summary function to 2D binned data

# Ellipse Stats
stat_ellipse()          # Computes confidence ellipses (useful in scatter plots)

# ECDF Stats
stat_ecdf()             # Empirical cumulative distribution function

# Rug Stats
stat_rug()              # Adds marginal rug plots along axes

# Quantile Stats
stat_quantile()         # Quantile regression

# Binomial and QQ Stats
stat_qq()               # Quantile-quantile plot, comparing two distributions
stat_qq_line()          # Adds a QQ line (a reference line in a QQ plot)

# Smooth Stats
stat_smooth()           # Adds a smoothed conditional mean (e.g., LOESS, regression)

# Summary Stats with Custom Functions
stat_summary()          # Provides summaries for y values (mean, median, etc.)
stat_summary_bin()      # Summarizes y values in binned x positions
stat_summary_hex()      # Summarizes y values in hexagonal binned positions

# Unique to `geom_sf()`
stat_sf()               # Simple features geometry computations for maps
stat_sf_coordinates()   # Extracts coordinates from simple feature geometries

# Identity Stats
stat_identity()         # No statistical transformation, data is plotted as is

# Custom and Miscellaneous Stats
stat_boxplot()          # Computes boxplot statistics
stat_function()         # Adds mathematical functions
stat_summary()          # Summarizes y values (mean, median, etc.)
stat_unique()           # Filters out duplicated points (keeps unique points)
stat_sf()               # Simple feature (sf) data for spatial plotting

# Deprecated or Less Common Stats
stat_bindot()           # Deprecated in favor of stat_bin for dot plots

Scale

Scalable asthetics include; position, colour, fill, transparency, size, shape, line type. Scales are modified using the family of functions scale__.

Scale commands
# Continuous scales
scale_x_continuous()     # Adjust x-axis for continuous data
scale_y_continuous()     # Adjust y-axis for continuous data

# Discrete scales
scale_x_discrete()       # Adjust x-axis for discrete data
scale_y_discrete()       # Adjust y-axis for discrete data

# Date/time scales
scale_x_date()           # Adjust x-axis for date data
scale_y_date()           # Adjust y-axis for date data
scale_x_datetime()       # Adjust x-axis for datetime data
scale_y_datetime()       # Adjust y-axis for datetime data
scale_x_time()           # Adjust x-axis for time data
scale_y_time()           # Adjust y-axis for time data

# Logarithmic scales
scale_x_log10()          # Logarithmic transformation on x-axis
scale_y_log10()          # Logarithmic transformation on y-axis

# Reverse scales
scale_x_reverse()        # Reverse x-axis
scale_y_reverse()        # Reverse y-axis

# Manual scales
scale_fill_manual()      # Manually specify fill colors
scale_color_manual()     # Manually specify line/point colors
scale_shape_manual()     # Manually specify shape of points
scale_linetype_manual()  # Manually specify line types
scale_size_manual()      # Manually specify size of points

# Gradient scales
scale_fill_gradient()         # Continuous color gradient for fill
scale_fill_gradient2()        # Diverging color gradient for fill
scale_fill_gradientn()        # Multiple color gradient for fill
scale_color_gradient()        # Continuous color gradient for lines/points
scale_color_gradient2()       # Diverging color gradient for lines/points
scale_color_gradientn()       # Multiple color gradient for lines/points

# Brewer scales (for discrete palettes)
scale_fill_brewer()      # Color Brewer palettes for fill
scale_color_brewer()     # Color Brewer palettes for lines/points

# Viridis scales (perceptually uniform)
scale_fill_viridis_d()   # Viridis color palette for discrete fill
scale_color_viridis_d()  # Viridis color palette for discrete lines/points
scale_fill_viridis_c()   # Viridis color palette for continuous fill
scale_color_viridis_c()  # Viridis color palette for continuous lines/points

# Alpha transparency scales
scale_alpha()            # Adjust alpha (transparency)
scale_alpha_continuous() # Continuous alpha scale
scale_alpha_discrete()   # Discrete alpha scale

# Identity scales (raw values)
scale_fill_identity()    # Use raw values for fill
scale_color_identity()   # Use raw values for color
scale_size_identity()    # Use raw values for size
scale_shape_identity()   # Use raw values for shape
scale_linetype_identity()# Use raw values for line type

# Other aesthetics
scale_size()             # Adjust size of points/lines
scale_shape()            # Adjust shape of points
scale_linetype()         # Adjust line types
scale_edge_color()       # For geom_edges, adjusts edge color
scale_edge_size()        # For geom_edges, adjusts edge size
scale_edge_linetype()    # For geom_edges, adjusts edge linetype

Coordinate System

Use a coord command to choose soordinate system and set axes limits (like a scene)

  p<-p+coord_cartesian(
    xlim = c(9,12),
    ylim = c(75000,200000)
  )

These are data limits, these will actually remove outliers from the data and create visible gaps in the plot.

  p<-p+xlim(9,12)                    # Truncate data &
  p<-p+ylim(75000+5000,200000-5000)   #  produce QNANS or gaps
Coord commands
# Cartesian Coordinates
coord_cartesian()         # Default Cartesian coordinate system, allows zooming in on plot
coord_fixed()             # Cartesian coordinates with a fixed aspect ratio between x and y axes
coord_equal()             # Alias for coord_fixed(), ensuring equal scaling for x and y

# Flipping Coordinates
coord_flip()              # Flips the x and y axes, useful for horizontal bar plots

# Polar Coordinates
coord_polar()             # Polar coordinates, useful for creating pie charts and circular plots

# Map Projections (for Geospatial Data)
coord_map()               # Projects coordinates onto a 2D plane with an approximation of map-like projections
coord_quickmap()          # Similar to coord_map, but faster and less precise (good for quick geospatial plots)
coord_sf()                # Handles simple feature (sf) objects for map projections in ggplot2

# Transformed Coordinates
coord_trans()             # Apply a transformation to the x and y axes (e.g., log or sqrt transformations)

Position Adjustment

Faceting

Lattice style graphics via +facet_wrap(~State, ncol = 10)+

Themes

Themes set general appearance
Theme commands
# Base Theme Functions
theme_gray()           # Default gray theme with white background and gray grid lines
theme_bw()             # A white background theme with black grid lines (no gray background)
theme_minimal()        # A minimalistic theme with no background annotations
theme_light()          # A theme similar to theme_gray, but with a lighter background
theme_dark()           # A dark theme with a black background and white grid lines
theme_classic()        # A classic theme with no gridlines and a white background
theme_void()           # A completely empty theme (no axes, gridlines, or text)
theme_test()           # A theme for testing, showing base color schemes

# Specialized Themes
theme_linedraw()       # Black and white theme with no fill colors
theme_eco()            # Environmentally-friendly theme with light colors (from ggthemes)
theme_economist()      # A theme inspired by The Economist style (requires ggthemes)
theme_few()            # A minimal theme based on Stephen Few's principles (from ggthemes)
theme_grey()           # Alias for theme_gray (default theme)
theme_tufte()          # Minimalistic theme following Edward Tufte's design principles (from ggthemes)
theme_map()            # A map-friendly theme (useful for choropleths)
theme_solarized()      # Solarized light and dark theme options (from ggthemes)
theme_wsj()            # Wall Street Journal inspired theme (from ggthemes)
theme_excel()          # Excel-style themes (from ggthemes)

# Customization Helpers
theme()                # Customizable theme that lets you adjust individual plot elements
theme_set()            # Set the default theme for future plots
theme_update()         # Update the current theme with new settings
theme_replace()        # Replace the existing theme with new settings

# Theme Element Functions (for customizing specific elements)
theme_line()           # Customize line elements (e.g., axis lines)
theme_rect()           # Customize rectangular elements (e.g., plot background)
theme_text()           # Customize text elements (e.g., axis titles, labels)

Examples

Scatterplot

Example: Linear Trendline

Let’s add a trend line…

housing2<-housing[Date==20011]
t<-predict(lm(housing2[,Structure.Cost]~log(housing2[,Land.Value])))
housing2[,pred.structure.cost:=t]

p<-ggplot(
    data = housing2,
    mapping = aes(y=Structure.Cost, x=log(Land.Value))
  )+
  coord_cartesian(
    xlim = c(9,12),
    ylim = c(75000,200000)
  )+
  xlim(9,12)+                    # Truncate data &
  ylim(75000+5000,200000-5000)   #  produce QNANS or gaps

p<-p+geom_point(aes(color = Home.Value))
p<-p+geom_line(aes(y=pred.structure.cost))
p<-p+theme_bw(base_family="times")
print(p)
Warning: Removed 18 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 17 rows containing missing values or values outside the scale range
(`geom_line()`).

Example: Smoother

if (!require("ggrepel")) {
  install.packages("ggrepel"); # Install if not already installed
}
Loading required package: ggrepel
library(ggrepel)

housing2<-housing[Date==20011]
p<-ggplot(
    data = housing2,
    mapping = aes(y=Structure.Cost, x=log(Land.Value))
  )
p<-p+geom_point(color = "blue", aes(shape = region))+
  geom_text_repel(aes(label=State),size = 3)
p<-p+geom_smooth(col="wheat")
p<-p+theme_bw(base_family="times")
print(p)
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Example: Histogram

p<-ggplot(
    data = housing,
    mapping = aes(x=Home.Value)
  )+
  geom_histogram(
    stat = "bin", # default anyway
    binwidth = 4000
  )

print(p)

Example: Pre-Summarised Barchart

totHomeValPerState<-housing[,.(Total.Home.Value=sum(Home.Value)),by=State]

p<-ggplot(
    data = totHomeValPerState, # Pre-sumerised data
    mapping = aes(x=State,y=Total.Home.Value)
  )+
  geom_bar(
    stat = "identity"  # "bin" wouldn't work
  )

# could have instead used +geom_col() which defaults stat to "identity"
print(p)

Example: Scale Modifications

library(scales)

p<-ggplot(
    data = housing,
    mapping = aes(x=State,y=Home.Price.Index)
  ) +
  geom_point(aes(color = Date),
             alpha = 0.5,
             size = 1.5,
             position = position_jitter(width = 0.25, height = 0)
  ) +
  scale_x_discrete(
    name ="State Abbreviation"
  ) +
  scale_color_gradient2(
    name = "",
    #breaks = c(1976, 1994, 2013),
    # summary(housing[,Date])
    breaks = c(19751, 19939, 20131),
    labels = c("'76","'94","'13"),
    low = muted("green"),
    high = muted("red"),
    mid = "gray60",
    midpoint = 19939
  )+
  theme(legend.position = "top",
        axis.text = element_text(size = 6)
  )
print(p)

Example: Horrible Plot

library(scales)

p<-ggplot(
    data = housing,
    mapping = aes(x=Date,y=Home.Value)
  ) +
  geom_line(aes(color = State),alpha=0.5) +
  theme_bw(base_family="times")

print(p)

Example: Faceting

library(scales)

p<-ggplot(
    data = housing,
    mapping = aes(x=Date,y=Home.Value)
  ) +
  geom_line() +
  facet_wrap(~State, ncol = 10)+
  theme_bw(base_family="times")

print(p)

ggalley

marginal plots

plot3d

some other shit