You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
library(ggplot2)
library(gcookbook) # Load gcookbook for the pg_mean data set
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_col()
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the plot.
BOD
str(BOD)
ggplot(BOD, aes(x = Time, y = demand)) + geom_col()
ggplot(BOD, aes(x = factor(Time), y = demand)) + geom_col() ggplot(pg_mean, aes(x = group, y = weight)) + geom_col(fill = “lightblue”, colour = “black”) library(gcookbook) # Load gcookbook for the cabbage_exp data set cabbage_exp
We'll map `Date` to the *x* position and map `Cultivar` to the fill color (Figure \@ref(fig:FIG-BAR-GRAPH-GROUPED-BAR)):
```r
library(ggplot2)
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = "dodge")
Graph with grouped bars
ce <- cabbage_exp[1:5, ]
ce
## Cultivar Date Weight sd n se
## 1 c39 d16 3.18 0.9566144 10 0.30250803
## 2 c39 d20 2.80 0.2788867 10 0.08819171
## 3 c39 d21 2.74 0.9834181 10 0.31098410
## 4 c52 d16 2.26 0.4452215 10 0.14079141
## 5 c52 d20 3.11 0.7908505 10 0.25008887
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = "dodge", colour = "black") +
scale_fill_brewer(palette = "Pastel1")
Graph with a missing bar-the other bar fills the space
# Equivalent to using geom_bar(stat = "bin")
ggplot(diamonds, aes(x = cut)) +
geom_bar()
Bar graph of counts
Bar graph of counts on a continuous axis (left); A histogram (right)
library(gcookbook) # Load gcookbook for the uspopchange data set
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
upc <- uspopchange %>%
arrange(desc(Change)) %>%
slice(1:10)
upc
## State Abb Region Change
## 1 Nevada NV West 35.1
## 2 Arizona AZ West 24.6
## 3 Utah UT West 23.8
## 4 Idaho ID West 21.1
## 5 Texas TX South 20.6
## 6 North Carolina NC South 18.5
## 7 Georgia GA South 18.3
## 8 Florida FL South 17.6
## 9 Colorado CO West 16.9
## 10 South Carolina SC South 15.3
ggplot(upc, aes(x = Abb, y = Change, fill = Region)) +
geom_col()
A variable mapped to fill
ggplot(upc, aes(x = reorder(Abb, Change), y = Change, fill = Region)) +
geom_col(colour = "black") +
scale_fill_manual(values = c("#669933", "#FFCC66")) +
xlab("State")
Graph with different colors, black outlines, and sorted by percentage change
library(gcookbook) # Load gcookbook for the climate data set
library(dplyr)
climate_sub <- climate %>%
filter(Source == "Berkeley" & Year >= 1900) %>%
mutate(pos = Anomaly10y >= 0)
climate_sub
## Source Year Anomaly1y Anomaly5y Anomaly10y Unc10y pos
## 1 Berkeley 1900 NA NA -0.171 0.108 FALSE
## 2 Berkeley 1901 NA NA -0.162 0.109 FALSE
## 3 Berkeley 1902 NA NA -0.177 0.108 FALSE
## 4 Berkeley 1903 NA NA -0.199 0.104 FALSE
## 5 Berkeley 1904 NA NA -0.223 0.105 FALSE
## 6 Berkeley 1905 NA NA -0.241 0.107 FALSE
## 7 Berkeley 1906 NA NA -0.294 0.106 FALSE
## 8 Berkeley 1907 NA NA -0.312 0.105 FALSE
## 9 Berkeley 1908 NA NA -0.328 0.103 FALSE
## 10 Berkeley 1909 NA NA -0.281 0.101 FALSE
## 11 Berkeley 1910 NA NA -0.247 0.099 FALSE
## 12 Berkeley 1911 NA NA -0.243 0.097 FALSE
## 13 Berkeley 1912 NA NA -0.257 0.100 FALSE
## 14 Berkeley 1913 NA NA -0.268 0.100 FALSE
## 15 Berkeley 1914 NA NA -0.257 0.097 FALSE
## 16 Berkeley 1915 NA NA -0.249 0.095 FALSE
## 17 Berkeley 1916 NA NA -0.214 0.096 FALSE
## 18 Berkeley 1917 NA NA -0.201 0.096 FALSE
## 19 Berkeley 1918 NA NA -0.176 0.096 FALSE
## 20 Berkeley 1919 NA NA -0.182 0.097 FALSE
## 21 Berkeley 1920 NA NA -0.193 0.097 FALSE
## 22 Berkeley 1921 NA NA -0.167 0.098 FALSE
## 23 Berkeley 1922 NA NA -0.128 0.096 FALSE
## 24 Berkeley 1923 NA NA -0.075 0.097 FALSE
## 25 Berkeley 1924 NA NA -0.064 0.098 FALSE
## 26 Berkeley 1925 NA NA -0.065 0.100 FALSE
## 27 Berkeley 1926 NA NA -0.050 0.100 FALSE
## 28 Berkeley 1927 NA NA -0.020 0.099 FALSE
## 29 Berkeley 1928 NA NA -0.018 0.099 FALSE
## 30 Berkeley 1929 NA NA -0.026 0.100 FALSE
## 31 Berkeley 1930 NA NA -0.014 0.101 FALSE
## 32 Berkeley 1931 NA NA -0.047 0.098 FALSE
## 33 Berkeley 1932 NA NA -0.035 0.096 FALSE
## 34 Berkeley 1933 NA NA -0.017 0.093 FALSE
## 35 Berkeley 1934 NA NA 0.020 0.092 TRUE
## 36 Berkeley 1935 NA NA 0.053 0.089 TRUE
## 37 Berkeley 1936 NA NA 0.063 0.085 TRUE
## 38 Berkeley 1937 NA NA 0.048 0.081 TRUE
## 39 Berkeley 1938 NA NA 0.073 0.079 TRUE
## 40 Berkeley 1939 NA NA 0.113 0.076 TRUE
## 41 Berkeley 1940 NA NA 0.113 0.072 TRUE
## 42 Berkeley 1941 NA NA 0.134 0.071 TRUE
## 43 Berkeley 1942 NA NA 0.134 0.069 TRUE
## 44 Berkeley 1943 NA NA 0.127 0.070 TRUE
## 45 Berkeley 1944 NA NA 0.111 0.068 TRUE
## 46 Berkeley 1945 NA NA 0.072 0.066 TRUE
## 47 Berkeley 1946 NA NA 0.035 0.066 TRUE
## 48 Berkeley 1947 NA NA 0.042 0.064 TRUE
## 49 Berkeley 1948 NA NA 0.045 0.063 TRUE
## 50 Berkeley 1949 NA NA 0.013 0.062 TRUE
## 51 Berkeley 1950 NA NA 0.010 0.058 TRUE
## 52 Berkeley 1951 NA NA -0.017 0.054 FALSE
## 53 Berkeley 1952 NA NA -0.040 0.047 FALSE
## 54 Berkeley 1953 NA NA -0.040 0.043 FALSE
## 55 Berkeley 1954 NA NA -0.032 0.038 FALSE
## 56 Berkeley 1955 NA NA -0.022 0.035 FALSE
## 57 Berkeley 1956 NA NA 0.012 0.031 TRUE
## 58 Berkeley 1957 NA NA 0.007 0.028 TRUE
## 59 Berkeley 1958 NA NA 0.002 0.027 TRUE
## 60 Berkeley 1959 NA NA 0.002 0.026 TRUE
## 61 Berkeley 1960 NA NA -0.019 0.026 FALSE
## 62 Berkeley 1961 NA NA -0.001 0.021 FALSE
## 63 Berkeley 1962 NA NA 0.017 0.018 TRUE
## 64 Berkeley 1963 NA NA 0.004 0.016 TRUE
## 65 Berkeley 1964 NA NA -0.028 0.018 FALSE
## 66 Berkeley 1965 NA NA -0.006 0.017 FALSE
## 67 Berkeley 1966 NA NA -0.024 0.017 FALSE
## 68 Berkeley 1967 NA NA -0.041 0.019 FALSE
## 69 Berkeley 1968 NA NA -0.025 0.020 FALSE
## 70 Berkeley 1969 NA NA -0.019 0.024 FALSE
## 71 Berkeley 1970 NA NA 0.010 0.026 TRUE
## 72 Berkeley 1971 NA NA 0.007 0.022 TRUE
## 73 Berkeley 1972 NA NA 0.015 0.015 TRUE
## 74 Berkeley 1973 NA NA 0.028 0.012 TRUE
## 75 Berkeley 1974 NA NA 0.049 0.014 TRUE
## 76 Berkeley 1975 NA NA 0.068 0.012 TRUE
## 77 Berkeley 1976 NA NA 0.128 0.011 TRUE
## 78 Berkeley 1977 NA NA 0.158 0.012 TRUE
## 79 Berkeley 1978 NA NA 0.167 0.013 TRUE
## 80 Berkeley 1979 NA NA 0.193 0.012 TRUE
## 81 Berkeley 1980 NA NA 0.186 0.016 TRUE
## 82 Berkeley 1981 NA NA 0.217 0.016 TRUE
## 83 Berkeley 1982 NA NA 0.235 0.014 TRUE
## 84 Berkeley 1983 NA NA 0.270 0.014 TRUE
## 85 Berkeley 1984 NA NA 0.318 0.014 TRUE
## 86 Berkeley 1985 NA NA 0.344 0.013 TRUE
## 87 Berkeley 1986 NA NA 0.352 0.012 TRUE
## 88 Berkeley 1987 NA NA 0.380 0.011 TRUE
## 89 Berkeley 1988 NA NA 0.370 0.013 TRUE
## 90 Berkeley 1989 NA NA 0.366 0.017 TRUE
## 91 Berkeley 1990 NA NA 0.433 0.019 TRUE
## 92 Berkeley 1991 NA NA 0.467 0.018 TRUE
## 93 Berkeley 1992 NA NA 0.496 0.017 TRUE
## 94 Berkeley 1993 NA NA 0.526 0.019 TRUE
## 95 Berkeley 1994 NA NA 0.554 0.020 TRUE
## 96 Berkeley 1995 NA NA 0.563 0.019 TRUE
## 97 Berkeley 1996 NA NA 0.565 0.022 TRUE
## 98 Berkeley 1997 NA NA 0.618 0.022 TRUE
## 99 Berkeley 1998 NA NA 0.680 0.023 TRUE
## 100 Berkeley 1999 NA NA 0.734 0.025 TRUE
## 101 Berkeley 2000 NA NA 0.748 0.026 TRUE
## 102 Berkeley 2001 NA NA 0.793 0.027 TRUE
## 103 Berkeley 2002 NA NA 0.856 0.028 TRUE
## 104 Berkeley 2003 NA NA 0.869 0.028 TRUE
## 105 Berkeley 2004 NA NA 0.884 0.029 TRUE
ggplot(climate_sub, aes(x = Year, y = Anomaly10y, fill = pos)) +
geom_col(position = "identity")
Different colors for positive and negative values
ggplot(climate_sub, aes(x = Year, y = Anomaly10y, fill = pos)) +
geom_col(position = "identity", colour = "black", size = 0.25) +
scale_fill_manual(values = c("#CCEEFF", "#FFDDDD"), guide = FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
Graph with customized colors and no legend
library(gcookbook) # Load gcookbook for the pg_mean data set
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_col()
For narrower bars:
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_col(width = 0.5)
And for wider bars (these have the maximum width of 1):
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_col(width = 1)
Different bar widths
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(width = 0.5, position = "dodge")
And with some space between the bars:
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(width = 0.5, position = position_dodge(0.7))
Bar graph with narrow grouped bars (left); With space between the bars (right)
The first graph used position = "dodge"
, and the second
graph used position = position_dodge()
. This is because
position = "dodge"
is simply shorthand for
position = position_dodge()
with the default value of 0.9,
but when we want to set a specific value, we need to use the more
verbose form.
geom_bar(position = "dodge")
geom_bar(width = 0.9, position = position_dodge())
geom_bar(position = position_dodge(0.9))
geom_bar(width = 0.9, position = position_dodge(width=0.9))
Same dodge width of 0.9, but different bar widths of 0.9 (left) and 0.2 (right)
library(gcookbook) # Load gcookbook for the cabbage_exp data set
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col()
Stacked bar graph
cabbage_exp
## Cultivar Date Weight sd n se
## 1 c39 d16 3.18 0.9566144 10 0.30250803
## 2 c39 d20 2.80 0.2788867 10 0.08819171
## 3 c39 d21 2.74 0.9834181 10 0.31098410
## 4 c52 d16 2.26 0.4452215 10 0.14079141
## 5 c52 d20 3.11 0.7908505 10 0.25008887
## 6 c52 d21 1.47 0.2110819 10 0.06674995
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col() +
guides(fill = guide_legend(reverse = TRUE))
Stacked bar graph with reversed legend order
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = position_stack(reverse = TRUE)) +
guides(fill = guide_legend(reverse = TRUE))
Stacked bar graph with reversed stacking order
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(colour = "black") +
scale_fill_brewer(palette = "Pastel1")
Stacked bar graph with reversed legend, new palette, and black outline
library(gcookbook) # Load gcookbook for the cabbage_exp data set
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = "fill")
Proportional stacked bar graph
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = "fill") +
scale_y_continuous(labels = scales::percent)
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(colour = "black", position = "fill") +
scale_y_continuous(labels = scales::percent) +
scale_fill_brewer(palette = "Pastel1")
Proportional stacked bar graph with reversed legend, new palette, and black outline
.
library(gcookbook)
library(dplyr)
cabbage_exp
## Cultivar Date Weight sd n se
## 1 c39 d16 3.18 0.9566144 10 0.30250803
## 2 c39 d20 2.80 0.2788867 10 0.08819171
## 3 c39 d21 2.74 0.9834181 10 0.31098410
## 4 c52 d16 2.26 0.4452215 10 0.14079141
## 5 c52 d20 3.11 0.7908505 10 0.25008887
## 6 c52 d21 1.47 0.2110819 10 0.06674995
# Do a group-wise transform(), splitting on "Date"
ce <- cabbage_exp %>%
group_by(Date) %>%
mutate(percent_weight = Weight / sum(Weight) * 100)
ce
## # A tibble: 6 × 7
## # Groups: Date [3]
## Cultivar Date Weight sd n se percent_weight
## <fct> <fct> <dbl> <dbl> <int> <dbl> <dbl>
## 1 c39 d16 3.18 0.957 10 0.303 58.5
## 2 c39 d20 2.8 0.279 10 0.0882 47.4
## 3 c39 d21 2.74 0.983 10 0.311 65.1
## 4 c52 d16 2.26 0.445 10 0.141 41.5
## 5 c52 d20 3.11 0.791 10 0.250 52.6
## 6 c52 d21 1.47 0.211 10 0.0667 34.9
ggplot(ce, aes(x = Date, y = percent_weight, fill = Cultivar)) +
geom_col()
library(gcookbook) # Load gcookbook for the cabbage_exp data set
# Below the top
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
geom_col() +
geom_text(aes(label = Weight), vjust = 1.5, colour = "white")
# Above the top
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
geom_col() +
geom_text(aes(label = Weight), vjust = -0.2)
Labels under the tops of bars (left); Labels above bars (right)
ggplot(mtcars, aes(x = factor(cyl))) +
geom_bar() +
geom_text(aes(label = ..count..), stat = "count", vjust = 1.5, colour = "white")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
Bar graph of counts with labels under the tops of bars
# Adjust y limits to be a little higher
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
geom_col() +
geom_text(aes(label = Weight), vjust = -0.2) +
ylim(0, max(cabbage_exp$Weight) * 1.05)
# Map y positions slightly above bar top - y range of plot will auto-adjust
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
geom_col() +
geom_text(aes(y = Weight + 0.1, label = Weight))
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = "dodge") +
geom_text(
aes(label = Weight),
colour = "white", size = 3,
vjust = 1.5, position = position_dodge(.9)
)
Labels on grouped bars
library(dplyr)
# Sort by the Date and Cultivar columns
ce <- cabbage_exp %>%
arrange(Date, rev(Cultivar))
# Get the cumulative sum
ce <- ce %>%
group_by(Date) %>%
mutate(label_y = cumsum(Weight))
ce
## # A tibble: 6 × 7
## # Groups: Date [3]
## Cultivar Date Weight sd n se label_y
## <fct> <fct> <dbl> <dbl> <int> <dbl> <dbl>
## 1 c52 d16 2.26 0.445 10 0.141 2.26
## 2 c39 d16 3.18 0.957 10 0.303 5.44
## 3 c52 d20 3.11 0.791 10 0.250 3.11
## 4 c39 d20 2.8 0.279 10 0.0882 5.91
## 5 c52 d21 1.47 0.211 10 0.0667 1.47
## 6 c39 d21 2.74 0.983 10 0.311 4.21
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col() +
geom_text(aes(y = label_y, label = Weight), vjust = 1.5, colour = "white")
Labels on stacked bars
ce <- cabbage_exp %>%
arrange(Date, rev(Cultivar))
# Calculate y position, placing it in the middle
ce <- ce %>%
group_by(Date) %>%
mutate(label_y = cumsum(Weight) - 0.5 * Weight)
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col() +
geom_text(aes(y = label_y, label = Weight), colour = "white")
Labels in the middle of stacked bars
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(colour = "black") +
geom_text(aes(y = label_y, label = paste(format(Weight, nsmall = 2), "kg")), size = 4) +
scale_fill_brewer(palette = "Pastel1")
Customized stacked bar graph with labels
library(gcookbook) # Load gcookbook for the tophitters2001 data set
tophit <- tophitters2001[1:25, ] # Take the top 25 from the tophitters data set
ggplot(tophit, aes(x = avg, y = name)) +
geom_point()
Basic dot plot
tophit[, c("name", "lg", "avg")]
## name lg avg
## 1 Larry Walker NL 0.3501
## 2 Ichiro Suzuki AL 0.3497
## 3 Jason Giambi AL 0.3423
## 4 Roberto Alomar AL 0.3357
## 5 Todd Helton NL 0.3356
## 6 Moises Alou NL 0.3314
## 7 Lance Berkman NL 0.3310
## 8 Bret Boone AL 0.3307
## 9 Frank Catalanotto AL 0.3305
## 10 Chipper Jones NL 0.3304
## 11 Albert Pujols NL 0.3288
## 12 Barry Bonds NL 0.3277
## 13 Sammy Sosa NL 0.3276
## 14 Juan Pierre NL 0.3274
## 15 Juan Gonzalez AL 0.3252
## 16 Luis Gonzalez NL 0.3251
## 17 Rich Aurilia NL 0.3239
## 18 Paul Lo Duca NL 0.3196
## 19 Jose Vidro NL 0.3189
## 20 Alex Rodriguez AL 0.3180
## 21 Cliff Floyd NL 0.3171
## 22 Shannon Stewart AL 0.3156
## 23 Jeff Cirillo NL 0.3125
## 24 Jeff Conine AL 0.3111
## 25 Derek Jeter AL 0.3111
ggplot(tophit, aes(x = avg, y = reorder(name, avg))) +
geom_point(size = 3) + # Use a larger dot
theme_bw() +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour = "grey60", linetype = "dashed")
)
Dot plot, ordered by batting average
ggplot(tophit, aes(x = reorder(name, avg), y = avg)) +
geom_point(size = 3) + # Use a larger dot
theme_bw() +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_line(colour = "grey60", linetype = "dashed"),
axis.text.x = element_text(angle = 60, hjust = 1)
)
Dot plot with names on x-axis and values on y-axis
# Get the names, sorted first by lg, then by avg
nameorder <- tophit$name[order(tophit$lg, tophit$avg)]
# Turn name into a factor, with levels in the order of nameorder
tophit$name <- factor(tophit$name, levels = nameorder)
ggplot(tophit, aes(x = avg, y = name)) +
geom_segment(aes(yend = name), xend = 0, colour = "grey50") +
geom_point(size = 3, aes(colour = lg)) +
scale_colour_brewer(palette = "Set1", limits = c("NL", "AL")) +
theme_bw() +
theme(
panel.grid.major.y = element_blank(), # No horizontal grid lines
legend.position = c(1, 0.55), # Put legend inside plot area
legend.justification = c(1, 0.5)
)
Grouped by league, with lines that stop at the point
ggplot(tophit, aes(x = avg, y = name)) +
geom_segment(aes(yend = name), xend = 0, colour = "grey50") +
geom_point(size = 3, aes(colour = lg)) +
scale_colour_brewer(palette = "Set1", limits = c("NL", "AL"), guide = FALSE) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(lg ~ ., scales = "free_y", space = "free_y")
Faceted by league