Convert Time to a discrete (categorical) variable with factor()

ggplot(BOD, aes(x = factor(Time), y = demand)) + geom_col() ggplot(pg_mean, aes(x = group, y = weight)) + geom_col(fill = “lightblue”, colour = “black”) library(gcookbook) # Load gcookbook for the cabbage_exp data set cabbage_exp


We'll map `Date` to the *x* position and map `Cultivar` to the fill color (Figure \@ref(fig:FIG-BAR-GRAPH-GROUPED-BAR)):


```r
library(ggplot2)
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(position = "dodge")

Graph with grouped bars

ce <- cabbage_exp[1:5, ]
ce

##   Cultivar Date Weight        sd  n         se
## 1      c39  d16   3.18 0.9566144 10 0.30250803
## 2      c39  d20   2.80 0.2788867 10 0.08819171
## 3      c39  d21   2.74 0.9834181 10 0.31098410
## 4      c52  d16   2.26 0.4452215 10 0.14079141
## 5      c52  d20   3.11 0.7908505 10 0.25008887

ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(position = "dodge", colour = "black") +
  scale_fill_brewer(palette = "Pastel1")

Graph with a missing bar-the other bar fills the space

# Equivalent to using geom_bar(stat = "bin")
ggplot(diamonds, aes(x = cut)) +
  geom_bar()

Bar graph of counts

Bar graph of counts on a continuous axis (left); A histogram (right)

library(gcookbook) # Load gcookbook for the uspopchange data set
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

upc <- uspopchange %>%
  arrange(desc(Change)) %>%
  slice(1:10)

upc

##             State Abb Region Change
## 1          Nevada  NV   West   35.1
## 2         Arizona  AZ   West   24.6
## 3            Utah  UT   West   23.8
## 4           Idaho  ID   West   21.1
## 5           Texas  TX  South   20.6
## 6  North Carolina  NC  South   18.5
## 7         Georgia  GA  South   18.3
## 8         Florida  FL  South   17.6
## 9        Colorado  CO   West   16.9
## 10 South Carolina  SC  South   15.3

ggplot(upc, aes(x = Abb, y = Change, fill = Region)) +
  geom_col()

A variable mapped to fill

ggplot(upc, aes(x = reorder(Abb, Change), y = Change, fill = Region)) +
  geom_col(colour = "black") +
  scale_fill_manual(values = c("#669933", "#FFCC66")) +
  xlab("State")

Graph with different colors, black outlines, and sorted by percentage change

library(gcookbook) # Load gcookbook for the climate data set
library(dplyr)

climate_sub <- climate %>%
  filter(Source == "Berkeley" & Year >= 1900) %>%
  mutate(pos = Anomaly10y >= 0)

climate_sub

##       Source Year Anomaly1y Anomaly5y Anomaly10y Unc10y   pos
## 1   Berkeley 1900        NA        NA     -0.171  0.108 FALSE
## 2   Berkeley 1901        NA        NA     -0.162  0.109 FALSE
## 3   Berkeley 1902        NA        NA     -0.177  0.108 FALSE
## 4   Berkeley 1903        NA        NA     -0.199  0.104 FALSE
## 5   Berkeley 1904        NA        NA     -0.223  0.105 FALSE
## 6   Berkeley 1905        NA        NA     -0.241  0.107 FALSE
## 7   Berkeley 1906        NA        NA     -0.294  0.106 FALSE
## 8   Berkeley 1907        NA        NA     -0.312  0.105 FALSE
## 9   Berkeley 1908        NA        NA     -0.328  0.103 FALSE
## 10  Berkeley 1909        NA        NA     -0.281  0.101 FALSE
## 11  Berkeley 1910        NA        NA     -0.247  0.099 FALSE
## 12  Berkeley 1911        NA        NA     -0.243  0.097 FALSE
## 13  Berkeley 1912        NA        NA     -0.257  0.100 FALSE
## 14  Berkeley 1913        NA        NA     -0.268  0.100 FALSE
## 15  Berkeley 1914        NA        NA     -0.257  0.097 FALSE
## 16  Berkeley 1915        NA        NA     -0.249  0.095 FALSE
## 17  Berkeley 1916        NA        NA     -0.214  0.096 FALSE
## 18  Berkeley 1917        NA        NA     -0.201  0.096 FALSE
## 19  Berkeley 1918        NA        NA     -0.176  0.096 FALSE
## 20  Berkeley 1919        NA        NA     -0.182  0.097 FALSE
## 21  Berkeley 1920        NA        NA     -0.193  0.097 FALSE
## 22  Berkeley 1921        NA        NA     -0.167  0.098 FALSE
## 23  Berkeley 1922        NA        NA     -0.128  0.096 FALSE
## 24  Berkeley 1923        NA        NA     -0.075  0.097 FALSE
## 25  Berkeley 1924        NA        NA     -0.064  0.098 FALSE
## 26  Berkeley 1925        NA        NA     -0.065  0.100 FALSE
## 27  Berkeley 1926        NA        NA     -0.050  0.100 FALSE
## 28  Berkeley 1927        NA        NA     -0.020  0.099 FALSE
## 29  Berkeley 1928        NA        NA     -0.018  0.099 FALSE
## 30  Berkeley 1929        NA        NA     -0.026  0.100 FALSE
## 31  Berkeley 1930        NA        NA     -0.014  0.101 FALSE
## 32  Berkeley 1931        NA        NA     -0.047  0.098 FALSE
## 33  Berkeley 1932        NA        NA     -0.035  0.096 FALSE
## 34  Berkeley 1933        NA        NA     -0.017  0.093 FALSE
## 35  Berkeley 1934        NA        NA      0.020  0.092  TRUE
## 36  Berkeley 1935        NA        NA      0.053  0.089  TRUE
## 37  Berkeley 1936        NA        NA      0.063  0.085  TRUE
## 38  Berkeley 1937        NA        NA      0.048  0.081  TRUE
## 39  Berkeley 1938        NA        NA      0.073  0.079  TRUE
## 40  Berkeley 1939        NA        NA      0.113  0.076  TRUE
## 41  Berkeley 1940        NA        NA      0.113  0.072  TRUE
## 42  Berkeley 1941        NA        NA      0.134  0.071  TRUE
## 43  Berkeley 1942        NA        NA      0.134  0.069  TRUE
## 44  Berkeley 1943        NA        NA      0.127  0.070  TRUE
## 45  Berkeley 1944        NA        NA      0.111  0.068  TRUE
## 46  Berkeley 1945        NA        NA      0.072  0.066  TRUE
## 47  Berkeley 1946        NA        NA      0.035  0.066  TRUE
## 48  Berkeley 1947        NA        NA      0.042  0.064  TRUE
## 49  Berkeley 1948        NA        NA      0.045  0.063  TRUE
## 50  Berkeley 1949        NA        NA      0.013  0.062  TRUE
## 51  Berkeley 1950        NA        NA      0.010  0.058  TRUE
## 52  Berkeley 1951        NA        NA     -0.017  0.054 FALSE
## 53  Berkeley 1952        NA        NA     -0.040  0.047 FALSE
## 54  Berkeley 1953        NA        NA     -0.040  0.043 FALSE
## 55  Berkeley 1954        NA        NA     -0.032  0.038 FALSE
## 56  Berkeley 1955        NA        NA     -0.022  0.035 FALSE
## 57  Berkeley 1956        NA        NA      0.012  0.031  TRUE
## 58  Berkeley 1957        NA        NA      0.007  0.028  TRUE
## 59  Berkeley 1958        NA        NA      0.002  0.027  TRUE
## 60  Berkeley 1959        NA        NA      0.002  0.026  TRUE
## 61  Berkeley 1960        NA        NA     -0.019  0.026 FALSE
## 62  Berkeley 1961        NA        NA     -0.001  0.021 FALSE
## 63  Berkeley 1962        NA        NA      0.017  0.018  TRUE
## 64  Berkeley 1963        NA        NA      0.004  0.016  TRUE
## 65  Berkeley 1964        NA        NA     -0.028  0.018 FALSE
## 66  Berkeley 1965        NA        NA     -0.006  0.017 FALSE
## 67  Berkeley 1966        NA        NA     -0.024  0.017 FALSE
## 68  Berkeley 1967        NA        NA     -0.041  0.019 FALSE
## 69  Berkeley 1968        NA        NA     -0.025  0.020 FALSE
## 70  Berkeley 1969        NA        NA     -0.019  0.024 FALSE
## 71  Berkeley 1970        NA        NA      0.010  0.026  TRUE
## 72  Berkeley 1971        NA        NA      0.007  0.022  TRUE
## 73  Berkeley 1972        NA        NA      0.015  0.015  TRUE
## 74  Berkeley 1973        NA        NA      0.028  0.012  TRUE
## 75  Berkeley 1974        NA        NA      0.049  0.014  TRUE
## 76  Berkeley 1975        NA        NA      0.068  0.012  TRUE
## 77  Berkeley 1976        NA        NA      0.128  0.011  TRUE
## 78  Berkeley 1977        NA        NA      0.158  0.012  TRUE
## 79  Berkeley 1978        NA        NA      0.167  0.013  TRUE
## 80  Berkeley 1979        NA        NA      0.193  0.012  TRUE
## 81  Berkeley 1980        NA        NA      0.186  0.016  TRUE
## 82  Berkeley 1981        NA        NA      0.217  0.016  TRUE
## 83  Berkeley 1982        NA        NA      0.235  0.014  TRUE
## 84  Berkeley 1983        NA        NA      0.270  0.014  TRUE
## 85  Berkeley 1984        NA        NA      0.318  0.014  TRUE
## 86  Berkeley 1985        NA        NA      0.344  0.013  TRUE
## 87  Berkeley 1986        NA        NA      0.352  0.012  TRUE
## 88  Berkeley 1987        NA        NA      0.380  0.011  TRUE
## 89  Berkeley 1988        NA        NA      0.370  0.013  TRUE
## 90  Berkeley 1989        NA        NA      0.366  0.017  TRUE
## 91  Berkeley 1990        NA        NA      0.433  0.019  TRUE
## 92  Berkeley 1991        NA        NA      0.467  0.018  TRUE
## 93  Berkeley 1992        NA        NA      0.496  0.017  TRUE
## 94  Berkeley 1993        NA        NA      0.526  0.019  TRUE
## 95  Berkeley 1994        NA        NA      0.554  0.020  TRUE
## 96  Berkeley 1995        NA        NA      0.563  0.019  TRUE
## 97  Berkeley 1996        NA        NA      0.565  0.022  TRUE
## 98  Berkeley 1997        NA        NA      0.618  0.022  TRUE
## 99  Berkeley 1998        NA        NA      0.680  0.023  TRUE
## 100 Berkeley 1999        NA        NA      0.734  0.025  TRUE
## 101 Berkeley 2000        NA        NA      0.748  0.026  TRUE
## 102 Berkeley 2001        NA        NA      0.793  0.027  TRUE
## 103 Berkeley 2002        NA        NA      0.856  0.028  TRUE
## 104 Berkeley 2003        NA        NA      0.869  0.028  TRUE
## 105 Berkeley 2004        NA        NA      0.884  0.029  TRUE

ggplot(climate_sub, aes(x = Year, y = Anomaly10y, fill = pos)) +
  geom_col(position = "identity")

Different colors for positive and negative values

ggplot(climate_sub, aes(x = Year, y = Anomaly10y, fill = pos)) +
  geom_col(position = "identity", colour = "black", size = 0.25) +
  scale_fill_manual(values = c("#CCEEFF", "#FFDDDD"), guide = FALSE)

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.

Graph with customized colors and no legend

library(gcookbook) # Load gcookbook for the pg_mean data set

ggplot(pg_mean, aes(x = group, y = weight)) +
  geom_col()

For narrower bars:

ggplot(pg_mean, aes(x = group, y = weight)) +
  geom_col(width = 0.5)

And for wider bars (these have the maximum width of 1):

ggplot(pg_mean, aes(x = group, y = weight)) +
  geom_col(width = 1)

Different bar widths

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(width = 0.5, position = "dodge")

And with some space between the bars:

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(width = 0.5, position = position_dodge(0.7))

Bar graph with narrow grouped bars (left); With space between the bars (right)

The first graph used position = "dodge", and the second graph used position = position_dodge(). This is because position = "dodge" is simply shorthand for position = position_dodge() with the default value of 0.9, but when we want to set a specific value, we need to use the more verbose form.

geom_bar(position = "dodge")
geom_bar(width = 0.9, position = position_dodge())
geom_bar(position = position_dodge(0.9))
geom_bar(width = 0.9, position = position_dodge(width=0.9))

Same dodge width of 0.9, but different bar widths of 0.9 (left) and 0.2 (right)

library(gcookbook) # Load gcookbook for the cabbage_exp data set

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col()

Stacked bar graph

cabbage_exp

##   Cultivar Date Weight        sd  n         se
## 1      c39  d16   3.18 0.9566144 10 0.30250803
## 2      c39  d20   2.80 0.2788867 10 0.08819171
## 3      c39  d21   2.74 0.9834181 10 0.31098410
## 4      c52  d16   2.26 0.4452215 10 0.14079141
## 5      c52  d20   3.11 0.7908505 10 0.25008887
## 6      c52  d21   1.47 0.2110819 10 0.06674995

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col() +
  guides(fill = guide_legend(reverse = TRUE))

Stacked bar graph with reversed legend order

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(position = position_stack(reverse = TRUE)) +
  guides(fill = guide_legend(reverse = TRUE))

Stacked bar graph with reversed stacking order

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(colour = "black") +
  scale_fill_brewer(palette = "Pastel1")

Stacked bar graph with reversed legend, new palette, and black outline

library(gcookbook) # Load gcookbook for the cabbage_exp data set

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(position = "fill")

Proportional stacked bar graph

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(position = "fill") +
  scale_y_continuous(labels = scales::percent)

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(colour = "black", position = "fill") +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_brewer(palette = "Pastel1")

Proportional stacked bar graph with reversed legend, new palette, and black outline

library(gcookbook)
library(dplyr)

cabbage_exp

##   Cultivar Date Weight        sd  n         se
## 1      c39  d16   3.18 0.9566144 10 0.30250803
## 2      c39  d20   2.80 0.2788867 10 0.08819171
## 3      c39  d21   2.74 0.9834181 10 0.31098410
## 4      c52  d16   2.26 0.4452215 10 0.14079141
## 5      c52  d20   3.11 0.7908505 10 0.25008887
## 6      c52  d21   1.47 0.2110819 10 0.06674995

# Do a group-wise transform(), splitting on "Date"
ce <- cabbage_exp %>%
  group_by(Date) %>%
  mutate(percent_weight = Weight / sum(Weight) * 100)

ce

## # A tibble: 6 × 7
## # Groups:   Date [3]
##   Cultivar Date  Weight    sd     n     se percent_weight
##   <fct>    <fct>  <dbl> <dbl> <int>  <dbl>          <dbl>
## 1 c39      d16     3.18 0.957    10 0.303            58.5
## 2 c39      d20     2.8  0.279    10 0.0882           47.4
## 3 c39      d21     2.74 0.983    10 0.311            65.1
## 4 c52      d16     2.26 0.445    10 0.141            41.5
## 5 c52      d20     3.11 0.791    10 0.250            52.6
## 6 c52      d21     1.47 0.211    10 0.0667           34.9

ggplot(ce, aes(x = Date, y = percent_weight, fill = Cultivar)) +
  geom_col()

library(gcookbook) # Load gcookbook for the cabbage_exp data set

# Below the top
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
  geom_col() +
  geom_text(aes(label = Weight), vjust = 1.5, colour = "white")

# Above the top
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
  geom_col() +
  geom_text(aes(label = Weight), vjust = -0.2)

Labels under the tops of bars (left); Labels above bars (right)

ggplot(mtcars, aes(x = factor(cyl))) +
  geom_bar() +
  geom_text(aes(label = ..count..), stat = "count", vjust = 1.5, colour = "white")

## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.

Bar graph of counts with labels under the tops of bars

# Adjust y limits to be a little higher
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
  geom_col() +
  geom_text(aes(label = Weight), vjust = -0.2) +
  ylim(0, max(cabbage_exp$Weight) * 1.05)

# Map y positions slightly above bar top - y range of plot will auto-adjust
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
  geom_col() +
  geom_text(aes(y = Weight + 0.1, label = Weight))

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(position = "dodge") +
  geom_text(
    aes(label = Weight),
    colour = "white", size = 3,
    vjust = 1.5, position = position_dodge(.9)
  )

Labels on grouped bars

library(dplyr)

# Sort by the Date and Cultivar columns
ce <- cabbage_exp %>%
  arrange(Date, rev(Cultivar))

# Get the cumulative sum
ce <- ce %>%
  group_by(Date) %>%
  mutate(label_y = cumsum(Weight))

ce

## # A tibble: 6 × 7
## # Groups:   Date [3]
##   Cultivar Date  Weight    sd     n     se label_y
##   <fct>    <fct>  <dbl> <dbl> <int>  <dbl>   <dbl>
## 1 c52      d16     2.26 0.445    10 0.141     2.26
## 2 c39      d16     3.18 0.957    10 0.303     5.44
## 3 c52      d20     3.11 0.791    10 0.250     3.11
## 4 c39      d20     2.8  0.279    10 0.0882    5.91
## 5 c52      d21     1.47 0.211    10 0.0667    1.47
## 6 c39      d21     2.74 0.983    10 0.311     4.21

ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col() +
  geom_text(aes(y = label_y, label = Weight), vjust = 1.5, colour = "white")

Labels on stacked bars

ce <- cabbage_exp %>%
  arrange(Date, rev(Cultivar))

# Calculate y position, placing it in the middle
ce <- ce %>%
  group_by(Date) %>%
  mutate(label_y = cumsum(Weight) - 0.5 * Weight)

ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col() +
  geom_text(aes(y = label_y, label = Weight), colour = "white")

Labels in the middle of stacked bars

ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(colour = "black") +
  geom_text(aes(y = label_y, label = paste(format(Weight, nsmall = 2), "kg")), size = 4) +
  scale_fill_brewer(palette = "Pastel1")

Customized stacked bar graph with labels

library(gcookbook) # Load gcookbook for the tophitters2001 data set
tophit <- tophitters2001[1:25, ] # Take the top 25 from the tophitters data set

ggplot(tophit, aes(x = avg, y = name)) +
  geom_point()

Basic dot plot

tophit[, c("name", "lg", "avg")]

##                 name lg    avg
## 1       Larry Walker NL 0.3501
## 2      Ichiro Suzuki AL 0.3497
## 3       Jason Giambi AL 0.3423
## 4     Roberto Alomar AL 0.3357
## 5        Todd Helton NL 0.3356
## 6        Moises Alou NL 0.3314
## 7      Lance Berkman NL 0.3310
## 8         Bret Boone AL 0.3307
## 9  Frank Catalanotto AL 0.3305
## 10     Chipper Jones NL 0.3304
## 11     Albert Pujols NL 0.3288
## 12       Barry Bonds NL 0.3277
## 13        Sammy Sosa NL 0.3276
## 14       Juan Pierre NL 0.3274
## 15     Juan Gonzalez AL 0.3252
## 16     Luis Gonzalez NL 0.3251
## 17      Rich Aurilia NL 0.3239
## 18      Paul Lo Duca NL 0.3196
## 19        Jose Vidro NL 0.3189
## 20    Alex Rodriguez AL 0.3180
## 21       Cliff Floyd NL 0.3171
## 22   Shannon Stewart AL 0.3156
## 23      Jeff Cirillo NL 0.3125
## 24       Jeff Conine AL 0.3111
## 25       Derek Jeter AL 0.3111

ggplot(tophit, aes(x = avg, y = reorder(name, avg))) +
  geom_point(size = 3) +  # Use a larger dot
  theme_bw() +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_line(colour = "grey60", linetype = "dashed")
  )

Dot plot, ordered by batting average

ggplot(tophit, aes(x = reorder(name, avg), y = avg)) +
  geom_point(size = 3) +  # Use a larger dot
  theme_bw() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.major.x = element_line(colour = "grey60", linetype = "dashed"),
    axis.text.x = element_text(angle = 60, hjust = 1)
  )

Dot plot with names on x-axis and values on y-axis

# Get the names, sorted first by lg, then by avg
nameorder <- tophit$name[order(tophit$lg, tophit$avg)]

# Turn name into a factor, with levels in the order of nameorder
tophit$name <- factor(tophit$name, levels = nameorder)

ggplot(tophit, aes(x = avg, y = name)) +
  geom_segment(aes(yend = name), xend = 0, colour = "grey50") +
  geom_point(size = 3, aes(colour = lg)) +
  scale_colour_brewer(palette = "Set1", limits = c("NL", "AL")) +
  theme_bw() +
  theme(
    panel.grid.major.y = element_blank(),   # No horizontal grid lines
    legend.position = c(1, 0.55),           # Put legend inside plot area
    legend.justification = c(1, 0.5)
  )

Grouped by league, with lines that stop at the point

ggplot(tophit, aes(x = avg, y = name)) +
  geom_segment(aes(yend = name), xend = 0, colour = "grey50") +
  geom_point(size = 3, aes(colour = lg)) +
  scale_colour_brewer(palette = "Set1", limits = c("NL", "AL"), guide = FALSE) +
  theme_bw() +
  theme(panel.grid.major.y = element_blank()) +
  facet_grid(lg ~ ., scales = "free_y", space = "free_y")

Faceted by league

Assignment 1

Areej yaseen

2022-11-22

Including Plots

Including Plots

Time is numeric (continuous)

Convert Time to a discrete (categorical) variable with factor()