Bar Graphs in R

Bar graphs are perhaps the most commonly used kind of data visualization. They’re typically used to display numeric values (on the y-axis), for different categories (on the x-axis). For example, a bar graph would be good for showing the prices of four different kinds of items. A bar graph generally wouldn’t be as good for showing prices over time, where time is a continuous variable—though it can be done, as we’ll see in this chapter.

Making a basic Bar graph

If I have a data frame where one column represents the x position of each bar, and another column represents the vertical (y) height of each bar.

Grouping bars together

library(ggplot2)
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
  geom_bar(position="dodge", stat="identity")

ce <- cabbage_exp[1:5, ] # Copy the data without last row
ce
##   Cultivar Date Weight        sd  n         se
## 1      c39  d16   3.18 0.9566144 10 0.30250803
## 2      c39  d20   2.80 0.2788867 10 0.08819171
## 3      c39  d21   2.74 0.9834181 10 0.31098410
## 4      c52  d16   2.26 0.4452215 10 0.14079141
## 5      c52  d20   3.11 0.7908505 10 0.25008887
ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) +
  geom_bar(position="dodge", colour="black", stat="identity") +
  scale_fill_brewer(palette="Pastel1")

Making a Bar Graph of Counts

ggplot(diamonds, aes(x=cut)) + geom_bar()

ggplot(diamonds, aes(x=carat)) + geom_bar()

Using colors in bar graph

library(gcookbook) # For the data set
upc <- subset(uspopchange, rank(Change)>40)
upc
##             State Abb Region Change
## 3         Arizona  AZ   West   24.6
## 6        Colorado  CO   West   16.9
## 10        Florida  FL  South   17.6
## 11        Georgia  GA  South   18.3
## 13          Idaho  ID   West   21.1
## 29         Nevada  NV   West   35.1
## 34 North Carolina  NC  South   18.5
## 41 South Carolina  SC  South   15.3
## 44          Texas  TX  South   20.6
## 45           Utah  UT   West   23.8
ggplot(upc, aes(x=Abb, y=Change, fill=Region)) + geom_bar(stat="identity")

ggplot(upc, aes(x=reorder(Abb, Change), y=Change, fill=Region)) +
  geom_bar(stat="identity", colour="black") +
  scale_fill_manual(values=c("#669933", "#FFCC66")) +
  xlab("State")

Coloring Negative and Positive Bars Differently

csub <- subset(climate, Source=="Berkeley" & Year >= 1900)
csub$pos <- csub$Anomaly10y >= 0


ggplot(csub, aes(x=Year, y=Anomaly10y, fill=pos)) +
  geom_bar(stat="identity", position="identity")

Adjusting Bar Width and Spacing

For standard_width bars

ggplot(pg_mean, aes(x=group, y=weight)) + geom_bar(stat="identity")

For narrow bars

ggplot(pg_mean, aes(x=group, y=weight)) + geom_bar(stat=“identity”, width=0.5)

ggplot(pg_mean, aes(x=group, y=weight)) + geom_bar(stat="identity", width=0.5)

For wider bars

ggplot(pg_mean, aes(x=group, y=weight)) + geom_bar(stat="identity", width=1)

For a grouped bar graph with narrow bars

ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
  geom_bar(stat="identity", width=0.5, position="dodge")

Some space between the grouped bars

ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
  geom_bar(stat="identity", width=0.5, position=position_dodge(0.7))

Making a Stacked Bar Graph

ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
  geom_bar(stat="identity")

For reverse stacking

library(plyr) # Needed for desc()
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar, order=desc(Cultivar))) +
  geom_bar(stat="identity")

Making a Proportional Stacked Bar Graph

ce <- ddply(cabbage_exp, "Date", transform,
            percent_weight = Weight / sum(Weight) * 100)
ggplot(ce, aes(x=Date, y=percent_weight, fill=Cultivar)) +
  geom_bar(stat="identity")

Adding Labels to a Bar Graph

Below the top

ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
 geom_bar(stat="identity") +
 geom_text(aes(label=Weight), vjust=1.5, colour="white")

Above the Top

ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
 geom_bar(stat="identity") +
 geom_text(aes(label=Weight), vjust=-0.2)

Adding labels on grouped bars

ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
 geom_bar(stat="identity", position="dodge") +
 geom_text(aes(label=Weight), vjust=1.5, colour="white",
 position=position_dodge(.9), size=3)

Addding labels on stacked bars

ce <- arrange(cabbage_exp, Date, Cultivar)
ce <- ddply(ce, "Date", transform, label_y=cumsum(Weight))
ce
##   Cultivar Date Weight        sd  n         se label_y
## 1      c39  d16   3.18 0.9566144 10 0.30250803    3.18
## 2      c52  d16   2.26 0.4452215 10 0.14079141    5.44
## 3      c39  d20   2.80 0.2788867 10 0.08819171    2.80
## 4      c52  d20   3.11 0.7908505 10 0.25008887    5.91
## 5      c39  d21   2.74 0.9834181 10 0.31098410    2.74
## 6      c52  d21   1.47 0.2110819 10 0.06674995    4.21
ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) +
 geom_bar(stat="identity") +
 geom_text(aes(y=label_y, label=Weight), vjust=1.5, colour="white")

Making a Cleveland Dot Plot

library(gcookbook) # For the data set
tophit <- tophitters2001[1:25, ] # Take the top 25 from the tophitters data set
ggplot(tophit, aes(x=avg, y=name)) + geom_point()

Dot plot, ordered by batting average

ggplot(tophit, aes(x=avg, y=reorder(name, avg))) +
 geom_point(size=3) + # Use a larger dot
 theme_bw() +
 theme(panel.grid.major.x = element_blank(),
 panel.grid.minor.x = element_blank(),
 panel.grid.major.y = element_line(colour="grey60", linetype="dashed"))

Dot plot with names on x-axis and values on y-axis

ggplot(tophit, aes(x=reorder(name, avg), y=avg)) +
 geom_point(size=3) + # Use a larger dot
 theme_bw() +
 theme(axis.text.x = element_text(angle=60, hjust=1),
 panel.grid.major.y = element_blank(),
 panel.grid.minor.y = element_blank(),
 panel.grid.major.x = element_line(colour="grey60", linetype="dashed"))

Dot plot Grouped by league, with lines that stop at the point

ggplot(tophit, aes(x=avg, y=name)) +
 geom_segment(aes(yend=name), xend=0, colour="grey50") +
 geom_point(size=3, aes(colour=lg)) +
 scale_colour_brewer(palette="Set1", limits=c("NL","AL"), guide=FALSE) +
 theme_bw() +
 theme(panel.grid.major.y = element_blank()) +
 facet_grid(lg ~ ., scales="free_y", space="free_y")
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.