Bar graphs are perhaps the most commonly used kind of data visualization. They’re typically used to display numeric values (on the y-axis), for different categories (on the x-axis). For example, a bar graph would be good for showing the prices of four different kinds of items. A bar graph generally wouldn’t be as good for showing prices over time, where time is a continuous variable—though it can be done, as we’ll see in this chapter.
If I have a data frame where one column represents the x position of each bar, and another column represents the vertical (y) height of each bar.
library(ggplot2)
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(position="dodge", stat="identity")
ce <- cabbage_exp[1:5, ] # Copy the data without last row
ce
## Cultivar Date Weight sd n se
## 1 c39 d16 3.18 0.9566144 10 0.30250803
## 2 c39 d20 2.80 0.2788867 10 0.08819171
## 3 c39 d21 2.74 0.9834181 10 0.31098410
## 4 c52 d16 2.26 0.4452215 10 0.14079141
## 5 c52 d20 3.11 0.7908505 10 0.25008887
ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(position="dodge", colour="black", stat="identity") +
scale_fill_brewer(palette="Pastel1")
ggplot(diamonds, aes(x=cut)) + geom_bar()
ggplot(diamonds, aes(x=carat)) + geom_bar()
library(gcookbook) # For the data set
upc <- subset(uspopchange, rank(Change)>40)
upc
## State Abb Region Change
## 3 Arizona AZ West 24.6
## 6 Colorado CO West 16.9
## 10 Florida FL South 17.6
## 11 Georgia GA South 18.3
## 13 Idaho ID West 21.1
## 29 Nevada NV West 35.1
## 34 North Carolina NC South 18.5
## 41 South Carolina SC South 15.3
## 44 Texas TX South 20.6
## 45 Utah UT West 23.8
ggplot(upc, aes(x=Abb, y=Change, fill=Region)) + geom_bar(stat="identity")
ggplot(upc, aes(x=reorder(Abb, Change), y=Change, fill=Region)) +
geom_bar(stat="identity", colour="black") +
scale_fill_manual(values=c("#669933", "#FFCC66")) +
xlab("State")
csub <- subset(climate, Source=="Berkeley" & Year >= 1900)
csub$pos <- csub$Anomaly10y >= 0
ggplot(csub, aes(x=Year, y=Anomaly10y, fill=pos)) +
geom_bar(stat="identity", position="identity")
ggplot(pg_mean, aes(x=group, y=weight)) + geom_bar(stat="identity")
ggplot(pg_mean, aes(x=group, y=weight)) + geom_bar(stat=“identity”, width=0.5)
ggplot(pg_mean, aes(x=group, y=weight)) + geom_bar(stat="identity", width=0.5)
ggplot(pg_mean, aes(x=group, y=weight)) + geom_bar(stat="identity", width=1)
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat="identity", width=0.5, position="dodge")
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat="identity", width=0.5, position=position_dodge(0.7))
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat="identity")
library(plyr) # Needed for desc()
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar, order=desc(Cultivar))) +
geom_bar(stat="identity")
ce <- ddply(cabbage_exp, "Date", transform,
percent_weight = Weight / sum(Weight) * 100)
ggplot(ce, aes(x=Date, y=percent_weight, fill=Cultivar)) +
geom_bar(stat="identity")
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
geom_bar(stat="identity") +
geom_text(aes(label=Weight), vjust=1.5, colour="white")
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
geom_bar(stat="identity") +
geom_text(aes(label=Weight), vjust=-0.2)
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat="identity", position="dodge") +
geom_text(aes(label=Weight), vjust=1.5, colour="white",
position=position_dodge(.9), size=3)
ce <- arrange(cabbage_exp, Date, Cultivar)
ce <- ddply(ce, "Date", transform, label_y=cumsum(Weight))
ce
## Cultivar Date Weight sd n se label_y
## 1 c39 d16 3.18 0.9566144 10 0.30250803 3.18
## 2 c52 d16 2.26 0.4452215 10 0.14079141 5.44
## 3 c39 d20 2.80 0.2788867 10 0.08819171 2.80
## 4 c52 d20 3.11 0.7908505 10 0.25008887 5.91
## 5 c39 d21 2.74 0.9834181 10 0.31098410 2.74
## 6 c52 d21 1.47 0.2110819 10 0.06674995 4.21
ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat="identity") +
geom_text(aes(y=label_y, label=Weight), vjust=1.5, colour="white")
library(gcookbook) # For the data set
tophit <- tophitters2001[1:25, ] # Take the top 25 from the tophitters data set
ggplot(tophit, aes(x=avg, y=name)) + geom_point()
ggplot(tophit, aes(x=avg, y=reorder(name, avg))) +
geom_point(size=3) + # Use a larger dot
theme_bw() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour="grey60", linetype="dashed"))
ggplot(tophit, aes(x=reorder(name, avg), y=avg)) +
geom_point(size=3) + # Use a larger dot
theme_bw() +
theme(axis.text.x = element_text(angle=60, hjust=1),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_line(colour="grey60", linetype="dashed"))
ggplot(tophit, aes(x=avg, y=name)) +
geom_segment(aes(yend=name), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=lg)) +
scale_colour_brewer(palette="Set1", limits=c("NL","AL"), guide=FALSE) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(lg ~ ., scales="free_y", space="free_y")
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.