Bar Graphs (Ch 3)

Bar Graphs

library(ggplot2)
library(gcookbook) # For the data set
ggplot(pg_mean, aes(x=group, y=weight)) + geom_bar(stat="identity")

plot of chunk unnamed-chunk-1

when time is continuous

ggplot(BOD, aes(x=Time, y=demand)) + geom_bar(stat="identity")   

plot of chunk unnamed-chunk-2

For proper graph

# Convert Time to a discrete (categorical) variable with factor()
ggplot(BOD, aes(x=factor(Time), y=demand)) + geom_bar(stat="identity")

plot of chunk unnamed-chunk-3

Changing the color

ggplot(pg_mean, aes(x=group, y=weight)) +
    geom_bar(stat="identity", fill="lightblue", colour="black")

plot of chunk unnamed-chunk-4

Grouping Bars

ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
    geom_bar(position="dodge",stat="identity")

plot of chunk unnamed-chunk-5

Grouping Bars

ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
    geom_bar(position="dodge", colour="black",stat="identity") +
    scale_fill_brewer(palette="Pastel1")

plot of chunk unnamed-chunk-6

Bar Graph of Counts

ggplot(diamonds, aes(x=cut)) + geom_bar()

plot of chunk unnamed-chunk-7

Changing Colors in Bar Graph

library(gcookbook)
upc = subset(uspopchange, rank(Change)>40)
ggplot(upc, aes(x=reorder(Abb, Change), y=Change, fill=Region)) +
geom_bar(stat="identity", colour="black") +
    scale_fill_manual(values=c("#669933", "#FFCC66")) +
    xlab("State")

plot of chunk unnamed-chunk-8

Changing Colors in Bar Graph

-Different colors for negative and positive bars

library(gcookbook) # For the data set
csub = subset(climate, Source=="Berkeley" & Year >= 1900)
csub$pos = csub$Anomaly10y >= 0

Changing Colors in Bar Graph

-Different colors for negative and positive bars

ggplot(csub, aes(x=Year, y=Anomaly10y, fill=pos)) + geom_bar(stat="identity", position="identity", colour="black", size=0.25) +
  scale_fill_manual(values=c("#CCEEFF", "#FFDDDD"), guide=FALSE)

plot of chunk unnamed-chunk-10

Adjusting Bar Width

  • Use width parameter
library(gcookbook) # For the data set
ggplot(pg_mean, aes(x=group, y=weight)) + geom_bar(stat="identity", width=0.5)

plot of chunk unnamed-chunk-11

Adjusting Bar Width

  • Use width parameter
ggplot(pg_mean, aes(x=group, y=weight)) + geom_bar(stat="identity", width=1)

plot of chunk unnamed-chunk-12

Adjusting Bar Spacing

  • Use position_dodge parameter
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
    geom_bar(stat="identity", width=0.5, position="dodge")

plot of chunk unnamed-chunk-13

Adjusting Bar Spacing

  • Use position_dodge parameter
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
    geom_bar(stat="identity", width=0.5, position=position_dodge(0.7))

plot of chunk unnamed-chunk-14

Stacked Bar Graph

  • Do not use position=“dodge”
library(gcookbook) # For the data set
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
    geom_bar(stat="identity")

plot of chunk unnamed-chunk-15

Stacked Bar Graph

  • To change the legend order
library(gcookbook) # For the data set
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
    geom_bar(stat="identity")+
  guides(fill=guide_legend(reverse=TRUE))

plot of chunk unnamed-chunk-16

Stacked Bar Graph

  • For more sophistication
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
    geom_bar(stat="identity", colour="black") +
    guides(fill=guide_legend(reverse=TRUE)) +
    scale_fill_brewer(palette="Pastel1")

plot of chunk unnamed-chunk-17

Proportional Stacked Bar Graph

  • Need to transform the data
library(gcookbook) # For the data set
library(plyr)
# Do a group-wise transform(), splitting on "Date"
ce = ddply(cabbage_exp, "Date", transform,
            percent_weight = Weight / sum(Weight) * 100)

Proportional Stacked Bar Graph

  • Need to transform the data
ggplot(ce, aes(x=Date, y=percent_weight, fill=Cultivar)) +
    geom_bar(stat="identity")

plot of chunk unnamed-chunk-19

Adding Labels

library(gcookbook) # For the data set
# Below the top
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
    geom_bar(stat="identity") +
    geom_text(aes(label=Weight), vjust=1.5, colour="white")

plot of chunk unnamed-chunk-20

Adding Labels

# Above the top
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
    geom_bar(stat="identity") +
    geom_text(aes(label=Weight), vjust=-0.2)

plot of chunk unnamed-chunk-21

Adjusting y limits

# Adjust y limits to be a little higher
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
    geom_bar(stat="identity") +geom_text(aes(label=Weight), vjust=-0.2) +
    ylim(0, max(cabbage_exp$Weight) * 1.05)

plot of chunk unnamed-chunk-22

Further adjustments

# Map y positions slightly above bar top - y range of plot will auto-adjust
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
    geom_bar(stat="identity") +
    geom_text(aes(y=Weight+0.1, label=Weight))

plot of chunk unnamed-chunk-23

Further adjustments

ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
    geom_bar(stat="identity", position="dodge") +
    geom_text(aes(label=Weight), vjust=1.5, colour="white",
    position=position_dodge(.9), size=3)

plot of chunk unnamed-chunk-24

Cleveland Dot Plot

library(gcookbook) # For the data set
tophit = tophitters2001[1:25, ] # Take the top 25 from the tophitters data set
ggplot(tophit, aes(x=avg, y=name)) + geom_point()

plot of chunk unnamed-chunk-25

Cleveland Dot Plot (Ordered)

ggplot(tophit, aes(x=avg, y=reorder(name, avg))) +
    geom_point(size=3) +    # Use a larger dot
    theme_bw() +
    theme(panel.grid.major.x = element_blank(),
          panel.grid.minor.x = element_blank(),
          panel.grid.major.y = element_line(colour="grey60", linetype="dashed"))

plot of chunk unnamed-chunk-26

Cleveland Dot Plot (Axes Changed)

ggplot(tophit, aes(x=reorder(name, avg), y=avg)) +
    geom_point(size=3) + # Use a larger dot
    theme_bw() +
    theme(axis.text.x = element_text(angle=60, hjust=1),
          panel.grid.major.y = element_blank(),
          panel.grid.minor.y = element_blank(),
          panel.grid.major.x = element_line(colour="grey60", linetype="dashed"))

plot of chunk unnamed-chunk-27

Cleveland Dot Plot (Grouping)

# Get the names, sorted first by lg, then by avg
nameorder <- tophit$name[order(tophit$lg, tophit$avg)]

# Turn name into a factor, with levels in the order of nameorder
tophit$name <- factor(tophit$name, levels=nameorder)

Cleveland Dot Plot (Grouping)

ggplot(tophit, aes(x=avg, y=name)) +
    geom_segment(aes(yend=name), xend=0, colour="grey50") +
    geom_point(size=3, aes(colour=lg)) +
    scale_colour_brewer(palette="Set1", limits=c("NL","AL"), guide=FALSE) +
    theme_bw() +
    theme(panel.grid.major.y = element_blank()) +
    facet_grid(lg ~ ., scales="free_y", space="free_y")

plot of chunk unnamed-chunk-29