Day 5

barplot continue

data: uspopchange from gcookbook

head(uspopchange)
##        State Abb Region Change
## 1    Alabama  AL  South    7.5
## 2     Alaska  AK   West   13.3
## 3    Arizona  AZ   West   24.6
## 4   Arkansas  AR  South    9.1
## 5 California  CA   West   10.0
## 6   Colorado  CO   West   16.9
str(uspopchange)
## 'data.frame':    50 obs. of  4 variables:
##  $ State : chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ Abb   : chr  "AL" "AK" "AZ" "AR" ...
##  $ Region: Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
##  $ Change: num  7.5 13.3 24.6 9.1 10 16.9 4.9 14.6 17.6 18.3 ...
summary(uspopchange)
##     State               Abb                      Region       Change      
##  Length:50          Length:50          Northeast    : 9   Min.   :-0.600  
##  Class :character   Class :character   South        :16   1st Qu.: 4.350  
##  Mode  :character   Mode  :character   North Central:12   Median : 7.850  
##                                        West         :13   Mean   : 9.852  
##                                                           3rd Qu.:13.900  
##                                                           Max.   :35.100

select top 10 states in Change

upc <- subset(uspopchange, rank(Change)>40)
# rank is return the rank index in current order
# order is return the index in rank order
upc
##             State Abb Region Change
## 3         Arizona  AZ   West   24.6
## 6        Colorado  CO   West   16.9
## 10        Florida  FL  South   17.6
## 11        Georgia  GA  South   18.3
## 13          Idaho  ID   West   21.1
## 29         Nevada  NV   West   35.1
## 34 North Carolina  NC  South   18.5
## 41 South Carolina  SC  South   15.3
## 44          Texas  TX  South   20.6
## 45           Utah  UT   West   23.8

display in barplot

ggplot(upc, aes(x = Abb, y = Change, fill = Region)) + geom_bar(stat = "identity")

display y in increasing order

ggplot(upc, aes(x=reorder(Abb, Change), y=Change, fill=Region)) + geom_bar(stat = "identity")

add black outline and change the color to fill the bar

ggplot(upc, aes(x=reorder(Abb, Change), y=Change, fill=Region)) + 
        geom_bar(stat = "identity", colour = "black") +
        scale_fill_manual(values = c("#669933", "#FFCC66")) +
        xlab("State")

barplot with different color for positive y and negative y

data: climate from gcookbook

head(climate)
##     Source Year Anomaly1y Anomaly5y Anomaly10y Unc10y
## 1 Berkeley 1800        NA        NA     -0.435  0.505
## 2 Berkeley 1801        NA        NA     -0.453  0.493
## 3 Berkeley 1802        NA        NA     -0.460  0.486
## 4 Berkeley 1803        NA        NA     -0.493  0.489
## 5 Berkeley 1804        NA        NA     -0.536  0.483
## 6 Berkeley 1805        NA        NA     -0.541  0.475
tail(climate)
##      Source Year Anomaly1y Anomaly5y Anomaly10y Unc10y
## 494 CRUTEM3 2006    0.7613        NA     0.7345     NA
## 495 CRUTEM3 2007    0.7703        NA         NA     NA
## 496 CRUTEM3 2008    0.6203        NA         NA     NA
## 497 CRUTEM3 2009    0.7343        NA         NA     NA
## 498 CRUTEM3 2010    0.8023        NA         NA     NA
## 499 CRUTEM3 2011    0.6193        NA         NA     NA
str(climate)
## 'data.frame':    499 obs. of  6 variables:
##  $ Source    : chr  "Berkeley" "Berkeley" "Berkeley" "Berkeley" ...
##  $ Year      : num  1800 1801 1802 1803 1804 ...
##  $ Anomaly1y : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Anomaly5y : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Anomaly10y: num  -0.435 -0.453 -0.46 -0.493 -0.536 -0.541 -0.59 -0.695 -0.763 -0.818 ...
##  $ Unc10y    : num  0.505 0.493 0.486 0.489 0.483 0.475 0.468 0.461 0.453 0.451 ...
summary(climate)
##     Source               Year        Anomaly1y          Anomaly5y      
##  Length:499         Min.   :1800   Min.   :-0.60070   Min.   :-0.4995  
##  Class :character   1st Qu.:1884   1st Qu.:-0.21629   1st Qu.:-0.1053  
##  Mode  :character   Median :1926   Median :-0.02797   Median :-0.0042  
##                     Mean   :1923   Mean   : 0.01277   Mean   : 0.0555  
##                     3rd Qu.:1968   3rd Qu.: 0.15155   3rd Qu.: 0.1620  
##                     Max.   :2011   Max.   : 0.96354   Max.   : 0.8953  
##                                    NA's   :207        NA's   :373      
##    Anomaly10y           Unc10y      
##  Min.   :-1.01500   Min.   :0.0110  
##  1st Qu.:-0.28350   1st Qu.:0.0430  
##  Median :-0.07328   Median :0.1040  
##  Mean   :-0.07869   Mean   :0.1452  
##  3rd Qu.: 0.05065   3rd Qu.:0.2220  
##  Max.   : 0.88400   Max.   :0.5050  
##  NA's   :20         NA's   :294

silce data

csub <- subset(climate, Source == "Berkeley" & Year >= 1900)
head(csub)
##       Source Year Anomaly1y Anomaly5y Anomaly10y Unc10y
## 101 Berkeley 1900        NA        NA     -0.171  0.108
## 102 Berkeley 1901        NA        NA     -0.162  0.109
## 103 Berkeley 1902        NA        NA     -0.177  0.108
## 104 Berkeley 1903        NA        NA     -0.199  0.104
## 105 Berkeley 1904        NA        NA     -0.223  0.105
## 106 Berkeley 1905        NA        NA     -0.241  0.107
csub$pos <- csub$Anomaly10y >= 0 #add an additional column to indicate whether Anomaly10y >= 0
head(csub)
##       Source Year Anomaly1y Anomaly5y Anomaly10y Unc10y   pos
## 101 Berkeley 1900        NA        NA     -0.171  0.108 FALSE
## 102 Berkeley 1901        NA        NA     -0.162  0.109 FALSE
## 103 Berkeley 1902        NA        NA     -0.177  0.108 FALSE
## 104 Berkeley 1903        NA        NA     -0.199  0.104 FALSE
## 105 Berkeley 1904        NA        NA     -0.223  0.105 FALSE
## 106 Berkeley 1905        NA        NA     -0.241  0.107 FALSE

barplot with x = Year, y = Anomaly10y

ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) + 
        geom_bar(stat = "identity")
## Warning: Stacking not well defined when ymin != 0

change color, bar outline width, and remove legend

ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) +
        geom_bar(stat = "identity", colour = "black", size = 0.25) +
        scale_fill_manual(values = c("#CCEEFF", "#FFDDDD"), guide = FALSE) # remove legend by setting guide = FALSE
## Warning: Stacking not well defined when ymin != 0

adjust bar width

data: pg_mean from gcookbook

pg_mean
##   group weight
## 1  ctrl  5.032
## 2  trt1  4.661
## 3  trt2  5.526
str(pg_mean)
## 'data.frame':    3 obs. of  2 variables:
##  $ group : Factor w/ 3 levels "ctrl","trt1",..: 1 2 3
##  $ weight: num  5.03 4.66 5.53
summary(pg_mean)
##   group       weight     
##  ctrl:1   Min.   :4.661  
##  trt1:1   1st Qu.:4.846  
##  trt2:1   Median :5.032  
##           Mean   :5.073  
##           3rd Qu.:5.279  
##           Max.   :5.526

standard-width bars:

ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity")

different width bars

ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity", width = 0.5)

ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity", width = 1) #different width

different width for grouped bars:

data: cabbage_exp

head(cabbage_exp)
##   Cultivar Date Weight        sd  n         se
## 1      c39  d16   3.18 0.9566144 10 0.30250803
## 2      c39  d20   2.80 0.2788867 10 0.08819171
## 3      c39  d21   2.74 0.9834181 10 0.31098410
## 4      c52  d16   2.26 0.4452215 10 0.14079141
## 5      c52  d20   3.11 0.7908505 10 0.25008887
## 6      c52  d21   1.47 0.2110819 10 0.06674995
str(cabbage_exp)
## 'data.frame':    6 obs. of  6 variables:
##  $ Cultivar: Factor w/ 2 levels "c39","c52": 1 1 1 2 2 2
##  $ Date    : Factor w/ 3 levels "d16","d20","d21": 1 2 3 1 2 3
##  $ Weight  : num  3.18 2.8 2.74 2.26 3.11 1.47
##  $ sd      : num  0.957 0.279 0.983 0.445 0.791 ...
##  $ n       : int  10 10 10 10 10 10
##  $ se      : num  0.3025 0.0882 0.311 0.1408 0.2501 ...
summary(cabbage_exp)
##  Cultivar  Date       Weight            sd               n     
##  c39:3    d16:2   Min.   :1.470   Min.   :0.2111   Min.   :10  
##  c52:3    d20:2   1st Qu.:2.380   1st Qu.:0.3205   1st Qu.:10  
##           d21:2   Median :2.770   Median :0.6180   Median :10  
##                   Mean   :2.593   Mean   :0.6110   Mean   :10  
##                   3rd Qu.:3.033   3rd Qu.:0.9152   3rd Qu.:10  
##                   Max.   :3.180   Max.   :0.9834   Max.   :10  
##        se         
##  Min.   :0.06675  
##  1st Qu.:0.10134  
##  Median :0.19544  
##  Mean   :0.19322  
##  3rd Qu.:0.28940  
##  Max.   :0.31098

display barplot

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
        geom_bar(stat = "identity", position = "dodge")

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
        geom_bar(stat = "identity", width = 0.5, position = "dodge")

seperate grouped bars a little:

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
        geom_bar(stat = "identity", position = position_dodge(0.7))

In the plot above, bar width is too large

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
        geom_bar(stat = "identity", width = 0.5, position = position_dodge(0.7))