Table of content


Importing the ggplot2 package -

> library(ggplot2)

Let’s use the built in Hair and Eye Color data set -

> HairEyeColor
, , Sex = Male

       Eye
Hair    Brown Blue Hazel Green
  Black    32   11    10     3
  Brown    53   50    25    15
  Red      10   10     7     7
  Blond     3   30     5     8

, , Sex = Female

       Eye
Hair    Brown Blue Hazel Green
  Black    36    9     5     2
  Brown    66   34    29    14
  Red      16    7     7     7
  Blond     4   64     5     8

This data set is not so suitable for visualization. So we need to do some manipulation before moving on.

Let’s import some necessary packages -

> library(dplyr)

The data set is then transformed into a form so that we can use it for plotting -

> df <- HairEyeColor %>%        
+   as_tibble() %>%             
+   tidyr::uncount(n) %>%              
+   mutate_all(as.factor)

More about uncount -

> tibble(a=c(2,1,4),
+        b=c('one','two','three')) %>% tidyr::uncount(a)
# A tibble: 7 x 1
  b    
  <chr>
1 one  
2 one  
3 two  
4 three
5 three
6 three
7 three

Uncount does the opposite work of count.

Let’s see the new data frame now-

> glimpse(df)
Rows: 592
Columns: 3
$ Hair <fct> Black, Black, Black, Black, Black, Black, Black, Black, Black, Bl~
$ Eye  <fct> Brown, Brown, Brown, Brown, Brown, Brown, Brown, Brown, Brown, Br~
$ Sex  <fct> Male, Male, Male, Male, Male, Male, Male, Male, Male, Male, Male,~

Now it can be used to create bar charts.


A Simple Barplot

> ggplot(data = df) +
+   geom_bar(mapping = aes(x = Hair))

The mapping can be done inside the ggplot() function -

> ggplot(data = df, mapping = aes(x=Hair))+
+   geom_bar(fill = "black") +  
+   labs(title = "Hair Color", 
+        subtitle = "592 Statistics Students",
+        caption = "(From R's built in HairEyeColor sample dataset)",
+        y = "Number of Students", x = NULL)

Horizontal Bar Chart

Using coord_flip() -

> ggplot(data = df, mapping = aes(x=Hair))+
+   geom_bar(fill = "black") +  
+   labs(title = "Hair Color", 
+        subtitle = "592 Statistics Students",
+        caption = "(From R's built in HairEyeColor sample dataset)",
+        y = "Number of Students", x = NULL) +
+   coord_flip()

Assigning variable to the y axis -

> ggplot(data = df, mapping = aes(y = Hair))+
+   geom_bar(fill = "black") +  
+   labs(title = "Hair Color", 
+        subtitle = "592 Statistics Students",
+        caption = "(From R's built in HairEyeColor sample dataset)",
+        y = "Number of Students", x = NULL)

Using Colors

fill = {the same variable as the x axis} so that for each variable different colors is shown -

> ggplot(data = df)+
+   geom_bar(mapping = aes(x = Hair, fill = Hair))+
+   theme(legend.position = "none")  # Don't show the legend

Using hue -

> ggplot(data = df)+
+   geom_bar(mapping = aes(x = Hair, fill = Hair))+
+   theme(legend.position = "none") +  # Don't show the legend
+   scale_fill_hue(c = 20) # Different values c gives different intensity of colors

Manually selecting colors

How to manually set colors in a bar chart?
Manually selecting colors -

> ggplot(data = df)+
+   geom_bar(mapping = aes(x = Hair, fill = Hair), 
+            col = "black",
+            fill = c("Black","beige","bisque3","red"))+
+   theme(legend.position = "none")

Another way to do that -

> ggplot(data = df)+
+   geom_bar(mapping = aes(x = Hair, fill = Hair), col = "black")+
+   theme(legend.position = "none") +
+   scale_fill_manual(values = c("Black","beige","bisque3","red"))

Modifying Axis Tickmarks

> ggplot(df, aes(x = Hair)) +
+   geom_bar() +
+   scale_y_continuous(breaks = seq(0, 300, by=50)) +
+   labs(x = "Colors", y = "Frequency",
+        title = "Bar Chart of Colors",
+        subtitle = "An observational study") +
+   theme(plot.title = element_text(hjust = 0.5),
+         plot.subtitle = element_text(hjust = 0.5)) # center the title and subtitle

Stacked Bar Chart

Using fill argument stacked bar can be made -

> ggplot(data = df) + 
+   geom_bar(mapping = aes(Hair, fill = Sex))

100% Stacked Bar Chart

Using position = “fill” inside geom_bar -

> ggplot(df, aes(Hair, fill = Sex)) + 
+   geom_bar(position = "fill") +
+   labs(x="Hair Color", y=NULL) +
+   coord_flip()

Changing Order of Bars

> df$Hair <- factor(df$Hair, levels = c("Red", "Black", "Blond", "Brown"))
> ggplot(df, aes(y=Hair, fill = Sex)) + 
+   geom_bar(position = "fill") +
+   labs(x=NULL, y="Hair Color") 

Another way to do this using scale_y_discrete()-

> ggplot(df, aes(y = Hair, fill = Sex)) + 
+   geom_bar(position = "fill") +
+   labs(x=NULL, y="Hair Color") +
+   scale_y_discrete(limits = c("Black","Red","Brown","Blond"))

Changing Order in Legend’s Labels

Using scale_fill_discrete() -

> ggplot(df, aes(y = Hair, fill = Sex)) + 
+   geom_bar(position = "fill") +
+   labs(x=NULL, y="Hair Color") +
+   scale_y_discrete(limits = c("Black","Red","Brown","Blond")) +
+   scale_fill_discrete(breaks = c("Male","Female"))

Changing Order of Stacks

In the following stacked barplot, the left bar denotes female and the right bar denotes male -

> ggplot(df, aes(x = Hair, fill = Sex)) + 
+   geom_bar(position = "dodge") +
+   labs(x=NULL, y="Hair Color") +
+   scale_x_discrete(limits = c("Black","Red","Brown","Blond"))

If we check the order of levels of Sex we’ll see -

> levels(df$Sex)
[1] "Female" "Male"  

Now if the order is changed, the bar will also change its order -

> df %>% 
+   mutate(Sex = factor(Sex, levels = c("Male","Female"))) %>% 
+   ggplot(aes(x = Hair, fill = Sex)) + 
+   geom_bar(position = "dodge") +
+   labs(x=NULL, y="Hair Color") +
+   scale_x_discrete(limits = c("Black","Red","Brown","Blond"))

This is particularly useful when showing a 100% stacked barplot -

> df %>%
+   mutate(Hair = factor(Hair,
+                        levels = rev(c("Black","Brown","Red","Blond")))) %>%
+   ggplot(aes(y = Sex, fill = Hair)) + 
+   geom_bar(position = "fill") +
+   labs(x=NULL, y=NULL, fill = "Hair Colors") +
+   scale_fill_manual(values = c("black","#8B4513","#FF0000","#faf0be"),
+                       limits = c("Black","Brown","Red","Blond")) +
+   theme_bw() + theme(legend.position = "bottom")

Changing width of the bars

Width of the bars can be changed using the width argument from geom_bar(). It takes values from 0 to 1 -

> ggplot(df, aes(Hair, fill = Sex)) + 
+   geom_bar(position = "fill", 
+            width = 0.5) +
+   labs(x="Hair Color", y=NULL) +
+   coord_flip()

Side by Side Bar Chart

Using dodge -

> ggplot(df, aes(Hair, fill = Sex)) + 
+   geom_bar(position = "dodge") +
+   labs(x="Hair Color", y=NULL)

Using dodge2 -

> ggplot(df, aes(Hair, fill = Sex)) + 
+   geom_bar(position = "dodge2") +
+   labs(x="Hair Color", y=NULL)

In the following case we can see that there is no Male who has the hair color red. It fills the whole bar with Female bar -

> df %>% 
+   filter(!(Sex=="Male" & Hair=="Red")) %>% 
+   ggplot(aes(Hair, fill = Sex)) + 
+   geom_bar(position = "dodge2") +
+   labs(x="Hair Color", y=NULL)

To prevent it from happening use position_dodge2(preserve = “single”) in position argument -

> df %>% 
+   filter(!(Sex=="Male" & Hair=="Red")) %>% 
+   ggplot(aes(Hair, fill = Sex)) + 
+   geom_bar(position = position_dodge2(preserve = "single")) +
+   labs(x="Hair Color", y=NULL)

preserve = “total” will fill the whole place -

> df %>% 
+   filter(!(Sex=="Male" & Hair=="Red")) %>% 
+   ggplot(aes(Hair, fill = Sex)) + 
+   geom_bar(position = position_dodge2(preserve = "total")) +
+   labs(x="Hair Color", y=NULL)

Column Chart

Column charts data looks like this - (after manipulation)

> hairdf <- df %>% 
+   filter(Sex == "Male") %>% 
+   group_by(Hair) %>% 
+   summarize(frequency = n()) 
> hairdf
# A tibble: 4 x 2
  Hair  frequency
  <fct>     <int>
1 Red          34
2 Black        56
3 Blond        46
4 Brown       143

This types of data frame can be graphed in column chart using the function geom_col(), not geom_bar(), here is the difference -

> hairdf %>% 
+   ggplot()+ 
+   geom_col(mapping = aes(x=Hair, y=frequency),
+            fill = c("Black","beige","bisque3","coral2")) +
+   labs(title="Hair Color in Column Chart")

This kind of data can also be graphed by defining stat = "identity" in the geom_bar() function -

> hairdf %>% 
+   ggplot() +
+   geom_bar(aes(x = Hair, y = frequency), 
+            stat = "identity")

Putting frequencies on each bars

> hairdf %>% 
+   ggplot(aes(x = Hair, y = frequency)) +
+   geom_col() +
+   scale_y_continuous(breaks = seq(0, 150, by=30)) +
+   labs(x = "Colors", y = "Frequency",
+        title = "Bar Chart of Colors",
+        subtitle = "An observational study") +
+   geom_text(aes(label= frequency), 
+             vjust=1.2, size=3,
+             col = "white")

To know more about ggplot2 visit here

To know more about colors visit here

Check out https://www.homeworkhelponline.net for R Studio Programming assignment help.

