1 Goal


The goal of this tutorial is to learn the different configurations of column plot in ggplot.


2 Data preparation


# First we load the libraries
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# In this tutorial we are going to use the Titanic dataset
data("Titanic")
Titanic <- as.data.frame(Titanic)
head(Titanic)
##   Class    Sex   Age Survived Freq
## 1   1st   Male Child       No    0
## 2   2nd   Male Child       No    0
## 3   3rd   Male Child       No   35
## 4  Crew   Male Child       No    0
## 5   1st Female Child       No    0
## 6   2nd Female Child       No    0

3 Geom col plotting

3.1 Simple geom_col


# The simplet geom_col that we can call is one variable for the x and one variable for the y.
# In this case we are plotting how many people from each class were on the boat

ggplot() + geom_col(data = Titanic, aes(x = Class, y = Freq))

# We can see that there are horizontal lines meaning that information has been stacked 

3.2 Stacked bars


# We can add a new variable by defining the fill parameter
ggplot(data = Titanic, aes(x = Class, y = Freq, fill = Survived)) + geom_col() 


3.3 Equal size columns


# Sometimes it is useful to define a common height for the columns to compare percentages
# This is achieved with position fill
ggplot() + geom_col(data = Titanic, aes(x = Class, y = Freq, fill = Survived), position = "fill")