• Loading data set
head(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
5.1 3.5 1.4 0.2 setosa
4.9 3.0 1.4 0.2 setosa
4.7 3.2 1.3 0.2 setosa
4.6 3.1 1.5 0.2 setosa
5.0 3.6 1.4 0.2 setosa
5.4 3.9 1.7 0.4 setosa

Scatter plot

Create a empty canvas

  • then create aesthetic mapping
  • tell the function which dataset and variables to use
ggplot(data = iris,        # which data set? canvas? 
       aes(x=Sepal.Length , y=Petal.Length   ))  # which variables as aesthetics? x and y are mapped to columns of the data; different geoms can have different aesthetics (different variables). 

Add a layer/geom of points to the canvas

ggplot(data = iris,
       mapping = aes(x=Sepal.Length , y=Petal.Length  )) + 
  geom_point()   # adding the geometrical representation 

# same plot as above
ggplot(data = iris) + 
  geom_point(  aes(x=Sepal.Length , y=Petal.Length  )) 

Add another aesthetic

  • add a curve/straight line to fit these points
  • geom provides the aesthetic to ggplot
# Loess curve
ggplot(data = iris, 
       mapping = aes(x=Sepal.Length , y=Petal.Length  )) + 
  geom_point() +
  geom_smooth() 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Linear regression line
ggplot(data = iris, 
       mapping = aes(x=Sepal.Length , y=Petal.Length)) + 
  geom_point() +
  geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

Add other aesthetic

  • set other aesthetics colour, alpha (transparency), and size of points
ggplot(data = iris) + 
  geom_point(aes(x=Sepal.Length , y=Petal.Length, size = Sepal.Width     ), 
             alpha = .5, 
             colour = "red")

ggplot(data = iris) + 
  geom_point(aes(x=Sepal.Length , y=Petal.Length, size = Sepal.Width , colour=Species), #white is a variable here 
             alpha=.9)

  • categorize Petal.Width then map colour to this new variable
iris <- iris %>% 
  mutate(growth = ifelse(Petal.Width   > 1.5, "Wide", "Normal"))

ggplot(data=iris) + 
  geom_point(aes(x=Sepal.Length , y=Petal.Length, size = Sepal.Width , colour=growth), 
             alpha=.9)

Bar chart

ggplot(data = iris) + 
  geom_bar(aes(x = growth))

  • bar chart after group_by
  • then use stat='identity'
results  <- iris %>% 
  group_by(Species, growth) %>% 
  summarize(Sepal.Length.mean=mean (Sepal.Length ))
## `summarise()` has grouped output by 'Species'. You can override using the
## `.groups` argument.
 gop <- results  %>% 
  filter(Species != "setosa_null"  )
 gop
## # A tibble: 5 × 3
## # Groups:   Species [3]
##   Species    growth Sepal.Length.mean
##   <fct>      <chr>              <dbl>
## 1 setosa     Normal              5.01
## 2 versicolor Normal              5.91
## 3 versicolor Wide                6.18
## 4 virginica  Normal              6.13
## 5 virginica  Wide                6.62
  • though meaningless below until line chart (just use the mean as the sum for demonstration)
# We can also store parts of a plot in an object
plot1 <- ggplot(gop) + 
  geom_bar(aes(x=growth , y=Sepal.Length.mean), 
           
           stat='identity')
plot1

### Add some options for the whole ggplot rather than layers - switch the x and y axes

plot1 + 
  coord_flip()

  • reorder x categories (-means descending)
ggplot( gop) + 
  geom_bar(aes(x=reorder(growth, -Sepal.Length.mean), y=Sepal.Length.mean, fill=growth), 
           stat='identity') + 
  coord_flip()

  • add x axis label and a theme
ggplot(gop) + 
  geom_bar(aes(x=reorder(growth, -Sepal.Length.mean), y=Sepal.Length.mean, fill=growth), 
           stat='identity') + 
  coord_flip() + 
  xlab("Growth categories") + 
  guides(fill=F) +
  theme_minimal()
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

  • set theme
library(ggthemes)
ggplot(data = iris) + 
  geom_bar(aes(x = growth)) + 
  theme_economist()

Grouped bar chart

-bar chart with different panels

ggplot(mpg, aes(x = class)) + 
  geom_bar() + 
  facet_wrap( ~ year)

  • actual number (groups are stacked by default)
ggplot(gop) + 
  geom_bar(aes(x=growth, y=Sepal.Length.mean, fill=Species)
           , stat='identity'
           )

ggplot(mpg, aes(x = class )) + 
  geom_bar(aes(group = year, fill = year), position = "stack")

  • percentage
ggplot(gop) + 
  geom_bar(aes(x=growth, y=Sepal.Length.mean, fill=Species), 
           stat='identity', 
           position='fill')

  • groups are dodge with actual number
ggplot(gop) + 
  geom_bar(aes(x=growth, y=Sepal.Length.mean, fill=Species), 
           stat='identity', 
           position='dodge')

- groups are dodge with percentage

gop2 <- gop %>% 
  group_by(growth ) %>% 
  mutate(Sepal.Length.prop=Sepal.Length.mean/sum(Sepal.Length.mean))

ggplot(gop2) + 
  geom_bar(aes(x=growth, y=Sepal.Length.prop, fill=Species), 
           stat='identity', 
           position='dodge') + 
  ylab("Votes (%)")

Line charts

ggplot(iris) + 
  geom_line(aes(x=Sepal.Length , y=Petal.Length))

Grouped by colour variable

ggplot(iris) + 
  geom_line(aes(x=Sepal.Length , y=Petal.Length, colour = Species))

  • grouped by state then set how many rows or columns
ggplot(iris) + 
  geom_line(aes(x=Sepal.Length , y=Petal.Length) ) + 
  
  facet_wrap(~Species, nrow = 1) +      #set how many rows
  coord_flip()  

Multiple aesthetics

iris <- iris %>% 
  mutate(growth = ifelse(Petal.Width   > 1.5, "Wide", "Normal"))

ggplot(iris, aes(x = Sepal.Length, y = Petal.Length)) + 
  geom_line(size=2,color="purple")+  
  # number format
  scale_x_log10(labels = scales::label_number())+
  geom_point( aes(size =  Sepal.Length,colour = as.factor(growth)),show.legend = F)+
  facet_wrap(~ Species)