Data visualisation with R ggplot2

1. Overview

ggplot2 book: https://ggplot2-book.org/index.html

All plots are composed of the data, the information you want to visualise, and a mapping, the description of how the data’s variables are mapped to aesthetic attributes. There are five mapping components:

A layer is a collection of geometric elements and statistical transformations. Geometric elements, geoms for short, represent what you actually see in the plot: points, lines, polygons, etc. Statistical transformations, stats for short, summarise the data: for example, binning and counting observations to create a histogram, or fitting a linear model.
Scales map values in the data space to values in the aesthetic space. This includes the use of colour, shape or size. Scales also draw the legend and axes, which make it possible to read the original data values from the plot (an inverse mapping).
A coord, or coordinate system, describes how data coordinates are mapped to the plane of the graphic. It also provides axes and gridlines to help read the graph. We normally use the Cartesian coordinate system, but a number of others are available, including polar coordinates and map projections.
A facet specifies how to break up and display subsets of data as small multiples. This is also known as conditioning or latticing/trellising.
A theme controls the finer points of display, like the font size and background colour. While the defaults in ggplot2 have been chosen with care, you may need to consult other references to create an attractive plot. A good starting place is Tufte’s early works.3
The template used to create a ggplot2 chart.

ggplot(data = <DATA>) + 
  <GEOM_FUNCTION>(
     mapping = aes(<MAPPINGS>),
     stat = <STAT>, 
     position = <POSITION>
  ) +
  <COORDINATE_FUNCTION> +
  <FACET_FUNCTION>

2. Comparison

2.1 Density (Numeric Distribution)

A density plot shows the distribution of a numeric variable.

# Make the histogram
 p1 <- diamonds %>%
  #filter( price<300 ) %>%
  ggplot() +
    geom_density(aes(x=price),
                 fill="#69b3a2", 
                 color="#e9ecef", 
                 alpha=0.8) +
    scale_x_continuous(limits = quantile(diamonds$price,c(0.01,0.99)),
                       labels = scales::dollar_format()) +
    geom_vline(aes(xintercept = mean(price)), 
             linetype = "dashed", size = 0.6,
             color = "#FC4E07") +
    ggtitle("Diamonds price distribution")

 p11 <- diamonds %>%
  #filter( price<300 ) %>%
  ggplot() +
    geom_density(aes(x=price,y=..count..),
                 fill="#69b3a2", 
                 color="#e9ecef", 
                 alpha=0.8) +
  #  scale_x_continuous(limits = quantile(diamonds$price,c(0.01,0.99))) +
   scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
              labels = trans_format("log10", math_format(10^.x))) +
    geom_vline(aes(xintercept = mean(price)), 
             linetype = "dashed", size = 0.6,
             color = "#FC4E07") +
    ggtitle("Diamonds price distribution")


 # Dummy data
data <- data.frame(
  var1 = rnorm(1000),
  var2 = rnorm(1000, mean=2)
)

# Chart
p2 <- ggplot(data, aes(x=x) ) +
  # Top
  geom_density( aes(x = var1, y = ..density..), fill="#69b3a2" ) +
  geom_label( aes(x=4.5, y=0.25, label="variable1"), color="#69b3a2") +
  # Bottom
  geom_density( aes(x = var2, y = -..density..), fill= "#404080") +
  geom_label( aes(x=4.5, y=-0.25, label="variable2"), color="#404080") +
  xlab("value of x")

# Chart
p3 <- ggplot(data, aes(x=x) ) +
  geom_histogram( aes(x = var1, y = ..density..), fill="#69b3a2" ) +
  geom_label( aes(x=4.5, y=0.25, label="variable1"), color="#69b3a2") +
  geom_histogram( aes(x = var2, y = ..density..), fill= "#404080") +
  geom_label( aes(x=4.5, y=-0.25, label="variable2"), color="#404080") +
  xlab("value of x")

grid.arrange(p1,p11,p2,p3,nrow=4)

diamonds %>%
  #filter( price<300 ) %>%
  ggplot() +
    geom_density(aes(x=price,y=..count..),
                 fill="#69b3a2", 
                 color="#e9ecef", 
                 alpha=0.8) +
  #  scale_x_continuous(limits = quantile(diamonds$price,c(0.01,0.99))) +
   # scale_x_continuous(trans=log_trans(),breaks = c(500,1000,5000,10000,20000)) +
  scale_x_continuous(trans="log10",labels = scales::dollar_format()) +
    geom_vline(aes(xintercept = mean(price)), 
             linetype = "dashed", size = 0.6,
             color = "#FC4E07") +
    ggtitle("Diamonds **price** distribution") +
  theme(plot.title = ggtext::element_markdown())

Compare desity by groups

p1 <- ggplot(data=diamonds, aes(x=price, group=cut, fill=cut)) +
    geom_density(adjust=1.5, alpha=.4) 

p2 <- ggplot(data=diamonds, aes(x=price, group=cut, fill=cut)) +
    geom_density(adjust=1.5) +
    facet_wrap(~cut) +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      axis.ticks.x=element_blank()
    )

# basic example
library(ggridges)
p3 <- ggplot(diamonds, aes(x = price, y = cut, fill = cut)) +
  geom_density_ridges() +
  theme_ridges() + 
  theme(legend.position = "none")

grid.arrange(p1, # First row with one plot spaning over 2 columns
             arrangeGrob(p2, p3, ncol = 2), # Second row with 2 plots in 2 different columns
             nrow = 3)

p4 <- ggplot(diamonds, aes(x = price, y = cut, fill = factor(stat(quantile)))) +
  stat_density_ridges(
    geom = "density_ridges_gradient",
    calc_ecdf = TRUE,
    quantiles = 5
  ) +
  scale_fill_viridis_d(name = "Quintiles") +
  theme_ridges()

p4

2.2 Boxplot (Numeric Distribution)

ggplot(mtcars, aes(x=as.factor(cyl), y=mpg)) + 
    geom_boxplot(fill="slateblue", alpha=0.2,outlier.shape = NA) + 
    scale_y_continuous(limits = quantile(mtcars$mpg,c(0.01,0.99))) +
    xlab("cyl")

# create a data frame
variety=rep(LETTERS[1:7], each=40)
treatment=rep(c("high","low"),each=20)
note=seq(1:280)+sample(1:150, 280, replace=T)
data=data.frame(variety, treatment ,  note)
 
# grouped boxplot
p1 <- ggplot(data, aes(x=variety, y=note, fill=treatment)) + 
    geom_boxplot()
ggplotly(p1) %>%layout(boxmode = "group")

p2 <- ggplot(data, aes(x=variety, y=note, fill=treatment)) + 
    geom_violin()


dodge <- position_dodge(width = 0.6)

p3 <- ggplot(data = data, 
             aes(x = forcats::fct_reorder(variety,note,.fun=median,.desc = TRUE), 
                 y = note, 
                 fill = treatment)
             ) +
  geom_violin(alpha =.1, position = dodge)+
  geom_boxplot(width=.1, outlier.colour=NA, position = dodge) 

gridExtra::grid.arrange(p1,p2,p3,nrow=3,top="2 boxplot charts")

2.3 Histogram

# plot
bin <- 20
p <- ggplot(data=diamonds) +
     geom_histogram( aes(x=price),
                    #binwidth=1000, #function(x) 2 * IQR(x) / (length(x)^(1/3)), 
                    bins = bin,
                    fill="#69b3a2"
                    , color="#e9ecef"
                    , alpha=0.9) +
    ggtitle(paste0("Bin size = ",bin) ) 

 plotly::ggplotly(p)

2.4 Barchart

Barchart with Error Bars

# create dummy data
data <- data.frame(
  name=letters[1:5],
  value=sample(seq(4,15),5),
  sd=c(1,0.2,3,2,4)
)
 
# Most basic error bar
ggplot(data) +
    geom_bar( aes(x=name, y=value), stat="identity", fill="skyblue", alpha=0.7) +
    geom_errorbar( aes(x=name, ymin=value-sd, ymax=value+sd), 
                   width=0.4, colour="orange", alpha=0.9, size=1.3)

2 groups

#Let's build a dataset : height of 10 sorgho and poacee sample in 3 environmental conditions (A, B, C)
data <- data.frame(
  specie=c(rep("sorgho" , 10) , rep("poacee" , 10) ),
  cond_A=rnorm(20,10,4),
  cond_B=rnorm(20,8,3),
  cond_C=rnorm(20,5,4)
)

#Let's calculate the average value for each condition and each specie with the *aggregate* function
bilan <- aggregate(cbind(cond_A,cond_B,cond_C)~specie , data=data , mean)
rownames(bilan) <- bilan[,1]
bilan <- as.matrix(bilan[,-1])
 
#Plot boundaries
lim <- 1.2*max(bilan)

#A function to add arrows on the chart
error.bar <- function(x, y, upper, lower=upper, length=0.1,...){
  arrows(x,y+upper, x, y-lower, angle=90, code=3, length=length, ...)
}
 
#Then I calculate the standard deviation for each specie and condition :
stdev <- aggregate(cbind(cond_A,cond_B,cond_C)~specie , data=data , sd)
rownames(stdev) <- stdev[,1]
stdev <- as.matrix(stdev[,-1]) * 1.96 / 10
 
#I am ready to add the error bar on the plot using my "error bar" function !
ze_barplot <- barplot(bilan , beside=T , legend.text=T,col=c("blue" , "skyblue") 
                      , ylim=c(0,lim) , ylab="height")
error.bar(ze_barplot,bilan, stdev)

diamonds %>% filter(cut %in% c('Fair','Ideal')) %>%
  mutate(price_grp=cut(price,breaks = c(-Inf,1000,2000,3000,4000,5000,Inf))) %>%
  ggplot(aes(x=price_grp,fill=cut)) +
    geom_bar(color="#e9ecef", alpha=0.6, position = 'identity') +
    scale_x_discrete(guide = guide_axis(n.dodge=2)) + # avoid x axis label overlap
    scale_fill_manual(values=c("#69b3a2", "#404080")) +
    ggtitle("Diamond price distribution")

* Superimpose bar plots

data.1 <- sample(1000:2000, 10)
data.2 <- sample(500:1000, 10)

ggplot(mapping = aes(x, y)) +
  geom_bar(data = data.frame(x = 1:10, y = data.1), width = 0.8, stat = 'identity', fill='lightgrey') +
  geom_bar(data = data.frame(x = 1:10, y = data.2), width = 0.4, stat = 'identity', fill = 'black') +
  theme_classic() + scale_y_continuous(expand = c(0, 0))

Stacked barchart with label and total counts, and order by total

dfp <- mtcars %>% mutate(cyl=as.factor(cyl),gear=as.factor(gear)) %>% 
  group_by(cyl,gear) %>%
  count() 
dfp %>%  
  ggplot(aes(x=forcats::fct_reorder(cyl, n, sum,.desc=T), 
             y=n, fill = gear,
             label=n,width=.5)) + # x=reorder(cyl, n, sum) replaced by forcats::fct_reorder
    geom_bar(stat="identity") +
    geom_text(data=(dfp %>% filter(gear %in% c(4,5))), # filter out some labels
              size = 3, position = position_stack(vjust = 0.5)) + 
    stat_summary(fun = sum, aes(label = ..y.., group = cyl), geom = "text",vjust = -.2) +
    xlab('Cyl') +
    labs(title='Stacked barchart with label and total counts, and order by total') +
    theme(legend.position = "bottom",
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          axis.title.y = element_blank(),
          axis.text.y = element_blank(),
          axis.ticks.y = element_blank())

mtcars %>%
  mutate(gear = factor(gear)) %>%
  group_by(gear, cyl) %>%
  count() %>%
  group_by(cyl) %>%
  mutate(percentage = n/sum(n)) %>%
  ggplot(aes(x=as.factor(cyl), y=percentage,fill=as.factor(gear)))+
  geom_bar(stat='identity', position="dodge" ) +
  geom_text(aes(y=percentage, label=scales::percent(percentage)),
            stat="identity", position=position_dodge(0.9), vjust=-0.5)+
  scale_y_continuous(labels = scales::percent) +
  ylab('Percent of Cylinder Group, %') +
  xlab('Cyl') +
  labs(title='Barchart with percentage',
       caption = "Source: mtcars") +
  theme(legend.position = "bottom",
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        plot.caption = element_text(hjust = 0))

2.5 Stack Columns

Group barchart and Percent stacked barchar

# create a dataset
specie <- c(rep("sorgho" , 3) , rep("poacee" , 3) , rep("banana" , 3) , rep("triticum" , 3) )
condition <- rep(c("normal" , "stress" , "Nitrogen") , 4)
value <- abs(rnorm(12 , 0 , 15))
data <- data.frame(specie,condition,value)
 
# Grouped
p1 <- ggplot(data, aes(fill=condition, y=value, x=specie)) + 
    geom_bar(position="dodge", stat="identity") +
    ggtitle("Studying 4 species..")

 
# Stacked + percent
p2 <- ggplot(data, aes(fill=condition, y=value, x=specie)) + 
    geom_bar(position="fill", stat="identity") +
    ggtitle("Studying 4 species..")
 
# Graph
p3 <- ggplot(data, aes(fill=condition, y=value, x=condition)) + 
    geom_bar(position="dodge", stat="identity") +
    ggtitle("Studying 4 species..") +
    facet_wrap(~specie) +
    theme(legend.position="none") +
    xlab("")


grid.arrange(p1, # First row with one plot spaning over 2 columns
             arrangeGrob(p2, p3, ncol = 2), # Second row with 2 plots in 2 different columns
             nrow = 2)

2.6 Rank

Barchart rank

p1 <- diamonds %>% dplyr::group_by(cut)  %>% tally() %>%
    ggplot( aes(x=cut, y=n)) +
    geom_bar(stat="identity", fill="#f68060", alpha=.6, width=.4) +
    #coord_flip() +
    theme_bw() +
    xlab("") 

p2 <- diamonds %>% dplyr::group_by(cut)  %>% tally() %>% mutate(cut2=fct_reorder(cut,desc(n))) %>%
    ggplot( aes(x=cut2, y=n)) +
    geom_bar(stat="identity", fill="#f68060", alpha=.6, width=.4) +
    coord_flip() +
    theme_bw() +
    xlab("") 

p3 <- diamonds %>% dplyr::group_by(cut)  %>% tally() %>%
  ggplot( aes(x=cut, y=n)) +
    geom_segment( aes(xend=cut, yend=0)) +
    geom_point( size=4, color="orange") +
    coord_flip() +
    theme_bw() +
    xlab("")
gridExtra::grid.arrange(p1,p2,p3,nrow=3)

grid.arrange(p1, # First row with one plot spaning over 2 columns
             arrangeGrob(p2, p3, ncol = 2), # Second row with 2 plots in 2 different columns
             nrow = 2)

2.7 Heatmap

# Dummy data
x <- LETTERS[1:20]
y <- paste0("var", seq(1,20))
data <- expand.grid(X=x, Y=y)
data$Z <- runif(400, 0, 5)
 
# Heatmap 
ggplot(data, aes(X, Y, fill= Z)) + 
  geom_tile()

2.8 Bullet

fig <- plot_ly() 
fig <- fig %>%
  add_trace(
    type = "indicator",
    mode = "number+gauge+delta",
    value = 180,
    delta = list(reference = 200),
    domain = list(x = c(0.25, 1), y = c(0.08, 0.25)),
    title =list(text = "Revenue"),
    gauge = list(
      shape = "bullet",
      axis = list(range = c(NULL, 300)),
      threshold = list(
        line= list(color = "black", width = 2),
        thickness = 0.75,
        value = 170),
      steps = list(
        list(range = c(0, 150), color = "gray"),
        list(range = c(150, 250), color = "lightgray")),
      bar = list(color = "black"))) 
fig <- fig %>%
  add_trace(
    type = "indicator",
    mode = "number+gauge+delta",
    value = 35,
    delta = list(reference = 200),
    domain = list(x = c(0.25, 1), y = c(0.4, 0.6)),
    title = list(text = "Profit"),
    gauge = list(
      shape = "bullet",
      axis = list(range = list(NULL, 100)),
      threshold = list(
        line = list(color = "black", width= 2),
        thickness = 0.75,
        value = 50),
      steps = list(
        list(range = c(0, 25), color = "gray"),
        list(range = c(25, 75), color = "lightgray")),
      bar = list(color = "black"))) 
fig <- fig %>%
  add_trace(
    type =  "indicator",
    mode = "number+gauge+delta",
    value = 220,
    delta = list(reference = 300 ),
    domain = list(x = c(0.25, 1), y = c(0.7, 0.9)),
    title = list(text = "Satisfaction"),
    gauge = list(
      shape = "bullet",
      axis = list(range = list(NULL, 300)),
      threshold = list(
        line = list(color = "black", width = 2),
        thickness = 0.75,
        value = 210),
      steps = list(
        list(range = c(0, 100), color = "gray"),
        list(range = c(100, 250), color = "lightgray")),
      bar = list(color = "black")))

fig

2.9 WaterFall (Part of Whole)

df_wf <- diamonds %>% count(cut) %>%
  mutate(prop=round(prop.table(n),digits = 2)*100) %>%
  rbind(cbind(cut='Total',as.data.frame.list(colSums(.[,-1]))))

df_wf$cut <- as.factor(df_wf$cut)

df_wf$cut <- fct_relevel(df_wf$cut,c('Total'
                        ,'Fair'
                        ,'Good'
                        ,'Ideal'
                        ,'Premium'
                        ,'Very Good'
                        ))

df_wf_plt <- df_wf %>% 
  arrange(prop) %>%
  mutate(csum=cumsum(prop),
         cut = fct_reorder(cut,prop,.desc=TRUE),
         id = as.integer(cut),
         labl = paste0(scales::comma(n),' (',prop,'%)'),
         desc = case_when(id == 2 ~ "Volume Rank 2",
                          id == 3  ~ "Volume Rank 3",
                          id == 4  ~ "Volume Rank 4",
                          id == 5  ~ "Volume Rank 5",
                          TRUE ~ "cut")) %>%
  arrange(id) %>%
  mutate(end = csum - prop,
         strt = lag(end,default = 0))
  


 df_wf_plt %>%
  ggplot(aes(x=cut
             , xmin = id - 0.45
             , xmax = id + 0.45
             , ymin = end
             , ymax = strt
             )) +
  geom_rect(colour = "black"
            ,fill = "#FFFF66"
            ,alpha = 0.6
            , show.legend = FALSE) +
   geom_text(aes(id,end, 
                   label = labl), 
               vjust = 1.5, 
               size = 3) +
   geom_text(aes(id,strt, 
                   label = desc), 
               vjust = -0.5, 
               size = 3) +
   labs(title = "Waterfall Chart",
        subtitle = "By Diamonds Cut",
        caption = "(Based on data from ...)") +
   xlab("Cut Type") + 
   ylab("Percentage") +
   theme_minimal()

# create company income statement
category <- c("Sales", "Services", "Fixed Costs", 
              "Variable Costs", "Taxes")
amount <- c(101000, 52000, -23000, -15000, -10000)
income <- data.frame(category, amount)

waterfalls::waterfall(income, 
          calc_total=TRUE, 
          total_axis_text = "Net",
          total_rect_text_color="black",
          total_rect_color="goldenrod1") +
  scale_y_continuous(label=scales::dollar) +
  labs(title = "West Coast Profit and Loss", 
       subtitle = "Year 2017",
       y="", 
       x="") +
  theme_minimal()

balance <- data.frame(desc = c("Starting Cash",
     "Sales", "Refunds", "Payouts", "Court Losses",
     "Court Wins", "Contracts", "End Cash"), 
     amount = c(2000,
     3400, -1100, -100, -6600, 3800, 1400, 2800))
balance$desc <- factor(balance$desc, levels = balance$desc)
balance$id <- seq_along(balance$amount)
balance$type <- ifelse(balance$amount > 0, "in","out")
balance[balance$desc %in% c("Starting Cash", "End Cash"),"type"] <- "net"

balance$end <- cumsum(balance$amount)
balance$end <- c(head(balance$end, -1), 0)
balance$start <- c(0, head(balance$end, -1))
balance <- balance[, c(3, 1, 4, 6, 5, 2)]

# id          desc type start   end amount
# 1  1 Starting Cash  net     0  2000   2000
# 2  2         Sales   in  2000  5400   3400
# 3  3       Refunds  out  5400  4300  -1100
# 4  4       Payouts  out  4300  4200   -100
# 5  5  Court Losses  out  4200 -2400  -6600
# 6  6    Court Wins   in -2400  1400   3800
# 7  7     Contracts   in  1400  2800   1400
# 8  8      End Cash  net  2800     0   2800


# ggplot(balance, aes(desc, fill = type)) + 
#   geom_rect(aes(x = desc,xmin = id - 0.45, xmax = id + 0.45, ymin = end,
#      ymax = start))
# balance$type <- factor(balance$type, levels = c("out","in", "net"))

strwr <- function(str) gsub(" ", "\n", str)

p1 <- ggplot(balance, aes(fill = type)) + 
  geom_rect(aes(x = desc,
                xmin = id - 0.45, 
                xmax = id + 0.45, 
                ymin = end,
                ymax = start)) + 
  scale_y_continuous("", labels = scales::comma) +
     scale_x_discrete("", breaks = levels(balance$desc),
         labels = strwr(levels(balance$desc))) +
     theme(legend.position = "none")

p1 + geom_text(data = balance[balance$type == "in",], 
               aes(id,end, 
                   label = scales::comma(amount)), 
               vjust = 1, 
               size = 3) +
     geom_text(data = balance[balance$type == "out",], aes(id,
         end, label = scales::comma(amount)), vjust = -0.3,
         size = 3) + 
  geom_text(data = subset(balance,
     type == "net" & id == min(id)), aes(id, end,
     colour = type, label = scales::comma(end), vjust = ifelse(end <
         start, 1, -0.3)), size = 3.5) + 
  geom_text(data = subset(balance,
     type == "net" & id == max(id)), aes(id, start,
     colour = type, label = scales::comma(start), vjust = ifelse(end <
         start, -0.3, 1)), size = 3.5)

3. Relationship

3.1 Scatter

# A basic scatterplot with color depending on Species
p1 <- ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width, color=Species)) + 
    geom_point(size=3)

p2 <- ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width)) +
  geom_point() +
  geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) 

gridExtra::grid.arrange(p1,p2,nrow=2)

Add the text to plot

# Keep 30 first rows in the mtcars natively available dataset
data=head(mtcars, 30)
 
# 1/ add text with geom_text, use nudge to nudge the text
ggplot(data, aes(x=wt, y=mpg)) +
  geom_point() + # Show dots
  geom_text_repel(
    label=rownames(data)
  )

# geom_text(
#     label=rownames(data), 
#     nudge_x = 0.25, nudge_y = 0.25, 
#     check_overlap = T
#   )

Add the label with conditions

# Keep 30 first rows in the mtcars natively available dataset
data=head(mtcars, 30)

# Change data rownames as a real column called 'carName'
data <- data %>%
  rownames_to_column(var="carName")
  
# Plot
ggplot(data, aes(x=wt, y=mpg)) +
  geom_point() + 
  geom_label( 
    data=data %>% filter(mpg>20 & wt>3), # Filter data first
    aes(label=carName)
  )

Add the jittering to viz the overlap dots

linecolors <- c("#714C02", "#01587A", "#024E37")
fillcolors <- c("#9D6C06", "#077DAA", "#026D4E")

# partially transparent points by setting `alpha = 0.5`
ggplot(mpg, aes(displ, cty, colour = drv, fill = drv)) +
  geom_point(position=position_jitter(h=0.1, w=0.1),
             shape = 21, alpha = 0.5, size = 3) +
  scale_color_manual(values=linecolors) +
  scale_fill_manual(values=fillcolors) +
  theme_bw()

3.2 Correlation

# Quick display of two cabapilities of GGally, to assess the distribution and correlation of variables 
library(GGally)
 
# Create data 
data(flea)
ggpairs(flea, columns = 2:4, ggplot2::aes(colour=species))

3.3 Combined Bar and line

# Build dummy data
data <- data.frame(
  day = as.Date("2019-01-01") + 0:99,
  temperature = runif(100) + seq(1,100)^2.5 / 10000,
  price = runif(100) + seq(100,1)^1.5 / 10
)

# Value used to transform the data
coeff <- 10

# A few constants
temperatureColor <- "#69b3a2"
priceColor <- rgb(0.2, 0.6, 0.9, 1)

ggplot(head(data, 80), aes(x=day)) +
  
  geom_bar( aes(y=temperature), stat="identity", size=.1, fill=temperatureColor, color="black", alpha=.4) + 
  geom_line( aes(y=price / coeff), size=2, color=priceColor) +
  
  scale_y_continuous(
    
    # Features of the first axis
    name = "Temperature (Celsius °)",
    
    # Add a second axis and specify its features
    sec.axis = sec_axis(~.*coeff, name="Price ($)")
  ) + 
  
  theme(
    axis.title.y = element_text(color = temperatureColor, size=13),
    axis.title.y.right = element_text(color = priceColor, size=13)
  ) +

  ggtitle("Temperature down, price up")

3.6 Evolution (Change over time)

# Load dataset from github
# data <- read.table("https://raw.githubusercontent.com/holtzy/data_to_viz/master/Example_dataset/3_TwoNumOrdered.csv", header=T)

# saveRDS(data,file = "./data.rds")

data<-readRDS("./data.rds")
data$date <- lubridate::ymd(data$date)

# plot
data %>% 
  ggplot( aes(x=date, y=value)) +
    geom_line(color="#69b3a2") +
    ylim(0,22000) +
    annotate(geom="text", x=as.Date("2017-01-01"), y=20089, 
             label="Bitcoin price reached 20k $\nat the end of 2017") +
    annotate(geom="point", x=as.Date("2017-12-17"), y=20089, 
             size=10, shape=21, fill="transparent") +
    geom_hline(yintercept=5000, color="orange", size=.5)

library(dygraphs)
library(xts)          # To make the convertion data-frame / xts format
 
# Create data 
data <- data.frame(
  time=seq(from=Sys.Date()-40, to=Sys.Date(), by=1 ), 
  value=runif(41)
)

# Double check time is at the date format
str(data$time)

##  Date[1:41], format: "2021-07-25" "2021-07-26" "2021-07-27" "2021-07-28" "2021-07-29" ...

# Switch to XTS format
data <- xts(x = data$value, order.by = data$time)
 
# Default = line plot --> See chart #316
 
# Add points
p1 <- dygraph(data) %>%
  dyOptions( drawPoints = TRUE, pointSize = 4 )
p1

p2 <- dygraph(data) %>%
  dyOptions( fillGraph=TRUE )

trend <- sin(seq(1,41))+runif(41)
data <- data.frame(
  time=seq(from=Sys.Date()-40, to=Sys.Date(), by=1 ), 
  trend=trend, 
  max=trend+abs(rnorm(41)), 
  min=trend-abs(rnorm(41, sd=1))
)

# switch to xts format
data <- xts(x = data[,-1], order.by = data$time)

# Plot
p3 <- dygraph(data) %>%
  dySeries(c("min", "trend", "max"))


p3

df <- economics %>%
  select(date, psavert, uempmed) %>%
  gather(key = "variable", value = "value", -date)

# Multiple line plot + label
p1 <- ggplot(df, aes(x = date, y = value,group=variable)) + 
  geom_line(aes(color = variable), size = 1) +
  geom_text(data = df %>% filter(date == last(df$date)), 
            aes(label = variable,color = variable), 
            hjust = -0.1, nudge_x = 0.1) +
  scale_color_manual(values = c("#00AFBB", "#E7B800")) +
  # Allow labels to bleed past the canvas boundaries
    coord_cartesian(clip = 'off') +
 theme_minimal() +
  # Remove legend & adjust margins to give more space for labels
  # Remember, the margins are t-r-b-l
 theme(legend.position = "bottom",
       plot.margin = margin(0.1, 2.6, 0.1, 0.1, "cm")) 
# Area plot
p2 <- ggplot(df, aes(x = date, y = value)) + 
  geom_area(aes(color = variable, fill = variable), 
            alpha = 0.5, position = position_dodge(0.8)) +
  scale_color_manual(values = c("#00AFBB", "#E7B800")) +
  scale_fill_manual(values = c("#00AFBB", "#E7B800")) +
  theme_minimal()

gridExtra::grid.arrange(p1,p2,nrow=2)

library(babynames)
# Load dataset from github
data <- babynames::babynames %>% 
  filter(name %in% c("Mary","Emma", "Ida", "Ashley", "Amanda", "Jessica",    "Patricia", "Linda", "Deborah",   "Dorothy", "Betty", "Helen")) %>%
  filter(sex=="F")

# Plot
data %>%
  ggplot( aes(x=year, y=n, group=name, color=name)) +
    geom_line() +
    theme(
      legend.position="none",
      plot.title = element_text(size=14)
    ) +
    ggtitle("A spaghetti chart of baby names popularity") +
    theme_bw()

data %>%
  mutate( highlight=ifelse(name=="Amanda", "Amanda", "Other")) %>%
  ggplot( aes(x=year, y=n, group=name, color=highlight, size=highlight)) +
    geom_line() +
    scale_color_manual(values = c("#69b3a2", "lightgrey")) +
    scale_size_manual(values=c(1.5,0.2)) +
    theme(legend.position="none") +
    ggtitle("Popularity of American names in the previous 30 years") +
    theme_bw() +
    geom_label( x=1990, y=55000, label="Amanda reached 3550\nbabies in 1970", size=4, color="#69b3a2") +
    theme(
      legend.position="none",
      plot.title = element_text(size=14)
)

tmp <- data %>%
  mutate(name2=name)

tmp %>%
  ggplot( aes(x=year, y=n)) +
    geom_line( data=tmp %>% dplyr::select(-name), aes(group=name2), color="grey", size=0.5, alpha=0.5) +
    geom_line( aes(color=name), color="#69b3a2", size=1.2 )+
    theme_bw() +
    theme(
      legend.position="none",
      plot.title = element_text(size=14),
      panel.grid = element_blank()
    ) +
    ggtitle("A spaghetti chart of baby names popularity") +
    facet_wrap(~name)

3.7 Calendar-Heatmap

library(lubridate) # for easy date manipulation

amznStock = as.data.frame(tidyquant::tq_get(c("AMZN"),get="stock.prices")) # get data using tidyquant
amznStock = amznStock[year(amznStock$date) > 2017, ] # Using data only after 2012 

amznStock$weekday = as.POSIXlt(amznStock$date)$wday #finding the day no. of the week
amznStock$weekdayf<-factor(amznStock$weekday,levels=rev(1:7),labels=rev(c("Mon","Tue","Wed","Thu","Fri","Sat","Sun")),ordered=TRUE) # converting the day no. to factor

amznStock$monthf<-factor(month(amznStock$date),levels=as.character(1:12),labels=c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"),ordered=TRUE) # finding the month

amznStock$week <- as.numeric(format(amznStock$date,"%W")) # finding the week of the year for each date

amznStock$day <- lubridate::day(amznStock$date)

p <- ggplot(amznStock, aes(monthf, day, fill = amznStock$adjusted)) + 
    geom_tile(colour = "white") + facet_grid(year(amznStock$date)~ .) + scale_fill_gradient(low="red", high="green") +  xlab("Month") + ylab("") + ggtitle("Time-Series Calendar Heatmap: AMZN Stock Prices") + labs(fill = "Price")

p

stock.data <- transform(amznStock,
  week = as.POSIXlt(amznStock$date)$yday %/% 7 + 1,
  wday = as.POSIXlt(amznStock$date)$wday,
  year = as.POSIXlt(amznStock$date)$year + 1900)

library(ggplot2)

ggplot(stock.data, aes(week, wday, fill = adjusted)) + 

  geom_tile(colour = "white") + 

  scale_fill_gradientn(colours = c("#D61818","#FFAE63","#FFFFBD","#B5E384")) + 

  facet_wrap(~ year, ncol = 1)

3.8 Sankey chart

library(networkD3)
 
# Load energy projection data
URL <- "https://cdn.rawgit.com/christophergandrud/networkD3/master/JSONdata/energy.json"
Energy <- jsonlite::fromJSON(URL)

 
# Now we have 2 data frames: a 'links' data frame with 3 columns (from, to, value), and a 'nodes' data frame that gives the name of each node.
head(Energy$links)

head(Energy$nodes)

# Thus we can plot it
p <- sankeyNetwork(Links = Energy$links, Nodes = Energy$nodes, Source = "source",
              Target = "target", Value = "value", NodeID = "name",
              units = "TWh", fontSize = 12, nodeWidth = 30)
p

df <- diamonds %>% count(cut,color)

s <- df %>% select(cut) %>%  distinct() %>% rename(name=cut)
t <- df %>% select(color) %>% distinct() %>% rename(name=color)

all_nodes <- dplyr::union(as.character(s$name) ,as.character(t$name))
nodes <- data.frame(node=c(0:(length(all_nodes)-1)),
                    name =c(all_nodes))

links <- merge(df,nodes,by.x='cut',by.y='name')
links <- merge(links,nodes,by.x='color',by.y='name')


p <- sankeyNetwork(Links = links, Nodes = nodes, Source = "node.x",
              Target = "node.y", Value = "n", NodeID = "name",
              units = "counts", fontSize = 12, nodeWidth = 30)
p

4 Spatial

4.1 Map

# Load the library

# Note: if you do not already installed it, install it with:
# install.packages("leaflet")

# Background 1: NASA
# m <- leaflet() %>% 
#    addTiles() %>% 
#    setView( lng = 2.34, lat = 48.85, zoom = 5 ) %>% 
#    addProviderTiles("NASAGIBS.ViirsEarthAtNight2012")
# m
 
# Background 2: World Imagery
m <- leaflet() %>% 
   addTiles() %>% 
   setView( lng = 2.34, lat = 48.85, zoom = 3 ) %>% 
   addProviderTiles("Esri.WorldImagery")
m

data("world.cities")

df <- world.cities %>% filter(country.etc=="Australia")
## define a palette for hte colour
pal <- colorNumeric(palette = "YlOrRd",
                    domain = df$pop)

leaflet(data = df) %>%
    addTiles() %>%
    addCircleMarkers(lat = ~lat, lng = ~long, popup = ~name, 
                     color = ~pal(pop), stroke = FALSE, fillOpacity = 0.6) %>%
    addLegend(position = "bottomleft", pal = pal, values = ~pop)

leaflet(data = df) %>%
    addTiles() %>%
    addMarkers(lat = ~lat, lng = ~long, popup = ~name, 
                     label=~ as.character(pop),clusterOptions = 
                 markerOptions()) %>%
    addLegend(position = "bottomleft", pal = pal, values = ~pop)

To be continued

5 Others

5.1 patchwork

library(patchwork)
library(ggplot2)
p1 <- ggplot(mtcars) + 
  geom_point(aes(mpg, disp)) + 
  ggtitle('Plot 1')

p2 <- ggplot(mtcars) + 
  geom_boxplot(aes(gear, disp, group = gear)) + 
  ggtitle('Plot 2')

p3 <- ggplot(mtcars) + 
  geom_point(aes(hp, wt, colour = mpg)) + 
  ggtitle('Plot 3')

p4 <- ggplot(mtcars) + 
  geom_bar(aes(gear)) + 
  facet_wrap(~cyl) + 
  ggtitle('Plot 4')

p1 + p2

p1 / (p2 | p3)

p1 + gridExtra::tableGrob(mtcars[1:10, c('mpg', 'disp')])