Install necessary packages

install.packages(c("ggdendro", "gplots", "heatmaply", "Rtsne"))
library(tidyverse)
library(ggdendro)
library(grid)

Using base R

df <- scale(iris[,1:4])

heatmap(df)

Activity 1


# Solution
heatmap(scale(mtcars))

Using ggplot2/tidyverse

  • ggplot2 likes to set axes using columns (see tidy data)
  • we will use pivot_longer to set that up

Heatmap

df <- iris %>%
  mutate(id = as.character(row_number())) %>%
  pivot_longer(
    1:4,
    names_to = "variable",
    values_to = "value"
  ) %>%
  group_by(variable) %>%
  mutate(
    scaled = (value - min(value)) / (max(value) - min(value))
  )

hm <- ggplot(df, aes(x = variable, y = id)) +
  geom_tile(aes(fill = scaled)) +
  scale_fill_viridis_c() +
  scale_x_discrete(expand = c(0,0)) +
  scale_y_discrete(expand = c(0,0)) +
  theme(
    axis.text.y = element_text(size = 4),
    legend.position = "left"
    )

hm

Activity 2


df <- mtcars %>%
  mutate(id = as.character(row_number())) %>%
  pivot_longer(
    -id,
    names_to = "variable",
    values_to = "value"
  ) %>%
  group_by(variable) %>%
  mutate(
    scaled = (value - min(value)) / (max(value) - min(value))
  )

hm <- ggplot(df, aes(x = variable, y = id)) +
  geom_tile(aes(fill = scaled)) +
  scale_fill_viridis_c() +
  scale_x_discrete(expand = c(0,0)) +
  scale_y_discrete(expand = c(0,0)) +
  theme(
    axis.text.y = element_text(size = 4),
    legend.position = "left"
    )

hm

Dendrogram

m <- iris[,1:4]
dd <- dist(m)
hc <- hclust(dd)

dend <- ggdendrogram(hc, rotate = TRUE) +
  theme(
    axis.text.y = element_text(size = 4),
    axis.text.x = element_blank()
  )

dend

Together

grid.newpage()
print(hm, vp = viewport(x = 0.4, y = 0.5, width = 0.8, height = 1))
print(dend, vp = viewport(x = 0.9, y = 0.5, width = 0.2, height = 1))

Reordering heatmap to clusters

df <- iris %>%
  mutate(id = as.character(row_number())) %>%
  pivot_longer(
    1:4,
    names_to = "variable",
    values_to = "value"
  ) %>%
  group_by(variable) %>%
  mutate(
    scaled = (value - min(value)) / (max(value) - min(value))
  )

hc$order

df$id <- factor(df$id, levels = hc$order)

hm <- ggplot(df, aes(x = variable, y = id)) +
  geom_tile(aes(fill = scaled)) +
  scale_fill_viridis_c() +
  scale_x_discrete(expand = c(0,0)) +
  scale_y_discrete(expand = c(0,0)) +
  theme(
    axis.text.y = element_text(size = 4),
    legend.position = "left"
    )

grid.newpage()
print(hm, vp = viewport(x = 0.4, y = 0.5, width = 0.8, height = 1))
print(dend, vp = viewport(x = 0.9, y = 0.51, width = 0.2, height = 1.05))

Using gplots::heatmap.2

library(gplots)

heatmap.2(as.matrix(iris[,1:4]))

heatmap.2(as.matrix(iris[,1:4]),
          scale = "column")

heatmap.2(as.matrix(iris[,1:4]),
          scale = "column",
          trace = "none",
          main = "Title Here")

heatmap.2(as.matrix(iris[,1:4]),
          scale = "column",
          trace = "none",
          main = "Title Here",
          col = "topo.colors")

library(viridisLite)
heatmap.2(as.matrix(iris[,1:4]),
          scale = "column",
          trace = "none",
          main = "Title Here",
          col = "viridis",
          cexRow = .5)

Activity 3


heatmap.2(as.matrix(mtcars),
          scale = "column",
          trace = "none",
          col = "viridis", 
          cexRow = 0.5)

Heatmaply

library(heatmaply)

heatmaply(iris[,1:4], scale = "column")
heatmaply(mtcars, scale = "column")

Prepping for Activity 4

https://www.data-to-viz.com/graph/heatmap.html

nat_stats <- read_delim("https://raw.githubusercontent.com/holtzy/data_to_viz/master/Example_dataset/multivariate.csv", delim = ";") %>%
  column_to_rownames("Country") %>%
  slice_max(Pop, n = 25)

Activity 4


heatmap(as.matrix(nat_stats[,1:8]), scale = "column")
heatmap.2(as.matrix(nat_stats[,1:8]), scale = "column")
heatmaply(nat_stats[,1:8], scale = "column")

tmp <- nat_stats %>%
  rownames_to_column("country") %>%
  select(c(1:8, "country")) %>%
  pivot_longer(
    -country,
    names_to = "variable",
    values_to = "value"
  ) %>%
  group_by(variable) %>%
  mutate(
    scaled = (value - min(value)) / (max(value) - min(value))
  )

ggplot(tmp, aes(x = variable, y = country)) + 
  geom_tile(aes(fill = scaled)) +
  scale_fill_viridis_c()

TSNE Plots

library(Rtsne)

iris_vars <- iris[,1:4]
iris_sp <- iris$Species

iris_tsne <- Rtsne(iris_vars, check_duplicates = FALSE)

Rtsne reduces the dimensionality of the data, allowing us to plot it on a plane even if it is beyond two dimensions

iris_df <- tibble(
  x = iris_tsne$Y[,1],
  y = iris_tsne$Y[,2],
  species = iris_sp
)
ggplot(iris_df, aes(x = x, y = y, color = species)) +
  geom_point()

Activity 5


gapminder <- gapminder::gapminder
gap_vars <- gapminder[,4:6]
gap_cont <- gapminder$continent
gap_tsne <- Rtsne(gap_vars)

gap_df <- tibble(
  x = gap_tsne$Y[,1],
  y = gap_tsne$Y[,2],
  continent = gap_cont
)

ggplot(gap_df, aes(x = x, y = y, color = continent)) + 
  geom_point()

This weirdness is likely because this is a time series