4. K-Means Clustering

4.3 Computing k-means clustering in R

4.3.1 Data

data("USArrests")
df <- scale(USArrests)

head(df, n=3)
##             Murder   Assault   UrbanPop         Rape
## Alabama 1.24256408 0.7828393 -0.5209066 -0.003416473
## Alaska  0.50786248 1.1068225 -1.2117642  2.484202941
## Arizona 0.07163341 1.4788032  0.9989801  1.042878388

4.3.2 Required R packages and functions

library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

4.3.3 Estimating the optimal number of clusters

library(factoextra)
fviz_nbclust(df, kmeans, method = "wss") + geom_vline(xintercept = 4, lynetype = 2)
## Warning in geom_vline(xintercept = 4, lynetype = 2): Ignoring unknown
## parameters: `lynetype`

4.3.4 Computing k-means clustering

set.seed(123)
km.res <- kmeans(df, 4, nstart = 25)
print(km.res)
## K-means clustering with 4 clusters of sizes 8, 13, 16, 13
## 
## Cluster means:
##       Murder    Assault   UrbanPop        Rape
## 1  1.4118898  0.8743346 -0.8145211  0.01927104
## 2 -0.9615407 -1.1066010 -0.9301069 -0.96676331
## 3 -0.4894375 -0.3826001  0.5758298 -0.26165379
## 4  0.6950701  1.0394414  0.7226370  1.27693964
## 
## Clustering vector:
##        Alabama         Alaska        Arizona       Arkansas     California 
##              1              4              4              1              4 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##              4              3              3              4              1 
##         Hawaii          Idaho       Illinois        Indiana           Iowa 
##              3              2              4              3              2 
##         Kansas       Kentucky      Louisiana          Maine       Maryland 
##              3              2              1              2              4 
##  Massachusetts       Michigan      Minnesota    Mississippi       Missouri 
##              3              4              2              1              4 
##        Montana       Nebraska         Nevada  New Hampshire     New Jersey 
##              2              2              4              2              3 
##     New Mexico       New York North Carolina   North Dakota           Ohio 
##              4              4              1              2              3 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
##              3              3              3              3              1 
##   South Dakota      Tennessee          Texas           Utah        Vermont 
##              2              1              4              3              2 
##       Virginia     Washington  West Virginia      Wisconsin        Wyoming 
##              3              3              2              2              3 
## 
## Within cluster sum of squares by cluster:
## [1]  8.316061 11.952463 16.212213 19.922437
##  (between_SS / total_SS =  71.2 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
aggregate(USArrests, by=list(cluster=km.res$cluster), mean)
##   cluster   Murder   Assault UrbanPop     Rape
## 1       1 13.93750 243.62500 53.75000 21.41250
## 2       2  3.60000  78.53846 52.07692 12.17692
## 3       3  5.65625 138.87500 73.87500 18.78125
## 4       4 10.81538 257.38462 76.00000 33.19231
dd <- cbind(USArrests, cluster=km.res$cluster)
head(dd)
##            Murder Assault UrbanPop Rape cluster
## Alabama      13.2     236       58 21.2       1
## Alaska       10.0     263       48 44.5       4
## Arizona       8.1     294       80 31.0       4
## Arkansas      8.8     190       50 19.5       1
## California    9.0     276       91 40.6       4
## Colorado      7.9     204       78 38.7       4

4.3.5 Accessing to the results of kmeans() function

km.res$cluster
##        Alabama         Alaska        Arizona       Arkansas     California 
##              1              4              4              1              4 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##              4              3              3              4              1 
##         Hawaii          Idaho       Illinois        Indiana           Iowa 
##              3              2              4              3              2 
##         Kansas       Kentucky      Louisiana          Maine       Maryland 
##              3              2              1              2              4 
##  Massachusetts       Michigan      Minnesota    Mississippi       Missouri 
##              3              4              2              1              4 
##        Montana       Nebraska         Nevada  New Hampshire     New Jersey 
##              2              2              4              2              3 
##     New Mexico       New York North Carolina   North Dakota           Ohio 
##              4              4              1              2              3 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
##              3              3              3              3              1 
##   South Dakota      Tennessee          Texas           Utah        Vermont 
##              2              1              4              3              2 
##       Virginia     Washington  West Virginia      Wisconsin        Wyoming 
##              3              3              2              2              3
head(km.res$cluster, 4)
##  Alabama   Alaska  Arizona Arkansas 
##        1        4        4        1
km.res$size
## [1]  8 13 16 13
km.res$centers
##       Murder    Assault   UrbanPop        Rape
## 1  1.4118898  0.8743346 -0.8145211  0.01927104
## 2 -0.9615407 -1.1066010 -0.9301069 -0.96676331
## 3 -0.4894375 -0.3826001  0.5758298 -0.26165379
## 4  0.6950701  1.0394414  0.7226370  1.27693964

4.3.6 Visualizing k-means clusters

fviz_cluster(km.res, data = df,
             palette = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
             ellipse.type = "euclid", 
             star.plot = TRUE,
             repel = TRUE,
             ggtheme = theme_minimal()
             )

5. K-Medoids

5.3 Computing PAM in R

5.3.1 Data

data("USArrests")
df <- scale(USArrests)
head(df, n=3)
##             Murder   Assault   UrbanPop         Rape
## Alabama 1.24256408 0.7828393 -0.5209066 -0.003416473
## Alaska  0.50786248 1.1068225 -1.2117642  2.484202941
## Arizona 0.07163341 1.4788032  0.9989801  1.042878388

####5.3.2 Required R packages and functions

library(cluster)
library(factoextra)

5.3.3 Estimating the optimal number of clusters

library(cluster)
library(factoextra)
fviz_nbclust(df, pam, method = "silhouette")+
  theme_classic()

5.3.4 Computing PAM clustering

pam.res <- pam(df, 2)
print(pam.res)
## Medoids:
##            ID     Murder    Assault   UrbanPop       Rape
## New Mexico 31  0.8292944  1.3708088  0.3081225  1.1603196
## Nebraska   27 -0.8008247 -0.8250772 -0.2445636 -0.5052109
## Clustering vector:
##        Alabama         Alaska        Arizona       Arkansas     California 
##              1              1              1              2              1 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##              1              2              2              1              1 
##         Hawaii          Idaho       Illinois        Indiana           Iowa 
##              2              2              1              2              2 
##         Kansas       Kentucky      Louisiana          Maine       Maryland 
##              2              2              1              2              1 
##  Massachusetts       Michigan      Minnesota    Mississippi       Missouri 
##              2              1              2              1              1 
##        Montana       Nebraska         Nevada  New Hampshire     New Jersey 
##              2              2              1              2              2 
##     New Mexico       New York North Carolina   North Dakota           Ohio 
##              1              1              1              2              2 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
##              2              2              2              2              1 
##   South Dakota      Tennessee          Texas           Utah        Vermont 
##              2              1              1              2              2 
##       Virginia     Washington  West Virginia      Wisconsin        Wyoming 
##              2              2              2              2              2 
## Objective function:
##    build     swap 
## 1.441358 1.368969 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"

5.3.5 Accessing to the results of the pam() function

pam.res$medoids
##                Murder    Assault   UrbanPop       Rape
## New Mexico  0.8292944  1.3708088  0.3081225  1.1603196
## Nebraska   -0.8008247 -0.8250772 -0.2445636 -0.5052109
head(pam.res$clustering)
##    Alabama     Alaska    Arizona   Arkansas California   Colorado 
##          1          1          1          2          1          1

5.3.6 Visualizing PAM clusters

fviz_cluster(pam.res,
             palette = c("#00AFBB", "#FC4E07"),
             ellipse.type = "t",
             repel = TRUE,
             ggtheme = theme_classic()
             )

6. CLARA - Clustering Large Applications

6.3 Computing CLARA in R

6.3.1 Data format and preparation

set.seed(1234)
df <-rbind(cbind(rnorm(200,0,8), rnorm(200,0,8)), cbind(rnorm(300,50,8), rnorm(300,50,8)))
colnames(df) <- c("x", "y")
rownames(df) <- paste0("S", 1:nrow(df))
head(df, nrow = 6)
##             x        y
## S1  -9.656526 3.881815
## S2   2.219434 5.574150
## S3   8.675529 1.484111
## S4 -18.765582 5.605868
## S5   3.432998 2.493448
## S6   4.048447 6.083699

6.3.3 Estimating the optimal number of custers

library(cluster)
library(factoextra)
fviz_nbclust(df, clara, method = "silhouette") +
  theme_classic()

6.3.4 Computing CLARA

clara.res <- clara(df, 2, samples = 50, pamLike = TRUE)
print(clara.res)
## Call:     clara(x = df, k = 2, samples = 50, pamLike = TRUE) 
## Medoids:
##              x         y
## S121 -1.531137  1.145057
## S455 48.357304 50.233499
## Objective function:   9.87862
## Clustering vector:    Named int [1:500] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "names")= chr [1:500] "S1" "S2" "S3" "S4" "S5" "S6" "S7" ...
## Cluster sizes:            200 300 
## Best sample:
##  [1] S37  S49  S54  S63  S68  S71  S76  S80  S82  S101 S103 S108 S109 S118 S121
## [16] S128 S132 S138 S144 S162 S203 S210 S216 S231 S234 S249 S260 S261 S286 S299
## [31] S304 S305 S312 S315 S322 S350 S403 S450 S454 S455 S456 S465 S488 S497
## 
## Available components:
##  [1] "sample"     "medoids"    "i.med"      "clustering" "objective" 
##  [6] "clusinfo"   "diss"       "call"       "silinfo"    "data"
dd <- cbind(df, cluster = clara.res$cluster)
head(dd, n=4)
##             x        y cluster
## S1  -9.656526 3.881815       1
## S2   2.219434 5.574150       1
## S3   8.675529 1.484111       1
## S4 -18.765582 5.605868       1
clara.res$medoids
##              x         y
## S121 -1.531137  1.145057
## S455 48.357304 50.233499
head(clara.res$clustering)
## S1 S2 S3 S4 S5 S6 
##  1  1  1  1  1  1

6.3.5 Visualizing CLARA clusters

fviz_cluster(clara.res,
             palette = c("#00AFBB", "#FC4E07"),
             ellipse.type = "t",
             geom = "point", pointsize = 1,
             ggtheme = theme_classic()
             )

7. Agglomerative Clustering

7.2 Steps to agglomerative hierarchical clustering

7.2.1 Data structure and preparation

data("USArrests")
df <-scale(USArrests)
head(df, nrow = 6)
##                Murder   Assault   UrbanPop         Rape
## Alabama    1.24256408 0.7828393 -0.5209066 -0.003416473
## Alaska     0.50786248 1.1068225 -1.2117642  2.484202941
## Arizona    0.07163341 1.4788032  0.9989801  1.042878388
## Arkansas   0.23234938 0.2308680 -1.0735927 -0.184916602
## California 0.27826823 1.2628144  1.7589234  2.067820292
## Colorado   0.02571456 0.3988593  0.8608085  1.864967207

7.2.2 Similarity measures

res.dist <- dist(df, method = "euclidean")
as.matrix(res.dist)[1:6, 1:6]
##             Alabama   Alaska  Arizona Arkansas California Colorado
## Alabama    0.000000 2.703754 2.293520 1.289810   3.263110 2.651067
## Alaska     2.703754 0.000000 2.700643 2.826039   3.012541 2.326519
## Arizona    2.293520 2.700643 0.000000 2.717758   1.310484 1.365031
## Arkansas   1.289810 2.826039 2.717758 0.000000   3.763641 2.831051
## California 3.263110 3.012541 1.310484 3.763641   0.000000 1.287619
## Colorado   2.651067 2.326519 1.365031 2.831051   1.287619 0.000000

7.2.3 Linkage

res.hc <- hclust(d = res.dist, method = "ward.D2")

7.2.4 Dendrogram

library("factoextra")
fviz_dend(res.hc, cex = 0.5)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <]8;;https://github.com/kassambara/factoextra/issueshttps://github.com/kassambara/factoextra/issues]8;;>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

7.3 Verify the cluster tree

res.coph <- cophenetic(res.hc)
cor(res.dist, res.coph)
## [1] 0.6975266
res.hc2 <- hclust(res.dist, method = "average")
cor(res.dist, cophenetic(res.hc2))
## [1] 0.7180382

7.4 Cut the dedrogram into different groups

grp <- cutree(res.hc, k = 4)
head(grp, n = 4)
##  Alabama   Alaska  Arizona Arkansas 
##        1        2        2        3
table(grp)
## grp
##  1  2  3  4 
##  7 12 19 12
rownames(df) [grp==1]
## [1] "Alabama"        "Georgia"        "Louisiana"      "Mississippi"   
## [5] "North Carolina" "South Carolina" "Tennessee"
fviz_dend(res.hc, k = 4,
          cex = 0.5,
          k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
          color_labels_by_k = TRUE,
          rect = TRUE
          )

fviz_cluster(list(data = df, cluster = grp),
             palette = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
             ellipse.type = "convex",
             repel = TRUE,
             show.clust.cent = FALSE, ggtheme = theme_minimal())

7.5 Cluster R package

library("cluster")
res.agnes <- agnes(x = USArrests,
                   stand = TRUE,
                   metric = "euclidean",
                   method = "ward"
                   )
res.diana <- diana(x = USArrests,
                   stand = TRUE,
                   metric = "euclidean"
                   )
fviz_dend(res.agnes, cex = 0.6, k =4)

8. Comparing Dendrograms

8.1 Data preparation

df <- scale(USArrests)
set.seed(123)
ss <- sample(1:50, 10)
df <- df[ss,]

8.2 Comparing dendrograms

library(dendextend)
## 
## ---------------------
## Welcome to dendextend version 1.17.1
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags: 
##   https://stackoverflow.com/questions/tagged/dendextend
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
## 
##     cutree
res.dist <- dist(df, method = "euclidean")

hc1 <- hclust(res.dist, method = "average")
hc2 <- hclust(res.dist, method = "ward.D2")

dend1 <- as.dendrogram (hc1)
dend2 <- as.dendrogram (hc2)

dend_list <- dendlist(dend1, dend2)

8.2.1 Visual comparison of two dendrograms

tanglegram(dend1, dend2)

tanglegram(dend1, dend2,
           highlight_distinct_edges = FALSE,
           common_subtrees_color_lines = FALSE,
           common_subtrees_color_branches = TRUE,
           main = paste("entanglement = 0.9"), round(entanglement(dend_list), 2))

8.2.2 Correlation matrix between a list of dendrograms

cor.dendlist(dend_list, method = "cophenetic")
##           [,1]      [,2]
## [1,] 1.0000000 0.9925544
## [2,] 0.9925544 1.0000000
cor.dendlist(dend_list, method = "baker")
##           [,1]      [,2]
## [1,] 1.0000000 0.9895528
## [2,] 0.9895528 1.0000000
cor_cophenetic(dend1, dend2)
## [1] 0.9925544
cor_bakers_gamma(dend1, dend2)
## [1] 0.9895528
dend1 <- df %>% dist %>% hclust("complete") %>% as.dendrogram
dend2 <- df %>% dist %>% hclust("single") %>% as.dendrogram
dend3 <- df %>% dist %>% hclust("average") %>% as.dendrogram
dend4 <- df %>% dist %>% hclust("centroid") %>% as.dendrogram

dend_list <- dendlist("Complete" = dend1, "Single" = dend2, "Average" = dend3, "Centroid" = dend4)
cors <- cor.dendlist(dend_list)

round(cors, 2)
##          Complete Single Average Centroid
## Complete     1.00   0.46    0.45     0.30
## Single       0.46   1.00    0.23     0.17
## Average      0.45   0.23    1.00     0.31
## Centroid     0.30   0.17    0.31     1.00
library(corrplot)
## corrplot 0.92 loaded
corrplot(cors, "pie", "lower")

9. Visualizing Dendrograms

data("USArrests")

dd <- dist(scale(USArrests), method = "euclidean")
hc <- hclust(dd, method = "ward.D2")

9.1 Visualizing dendrograms

library("factoextra")
fviz_dend(hc, cex = 0.5)

fviz_dend(hc, cex = 0.5,
          main = "Dendrogram - ward.D2",
          xlab = "Objects", ylab = "Distance", sub = "")

fviz_dend(hc, cex = 0.5, horiz = TRUE)

fviz_dend(hc, k = 4,
          cex = 0.5,
          k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
          color_labels_by_k = TRUE,
          ggtheme = theme_gray()
          )

fviz_dend(hc, cex = 0.5, k = 4,
          k_colors = "jco")

fviz_dend(hc, k = 4, cex = 0.4, horiz = TRUE, k_colors = "jco", rect = TRUE, rect_border = "jco", rect_fill = TRUE)

fviz_dend(hc, cex = 0.5, k = 4,
          k_colors = "jco", type = "circular")

require("igraph")
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
fviz_dend(hc, k=4, k_colors="jco",
          type="phylogenic", repel = TRUE)

require("igraph")
fviz_dend(hc, k=4,
          k_colors="jco",
          type="phylogenic", repel = TRUE,
          phylo_layout = "layout.gem")

9.2 Case of dendrogram with large data sets

9.2.1 Zooming in the dendrogram

fviz_dend(hc, xlim = c(1, 20), ylim = c(1, 8))

9.2.2 Plotting a sub-tree of dendrograms

dend_plot <- fviz_dend(hc, k=4,
                       cex=0.5,
                       k_colors="jco"
                       )
dend_data <- attr(dend_plot, "dendrogram")

dend_cuts <- cut(dend_data, h=10)

fviz_dend(dend_cuts$upper)
## Warning in min(-diff(our_dend_heights)): ningún argumento finito para min;
## retornando Inf

print(dend_plot)

fviz_dend(dend_cuts$lower[[1]], main="Subtree 1")

fviz_dend(dend_cuts$lower[[2]], main="Subtree 2")

fviz_dend(dend_cuts$lower[[2]], type="circular")

9.2.3 Saving dendrogram into a large PDF page

pdf("dendrogram.pdf", width = 30, height = 15)
p <- fviz_dend(hc, k=4, cex=1, k_colors="jco")
print(p)
dev.off()
## png 
##   2

9.3 Manipulating dendrograms using dendextend

data <- scale(USArrests)
dist.res <- dist(data)
hc <- hclust(dist.res, method = "ward.D2")
dend <- as.dendrogram(hc)
plot(dend)

library(dendextend)
dend <- USArrests[1:5,] %>%
  scale %>%
  dist %>%
  hclust(method = "ward.D2") %>%
  as.dendrogram
plot(dend)

library(dendextend)

mycols <-c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07")
dend <- as.dendrogram(hc) %>%
  set("branches_lwd", 1)%>%
  set("branches_k_color", mycols, k=4) %>%
  set("labels_colors", mycols, k=4) %>%
  set("labels_cex", 0.5)

fviz_dend(dend)

10. Heatmap: Static and Interactive

10.2 Data preparation

df <- scale(mtcars)

10.3 R base heatmap: heatmap()

heatmap(df, scale="none")

col <- colorRampPalette(c("red", "white", "blue"))(256)
library("RColorBrewer")
col <- colorRampPalette(brewer.pal(10, "RdYlBu"))(256)
library("RColorBrewer")
col <- colorRampPalette(brewer.pal(10, "RdYlBu"))(256)
heatmap(df, scale = "none", col=col,
        RowSideColors = rep(c("blue", "pink"), each=16),
        ColSideColors = c(rep("purple", 5), rep("orange", 6)))

10.4 Enhanced heat maps: heatmap.2()

library("gplots")
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
heatmap.2(df, scale="none", col=bluered(100),
          trace="none", density.info="none")

10.5 Pretty heat maps: pheatmap()

library("pheatmap")
pheatmap(df,cutree_rows = 4)

10.7 Enhancing heatmaps using dendextend

library(dendextend)

Rowv <- mtcars %>% scale %>% dist %>% hclust %>% as.dendrogram %>%
  set("branches_k_color", k=3) %>% set("branches_lwd", 1.2) %>%
  ladderize

Colv <- mtcars %>% scale %>% t %>% dist %>% hclust %>% as.dendrogram %>%
  set("branches_k_color", k=2, value=c("orange", "blue")) %>%
  set("branches_lwd", 1.2) %>%
  ladderize
heatmap(scale(mtcars), Rowv = Rowv, Colv = Colv,
        scale = "none")

library(gplots)
heatmap.2(scale(mtcars), scale="none", col=bluered(100),
          Rowv = Rowv, Colv = Colv,
          trace = "none", density.info = "none")

10.8 Complex heatmap

library(devtools)
## Loading required package: usethis
install_github("jokergoo/ComplexHeatmap")
## WARNING: Rtools is required to build R packages, but is not currently installed.
## 
## Please download and install Rtools 4.2 from https://cran.r-project.org/bin/windows/Rtools/ or https://www.r-project.org/nosvn/winutf8/ucrt3/.
## Skipping install of 'ComplexHeatmap' from a github remote, the SHA1 (ae0ec42c) has not changed since last install.
##   Use `force = TRUE` to force installation

10.8.1 Simple heatmap

library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.15.4
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
## ! pheatmap() has been masked by ComplexHeatmap::pheatmap(). Most of the arguments
##    in the original pheatmap() are identically supported in the new function. You 
##    can still use the original function by explicitly calling pheatmap::pheatmap().
## 
## Attaching package: 'ComplexHeatmap'
## The following object is masked from 'package:pheatmap':
## 
##     pheatmap
heatmap(df,
        name = "mtcars",
        column_title = "Variables", Row_title = "Samples",
        row_names_gp = gpar(fontsize=7)
        )
## Warning in plot.window(...): "name" is not a graphical parameter
## Warning in plot.window(...): "column_title" is not a graphical parameter
## Warning in plot.window(...): "Row_title" is not a graphical parameter
## Warning in plot.window(...): "row_names_gp" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "name" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "column_title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "Row_title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "row_names_gp" is not a graphical parameter
## Warning in title(...): "name" is not a graphical parameter
## Warning in title(...): "column_title" is not a graphical parameter
## Warning in title(...): "Row_title" is not a graphical parameter
## Warning in title(...): "row_names_gp" is not a graphical parameter

library(circlize)
## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
## 
## Attaching package: 'circlize'
## The following object is masked from 'package:igraph':
## 
##     degree
mycols <- colorRamp2(breaks = c(-2,0,2),
                     colors = c("green", "white", "red"))

heatmap(df, name="mtcars", col=mycols)
## Warning in plot.window(...): "name" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "name" is not a graphical parameter
## Warning in title(...): "name" is not a graphical parameter
## Warning in image.default(1L:nc, 1L:nr, x, xlim = 0.5 + c(0, nc), ylim = 0.5 + :
## supplied color is neither numeric nor character

library("circlize")
library("RColorBrewer")
heatmap(df, name = "mtcars",
        col=colorRamp2(c(-2,0,2), brewer.pal(n=3, name="RdBu")))
## Warning in plot.window(...): "name" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "name" is not a graphical parameter
## Warning in title(...): "name" is not a graphical parameter
## Warning in image.default(1L:nc, 1L:nr, x, xlim = 0.5 + c(0, nc), ylim = 0.5 + :
## supplied color is neither numeric nor character

library(dendextend)
row_dend = hclust(dist(df))
col_dend = hclust(dist(t(df)))
heatmap(df, name="mtcars",
        row_names_gp = gpar(fontsize=6.5),
        cluster_rows = color_branches(row_dend, k=4),
        cluster_columns = color_branches(col_dend, k=2))
## Warning in plot.window(...): "name" is not a graphical parameter
## Warning in plot.window(...): "row_names_gp" is not a graphical parameter
## Warning in plot.window(...): "cluster_rows" is not a graphical parameter
## Warning in plot.window(...): "cluster_columns" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "name" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "row_names_gp" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "cluster_rows" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "cluster_columns" is not a graphical
## parameter
## Warning in title(...): "name" is not a graphical parameter
## Warning in title(...): "row_names_gp" is not a graphical parameter
## Warning in title(...): "cluster_rows" is not a graphical parameter
## Warning in title(...): "cluster_columns" is not a graphical parameter

10.8.2 Splitting heatmap by rows

set.seed(2)
heatmap(df, name ="mtcars", k=2)
## Warning in plot.window(...): "name" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "name" is not a graphical parameter
## Warning in title(...): "name" is not a graphical parameter

library(grid)
heatmap(df, name="mtcars", split=mtcars$cyl, row_names_gp = gpar(fontsize=7))
## Warning in plot.window(...): "name" is not a graphical parameter
## Warning in plot.window(...): "split" is not a graphical parameter
## Warning in plot.window(...): "row_names_gp" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "name" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "split" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "row_names_gp" is not a graphical parameter
## Warning in title(...): "name" is not a graphical parameter
## Warning in title(...): "split" is not a graphical parameter
## Warning in title(...): "row_names_gp" is not a graphical parameter

heatmap(df, name="mtcars", split=data.frame(cyl=mtcars$cyl, am=mtcars$am))
## Warning in plot.window(...): "name" is not a graphical parameter
## Warning in plot.window(...): "split" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "name" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "split" is not a graphical parameter
## Warning in title(...): "name" is not a graphical parameter
## Warning in title(...): "split" is not a graphical parameter

10.8.3 Heatmap annotation

df <- t(df)
10.8.3.3
ht1 = heatmap(df, name="ht1", km=2, column_names_gp=gpar(fontsize=9))
## Warning in plot.window(...): "name" is not a graphical parameter
## Warning in plot.window(...): "km" is not a graphical parameter
## Warning in plot.window(...): "column_names_gp" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "name" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "km" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "column_names_gp" is not a graphical
## parameter
## Warning in title(...): "name" is not a graphical parameter
## Warning in title(...): "km" is not a graphical parameter
## Warning in title(...): "column_names_gp" is not a graphical parameter

ht2 = heatmap(df, name="ht2", col=circlize::colorRamp2(c(-2,0,2), c("green", "white","red")), column_names_gp=gpar(fontsize=9))
## Warning in plot.window(...): "name" is not a graphical parameter
## Warning in plot.window(...): "column_names_gp" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "name" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "column_names_gp" is not a graphical
## parameter
## Warning in title(...): "name" is not a graphical parameter
## Warning in title(...): "column_names_gp" is not a graphical parameter
## Warning in image.default(1L:nc, 1L:nr, x, xlim = 0.5 + c(0, nc), ylim = 0.5 + :
## supplied color is neither numeric nor character