Source: https://towardsdatascience.com/beautiful-correlation-plots-in-r-a-new-approach-d3b93d9c77be
Whenever you think about heatmaps or coloured corrplots, there is a place between science and art.
Learn to adjust plotly objects according to your needs by exploring this script (see the source).
Mind the differences it takes to adjust the looks of this plot as compared to the ggplot2 routine.
library(plotly)
library(data.table)
data <- USArrests
corrdata <- cor(data)
library(summarytools)
dfSummary(USArrests)
## Data Frame Summary
## USArrests
## Dimensions: 50 x 4
## Duplicates: 0
##
## -----------------------------------------------------------------------------------------------------------
## No Variable Stats / Values Freqs (% of Valid) Graph Valid Missing
## ---- ----------- --------------------------- -------------------- -------------------- ---------- ---------
## 1 Murder Mean (sd) : 7.8 (4.4) 43 distinct values : 50 0
## [numeric] min < med < max: : . (100.0%) (0.0%)
## 0.8 < 7.2 < 17.4 : : : : .
## IQR (CV) : 7.2 (0.6) : : : : . :
## . : : : : : : : :
##
## 2 Assault Mean (sd) : 170.8 (83.3) 45 distinct values : 50 0
## [integer] min < med < max: : : : (100.0%) (0.0%)
## 45 < 159 < 337 : : : : :
## IQR (CV) : 140 (0.5) : : : : :
## : : : : : : :
##
## 3 UrbanPop Mean (sd) : 65.5 (14.5) 36 distinct values : . 50 0
## [integer] min < med < max: : : : (100.0%) (0.0%)
## 32 < 66 < 91 : : : : :
## IQR (CV) : 23.2 (0.2) : : : : :
## : : : : : : .
##
## 4 Rape Mean (sd) : 21.2 (9.4) 48 distinct values : 50 0
## [numeric] min < med < max: . : : . (100.0%) (0.0%)
## 7.3 < 20.1 < 46 : : : :
## IQR (CV) : 11.1 (0.4) : : : : :
## : : : : : : : : .
## -----------------------------------------------------------------------------------------------------------
library(GGally)
ggpairs(USArrests)
Get rid of the upper triangle of a correlation matrix
corrdata[upper.tri(corrdata, diag = TRUE)] <- NA
corrdata <- corrdata[-1, -ncol(corrdata)]
Store variable names for later use
x_labels <- colnames(corrdata)
y_labels <- rownames(corrdata)
Change the variable names to numeric for the grid
colnames(corrdata) <- 1:ncol(corrdata)
rownames(corrdata) <- nrow(corrdata):1
head(corrdata)
## 1 2 3
## 3 0.80187331 NA NA
## 2 0.06957262 0.2588717 NA
## 1 0.56357883 0.6652412 0.4113412
Melt the data into the desired, tidy format
plotdata <- reshape2::melt(corrdata)
head(plotdata)
## Var1 Var2 value
## 1 3 1 0.80187331
## 2 2 1 0.06957262
## 3 1 1 0.56357883
## 4 3 2 NA
## 5 2 2 0.25887170
## 6 1 2 0.66524123
Adding a size variable and scale it
plotdata$size <- (abs(plotdata$value))
scaling <- 500 / ncol(corrdata) / 2
plotdata$size <- plotdata$size * scaling
Setting x and y ranges for the chart
xrange <- c(0.5, length(x_labels)+0.5)
yrange <- c(0.5, length(y_labels)+0.5)
Set the gridlines
x_grid <- seq(1.5, length(x_labels)-0.5, 1)
y_grid <- seq(1.5, length(y_labels)-0.5, 1)
Define axes for plotly
xAx1 <- list(showgrid = FALSE,
showline = FALSE,
zeroline = FALSE,
tickvals = colnames(corrdata),
ticktext = x_labels,
title = "",
range = xrange,
rangemode = "tozero")
xAx2 <- list(showgrid = TRUE,
showline = FALSE,
zeroline = FALSE,
overlaying = "x",
showticklabels = FALSE,
range = xrange,
tickvals = x_grid)
yAx1 <- list(autoaxis = FALSE,
showgrid = FALSE,
showline = FALSE,
zeroline = FALSE,
tickvals = rownames(corrdata),
ticktext = y_labels,
title = FALSE,
rangemode = "tozero",
range = yrange)
yAx2 <- list(showgrid = TRUE,
showline = FALSE,
zeroline = FALSE,
overlaying = "y",
showticklabels = FALSE,
range = yrange,
tickvals = y_grid)
Start drawing
fig <- plot_ly(data = plotdata, width = 500, height = 500)
fig <- fig %>%
add_trace(x = ~Var2, y = ~Var1, type = "scatter", mode = "markers",
color = ~value,
marker = list(size = ~size, opacity = 1),
symbol = I("square"),
text = ~value,
hovertemplate = "%{text:.2f} <extra></extra>",
xaxis = "x1",
yaxis = "y1")
fig <- fig %>%
add_trace(x = ~Var2, y = ~Var1, type = "scatter", mode = "markers",
opacity = 0,
showlegend = FALSE,
xaxis = "x2",
yaxis = "y2",
hoverinfo = "none")
fig <- fig %>%
layout(xaxis = xAx1,
yaxis = yAx1,
xaxis2 = xAx2,
yaxis2 = yAx2,
plot_bgcolor = "rgba(0,0,0,0)",
paper_bgcolor = "rgba(0, 0, 0, 0.03)")
fig <- fig %>%
colorbar(title = "", limits = c(-1,1), x = 1.1, y = 0.75)
fig
See how you can tell not only the value but also the magnitude from the size of the square (a number would be more precise, though).
Define fonts and axis labels
f <- list(
family = "Helvetica, monospace",
size = 18,
color = "#7f7f7f"
)
x <- list(
title = "x Axis",
titlefont = f
)
y <- list(
title = "y Axis",
titlefont = f
)
Add a title
fig <- fig %>%
layout(title = 'A Whistles-and-Blows Artsy Corrplot',
xaxis = x,
yaxis = y)
fig
Profit!