I am using mtcars data set to visualize this.
mtcars <- tibble::rownames_to_column(mtcars, "car")
summary(mtcars)
car mpg cyl disp
Length:32 Min. :10.40 Min. :4.000 Min. : 71.1
Class :character 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8
Mode :character Median :19.20 Median :6.000 Median :196.3
Mean :20.09 Mean :6.188 Mean :230.7
3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0
Max. :33.90 Max. :8.000 Max. :472.0
hp drat wt qsec
Min. : 52.0 Min. :2.760 Min. :1.513 Min. :14.50
1st Qu.: 96.5 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89
Median :123.0 Median :3.695 Median :3.325 Median :17.71
Mean :146.7 Mean :3.597 Mean :3.217 Mean :17.85
3rd Qu.:180.0 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90
Max. :335.0 Max. :4.930 Max. :5.424 Max. :22.90
vs am gear carb
Min. :0.0000 Min. :0.0000 Min. :3.000 Min. :1.000
1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
Median :0.0000 Median :0.0000 Median :4.000 Median :2.000
Mean :0.4375 Mean :0.4062 Mean :3.688 Mean :2.812
3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
Max. :1.0000 Max. :1.0000 Max. :5.000 Max. :8.000
library(lares)
Ranked cross correlation:
corr_cross(
df = mtcars,
max_pvalue = 0.05,
type = 1, top = 20, grid = F
)
Local cross correlation:
corr_cross(
df = mtcars,
max_pvalue = 0.05,
type = 2, local = 3
)
Correlation with a specific variable:
corr_var(df = mtcars, var = cyl, ranks = F)
corr_var(df = mtcars, var = qsec, ranks = F)
library(ggcorrplot)
Stores a matrix containing the p-values of correlations:
p_mat <- ggcorrplot::cor_pmat(mtcars[,sapply(mtcars, is.numeric)])
cor_pmat function is also available in
rstatix package.
Stores a correlation matrix:
corr_mat <- cor(mtcars[,sapply(mtcars, is.numeric)])
A simple correlogram:
ggcorrplot(corr_mat)
Only lower part:
ggcorrplot(corr_mat, type = "lower")
Order by strength:
ggcorrplot(corr_mat, type = "lower", hc.order = TRUE, p.mat = p_mat)
Furnished further:
ggcorrplot(corr_mat,
hc.order = TRUE,
method = "square",
type = "lower",
title = "Correlogram of mtcars dataset",
legend.title = "Correlation",
outline.color = "white",
ggtheme = ggplot2::theme_minimal,
colors = c("#91412d", "white", "#45839a"),
lab = TRUE)
Cross out insignificant correlation values:
ggcorrplot(corr_mat,
hc.order = TRUE,
method = "square",
type = "lower",
p.mat = p_mat)
Using circle type:
ggcorrplot(corr_mat,
hc.order = TRUE,
method = "circle",
type = "lower",
outline.color = "white",
ggtheme = ggplot2::theme_minimal,
colors = c("#91412d", "white", "#45839a"),
lab = TRUE
)
Learn more at: https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html
library(corrplot)
Stores a correlation matrix:
corr_mat <- cor(mtcars[,sapply(mtcars, is.numeric)])
corrplot(corr_mat)
Mixed correlogram:
corrplot.mixed(
corr_mat,
upper = "square",
lower = "number",
addgrid.col = "black", # The color of the grid
tl.col = "black" # The color of text label
)
Ordered correlogram:
corr_mat_ord <- corrMatOrder(corr_mat, order = 'FPC')
corrplot.mixed(
corr_mat[corr_mat_ord,corr_mat_ord],
upper = "square",
lower = "number",
addgrid.col = "black", # The color of the grid
tl.col = "black" # The color of text label
)
library(ggstatsplot)
ggcorrmat(
data = mtcars,
colors = c("#B2182B", "white", "#4D4D4D"),
title = "Correlalogram for mtcars",
matrix.type = "lower",
type = "parametric", pch = ""
)
This function has an argument sig.level that does not
work as it should be. So I have added pch = “” so that no cell is
crossed out.
pairs(iris[,1:4],
pch=23, #Shape of the points
col=as.numeric(iris$Species)+1, #Colour of the outline of the points
bg=scales::alpha(as.numeric(iris$Species)+1, 0.3), #Fill colour of the points with reduced colour intensity
cex=1.5, #Size of the points
upper.panel=NULL, #Do not display the the upper panel
labels=gsub("[[:punct:]]"," ",colnames(iris[,1:4])) #Substitution the full stop in the feature names with a space
)
Suitable for small number of variables only.
library(GGally)
ggcorr(mtcars, label = T, hjust = 1, layout.exp = 2, size = 3, label_size = 2)
ggpairs(mtcars[,2:6],
title="Correlogram of mtcars dataset",
lower = list(continuous = "points", combo = "dot_no_facet"))
ggpairs(mtcars[,2:6],
title="Correlogram of mtcars dataset",
upper = list(continuous = wrap("cor", size=2)))+
theme(axis.text = element_text(colour = "black", size = 4),
strip.text = element_text(size = 5))
Custom functions to be used in diagonal, upper and lower portion of the correlogram.
# Function for diagonal
my_bin <- function(data, mapping, binwidth, ...) {
ggplot(data = data, mapping = mapping) +
geom_histogram(aes(y=..density..),
binwidth = binwidth, fill = "#298dff", alpha = 0.2) +
geom_density(color = "red") + theme_classic()
}
# Function for upper portion
upperfun <- function(data, mapping, method, symbol, ...){
x <- eval_data_col(data, mapping$x)
y <- eval_data_col(data, mapping$y)
corr <- cor(x, y, method=method, use='complete.obs')
ggally_text(
label = paste(symbol, as.character(round(corr, 2))),
mapping = aes(), xP = 0.5, yP = 0.5, color = 'black',
...
) + theme_classic()
}
# Function for lower portion
lowerfun <- function(data, mapping) {
ggplot(data = data, mapping = mapping)+
geom_point(alpha = 1, color = "#298dff", pch = 20) +
geom_smooth(method = "lm", formula = y ~ x,
color = "red", size = 0.5) +
theme_classic()
}
ggpairs(
mtcars[, 2:6],
diag = list(continuous = wrap(my_bin, binwidth = 10)),
upper = list(continuous = wrap(upperfun, method = 'pearson', symbol = expression('r ='))),
lower = list(continuous = wrap(lowerfun))
) +
theme(plot.caption = element_text(hjust = 0.5, face = "bold"))+
labs(caption="Correlogram of mtcars dataset")
library(PerformanceAnalytics)
chart.Correlation(mtcars[,sapply(mtcars, is.numeric)],
histogram=TRUE, pch=19)
library(rstatix)
Correlation Matrix:
rstatix::cor_mat(mtcars[,sapply(mtcars, is.numeric)])
# A tibble: 11 × 12
rowname mpg cyl disp hp drat wt qsec vs am gear carb
* <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 mpg 1 -0.85 -0.85 -0.78 0.68 -0.87 0.42 0.66 0.6 0.48 -0.55
2 cyl -0.85 1 0.9 0.83 -0.7 0.78 -0.59 -0.81 -0.52 -0.49 0.53
3 disp -0.85 0.9 1 0.79 -0.71 0.89 -0.43 -0.71 -0.59 -0.56 0.39
4 hp -0.78 0.83 0.79 1 -0.45 0.66 -0.71 -0.72 -0.24 -0.13 0.75
5 drat 0.68 -0.7 -0.71 -0.45 1 -0.71 0.091 0.44 0.71 0.7 -0.091
6 wt -0.87 0.78 0.89 0.66 -0.71 1 -0.17 -0.55 -0.69 -0.58 0.43
7 qsec 0.42 -0.59 -0.43 -0.71 0.091 -0.17 1 0.74 -0.23 -0.21 -0.66
8 vs 0.66 -0.81 -0.71 -0.72 0.44 -0.55 0.74 1 0.17 0.21 -0.57
9 am 0.6 -0.52 -0.59 -0.24 0.71 -0.69 -0.23 0.17 1 0.79 0.058
10 gear 0.48 -0.49 -0.56 -0.13 0.7 -0.58 -0.21 0.21 0.79 1 0.27
11 carb -0.55 0.53 0.39 0.75 -0.091 0.43 -0.66 -0.57 0.058 0.27 1
P-value matrix of correlations:
rstatix::cor_pmat(mtcars[,sapply(mtcars, is.numeric)])
# A tibble: 11 × 12
rowname mpg cyl disp hp drat wt qsec
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 mpg 0 6.11e-10 9.38e-10 0.000000179 0.0000178 1.29e- 10 1.71e-2
2 cyl 6.11e-10 0 1.80e-12 0.00000000348 0.00000824 1.22e- 7 3.66e-4
3 disp 9.38e-10 1.80e-12 0 0.0000000714 0.00000528 1.22e- 11 1.31e-2
4 hp 1.79e- 7 3.48e- 9 7.14e- 8 0 0.00999 4.15e- 5 5.77e-6
5 drat 1.78e- 5 8.24e- 6 5.28e- 6 0.00999 0 4.78e- 6 6.2 e-1
6 wt 1.29e-10 1.22e- 7 1.22e-11 0.0000415 0.00000478 2.27e-236 3.39e-1
7 qsec 1.71e- 2 3.66e- 4 1.31e- 2 0.00000577 0.62 3.39e- 1 0
8 vs 3.42e- 5 1.84e- 8 5.24e- 6 0.00000294 0.0117 9.8 e- 4 1.03e-6
9 am 2.85e- 4 2.15e- 3 3.66e- 4 0.18 0.00000473 1.13e- 5 2.06e-1
10 gear 5.4 e- 3 4.17e- 3 9.64e- 4 0.493 0.00000836 4.59e- 4 2.43e-1
11 carb 1.08e- 3 1.94e- 3 2.53e- 2 0.000000783 0.621 1.46e- 2 4.54e-5
# ℹ 4 more variables: vs <dbl>, am <dbl>, gear <dbl>, carb <dbl>
corr_mat <- rstatix::cor_mat(mtcars[,sapply(mtcars, is.numeric)])
corr_mat_df <- corr_mat %>%
tidyr::pivot_longer(cols = -rowname, names_to = 'variables', values_to = 'r')
head(corr_mat_df)
# A tibble: 6 × 3
rowname variables r
<chr> <chr> <dbl>
1 mpg mpg 1
2 mpg cyl -0.85
3 mpg disp -0.85
4 mpg hp -0.78
5 mpg drat 0.68
6 mpg wt -0.87
corr_mat_df %>%
ggplot(aes(rowname, variables, fill = r)) +
geom_tile() +
labs(x = "variables", y = "variables") +
scale_fill_gradient(low = "blue", high = "red") +
geom_text(aes(label = r), size = 3)
corr_mat <- cor(mtcars[,sapply(mtcars, is.numeric)])
col <- colorRampPalette(c("blue", "white", "red"))(20)
heatmap(corr_mat, col = col, symm = TRUE)
lattice::levelplot(cor(mtcars[c('cyl','disp','hp','drat','wt','qsec','vs')]),
pretty = TRUE)
library(sjPlot)
tab_corr(iris[,sapply(iris, is.numeric)],
show.p = T,
p.numeric = T,
title = "Table 1: Correlation Matrix",
var.labels = c("Sepal Length", "Sepal Width", "Petal Length", "Petal Width"),
triangle = "lower"
)
| Sepal Length | Sepal Width | Petal Length | Petal Width | |
|---|---|---|---|---|
| Sepal Length | ||||
| Sepal Width |
-0.118 (.152) |
|||
| Petal Length |
0.872 (<.001) |
-0.428 (<.001) |
||
| Petal Width |
0.818 (<.001) |
-0.366 (<.001) |
0.963 (<.001) |
|
| Computed correlation used pearson-method with listwise-deletion. | ||||
library(correlation)
correlation(iris[,sapply(iris, is.numeric)]) |> kableExtra::kable()
| Parameter1 | Parameter2 | r | CI | CI_low | CI_high | t | df_error | p | Method | n_Obs |
|---|---|---|---|---|---|---|---|---|---|---|
| Sepal.Length | Sepal.Width | -0.1175698 | 0.95 | -0.2726932 | 0.0435116 | -1.440287 | 148 | 0.1518983 | Pearson correlation | 150 |
| Sepal.Length | Petal.Length | 0.8717538 | 0.95 | 0.8270363 | 0.9055080 | 21.646019 | 148 | 0.0000000 | Pearson correlation | 150 |
| Sepal.Length | Petal.Width | 0.8179411 | 0.95 | 0.7568971 | 0.8648361 | 17.296454 | 148 | 0.0000000 | Pearson correlation | 150 |
| Sepal.Width | Petal.Length | -0.4284401 | 0.95 | -0.5508771 | -0.2879499 | -5.768449 | 148 | 0.0000001 | Pearson correlation | 150 |
| Sepal.Width | Petal.Width | -0.3661259 | 0.95 | -0.4972130 | -0.2186966 | -4.786461 | 148 | 0.0000081 | Pearson correlation | 150 |
| Petal.Length | Petal.Width | 0.9628654 | 0.95 | 0.9490525 | 0.9729853 | 43.387237 | 148 | 0.0000000 | Pearson correlation | 150 |
correlation(iris[,sapply(iris, is.numeric)]) |>
summary(redundant = TRUE, digits = 3) |>
visualisation_recipe() |>
plot()
correlation(iris[,sapply(iris, is.numeric)]) |>
as.matrix() |>
cor_sort() |>
visualisation_recipe() |>
plot()
correlation(iris[,sapply(iris, is.numeric)]) |>
visualisation_recipe() |>
plot()