Correlation Plot
## Loading required package: ggplot2
## corrplot 0.95 loaded
Data
This case using mtcars dataset. This is one of the classic built-in datasets in R. It is widely used for teaching statistics, regression analysis, and data visualization. Historically, the data comes from a 1974 issue of Motor Trend, which tested several car models. The dataset contains fuel consumption and design characteristics for 32 automobiles produced in the early 1970s. In simple terms, each row represents one car, and each column represents a variable describing that car.
## function (..., list = character(), package = NULL, lib.loc = NULL,
## verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE)
## {
## fileExt <- function(x) {
## db <- grepl("\\.[^.]+\\.(gz|bz2|xz)$", x)
## ans <- sub(".*\\.", "", x)
## ans[db] <- sub(".*\\.([^.]+\\.)(gz|bz2|xz)$", "\\1\\2",
## x[db])
## ans
## }
## my_read_table <- function(...) {
## lcc <- Sys.getlocale("LC_COLLATE")
## on.exit(Sys.setlocale("LC_COLLATE", lcc))
## Sys.setlocale("LC_COLLATE", "C")
## read.table(...)
## }
## stopifnot(is.character(list))
## names <- c(as.character(substitute(list(...))[-1L]), list)
## if (!is.null(package)) {
## if (!is.character(package))
## stop("'package' must be a character vector or NULL")
## }
## paths <- find.package(package, lib.loc, verbose = verbose)
## if (is.null(lib.loc))
## paths <- c(path.package(package, TRUE), if (!length(package)) getwd(),
## paths)
## paths <- unique(normalizePath(paths[file.exists(paths)]))
## paths <- paths[dir.exists(file.path(paths, "data"))]
## dataExts <- tools:::.make_file_exts("data")
## if (length(names) == 0L) {
## db <- matrix(character(), nrow = 0L, ncol = 4L)
## for (path in paths) {
## entries <- NULL
## packageName <- if (file_test("-f", file.path(path,
## "DESCRIPTION")))
## basename(path)
## else "."
## if (file_test("-f", INDEX <- file.path(path, "Meta",
## "data.rds"))) {
## entries <- readRDS(INDEX)
## }
## else {
## dataDir <- file.path(path, "data")
## entries <- tools::list_files_with_type(dataDir,
## "data")
## if (length(entries)) {
## entries <- unique(tools::file_path_sans_ext(basename(entries)))
## entries <- cbind(entries, "")
## }
## }
## if (NROW(entries)) {
## if (is.matrix(entries) && ncol(entries) == 2L)
## db <- rbind(db, cbind(packageName, dirname(path),
## entries))
## else warning(gettextf("data index for package %s is invalid and will be ignored",
## sQuote(packageName)), domain = NA, call. = FALSE)
## }
## }
## colnames(db) <- c("Package", "LibPath", "Item", "Title")
## footer <- if (missing(package))
## paste0("Use ", sQuote(paste("data(package =", ".packages(all.available = TRUE))")),
## "\n", "to list the data sets in all *available* packages.")
## else NULL
## y <- list(title = "Data sets", header = NULL, results = db,
## footer = footer)
## class(y) <- "packageIQR"
## return(y)
## }
## paths <- file.path(paths, "data")
## for (name in names) {
## found <- FALSE
## for (p in paths) {
## tmp_env <- if (overwrite)
## envir
## else new.env()
## if (file_test("-f", file.path(p, "Rdata.rds"))) {
## rds <- readRDS(file.path(p, "Rdata.rds"))
## if (name %in% names(rds)) {
## found <- TRUE
## if (verbose)
## message(sprintf("name=%s:\t found in Rdata.rds",
## name), domain = NA)
## objs <- rds[[name]]
## lazyLoad(file.path(p, "Rdata"), envir = tmp_env,
## filter = function(x) x %in% objs)
## break
## }
## else if (verbose)
## message(sprintf("name=%s:\t NOT found in names() of Rdata.rds, i.e.,\n\t%s\n",
## name, paste(names(rds), collapse = ",")),
## domain = NA)
## }
## files <- list.files(p, full.names = TRUE)
## files <- files[grep(name, files, fixed = TRUE)]
## if (length(files) > 1L) {
## o <- match(fileExt(files), dataExts, nomatch = 100L)
## paths0 <- dirname(files)
## paths0 <- factor(paths0, levels = unique(paths0))
## files <- files[order(paths0, o)]
## }
## if (length(files)) {
## for (file in files) {
## if (verbose)
## message("name=", name, ":\t file= ...", .Platform$file.sep,
## basename(file), "::\t", appendLF = FALSE,
## domain = NA)
## ext <- fileExt(file)
## if (basename(file) != paste0(name, ".", ext))
## found <- FALSE
## else {
## found <- TRUE
## switch(ext, R = , r = {
## library("utils")
## sys.source(file, chdir = TRUE, envir = tmp_env)
## }, RData = , rdata = , rda = load(file, envir = tmp_env),
## TXT = , txt = , tab = , tab.gz = , tab.bz2 = ,
## tab.xz = , txt.gz = , txt.bz2 = , txt.xz = assign(name,
## my_read_table(file, header = TRUE, as.is = FALSE),
## envir = tmp_env), CSV = , csv = , csv.gz = ,
## csv.bz2 = , csv.xz = assign(name, my_read_table(file,
## header = TRUE, sep = ";", as.is = FALSE),
## envir = tmp_env), found <- FALSE)
## }
## if (found)
## break
## }
## if (verbose)
## message(if (!found)
## "*NOT* ", "found", domain = NA)
## }
## if (found)
## break
## }
## if (!found) {
## warning(gettextf("data set %s not found", sQuote(name)),
## domain = NA)
## }
## else if (!overwrite) {
## for (o in ls(envir = tmp_env, all.names = TRUE)) {
## if (exists(o, envir = envir, inherits = FALSE))
## warning(gettextf("an object named %s already exists and will not be overwritten",
## sQuote(o)))
## else assign(o, get(o, envir = tmp_env, inherits = FALSE),
## envir = envir)
## }
## rm(tmp_env)
## }
## }
## invisible(names)
## }
## <bytecode: 0x0000019bc9079e48>
## <environment: namespace:utils>
Correlation Value
A correlation value measures the strength and direction of the linear relationship between two variables. It ranges from −1 to +1, where +1 indicates a perfect positive relationship, −1 indicates a perfect negative relationship, and 0 indicates no linear relationship. A positive value means that when one variable increases, the other variable also tends to increase, while a negative value means that one variable increases as the other decreases. Correlation values are commonly used in data analysis to identify patterns and relationships between variables before building statistical models
## mpg cyl disp hp drat wt qsec vs am gear carb
## mpg 1.0 -0.9 -0.8 -0.8 0.7 -0.9 0.4 0.7 0.6 0.5 -0.6
## cyl -0.9 1.0 0.9 0.8 -0.7 0.8 -0.6 -0.8 -0.5 -0.5 0.5
## disp -0.8 0.9 1.0 0.8 -0.7 0.9 -0.4 -0.7 -0.6 -0.6 0.4
## hp -0.8 0.8 0.8 1.0 -0.4 0.7 -0.7 -0.7 -0.2 -0.1 0.7
## drat 0.7 -0.7 -0.7 -0.4 1.0 -0.7 0.1 0.4 0.7 0.7 -0.1
## wt -0.9 0.8 0.9 0.7 -0.7 1.0 -0.2 -0.6 -0.7 -0.6 0.4
## qsec 0.4 -0.6 -0.4 -0.7 0.1 -0.2 1.0 0.7 -0.2 -0.2 -0.7
## vs 0.7 -0.8 -0.7 -0.7 0.4 -0.6 0.7 1.0 0.2 0.2 -0.6
## am 0.6 -0.5 -0.6 -0.2 0.7 -0.7 -0.2 0.2 1.0 0.8 0.1
## gear 0.5 -0.5 -0.6 -0.1 0.7 -0.6 -0.2 0.2 0.8 1.0 0.3
## carb -0.6 0.5 0.4 0.7 -0.1 0.4 -0.7 -0.6 0.1 0.3 1.0
## mpg cyl disp hp drat
## mpg 0.000000e+00 6.112687e-10 9.380327e-10 1.787835e-07 1.776240e-05
## cyl 6.112687e-10 0.000000e+00 1.802838e-12 3.477861e-09 8.244636e-06
## disp 9.380327e-10 1.802838e-12 0.000000e+00 7.142679e-08 5.282022e-06
## hp 1.787835e-07 3.477861e-09 7.142679e-08 0.000000e+00 9.988772e-03
## drat 1.776240e-05 8.244636e-06 5.282022e-06 9.988772e-03 0.000000e+00
## wt 1.293959e-10 1.217567e-07 1.222320e-11 4.145827e-05 4.784260e-06
## qsec 1.708199e-02 3.660533e-04 1.314404e-02 5.766253e-06 6.195826e-01
## vs 3.415937e-05 1.843018e-08 5.235012e-06 2.940896e-06 1.167553e-02
## am 2.850207e-04 2.151207e-03 3.662114e-04 1.798309e-01 4.726790e-06
## gear 5.400948e-03 4.173297e-03 9.635921e-04 4.930119e-01 8.360110e-06
## carb 1.084446e-03 1.942340e-03 2.526789e-02 7.827810e-07 6.211834e-01
## wt qsec vs am gear
## mpg 1.293959e-10 1.708199e-02 3.415937e-05 2.850207e-04 5.400948e-03
## cyl 1.217567e-07 3.660533e-04 1.843018e-08 2.151207e-03 4.173297e-03
## disp 1.222320e-11 1.314404e-02 5.235012e-06 3.662114e-04 9.635921e-04
## hp 4.145827e-05 5.766253e-06 2.940896e-06 1.798309e-01 4.930119e-01
## drat 4.784260e-06 6.195826e-01 1.167553e-02 4.726790e-06 8.360110e-06
## wt 0.000000e+00 3.388683e-01 9.798492e-04 1.125440e-05 4.586601e-04
## qsec 3.388683e-01 0.000000e+00 1.029669e-06 2.056621e-01 2.425344e-01
## vs 9.798492e-04 1.029669e-06 0.000000e+00 3.570439e-01 2.579439e-01
## am 1.125440e-05 2.056621e-01 3.570439e-01 0.000000e+00 5.834043e-08
## gear 4.586601e-04 2.425344e-01 2.579439e-01 5.834043e-08 0.000000e+00
## carb 1.463861e-02 4.536949e-05 6.670496e-04 7.544526e-01 1.290291e-01
## carb
## mpg 1.084446e-03
## cyl 1.942340e-03
## disp 2.526789e-02
## hp 7.827810e-07
## drat 6.211834e-01
## wt 1.463861e-02
## qsec 4.536949e-05
## vs 6.670496e-04
## am 7.544526e-01
## gear 1.290291e-01
## carb 0.000000e+00
Correlation with “GGCorrplot”
Ggcorrplot is a function used to visualize a correlation matrix in a clear and attractive graphical form. It comes from the package ggcorrplot, which is built on top of ggplot2 in the R ecosystem.
Circle Correlation Plot
ggcorrplot(cor,method = "circle", colors = c("#F2E3BB", "#C0B87A", "#427A43", "#005F02"), outline.color = "white", title = "Correlation with GGCorrplot")## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggcorrplot package.
## Please report the issue at <https://github.com/kassambara/ggcorrplot/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Square Correlation Plot 1
ggcorrplot(cor,method = "square", colors = c("#FF5F00", "#FF8C00", "#FFC300", "#FFD400"), outline.color = "white", title = "Correlation with GGCorrplot", type = "lower")Square Correlation Plot 2
The plot displays the correlation matrix using square shapes, and only the lower triangle of the matrix is shown to avoid repeating the same information. The color gradient represents the strength of the correlation, while the numerical correlation values are displayed inside each square.
ggcorrplot(cor,method = "square", colors = c("#FF5F00", "#FF8C00", "#FFC300", "#FFD400"), outline.color = "white", title = "Correlation with GGCorrplot", type = "lower", lab = TRUE, lab_size = 4)Correlation with “Corrplot”
Corrplot is a function used to visualize a correlation matrix in a graphical form. It comes from the package corrplot in R. The main purpose of this function is to make it easier to understand the relationships between many variables at once. Although this matrix provides important information, it often contains many numbers that are difficult to interpret quickly. The corrplot() function solves this problem by converting the matrix into a visual representation.
Number Correlation Plot 1
corrplot(cor, method='number', tl.col = "black", title = "Correlation with Corrplot", mar = c(0,0,2,0), number.cex = 0.8)Number Correlation Plot 2
corrplot(cor, method='number', tl.col = "black", title = "Correlation with Corrplot", mar = c(0,0,2,0), number.cex = 0.8, type = "upper", col=c("#C0B87A","#9BC265","#427A43","#005F02"))Number Correlation Plot 3
This chart shows the correlations between variables and also indicates whether the relationships are statistically significant. In addition to displaying the correlation values, the visualization uses results from hypothesis tests to determine if the correlations are meaningful. Correlations that meet the chosen significance threshold are highlighted, while those that do not may be hidden or marked differently. As a result, the chart helps focus attention on relationships that are statistically supported rather than showing all correlations equally.
corrplot(cor, method='number', tl.col = "black", title = "Check Correlation Significancy", mar = c(0,0,2,0), number.cex = 0.8, type = "upper", col=c("#C0B87A","#9BC265","#427A43","#005F02"), p.mat = cor.sig$p,sig.level = 0.05)