Correlation Test Between Two Variables in R

Install and load required R packages

install.packages(“ggpubr”) library(“ggpubr”)

Import your data into R

my_data <- mtcars
head(my_data, 6)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

Visualize your data using scatter plots

library("ggpubr")
## Warning: package 'ggpubr' was built under R version 4.3.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.2
ggscatter(my_data, x = "mpg", y = "wt", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Miles/(US) gallon", ylab = "Weight (1000 lbs)")

Preleminary test to check the test assumptions

# Shapiro-Wilk normality test for mpg
shapiro.test(my_data$mpg) # => p = 0.1229
## 
##  Shapiro-Wilk normality test
## 
## data:  my_data$mpg
## W = 0.94756, p-value = 0.1229
# Shapiro-Wilk normality test for wt
shapiro.test(my_data$wt) # => p = 0.09
## 
##  Shapiro-Wilk normality test
## 
## data:  my_data$wt
## W = 0.94326, p-value = 0.09265
# mpg
ggqqplot(my_data$mpg, ylab = "MPG")

# wt
ggqqplot(my_data$wt, ylab = "WT")

Pearson correlation test

res <- cor.test(my_data$wt, my_data$mpg, 
                    method = "pearson")
res
## 
##  Pearson's product-moment correlation
## 
## data:  my_data$wt and my_data$mpg
## t = -9.559, df = 30, p-value = 1.294e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.9338264 -0.7440872
## sample estimates:
##        cor 
## -0.8676594

Interpretation of the result

### The p-value of the test is 1.29410^{-10}, which is less than the significance level alpha = 0.05. We can conclude that wt and mpg are significantly correlated with a correlation coefficient of -0.87 and p-value of 1.29410^{-10} .

Kendall rank correlation test

res2 <- cor.test(my_data$wt, my_data$mpg,  method="kendall")
## Warning in cor.test.default(my_data$wt, my_data$mpg, method = "kendall"):
## Cannot compute exact p-value with ties
res2
## 
##  Kendall's rank correlation tau
## 
## data:  my_data$wt and my_data$mpg
## z = -5.7981, p-value = 6.706e-09
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
##        tau 
## -0.7278321

Spearman rank correlation coefficient

res2 <-cor.test(my_data$wt, my_data$mpg,  method = "spearman")
## Warning in cor.test.default(my_data$wt, my_data$mpg, method = "spearman"):
## Cannot compute exact p-value with ties
res2
## 
##  Spearman's rank correlation rho
## 
## data:  my_data$wt and my_data$mpg
## S = 10292, p-value = 1.488e-11
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## -0.886422

Correlation Matrix: Analyze, Format and Visualize

Compute correlation matrix in R

# Load data
data("mtcars")
my_data <- mtcars[, c(1,3,4,5,6,7)]
# print the first 6 rows
head(my_data, 6)
##                    mpg disp  hp drat    wt  qsec
## Mazda RX4         21.0  160 110 3.90 2.620 16.46
## Mazda RX4 Wag     21.0  160 110 3.90 2.875 17.02
## Datsun 710        22.8  108  93 3.85 2.320 18.61
## Hornet 4 Drive    21.4  258 110 3.08 3.215 19.44
## Hornet Sportabout 18.7  360 175 3.15 3.440 17.02
## Valiant           18.1  225 105 2.76 3.460 20.22
res <- cor(my_data)
round(res, 2)
##        mpg  disp    hp  drat    wt  qsec
## mpg   1.00 -0.85 -0.78  0.68 -0.87  0.42
## disp -0.85  1.00  0.79 -0.71  0.89 -0.43
## hp   -0.78  0.79  1.00 -0.45  0.66 -0.71
## drat  0.68 -0.71 -0.45  1.00 -0.71  0.09
## wt   -0.87  0.89  0.66 -0.71  1.00 -0.17
## qsec  0.42 -0.43 -0.71  0.09 -0.17  1.00
cor(my_data, use = "complete.obs")
##             mpg       disp         hp        drat         wt        qsec
## mpg   1.0000000 -0.8475514 -0.7761684  0.68117191 -0.8676594  0.41868403
## disp -0.8475514  1.0000000  0.7909486 -0.71021393  0.8879799 -0.43369788
## hp   -0.7761684  0.7909486  1.0000000 -0.44875912  0.6587479 -0.70822339
## drat  0.6811719 -0.7102139 -0.4487591  1.00000000 -0.7124406  0.09120476
## wt   -0.8676594  0.8879799  0.6587479 -0.71244065  1.0000000 -0.17471588
## qsec  0.4186840 -0.4336979 -0.7082234  0.09120476 -0.1747159  1.00000000

Prerequisites

Install Package

install.packages(“corrplot”)

To use the rquery.cormat function, you can source it as follow :

source("http://www.sthda.com/upload/rquery_cormat.r")
# Example of data
mydata <- mtcars[, c(1,3,4,5,6,7)]
head(mydata)
##                    mpg disp  hp drat    wt  qsec
## Mazda RX4         21.0  160 110 3.90 2.620 16.46
## Mazda RX4 Wag     21.0  160 110 3.90 2.875 17.02
## Datsun 710        22.8  108  93 3.85 2.320 18.61
## Hornet 4 Drive    21.4  258 110 3.08 3.215 19.44
## Hornet Sportabout 18.7  360 175 3.15 3.440 17.02
## Valiant           18.1  225 105 2.76 3.460 20.22

Computing the correlation matrix

rquery.cormat(mydata)
## Warning: package 'corrplot' was built under R version 4.3.2
## corrplot 0.92 loaded

## $r
##         hp  disp    wt  qsec  mpg drat
## hp       1                            
## disp  0.79     1                      
## wt    0.66  0.89     1                
## qsec -0.71 -0.43 -0.17     1          
## mpg  -0.78 -0.85 -0.87  0.42    1     
## drat -0.45 -0.71 -0.71 0.091 0.68    1
## 
## $p
##           hp    disp      wt  qsec     mpg drat
## hp         0                                   
## disp 7.1e-08       0                           
## wt   4.1e-05 1.2e-11       0                   
## qsec 5.8e-06   0.013    0.34     0             
## mpg  1.8e-07 9.4e-10 1.3e-10 0.017       0     
## drat    0.01 5.3e-06 4.8e-06  0.62 1.8e-05    0
## 
## $sym
##      hp disp wt qsec mpg drat
## hp   1                       
## disp ,  1                    
## wt   ,  +    1               
## qsec ,  .       1            
## mpg  ,  +    +  .    1       
## drat .  ,    ,       ,   1   
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1

Upper triangle of the correlation matrix

rquery.cormat(mydata, type="upper")

## $r
##      hp disp   wt  qsec   mpg  drat
## hp    1 0.79 0.66 -0.71 -0.78 -0.45
## disp       1 0.89 -0.43 -0.85 -0.71
## wt              1 -0.17 -0.87 -0.71
## qsec                  1  0.42 0.091
## mpg                         1  0.68
## drat                              1
## 
## $p
##      hp    disp      wt    qsec     mpg    drat
## hp    0 7.1e-08 4.1e-05 5.8e-06 1.8e-07    0.01
## disp          0 1.2e-11   0.013 9.4e-10 5.3e-06
## wt                    0    0.34 1.3e-10 4.8e-06
## qsec                          0   0.017    0.62
## mpg                                   0 1.8e-05
## drat                                          0
## 
## $sym
##      hp disp wt qsec mpg drat
## hp   1  ,    ,  ,    ,   .   
## disp    1    +  .    +   ,   
## wt           1       +   ,   
## qsec            1    .       
## mpg                  1   ,   
## drat                     1   
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1

Full correlation matrix

rquery.cormat(mydata, type="full")

## $r
##         hp  disp    wt   qsec   mpg   drat
## hp    1.00  0.79  0.66 -0.710 -0.78 -0.450
## disp  0.79  1.00  0.89 -0.430 -0.85 -0.710
## wt    0.66  0.89  1.00 -0.170 -0.87 -0.710
## qsec -0.71 -0.43 -0.17  1.000  0.42  0.091
## mpg  -0.78 -0.85 -0.87  0.420  1.00  0.680
## drat -0.45 -0.71 -0.71  0.091  0.68  1.000
## 
## $p
##           hp    disp      wt    qsec     mpg    drat
## hp   0.0e+00 7.1e-08 4.1e-05 5.8e-06 1.8e-07 1.0e-02
## disp 7.1e-08 0.0e+00 1.2e-11 1.3e-02 9.4e-10 5.3e-06
## wt   4.1e-05 1.2e-11 0.0e+00 3.4e-01 1.3e-10 4.8e-06
## qsec 5.8e-06 1.3e-02 3.4e-01 0.0e+00 1.7e-02 6.2e-01
## mpg  1.8e-07 9.4e-10 1.3e-10 1.7e-02 0.0e+00 1.8e-05
## drat 1.0e-02 5.3e-06 4.8e-06 6.2e-01 1.8e-05 0.0e+00
## 
## $sym
##      hp disp wt qsec mpg drat
## hp   1                       
## disp ,  1                    
## wt   ,  +    1               
## qsec ,  .       1            
## mpg  ,  +    +  .    1       
## drat .  ,    ,       ,   1   
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1

Change the colors of the correlogram

col<- colorRampPalette(c("blue", "white", "red"))(20)
cormat<-rquery.cormat(mydata, type="full", col=col)

Draw a heatmap

cormat<-rquery.cormat(mydata, graphType="heatmap")

Format the correlation table

rquery.cormat(mydata, type="flatten", graph=FALSE)
## $r
##     row column    cor       p
## 1    hp   disp  0.790 7.1e-08
## 2    hp     wt  0.660 4.1e-05
## 3  disp     wt  0.890 1.2e-11
## 4    hp   qsec -0.710 5.8e-06
## 5  disp   qsec -0.430 1.3e-02
## 6    wt   qsec -0.170 3.4e-01
## 7    hp    mpg -0.780 1.8e-07
## 8  disp    mpg -0.850 9.4e-10
## 9    wt    mpg -0.870 1.3e-10
## 10 qsec    mpg  0.420 1.7e-02
## 11   hp   drat -0.450 1.0e-02
## 12 disp   drat -0.710 5.3e-06
## 13   wt   drat -0.710 4.8e-06
## 14 qsec   drat  0.091 6.2e-01
## 15  mpg   drat  0.680 1.8e-05
## 
## $p
## NULL
## 
## $sym
## NULL